diff options
-rw-r--r-- | extras/vcl-ldpreload/README.md | 52 | ||||
-rw-r--r-- | extras/vcl-ldpreload/env.sh | 5 | ||||
-rw-r--r-- | extras/vcl-ldpreload/src/.gitignore | 17 | ||||
-rw-r--r-- | extras/vcl-ldpreload/src/Makefile.am | 62 | ||||
-rw-r--r-- | extras/vcl-ldpreload/src/configure.ac | 151 | ||||
-rw-r--r-- | extras/vcl-ldpreload/src/libvcl-ldpreload.am | 31 | ||||
-rw-r--r-- | extras/vcl-ldpreload/src/libvcl-ldpreload/vcom.c | 2980 | ||||
-rw-r--r-- | extras/vcl-ldpreload/src/libvcl-ldpreload/vcom.h | 193 | ||||
-rw-r--r-- | extras/vcl-ldpreload/src/libvcl-ldpreload/vcom_glibc_socket.h | 349 | ||||
-rw-r--r-- | extras/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket.c | 2944 | ||||
-rw-r--r-- | extras/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket.h | 457 | ||||
-rw-r--r-- | extras/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket_wrapper.c | 853 | ||||
-rw-r--r-- | extras/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket_wrapper.h | 224 | ||||
-rw-r--r-- | extras/vcl-ldpreload/src/vcl-ldpreload.mk | 9 |
14 files changed, 8327 insertions, 0 deletions
diff --git a/extras/vcl-ldpreload/README.md b/extras/vcl-ldpreload/README.md new file mode 100644 index 00000000000..0b5378e9b48 --- /dev/null +++ b/extras/vcl-ldpreload/README.md @@ -0,0 +1,52 @@ +# vcl-ldpreload a LD_PRELOAD library that uses the VPP Communications Library (VCL). + +User can LD_PRELOAD any application that uses POSIX socket API. +This library internally uses libvppcom.so library from VPP project. + + +## HowTo + +If VPP is not installed, but rather built in a separate directory, you can use the VPP_DIR 'configure' argument. +```bash +# 1. Set environment variables for source +cd vpp/extras/vcl-ldpreload +source ./env.sh + +# 2. Change to VPP source directory and build +- Change director and modify uri.am to enable socket_test program + +cd $VPP_DIR +perl -pi -e 's/noinst_PROGRAMS/bin_PROGRAMS/g' $VPP_DIR/src/uri.am + +- Build VPP release + +make install-dep wipe-release bootstrap dpdk-install-dev build-release + +# 2. Build LD_PRELOAD library against VPP build above +## This does not install the LD_PRELOAD library in your system. +## Instead it will be referenced from the build directory set in VCL_LDPRELOAD_LIB + +cd $LDP_DIR/vcl-ldpreload/src +autoreconf -i -f +./configure VPP_DIR=$VPP_DIR +make +```bash + + +# 3. Running the demo +## Run test script without parameters to see help menu: + +cd $VPP_DIR/test/scripts +./socket_test.sh + +# 4. Docker iPerf examples. +## These launch xterms. To quit, close xterms and run following docker kill cmd (WARNING: This will kill all docker containers!) 'docker kill $(docker ps -q)' + + +## Docker iPerf using default Linux Bridge + +./socket_test.sh -bi docker-kernel + +## Docker iPerf using VPP +./socket_test.sh -bi docker-preload + diff --git a/extras/vcl-ldpreload/env.sh b/extras/vcl-ldpreload/env.sh new file mode 100644 index 00000000000..ebd3cc0d614 --- /dev/null +++ b/extras/vcl-ldpreload/env.sh @@ -0,0 +1,5 @@ +export VCL_BASE=$PWD/../../.. +export VPP_DIR=$VCL_BASE/vpp +export LDP_DIR=$VCL_BASE/vpp/extras +export WS_ROOT=$VCL_BASE/vpp +export VCL_LDPRELOAD_LIB_DIR=$LDP_DIR/vcl-ldpreload/src/.libs diff --git a/extras/vcl-ldpreload/src/.gitignore b/extras/vcl-ldpreload/src/.gitignore new file mode 100644 index 00000000000..95781235ad5 --- /dev/null +++ b/extras/vcl-ldpreload/src/.gitignore @@ -0,0 +1,17 @@ +*.m4 +autom4te.cache +compile +config.* +configure +depcomp +install-sh +ltmain.sh +missing +Makefile.in +Makefile +*.o +*.l[oa]* +*.Plo +*.dirstamp +*.so* +libtool diff --git a/extras/vcl-ldpreload/src/Makefile.am b/extras/vcl-ldpreload/src/Makefile.am new file mode 100644 index 00000000000..a1317696678 --- /dev/null +++ b/extras/vcl-ldpreload/src/Makefile.am @@ -0,0 +1,62 @@ +# Copyright (c) 2016 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +############################################################################### +# Global Defines +############################################################################### + +AUTOMAKE_OPTIONS = foreign subdir-objects +ACLOCAL_AMFLAGS = -I m4 +AM_LIBTOOLFLAGS = --quiet + +AM_CFLAGS = -Wall -fstack-protector -fPIC -Werror -g -DFORTIFY_SOURCE=2 +AM_LDFLAGS = -shared + +if VPP_DEBUG +AM_CFLAGS += -O0 -DCLIB_DEBUG +vpp_build = vpp_debug-native +else +AM_CFLAGS += -O2 +vpp_build = vpp-native +endif + +if VCL_LDPRELOAD_DEBUG +AM_CFLAGS += -ggdb '-DVCOM_DEBUG=1' +else +AM_CFLAGS += -Wall '-DVCOM_DEBUG=0' +endif + +if VPP_DIR_SET +vpp_install_dir = @VPP_DIR@/build-root/install-$(vpp_build)/vpp +AM_CFLAGS += -I$(vpp_install_dir)/include/ +AM_LDFLAGS += -L$(vpp_install_dir)/lib64/ +endif + + +AM_LDFLAGS += -lvppcom -lvppinfra + +SUBDIRS = . +noinst_HEADERS = +dist_bin_SCRIPTS = +lib_LTLIBRARIES = +BUILT_SOURCES = +CLEANFILES = + + +############################################################################### +# Components +############################################################################### + +include libvcl-ldpreload.am + + diff --git a/extras/vcl-ldpreload/src/configure.ac b/extras/vcl-ldpreload/src/configure.ac new file mode 100644 index 00000000000..cee7c95add0 --- /dev/null +++ b/extras/vcl-ldpreload/src/configure.ac @@ -0,0 +1,151 @@ +AC_INIT([vcl-ldpreload], [1.0], [vpp-dev@fd.io]) +LT_INIT +AC_CONFIG_AUX_DIR([.]) +AM_INIT_AUTOMAKE([subdir-objects]) +AM_SILENT_RULES([yes]) +AC_CONFIG_FILES([Makefile]) +AC_CONFIG_MACRO_DIR([m4]) + +AC_PROG_CC +AM_PROG_AS +AM_PROG_LIBTOOL +AC_PROG_YACC +AM_PATH_PYTHON + +AC_ARG_VAR(VPP_DIR,[ vpp build directory ]) +AM_CONDITIONAL([VPP_DIR_SET], [test ! -z "$VPP_DIR"]) + +############################################################################### +# Macros +############################################################################### + +AC_DEFUN([ENABLE_ARG], +[ + AC_ARG_ENABLE($1, + AC_HELP_STRING(patsubst([--enable-$1],[_],[-]), $2), + [enable_$1=yes n_enable_$1=1], + [enable_$1=no n_enable_$1=0]) + AM_CONDITIONAL(m4_toupper(ENABLE_$1), test "$enable_$1" = "yes") + m4_append([list_of_enabled], [$1], [, ]) +]) + +AC_DEFUN([DISABLE_ARG], +[ + AC_ARG_ENABLE($1, + AC_HELP_STRING(patsubst([--disable-$1],[_],[-]), $2), + [enable_$1=no n_enable_$1=0], + [enable_$1=yes n_enable_$1=1]) + AM_CONDITIONAL(m4_toupper(ENABLE_$1), test "$enable_$1" = "yes") + m4_append([list_of_enabled], [$1], [, ]) +]) + +AC_DEFUN([WITH_ARG], +[ + AC_ARG_WITH($1, + AC_HELP_STRING(patsubst([--with-$1],[_],[-]), $2), + [with_$1=yes n_with_$1=1], + [with_$1=no n_with_$1=0]) + AM_CONDITIONAL(m4_toupper(WITH_$1), test "$with_$1" = "yes") + m4_append([list_of_with], [$1], [, ]) +]) + +AC_DEFUN([WITHOUT_ARG], +[ + AC_ARG_WITH($1, + AC_HELP_STRING(patsubst([--without-$1],[_],[-]), $2), + [with_$1=no n_with_$1=0], + [with_$1=yes n_with_$1=1]) + AM_CONDITIONAL(m4_toupper(WITH_$1), test "$with_$1" = "yes") + m4_append([list_of_with], [$1], [, ]) +]) + + +AC_DEFUN([PRINT_VAL], [ AC_MSG_RESULT(AC_HELP_STRING($1,$2)) ]) + + +############################################################################### +# configure arguments +############################################################################### + +# --enable-X + +AC_ARG_ENABLE([vpp-debug], +[ --enable-vpp-debug Use vpp debug native build libraries], + [if test x$enableval = xyes; then + AC_DEFINE(VPP_DEBUG, 1, [Define this to use vpp debug native build libraries.]) + vpp_debug=true + fi], [vpp_debug=false]) +AM_CONDITIONAL([VPP_DEBUG], [test x$vpp_debug = xtrue]) + +AC_ARG_ENABLE([vcl-ldpreload-debug], +[ --enable-vcl-ldpreload-debug Turn on vcl-ldpreload debugging], + [if test x$enableval = xyes; then + AC_DEFINE(VCL_LDPRELOAD_DEBUG, 1, [Define this to enable vcl-ldpreload debug.]) + vcl_ldpreload_debug=true + fi], [vcl_ldpreload_debug=false]) +AM_CONDITIONAL([VCL_LDPRELOAD_DEBUG], [test x$vcl_ldpreload_debug = xtrue]) + +# --disable-X + +# --with-X + +# --without-X + +AC_ARG_WITH(unix, + AC_HELP_STRING([--with-unix],[Compile unix version of clib]), + [], + [case $host_os in + darwin* | linux*) with_unix=yes;; + *) with_unix=no;; + esac]) + +AM_CONDITIONAL(WITH_UNIX, test "$with_unix" = "yes") + + +############################################################################### +# Substitutions and defines +############################################################################### + + +# Silence following noise: +# ar: `u' modifier ignored since `D' is the default (see `U') +AR_FLAGS=cr +AC_SUBST(AR_FLAGS) + + +############################################################################### +# Dependency checks +############################################################################### + +############################################################################### +# Output +############################################################################### + +AC_OUTPUT + +AC_MSG_RESULT([==============================================================================]) +PRINT_VAL([version], $PACKAGE $VERSION) +PRINT_VAL([prefix], ${prefix}) +PRINT_VAL([exec_prefix], ${exec_prefix}) +PRINT_VAL([libdir], ${libdir}) +PRINT_VAL([includedir], ${includedir}) +PRINT_VAL([CFLAGS], ${CFLAGS}) +PRINT_VAL([CPPFLAGS], ${CPPFLAGS}) +PRINT_VAL([LDFLAGS], ${LDFLAGS}) + +AC_MSG_RESULT([]) +AC_MSG_RESULT([with:]) +m4_foreach([x], m4_dquote(list_of_with), [ + AC_MSG_RESULT(AC_HELP_STRING(x, m4_join([], [${with_], x, [}]))) +]) + +AC_MSG_RESULT([]) +AC_MSG_RESULT([enabled:]) +m4_foreach([x], m4_dquote(list_of_enabled), [ + AC_MSG_RESULT(AC_HELP_STRING(x, m4_join([], [${enable_], x, [}]))) +]) + +AC_MSG_RESULT([]) +AC_MSG_RESULT([==============================================================================]) + + diff --git a/extras/vcl-ldpreload/src/libvcl-ldpreload.am b/extras/vcl-ldpreload/src/libvcl-ldpreload.am new file mode 100644 index 00000000000..0d2fcb589b1 --- /dev/null +++ b/extras/vcl-ldpreload/src/libvcl-ldpreload.am @@ -0,0 +1,31 @@ +# Copyright (c) 2016 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +lib_LTLIBRARIES += libvcl_ldpreload.la + +libvcl_ldpreload_la_SOURCES = +nobase_include_HEADERS = + +libvcl_ldpreload_la_LIBADD = -lvppinfra -lvppcom -lpthread + +libvcl_ldpreload_la_SOURCES += \ + libvcl-ldpreload/vcom_socket_wrapper.c \ + libvcl-ldpreload/vcom.c \ + libvcl-ldpreload/vcom_socket.c + +nobase_include_HEADERS += \ + libvcl-ldpreload/vcom_socket_wrapper.h \ + libvcl-ldpreload/vcom_glibc_socket.h \ + libvcl-ldpreload/vcom.h \ + libvcl-ldpreload/vcom_socket.h + diff --git a/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom.c b/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom.c new file mode 100644 index 00000000000..6245c5d2487 --- /dev/null +++ b/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom.c @@ -0,0 +1,2980 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <unistd.h> +#include <stdio.h> +#include <signal.h> +#include <dlfcn.h> +#include <pthread.h> +#include <time.h> +#include <stdarg.h> + +#include <libvcl-ldpreload/vcom_socket_wrapper.h> +#include <libvcl-ldpreload/vcom.h> +#include <sys/time.h> + +#include <uri/vppcom.h> +#include <libvcl-ldpreload/vcom_socket.h> + +/* GCC have printf type attribute check. */ +#ifdef HAVE_FUNCTION_ATTRIBUTE_FORMAT +#define PRINTF_ATTRIBUTE(a,b) \ + __attribute__ ((__format__ (__printf__, a, b))) +#else +#define PRINTF_ATTRIBUTE(a,b) +#endif /* HAVE_FUNCTION_ATTRIBUTE_FORMAT */ + +#define HAVE_CONSTRUCTOR_ATTRIBUTE +#ifdef HAVE_CONSTRUCTOR_ATTRIBUTE +#define CONSTRUCTOR_ATTRIBUTE \ + __attribute__ ((constructor)) +#else +#define CONSTRUCTOR_ATTRIBUTE +#endif /* HAVE_CONSTRUCTOR_ATTRIBUTE */ + +#define HAVE_DESTRUCTOR_ATTRIBUTE +#ifdef HAVE_DESTRUCTOR_ATTRIBUTE +#define DESTRUCTOR_ATTRIBUTE \ + __attribute__ ((destructor)) +#else +#define DESTRUCTOR_ATTRIBUTE +#endif + +#define HAVE_ADDRESS_SANITIZER_ATTRIBUTE +#ifdef HAVE_ADDRESS_SANITIZER_ATTRIBUTE +#define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE \ + __attribute__((no_sanitize_address)) +#else +#define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE +#endif + +#define VCOM_SOCKET_FD_MAX 0x10000 + +static char vcom_app_name[MAX_VCOM_APP_NAME]; + +/* + * RETURN: 0 on success or -1 on error. + * */ +int +vcom_set_app_name (char *__app_name) +{ + return snprintf (vcom_app_name, MAX_VCOM_APP_NAME, "vcom-%s-%d", + __app_name, getpid ()) < 0 ? -1 : 0; +} + +static char * +vcom_get_app_name () +{ + if (vcom_app_name[0] == '\0') + { + snprintf (vcom_app_name, MAX_VCOM_APP_NAME, "vcom-app-%d", getpid ()); + } + return vcom_app_name; +} + +/* + * 1 if init, 0 otherwise + */ +static int is_vcom_init; + +/* + * TBD: Make it thread safe + */ + +/* + * constructor function called before main is called + * RETURN: 0 on success -1 on failure + * */ +static inline int +vcom_init () +{ + pid_t pid = getpid (); + + if (!is_vcom_init) + { + if (vppcom_app_create (vcom_get_app_name ()) != 0) + { + printf ("\n[%d] vcom_init...failed!\n", pid); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] vcom_init: vppcom_app_create failed!\n", pid); + return -1; + } + if (vcom_socket_main_init () != 0) + { + printf ("\n[%d] vcom_init...failed!\n", pid); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] vcom_init: vcom_socket_main_init failed!\n", pid); + return -1; + } + + is_vcom_init = 1; + printf ("\n[%d] vcom_init...done!\n", pid); + } + return 0; +} + +static inline void +vcom_destroy (void) +{ + pid_t pid = getpid (); + + if (is_vcom_init) + { + vcom_socket_main_destroy (); + vppcom_app_destroy (); + is_vcom_init = 0; + fprintf (stderr, "\n[%d] vcom_destroy...done!\n", pid); + } +} + +static inline int +is_vcom_socket_fd (int fd) +{ + return vcom_socket_is_vcom_fd (fd); +} + +static inline int +is_vcom_epfd (int epfd) +{ + return vcom_socket_is_vcom_epfd (epfd); +} + + +/* + * + * Generic glibc fd api + * + */ + +/* Close the file descriptor FD. + + This function is a cancellation point and therefore + not marked with __THROW. */ +/* + * PRE: is_vcom_socket_fd(__fd) == 1 + * RETURN: 0 on success and -1 for errors. + * */ +int +vcom_close (int __fd) +{ + if (vcom_init () != 0) + { + return -1; + } + + if (vcom_socket_close (__fd) != 0) + { + return -1; + } + + return 0; +} + +/* + * RETURN: 0 on success, or -1 on error + */ +int +close (int __fd) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd) || is_vcom_epfd (__fd)) + { + if (VCOM_DEBUG > 0) + vcom_socket_main_show (); + rv = vcom_close (__fd); + if (VCOM_DEBUG > 0) + fprintf (stderr, "[%d] close: " "'%04d'='%04d'\n", pid, rv, __fd); + if (VCOM_DEBUG > 0) + vcom_socket_main_show (); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + return libc_close (__fd); +} + +/* Read NBYTES into BUF from FD. Return the + number read, -1 for errors or 0 for EOF. + + This function is a cancellation point and therefore + not marked with __THROW. */ +ssize_t +vcom_read (int __fd, void *__buf, size_t __nbytes) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_read (__fd, __buf, __nbytes); +} + +ssize_t +read (int __fd, void *__buf, size_t __nbytes) +{ + ssize_t size = 0; + pid_t pid = getpid (); + pthread_t tid = pthread_self (); + + if (is_vcom_socket_fd (__fd)) + { + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d][%lu (0x%lx)] read:1 " + "'%04d'='%04d', '%p', '%04d'\n", + pid, (unsigned long) tid, (unsigned long) tid, + (int) size, __fd, __buf, (int) __nbytes); + size = vcom_read (__fd, __buf, __nbytes); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d][%lu (0x%lx)] read:2 " + "'%04d'='%04d', '%p', '%04d'\n", + pid, (unsigned long) tid, (unsigned long) tid, + (int) size, __fd, __buf, (int) __nbytes); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_read (__fd, __buf, __nbytes); +} + +ssize_t +vcom_readv (int __fd, const struct iovec * __iov, int __iovcnt) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_readv (__fd, __iov, __iovcnt); +} + +ssize_t +readv (int __fd, const struct iovec * __iov, int __iovcnt) +{ + ssize_t size = 0; + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_readv (__fd, __iov, __iovcnt); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + else + return libc_readv (__fd, __iov, __iovcnt); +} + +/* Write N bytes of BUF to FD. Return the number written, or -1. + + This function is a cancellation point and therefore + not marked with __THROW. */ +ssize_t +vcom_write (int __fd, const void *__buf, size_t __n) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_write (__fd, (void *) __buf, __n); +} + +ssize_t +write (int __fd, const void *__buf, size_t __n) +{ + ssize_t size = 0; + pid_t pid = getpid (); + pthread_t tid = pthread_self (); + + if (is_vcom_socket_fd (__fd)) + { + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d][%lu (0x%lx)] write:1 " + "'%04d'='%04d', '%p', '%04d'\n", + pid, (unsigned long) tid, (unsigned long) tid, + (int) size, __fd, __buf, (int) __n); + size = vcom_write (__fd, __buf, __n); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d][%lu (0x%lx)] write:2 " + "'%04d'='%04d', '%p', '%04d'\n", + pid, (unsigned long) tid, (unsigned long) tid, + (int) size, __fd, __buf, (int) __n); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_write (__fd, __buf, __n); +} + +ssize_t +vcom_writev (int __fd, const struct iovec * __iov, int __iovcnt) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_writev (__fd, __iov, __iovcnt); +} + +ssize_t +writev (int __fd, const struct iovec * __iov, int __iovcnt) +{ + ssize_t size = 0; + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_writev (__fd, __iov, __iovcnt); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + else + return libc_writev (__fd, __iov, __iovcnt); +} + +/* Do the file control operation described by CMD on FD. + The remaining arguments are interpreted depending on CMD. + + This function is a cancellation point and therefore + not marked with __THROW. */ +int +vcom_fcntl_va (int __fd, int __cmd, va_list __ap) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_fcntl_va (__fd, __cmd, __ap); +} + +int +vcom_fcntl (int __fd, int __cmd, ...) +{ + int rv = -1; + va_list ap; + + if (is_vcom_socket_fd (__fd)) + { + va_start (ap, __cmd); + rv = vcom_fcntl_va (__fd, __cmd, ap); + va_end (ap); + } + return rv; +} + +int +fcntl (int __fd, int __cmd, ...) +{ + int rv; + va_list ap; + pid_t pid = getpid (); + + va_start (ap, __cmd); + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_fcntl_va (__fd, __cmd, ap); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] fcntl: " + "'%04d'='%04d', '%04d'\n", pid, rv, __fd, __cmd); + if (rv < 0) + { + errno = -rv; + rv = -1; + } + goto out; + } + rv = libc_vfcntl (__fd, __cmd, ap); + +out: + va_end (ap); + return rv; +} + +/* + * Check the first NFDS descriptors each in READFDS (if not NULL) for + * read readiness, in WRITEFDS (if not NULL) for write readiness, + * and in EXCEPTFDS (if not NULL) for exceptional conditions. + * If TIMEOUT is not NULL, time out after waiting the interval + * specified therein. Returns the number of ready descriptors, + * or -1 for errors. + * + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ + +/* + * clear all vcom FDs from fd_sets __readfds, __writefds and + * __exceptfds and update the new nfds + * + * new nfds is the highest-numbered file descriptor + * in any of the three sets, plus 1 + * + * Return the number of file descriptors contained in the + * three descriptor sets. ie. the total number of the bits + * that are set in __readfds, __writefds and __exceptfds + */ +static inline int +vcom_fd_clear (int __nfds, + int *__new_nfds, + fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds) +{ + int fd; + /* invalid max_fd is -1 */ + int max_fd = -1; + int nfd = 0; + + + /* clear all vcom fd from the sets */ + for (fd = 0; fd < __nfds; fd++) + { + + /* clear vcom fd from set */ + /* + * F fd set + */ +#define _(F) \ + if ((F) && FD_ISSET (fd, (F))) \ + { \ + if (is_vcom_socket_fd (fd)) \ + { \ + FD_CLR (fd, (F)); \ + } \ + } + + + _(__readfds); + _(__writefds); + _(__exceptfds); +#undef _ + } + + /* + * compute nfd and __new_nfds + */ + for (fd = 0; fd < __nfds; fd++) + { + + /* + * F fd set + */ +#define _(F) \ + if ((F) && FD_ISSET (fd, (F))) \ + { \ + if (fd > max_fd) \ + { \ + max_fd = fd; \ + } \ + ++nfd; \ + } + + + _(__readfds); + _(__writefds); + _(__exceptfds); +#undef _ + } + + *__new_nfds = max_fd != -1 ? max_fd + 1 : 0; + return nfd; +} + +/* + * Return the number of file descriptors contained in the + * three descriptor sets. ie. the total number of the bits + * that are set in __readfds, __writefds and __exceptfds + */ +static inline int +vcom_fd_set (int __nfds, + /* dest */ + int *__new_nfds, + fd_set * __restrict __readfds, + fd_set * __restrict __writefds, fd_set * __restrict __exceptfds, + /* src */ + fd_set * __restrict __saved_readfds, + fd_set * __restrict __saved_writefds, + fd_set * __restrict __saved_exceptfds) +{ + int fd; + /* invalid max_fd is -1 */ + int max_fd = -1; + int nfd = 0; + + for (fd = 0; fd < __nfds; fd++) + { + /* + * F fd set + * S saved fd set + */ +#define _(S,F) \ + if ((F) && (S) && FD_ISSET (fd, (S))) \ + { \ + if (is_vcom_socket_fd (fd)) \ + { \ + FD_SET (fd, (F)); \ + } \ + } + + + _(__saved_readfds, __readfds); + _(__saved_writefds, __writefds); +#undef _ + } + + + /* + * compute nfd and __new_nfds + */ + for (fd = 0; fd < __nfds; fd++) + { + + /* + * F fd set + */ +#define _(F) \ + if ((F) && FD_ISSET (fd, (F))) \ + { \ + if (fd > max_fd) \ + { \ + max_fd = fd; \ + } \ + ++nfd; \ + } + + + _(__readfds); + _(__writefds); + _(__exceptfds); +#undef _ + } + + *__new_nfds = max_fd != -1 ? max_fd + 1 : 0; + return nfd; +} + +/* + * split select sets(src) into + * vcom sets(dest1) and libc sets(dest2) + */ +static inline void +vcom_fd_set_split ( + /* src, select sets */ + int nfds, + fd_set * __restrict readfds, + fd_set * __restrict writefds, + fd_set * __restrict exceptfds, + /* dest1, vcom sets */ + int *vcom_nfds, + fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds, int *vcom_nfd, + /* dest2, libc sets */ + int *libc_nfds, + fd_set * __restrict libc_readfds, + fd_set * __restrict libc_writefds, + fd_set * __restrict libc_exceptfds, int *libc_nfd) +{ + int fd; + + /* vcom */ + /* invalid max_fd is -1 */ + int vcom_max_fd = -1; + int vcom_nfd2 = 0; + + /* libc */ + /* invalid max_fd is -1 */ + int libc_max_fd = -1; + int libc_nfd2 = 0; + + + for (fd = 0; fd < nfds; fd++) + { + /* + * S select fd set + * V vcom fd set + * L libc fd set + */ +#define _(S,V,L) \ + if ((S) && FD_ISSET (fd, (S))) \ + { \ + if (is_vcom_socket_fd (fd)) \ + { \ + if ((V)) \ + { \ + FD_SET(fd, (V)); \ + if (fd > vcom_max_fd) \ + { \ + vcom_max_fd = fd; \ + } \ + ++vcom_nfd2; \ + } \ + } \ + else \ + { \ + if ((L)) \ + { \ + FD_SET(fd, (L)); \ + if (fd > libc_max_fd) \ + { \ + libc_max_fd = fd; \ + } \ + ++libc_nfd2; \ + } \ + } \ + } + + + _(readfds, vcom_readfds, libc_readfds); + _(writefds, vcom_writefds, libc_writefds); + _(exceptfds, vcom_exceptfds, libc_exceptfds); +#undef _ + } + + if (vcom_nfds) + *vcom_nfds = vcom_max_fd != -1 ? vcom_max_fd + 1 : 0; + if (vcom_nfd) + *vcom_nfd = vcom_nfd2; + if (libc_nfds) + *libc_nfds = libc_max_fd != -1 ? libc_max_fd + 1 : 0; + if (libc_nfd) + *libc_nfd = libc_nfd2; +} + +/* + * merge vcom sets(src1) and libc sets(src2) + * into select sets(dest) + */ +static inline void +vcom_fd_set_merge ( + /* dest, select sets */ + int *nfds, + fd_set * __restrict readfds, + fd_set * __restrict writefds, + fd_set * __restrict exceptfds, int *nfd, + /* src1, vcom sets */ + int vcom_nfds, + fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds, int vcom_nfd, + /* src2, libc sets */ + int libc_nfds, + fd_set * __restrict libc_readfds, + fd_set * __restrict libc_writefds, + fd_set * __restrict libc_exceptfds, int libc_nfd) +{ + int fd; + /* invalid max_fd is -1 */ + int max_fd = -1; + int nfd2 = 0; + + + /* FD_BIT_OR + * + * dest |= src at current bit index + * update MAX and NFD of dest fd set + * + * + * FS source fd set + * FD dest fd set + * BI bit index + * MAX current max_fd of dest fd sets + * NFD current nfd of dest fd sets + * N nfds of source fd set + */ +#define FD_BIT_OR(FD,FS,BI, \ + MAX,NFD) \ + if ((FS) && (FD) && FD_ISSET ((BI), (FS))) \ + { \ + FD_SET ((BI), (FD)); \ + if ((BI) > (MAX)) \ + { \ + (MAX) = (BI); \ + } \ + ++(NFD); \ + } + + + /* FD_RWE_SET_OR */ + /* + * SR,SW,SE source RWE fd sets + * DR,DW,DE dest RWE fd sets + * BI bit index + * NFDS nfds of source fd sets + * MAX current max_fd of dest fd sets + * NFD current nfd of dest fd sets + */ +#define FD_RWE_SETS_OR(DR,DW,DE, \ + SR,SW,SE, \ + BI,NFDS, \ + MAX,NFD) \ + do \ + { \ + for ((BI) = 0; (BI) < (NFDS); (BI)++) \ + { \ + FD_BIT_OR((DR), (SR), (BI), (MAX), (NFD)); \ + FD_BIT_OR((DW), (SW), (BI), (MAX), (NFD)); \ + FD_BIT_OR((DE), (SE), (BI), (MAX), (NFD)); \ + } \ + } \ + while (0); + + + /* source(vcom) to dest(select) rwe fd sets */ + FD_RWE_SETS_OR (readfds, writefds, exceptfds, + vcom_readfds, vcom_writefds, vcom_exceptfds, + fd, vcom_nfds, max_fd, nfd2); + + /* source(libc) to dest(select) rwe fd sets */ + FD_RWE_SETS_OR (readfds, writefds, exceptfds, + libc_readfds, libc_writefds, libc_exceptfds, + fd, libc_nfds, max_fd, nfd2); + +#undef FD_RWE_SETS_OR +#undef FD_BIT_OR + + if (nfds) + *nfds = max_fd != -1 ? max_fd + 1 : 0; + if (nfd) + *nfd = nfd2; +} + +/* + * RETURN 1 if fds is NULL or empty. 0 otherwise + */ +static inline int +fd_set_iszero (fd_set * __restrict fds) +{ + int fd; + + /* NULL fds */ + if (!fds) + return 1; + + for (fd = 0; fd < FD_SETSIZE; fd++) + { + if (FD_ISSET (fd, fds)) + { + /* non-empty fds */ + return 0; + } + } + /* empty fds */ + return 1; +} + + +/* + * ################ + * kernel time64.h + * ################ + * */ +typedef long int s64; +typedef unsigned long int u64; + +typedef long long int __s64; +typedef unsigned long long int __u64; + +typedef __s64 time64_t; +typedef __u64 timeu64_t; + +/* Parameters used to convert the timespec values: */ +#define MSEC_PER_SEC 1000L +#define USEC_PER_MSEC 1000L +#define NSEC_PER_USEC 1000L +#define NSEC_PER_MSEC 1000000L +#define USEC_PER_SEC 1000000L +#define NSEC_PER_SEC 1000000000L +#define FSEC_PER_SEC 1000000000000000LL + + +/* + * ################ + * kernel time.h + * ################ + * */ + + +#define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1) + +static inline int +timespec_equal (const struct timespec *a, const struct timespec *b) +{ + return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec); +} + +/* + * lhs < rhs: return <0 + * lhs == rhs: return 0 + * lhs > rhs: return >0 + */ +static inline int +timespec_compare (const struct timespec *lhs, const struct timespec *rhs) +{ + if (lhs->tv_sec < rhs->tv_sec) + return -1; + if (lhs->tv_sec > rhs->tv_sec) + return 1; + return lhs->tv_nsec - rhs->tv_nsec; +} + +static inline int +timeval_compare (const struct timeval *lhs, const struct timeval *rhs) +{ + if (lhs->tv_sec < rhs->tv_sec) + return -1; + if (lhs->tv_sec > rhs->tv_sec) + return 1; + return lhs->tv_usec - rhs->tv_usec; +} + +extern void set_normalized_timespec (struct timespec *ts, time_t sec, + s64 nsec); + + +static inline struct timespec +timespec_add (struct timespec lhs, struct timespec rhs) +{ + struct timespec ts_delta; + set_normalized_timespec (&ts_delta, lhs.tv_sec + rhs.tv_sec, + lhs.tv_nsec + rhs.tv_nsec); + return ts_delta; +} + +/* + * sub = lhs - rhs, in normalized form + */ +static inline struct timespec +timespec_sub (struct timespec lhs, struct timespec rhs) +{ + struct timespec ts_delta; + set_normalized_timespec (&ts_delta, lhs.tv_sec - rhs.tv_sec, + lhs.tv_nsec - rhs.tv_nsec); + return ts_delta; +} + +/* + * ################ + * kernel time.c + * ################ + * */ + + +/** + * set_normalized_timespec - set timespec sec and nsec parts and normalize + * + * @ts: pointer to timespec variable to be set + * @sec: seconds to set + * @nsec: nanoseconds to set + * + * Set seconds and nanoseconds field of a timespec variable and + * normalize to the timespec storage format + * + * Note: The tv_nsec part is always in the range of + * 0 <= tv_nsec < NSEC_PER_SEC + * For negative values only the tv_sec field is negative ! + */ +void +set_normalized_timespec (struct timespec *ts, time_t sec, s64 nsec) +{ + while (nsec >= NSEC_PER_SEC) + { + /* + * The following asm() prevents the compiler from + * optimising this loop into a modulo operation. See + * also __iter_div_u64_rem() in include/linux/time.h + */ + asm ("":"+rm" (nsec)); + nsec -= NSEC_PER_SEC; + ++sec; + } + while (nsec < 0) + { + asm ("":"+rm" (nsec)); + nsec += NSEC_PER_SEC; + --sec; + } + ts->tv_sec = sec; + ts->tv_nsec = nsec; +} + +#define vcom_timerisvalid(tvp) (!((tvp)->tv_sec < 0 || (tvp)->tv_usec < 0)) + +/* Macros for converting between `struct timeval' and `struct timespec'. */ +#define VCOM_TIMEVAL_TO_TIMESPEC(tv, ts) { \ + (ts)->tv_sec = (tv)->tv_sec; \ + (ts)->tv_nsec = (tv)->tv_usec * 1000; \ +} +#define VCOM_TIMESPEC_TO_TIMEVAL(tv, ts) { \ + (tv)->tv_sec = (ts)->tv_sec; \ + (tv)->tv_usec = (ts)->tv_nsec / 1000; \ +} + +static inline int +vcom_select_impl (int vcom_nfds, fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds, + struct timeval *__restrict timeout) +{ + return vcom_socket_select (vcom_nfds, vcom_readfds, + vcom_writefds, vcom_exceptfds, timeout); +} + +int +vcom_select (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + struct timeval *__restrict __timeout) +{ + int rv; + int rv2 = 0; + pid_t pid = getpid (); + + int timedout = 0; + /* block indefinitely */ + int no_timeout = 0; + int first_clock_gettime_failed = 0; + /* timeout value in units of timespec */ + struct timespec timeout_ts; + struct timespec start_time, now, end_time; + + /* select sets attributes - after merge */ + int new_nfds = 0; + int new_nfd = -1; + + /* vcom */ + int vcom_nfds = 0; + fd_set vcom_readfds; + fd_set vcom_writefds; + fd_set vcom_exceptfds; + int vcom_nfd = -1; + + /* libc */ + int libc_nfds = 0; + fd_set libc_readfds; + fd_set libc_writefds; + fd_set libc_exceptfds; + int libc_nfd = -1; + + /* for polling */ + struct timeval tv = {.tv_sec = 0,.tv_usec = 0 }; + + /* validate __timeout */ + if (__timeout) + { + /* validate tv_sec */ + /* bogus */ + if (!vcom_timerisvalid (__timeout)) + { + rv = -EINVAL; + goto select_done; + } + + /* validate tv_usec */ + /* TBD: */ + /* init timeout_ts */ + VCOM_TIMEVAL_TO_TIMESPEC (__timeout, &timeout_ts); + set_normalized_timespec (&timeout_ts, + timeout_ts.tv_sec, timeout_ts.tv_nsec); + } + + rv = clock_gettime (CLOCK_MONOTONIC, &start_time); + if (rv == -1) + { + rv = -errno; + first_clock_gettime_failed = 1; + goto select_done; + } + + /* init end_time */ + if (__timeout) + { + if (timerisset (__timeout)) + { + end_time = timespec_add (start_time, timeout_ts); + } + else + { + /* + * if both fields of the timeout structure are zero, + * then select returns immediately + * */ + end_time = start_time; + } + } + else + { + /* block indefinitely */ + no_timeout = 1; + } + + + + if (vcom_init () != 0) + { + rv = -1; + goto select_done; + } + + /* validate __nfds */ + if (__nfds < 0 || __nfds > FD_SETSIZE) + { + rv = -EINVAL; + goto select_done; + } + + + /* + * usleep(3) emulation + * */ + + /* call libc_select() with a finite timeout and + * no file descriptors or empty fd sets and + * zero nfds */ + if (__nfds == 0 && + (!__readfds || fd_set_iszero (__readfds)) && + (!__writefds || fd_set_iszero (__writefds)) && + (!__exceptfds || fd_set_iszero (__exceptfds))) + { + if (__timeout) + { + rv = libc_select (__nfds, + __readfds, __writefds, __exceptfds, __timeout); + if (rv == -1) + rv = -errno; + } + else + { + /* TBD: block indefinitely or return -EINVAL */ + rv = -EINVAL; + } + goto select_done; + } + + /* init once before the polling loop */ + + /* zero vcom and libc fd sets */ + /* + * S select fd set + * V vcom fd set + * L libc fd set + */ +#define _(S,V,L) \ + if ((S)) \ + { \ + FD_ZERO ((V)); \ + FD_ZERO ((L)); \ + } + + + _(__readfds, &vcom_readfds, &libc_readfds); + _(__writefds, &vcom_writefds, &libc_writefds); + _(__exceptfds, &vcom_exceptfds, &libc_exceptfds); +#undef _ + new_nfds = 0; + new_nfd = -1; + + vcom_nfds = 0; + vcom_nfd = -1; + libc_nfds = 0; + libc_nfd = -1; + + vcom_fd_set_split ( + /* src, select sets */ + __nfds, __readfds, __writefds, __exceptfds, + /* dest1, vcom sets */ + __readfds || __writefds || __exceptfds ? + &vcom_nfds : NULL, + __readfds ? &vcom_readfds : NULL, + __writefds ? &vcom_writefds : NULL, + __exceptfds ? &vcom_exceptfds : NULL, + __readfds || __writefds || __exceptfds ? + &vcom_nfd : NULL, + /* dest2, libc sets */ + __readfds || __writefds || __exceptfds ? + &libc_nfds : NULL, + __readfds ? &libc_readfds : NULL, + __writefds ? &libc_writefds : NULL, + __exceptfds ? &libc_exceptfds : NULL, + __readfds || __writefds || __exceptfds ? + &libc_nfd : NULL); + + /* + * polling loop + * */ + do + { + new_nfd = -1; + vcom_nfd = -1; + libc_nfd = -1; + + /* + * if both fields of timeval structure are zero, + * vcom_select_impl and libc_select returns immediately. + * useful for polling and ensure fairness among + * file descriptors watched. + */ + + /* for polling */ + tv.tv_sec = 0; + tv.tv_usec = 0; + + /* select on vcom fds */ + if (vcom_nfds) + { + vcom_nfd = vcom_select_impl (vcom_nfds, + __readfds ? &vcom_readfds : NULL, + __writefds ? &vcom_writefds : NULL, + __exceptfds ? &vcom_exceptfds : NULL, + &tv); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] select vcom: " + "'%04d'='%04d'\n", pid, vcom_nfd, vcom_nfds); + + if (vcom_nfd < 0) + { + rv = vcom_nfd; + goto select_done; + } + } + /* select on libc fds */ + if (libc_nfds) + { + libc_nfd = libc_select (libc_nfds, + __readfds ? &libc_readfds : NULL, + __writefds ? &libc_writefds : NULL, + __exceptfds ? &libc_exceptfds : NULL, &tv); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] select libc: " + "'%04d'='%04d'\n", pid, libc_nfd, libc_nfds); + + if (libc_nfd < 0) + { + /* tv becomes undefined */ + libc_nfd = errno; + rv = libc_nfd; + goto select_done; + } + } + + /* check if any file descriptors changed status */ + if ((vcom_nfds && vcom_nfd > 0) || (libc_nfds && libc_nfd > 0)) + { + /* zero the sets before merge and exit */ + + /* + * F fd set + */ +#define _(F) \ + if ((F)) \ + { \ + FD_ZERO ((F)); \ + } + + + _(__readfds); + _(__writefds); + _(__exceptfds); +#undef _ + new_nfds = 0; + new_nfd = -1; + + /* + * on exit, sets are modified in place to indicate which + * file descriptors actually changed status + * */ + vcom_fd_set_merge ( + /* dest, select sets */ + __readfds || __writefds || __exceptfds ? + &new_nfds : NULL, + __readfds, + __writefds, + __exceptfds, + __readfds || __writefds || __exceptfds ? + &new_nfd : NULL, + /* src1, vcom sets */ + vcom_nfds, + __readfds ? &vcom_readfds : NULL, + __writefds ? &vcom_writefds : NULL, + __exceptfds ? &vcom_exceptfds : NULL, vcom_nfd, + /* src2, libc sets */ + libc_nfds, + __readfds ? &libc_readfds : NULL, + __writefds ? &libc_writefds : NULL, + __exceptfds ? &libc_exceptfds : NULL, libc_nfd); + /* + * return the number of file descriptors contained in the + * three returned sets + * */ + rv = 0; + /* + * for documentation + * + * if(vcom_nfd > 0) + * rv += vcom_nfd; + * if(libc_nfd > 0) + * rv += libc_nfd; + */ + + rv = new_nfd == -1 ? 0 : new_nfd; + goto select_done; + } + + rv = clock_gettime (CLOCK_MONOTONIC, &now); + if (rv == -1) + { + rv = -errno; + goto select_done; + } + } + while (no_timeout || timespec_compare (&now, &end_time) < 0); + + /* timeout expired before anything interesting happened */ + timedout = 1; + rv = 0; + +select_done: + if (VCOM_DEBUG > 0) + fprintf (stderr, "[%d] vselect1: " "'%04d'='%04d'\n", pid, rv, __nfds); + /* + * modify timeout parameter to reflect the amount of time not slept + * */ + if (__timeout) + { + if (vcom_timerisvalid (__timeout)) + { + /* timeout expired */ + if (timedout) + { + timerclear (__timeout); + } + else if (!first_clock_gettime_failed) + { + rv2 = clock_gettime (CLOCK_MONOTONIC, &now); + if (rv2 == -1) + { + rv = -errno; + } + else + { + struct timespec ts_delta; + ts_delta = timespec_sub (end_time, now); + VCOM_TIMESPEC_TO_TIMEVAL (__timeout, &ts_delta); + } + } + } + } + if (VCOM_DEBUG > 0) + fprintf (stderr, "[%d] vselect2: " "'%04d',='%04d'\n", pid, rv, __nfds); + + return rv; +} + +int +vcom_select_internal (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + struct timeval *__restrict __timeout) +{ + int rv; + int new_nfds = 0; + int nfd = 0; + pid_t pid = getpid (); + + fd_set saved_readfds; + fd_set saved_writefds; + fd_set saved_exceptfds; + + /* validate __nfds */ + if (__nfds < 0) + { + errno = EINVAL; + return -1; + } + + /* validate __timeout */ + if (__timeout) + { + /* validate tv_sec */ + /* bogus */ + if (__timeout->tv_sec < 0 || __timeout->tv_usec < 0) + { + errno = EINVAL; + return -1; + } + + /* validate tv_usec */ + /* TBD: */ + } + + /* init saved_x fds */ + if (__readfds) + { + saved_readfds = *__readfds; + /* + memcpy (&saved_readfds, __readfds, sizeof (*__readfds)); + */ + } + else + { + FD_ZERO (&saved_readfds); + } + + if (__writefds) + { + saved_writefds = *__writefds; + /* + memcpy (&saved_writefds, __writefds, sizeof (*__writefds)); + */ + + } + else + { + FD_ZERO (&saved_writefds); + } + + if (__exceptfds) + { + saved_exceptfds = *__exceptfds; + /* + memcpy (&saved_exceptfds, __exceptfds, sizeof (*__exceptfds)); + */ + + } + else + { + FD_ZERO (&saved_exceptfds); + } + + /* clear vcom fds */ + nfd = vcom_fd_clear (__nfds, &new_nfds, __readfds, __writefds, __exceptfds); + + /* set to an invalid value */ + rv = -2; + /* have kernel fds */ + if (new_nfds) + rv = libc_select (new_nfds, __readfds, + __writefds, __exceptfds, __timeout); + + if (new_nfds && rv == -1) + { + /* on error, the file descriptor sets are unmodified */ + if (__readfds) + *__readfds = saved_readfds; + if (__writefds) + *__writefds = saved_writefds; + if (__exceptfds) + *__exceptfds = saved_exceptfds; + return rv; + } + else if ((new_nfds && rv != -1) || (rv == -2)) + { + /* restore vcom fds */ + nfd = vcom_fd_set (__nfds, + &new_nfds, + __readfds, + __writefds, + __exceptfds, + &saved_readfds, &saved_writefds, &saved_exceptfds); + rv = nfd; + } + + if (VCOM_DEBUG > 0) + fprintf (stderr, "[%d] select: " "'%04d'='%04d'\n", pid, rv, __nfds); + return rv; +} + +int +select (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, struct timeval *__restrict __timeout) +{ + int rv = 0; + pid_t pid = getpid (); + + if (VCOM_DEBUG > 0) + fprintf (stderr, "[%d] select1: " "'%04d'='%04d'\n", pid, rv, __nfds); + rv = vcom_select (__nfds, __readfds, __writefds, __exceptfds, __timeout); + if (VCOM_DEBUG > 0) + fprintf (stderr, "[%d] select2: " "'%04d'='%04d'\n", pid, rv, __nfds); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; +} + +#ifdef __USE_XOPEN2K +/* + * Same as above only that the TIMEOUT value is given with higher + * resolution and a sigmask which is been set temporarily. This + * version should be used. + * + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +int +vcom_pselect (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + const struct timespec *__restrict __timeout, + const __sigset_t * __restrict __sigmask) +{ + int fd; + int vcom_nfds = 0; + + for (fd = 0; fd < __nfds; fd++) + { + if (__readfds && FD_ISSET (fd, __readfds)) + { + if (is_vcom_socket_fd (fd)) + { + vcom_nfds++; + } + } + + if (__writefds && FD_ISSET (fd, __writefds)) + { + if (is_vcom_socket_fd (fd)) + { + vcom_nfds++; + } + } + if (__exceptfds && FD_ISSET (fd, __exceptfds)) + { + if (is_vcom_socket_fd (fd)) + { + FD_CLR (fd, __exceptfds); + } + } + } + return vcom_nfds; +} + +int +pselect (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + const struct timespec *__restrict __timeout, + const __sigset_t * __restrict __sigmask) +{ + int rv; + int new_nfds = 0; + int nfd = 0; + pid_t pid = getpid (); + + fd_set saved_readfds; + fd_set saved_writefds; + fd_set saved_exceptfds; + + /* validate __nfds */ + if (__nfds < 0) + { + errno = EINVAL; + return -1; + } + + /* validate __timeout */ + if (__timeout) + { + /* validate tv_sec */ + /* bogus */ + if (__timeout->tv_sec < 0 || __timeout->tv_nsec < 0) + { + errno = EINVAL; + return -1; + } + + /* validate tv_usec */ + /* TBD: */ + } + + /* init saved fds */ + if (__readfds) + { + saved_readfds = *__readfds; + /* + memcpy (&saved_readfds, __readfds, sizeof (*__readfds)); + */ + } + else + { + FD_ZERO (&saved_readfds); + } + + if (__writefds) + { + saved_writefds = *__writefds; + /* + memcpy (&saved_writefds, __writefds, sizeof (*__writefds)); + */ + + } + else + { + FD_ZERO (&saved_writefds); + } + + if (__exceptfds) + { + saved_exceptfds = *__exceptfds; + /* + memcpy (&saved_exceptfds, __exceptfds, sizeof (*__exceptfds)); + */ + + } + else + { + FD_ZERO (&saved_exceptfds); + } + + /* clear vcom fds */ + nfd = vcom_fd_clear (__nfds, &new_nfds, __readfds, __writefds, __exceptfds); + + /* set to an invalid value */ + rv = -2; + if (new_nfds) + rv = libc_pselect (new_nfds, + __readfds, + __writefds, __exceptfds, __timeout, __sigmask); + + if (new_nfds && rv == -1) + { + /* on error, the file descriptor sets are unmodified */ + if (__readfds) + *__readfds = saved_readfds; + if (__writefds) + *__writefds = saved_writefds; + if (__exceptfds) + *__exceptfds = saved_exceptfds; + return rv; + } + else if ((new_nfds && rv != -1) || (rv == -2)) + { + /* restore vcom fds */ + nfd = vcom_fd_set (__nfds, + &new_nfds, + __readfds, + __writefds, + __exceptfds, + &saved_readfds, &saved_writefds, &saved_exceptfds); + rv = nfd; + } + + if (VCOM_DEBUG > 0) + fprintf (stderr, "[%d] pselect: " "'%04d'='%04d'\n", pid, rv, __nfds); + return rv; +} +#endif + +/* + * + * Socket specific glibc api + * + */ + +/* Create a new socket of type TYPE in domain DOMAIN, using + * protocol PROTOCOL. If PROTOCOL is zero, one is chosen + * automatically. Returns a file descriptor for the new socket, + * or -1 for errors. + * RETURN: a valid file descriptor for the new socket, + * or -1 for errors. + * */ + +int +vcom_socket (int __domain, int __type, int __protocol) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_socket (__domain, __type, __protocol); +} + +int +socket (int __domain, int __type, int __protocol) +{ + int rv; + pid_t pid = getpid (); + pthread_t tid = pthread_self (); + + /* handle domains implemented by vpp */ + switch (__domain) + { + case AF_INET: + case AF_INET6: + /* handle types implemented by vpp */ + switch (__type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + { + case SOCK_STREAM: + case SOCK_DGRAM: + if (VCOM_DEBUG > 0) + vcom_socket_main_show (); + rv = vcom_socket (__domain, __type, __protocol); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d][%lu (0x%lx)] socket: " + "'%04d'= D='%04d', T='%04d', P='%04d'\n", + pid, (unsigned long) tid, (unsigned long) tid, + rv, __domain, __type, __protocol); + if (VCOM_DEBUG > 0) + vcom_socket_main_show (); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; + break; + + default: + goto CALL_GLIBC_SOCKET_API; + break; + } + + break; + + default: + goto CALL_GLIBC_SOCKET_API; + break; + } + +CALL_GLIBC_SOCKET_API: + return libc_socket (__domain, __type, __protocol); +} + +/* + * Create two new sockets, of type TYPE in domain DOMAIN and using + * protocol PROTOCOL, which are connected to each other, and put file + * descriptors for them in FDS[0] and FDS[1]. If PROTOCOL is zero, + * one will be chosen automatically. + * Returns 0 on success, -1 for errors. + * */ +int +vcom_socketpair (int __domain, int __type, int __protocol, int __fds[2]) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_socketpair (__domain, __type, __protocol, __fds); +} + +int +socketpair (int __domain, int __type, int __protocol, int __fds[2]) +{ + int rv; + pid_t pid = getpid (); + + /* handle domains implemented by vpp */ + switch (__domain) + { + case AF_INET: + case AF_INET6: + /* handle types implemented by vpp */ + switch (__type) + { + case SOCK_STREAM: + case SOCK_DGRAM: + rv = vcom_socketpair (__domain, __type, __protocol, __fds); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] socketpair: " + "'%04d'= D='%04d', T='%04d', P='%04d'\n", + pid, rv, __domain, __type, __protocol); + if (rv < 0) + { + errno = -rv; + return -1; + } + return 0; + break; + + default: + goto CALL_GLIBC_SOCKET_API; + break; + } + + break; + + default: + goto CALL_GLIBC_SOCKET_API; + break; + } + +CALL_GLIBC_SOCKET_API: + return libc_socketpair (__domain, __type, __protocol, __fds); +} + +/* + * Give the socket FD the local address ADDR + * (which is LEN bytes long). + * */ +int +vcom_bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len) +{ + int rv; + + if (vcom_init () != 0) + { + return -1; + } + + /* validate __len */ + switch (__addr->sa_family) + { + case AF_INET: + if (__len != sizeof (struct sockaddr_in)) + return -EINVAL; + break; + case AF_INET6: + if (__len != sizeof (struct sockaddr_in6)) + return -EINVAL; + break; + + default: + return -1; + break; + } + + /* handle domains implemented by vpp */ + switch (__addr->sa_family) + { + case AF_INET: + case AF_INET6: + rv = vcom_socket_bind (__fd, __addr, __len); + return rv; + break; + + default: + return -1; + break; + } + + return -1; +} + +int +bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + + rv = vcom_bind (__fd, __addr, __len); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] bind: " + "'%04d'='%04d', '%p', '%04d'\n", + pid, rv, __fd, __addr, __len); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + return libc_bind (__fd, __addr, __len); +} + +/* + * Put the local address of FD into *ADDR and its length in *LEN. + * */ +int +vcom_getsockname (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_getsockname (__fd, __addr, __len); +} + +int +getsockname (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __len) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_getsockname (__fd, __addr, __len); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] getsockname: " + "'%04d'='%04d', '%p', '%p'\n", pid, rv, __fd, __addr, __len); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + return libc_getsockname (__fd, __addr, __len); +} + +/* + * Open a connection on socket FD to peer at ADDR + * (which LEN bytes long). For connectionless socket types, just set + * the default address to send to and the only address from which to + * accept transmissions. Return 0 on success, -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +int +vcom_connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len) +{ + int rv = -1; + + if (vcom_init () != 0) + { + return -1; + } + + /* validate __len */ + switch (__addr->sa_family) + { + case AF_INET: + if (__len != INET_ADDRSTRLEN) + return -1; + break; + case AF_INET6: + if (__len != INET6_ADDRSTRLEN) + return -1; + break; + + default: + return -1; + break; + } + + /* handle domains implemented by vpp */ + switch (__addr->sa_family) + { + case AF_INET: + case AF_INET6: + rv = vcom_socket_connect (__fd, __addr, __len); + break; + + default: + return -1; + break; + } + + return rv; +} + +int +connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len) +{ + int rv; + pid_t pid = getpid (); + pthread_t tid = pthread_self (); + + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_connect (__fd, __addr, __len); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d][%lu (0x%lx)] connect: " + "'%04d'='%04d', '%p', '%04d'\n", + pid, (unsigned long) tid, (unsigned long) tid, + rv, __fd, __addr, __len); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + + return libc_connect (__fd, __addr, __len); +} + +/* + * Put the address of the peer connected to socket FD into *ADDR + * (which is *LEN bytes long), and its actual length into *LEN. + * */ +int +vcom_getpeername (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_getpeername (__fd, __addr, __len); +} + +int +getpeername (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __len) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_getpeername (__fd, __addr, __len); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] getpeername: " + "'%04d'='%04d', '%p', '%p'\n", pid, rv, __fd, __addr, __len); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + return libc_getpeername (__fd, __addr, __len); +} + +/* + * Send N bytes of BUF to socket FD. Returns the number sent or -1. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +ssize_t +vcom_send (int __fd, const void *__buf, size_t __n, int __flags) +{ + + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_send (__fd, (void *) __buf, (int) __n, __flags); +} + +ssize_t +send (int __fd, const void *__buf, size_t __n, int __flags) +{ + ssize_t size; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_send (__fd, __buf, __n, __flags); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] send: " + "'%04d'='%04d', '%p', '%04d', '%04x'\n", + pid, (int) size, __fd, __buf, (int) __n, __flags); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_send (__fd, __buf, __n, __flags); +} + +/* + * Read N bytes into BUF from socket FD. + * Returns the number read or -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +ssize_t +vcom_recv (int __fd, void *__buf, size_t __n, int __flags) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_recv (__fd, __buf, __n, __flags); +} + +ssize_t +recv (int __fd, void *__buf, size_t __n, int __flags) +{ + ssize_t size; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_recv (__fd, __buf, __n, __flags); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] recv: " + "'%04d'='%04d', '%p', '%04d', '%04x'\n", + pid, (int) size, __fd, __buf, (int) __n, __flags); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_recv (__fd, __buf, __n, __flags); +} + +/* + * Send N bytes of BUF on socket FD to peer at address ADDR (which is + * ADDR_LEN bytes long). Returns the number sent, or -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +ssize_t +vcom_sendto (int __fd, const void *__buf, size_t __n, int __flags, + __CONST_SOCKADDR_ARG __addr, socklen_t __addr_len) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_sendto (__fd, __buf, __n, __flags, __addr, __addr_len); +} + +ssize_t +sendto (int __fd, const void *__buf, size_t __n, int __flags, + __CONST_SOCKADDR_ARG __addr, socklen_t __addr_len) +{ + ssize_t size; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_sendto (__fd, __buf, __n, __flags, __addr, __addr_len); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] sendto: " + "'%04d'='%04d', '%p', '%04d', '%04x', " + "'%p', '%04d'\n", + pid, (int) size, __fd, __buf, (int) __n, __flags, + __addr, __addr_len); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_sendto (__fd, __buf, __n, __flags, __addr, __addr_len); +} + +/* + * Read N bytes into BUF through socket FD. + * If ADDR is not NULL, fill in *ADDR_LEN bytes of it with the + * address of the sender, and store the actual size of the address + * in *ADDR_LEN. + * Returns the number of bytes read or -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +ssize_t +vcom_recvfrom (int __fd, void *__restrict __buf, size_t __n, + int __flags, + __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_recvfrom (__fd, __buf, __n, __flags, __addr, __addr_len); +} + +ssize_t +recvfrom (int __fd, void *__restrict __buf, size_t __n, + int __flags, + __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len) +{ + ssize_t size; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_recvfrom (__fd, __buf, __n, __flags, __addr, __addr_len); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] recvfrom: " + "'%04d'='%04d', '%p', '%04d', '%04x', " + "'%p', '%p'\n", + pid, (int) size, __fd, __buf, (int) __n, __flags, + __addr, __addr_len); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_recvfrom (__fd, __buf, __n, __flags, __addr, __addr_len); +} + +/* + * Send a message described MESSAGE on socket FD. + * Returns the number of bytes sent, or -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +ssize_t +vcom_sendmsg (int __fd, const struct msghdr * __message, int __flags) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_sendmsg (__fd, __message, __flags); +} + +ssize_t +sendmsg (int __fd, const struct msghdr * __message, int __flags) +{ + ssize_t size; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_sendmsg (__fd, __message, __flags); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] sendmsg: " + "'%04d'='%04d', '%p', '%04x'\n", + pid, (int) size, __fd, __message, __flags); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_sendmsg (__fd, __message, __flags); +} + +#ifdef __USE_GNU +/* + * Send a VLEN messages as described by VMESSAGES to socket FD. + * Returns the number of datagrams successfully written + * or -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +int +vcom_sendmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_sendmmsg (__fd, __message, __vlen, __flags); +} + +int +sendmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags) +{ + ssize_t size; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_sendmmsg (__fd, __message, __vlen, __flags); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] sendmmsg: " + "'%04d'='%04d', '%p', '%04d', '%04x'\n", + pid, (int) size, __fd, __vmessages, __vlen, __flags); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_sendmmsg (__fd, __message, __vlen, __flags); +} + +#endif + +/* + * Receive a message as described by MESSAGE from socket FD. + * Returns the number of bytes read or -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +ssize_t +vcom_recvmsg (int __fd, struct msghdr * __message, int __flags) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_recvmsg (__fd, __message, __flags); +} + +ssize_t +recvmsg (int __fd, struct msghdr * __message, int __flags) +{ + ssize_t size; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_recvmsg (__fd, __message, __flags); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] recvmsg: " + "'%04d'='%04d', '%p', '%04x'\n", + pid, (int) size, __fd, __message, __flags); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_recvmsg (__fd, __message, __flags); +} + +#ifdef __USE_GNU +/* + * Receive up to VLEN messages as described by VMESSAGES from socket FD. + * Returns the number of messages received or -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +int +vcom_recvmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags, struct timespec *__tmo) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_recvmmsg (__fd, __message, __vlen, __flags, __tmo); +} + +int +recvmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags, struct timespec *__tmo) +{ + ssize_t size; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + size = vcom_recvmmsg (__fd, __message, __vlen, __flags, __tmo); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] recvmmsg: " + "'%04d'='%04d', '%p', " + "'%04d', '%04x', '%p'\n", + pid, (int) size, __fd, __vmessages, __vlen, __flags, __tmo); + if (size < 0) + { + errno = -size; + return -1; + } + return size; + } + return libc_recvmmsg (__fd, __message, __vlen, __flags, __tmo); +} + +#endif + +/* + * Put the current value for socket FD's option OPTNAME + * at protocol level LEVEL into OPTVAL (which is *OPTLEN bytes long), + * and set *OPTLEN to the value's actual length. + * Returns 0 on success, -1 for errors. + * */ +int +vcom_getsockopt (int __fd, int __level, int __optname, + void *__restrict __optval, socklen_t * __restrict __optlen) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_getsockopt (__fd, __level, __optname, + __optval, __optlen); +} + +int +getsockopt (int __fd, int __level, int __optname, + void *__restrict __optval, socklen_t * __restrict __optlen) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_getsockopt (__fd, __level, __optname, __optval, __optlen); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] getsockopt: " + "'%04d'='%04d', '%04d', '%04d', " + "'%p', '%p'\n", + pid, rv, __fd, __level, __optname, __optval, __optlen); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + return libc_getsockopt (__fd, __level, __optname, __optval, __optlen); +} + +/* + * Set socket FD's option OPTNAME at protocol level LEVEL + * to *OPTVAL (which is OPTLEN bytes long). + * Returns 0 on success, -1 for errors. + * */ +int +vcom_setsockopt (int __fd, int __level, int __optname, + const void *__optval, socklen_t __optlen) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_setsockopt (__fd, __level, __optname, + __optval, __optlen); +} + +int +setsockopt (int __fd, int __level, int __optname, + const void *__optval, socklen_t __optlen) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_setsockopt (__fd, __level, __optname, __optval, __optlen); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] setsockopt: " + "'%04d'='%04d', '%04d', '%04d', " + "'%p', '%04d'\n", + pid, rv, __fd, __level, __optname, __optval, __optlen); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + return libc_setsockopt (__fd, __level, __optname, __optval, __optlen); +} + +/* + * Prepare to accept connections on socket FD. + * N connection requests will be queued before further + * requests are refused. + * Returns 0 on success, -1 for errors. + * */ +int +vcom_listen (int __fd, int __n) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_listen (__fd, __n); +} + +int +listen (int __fd, int __n) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_listen (__fd, __n); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] listen: " + "'%04d'='%04d', '%04d'\n", pid, rv, __fd, __n); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + return libc_listen (__fd, __n); +} + +/* + * Await a connection on socket FD. + * When a connection arrives, open a new socket to communicate + * with it, set *ADDR (which is *ADDR_LEN bytes long) to the address + * of the connecting peer and *ADDR_LEN to the address's actual + * length, and return the new socket's descriptor, or -1 for errors. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +int +vcom_accept (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len) +{ + + if (vcom_init () != 0) + { + return -1; + } + return vcom_socket_accept (__fd, __addr, __addr_len); +} + +int +accept (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len) +{ + int rv = -1; + pid_t pid = getpid (); + pthread_t tid = pthread_self (); + + if (is_vcom_socket_fd (__fd)) + { + if (VCOM_DEBUG > 0) + vcom_socket_main_show (); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d][%lu (0x%lx)] accept1: " + "'%04d'='%04d', '%p', '%p'\n", + pid, (unsigned long) tid, (unsigned long) tid, + rv, __fd, __addr, __addr_len); + rv = vcom_accept (__fd, __addr, __addr_len); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d][%lu (0x%lx)] accept2: " + "'%04d'='%04d', '%p', '%p'\n", + pid, (unsigned long) tid, (unsigned long) tid, + rv, __fd, __addr, __addr_len); + if (VCOM_DEBUG > 0) + vcom_socket_main_show (); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; + } + return libc_accept (__fd, __addr, __addr_len); +} + +#ifdef __USE_GNU +/* + * Similar to 'accept' but takes an additional parameter to specify + * flags. + * This function is a cancellation point and therefore not marked + * with __THROW. + * */ +int +vcom_accept4 (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len, int __flags) +{ + + if (vcom_init () != 0) + { + return -1; + } + + return vcom_socket_accept4 (__fd, __addr, __addr_len, __flags); +} + +int +accept4 (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len, int __flags) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + if (VCOM_DEBUG > 0) + vcom_socket_main_show (); + rv = vcom_accept4 (__fd, __addr, __addr_len, __flags); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] accept4: " + "'%04d'='%04d', '%p', '%p', '%04x'\n", + pid, rv, __fd, __addr, __addr_len, __flags); + if (VCOM_DEBUG > 0) + vcom_socket_main_show (); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; + } + return libc_accept4 (__fd, __addr, __addr_len, __flags); +} + +#endif + +/* + * Shut down all or part of the connection open on socket FD. + * HOW determines what to shut down: + * SHUT_RD = No more receptions; + * SHUT_WR = No more transmissions; + * SHUT_RDWR = No more receptions or transmissions. + * Returns 0 on success, -1 for errors. + * */ +int +vcom_shutdown (int __fd, int __how) +{ + if (vcom_init () != 0) + { + return -1; + } + return vcom_socket_shutdown (__fd, __how); +} + +int +shutdown (int __fd, int __how) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_shutdown (__fd, __how); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] shutdown: " + "'%04d'='%04d', '%04d'\n", pid, rv, __fd, __how); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + return libc_shutdown (__fd, __how); +} + +int +vcom_epoll_create (int __size) +{ + + if (vcom_init () != 0) + { + return -1; + } + + if (__size <= 0) + { + return -EINVAL; + } + + /* __size argument is ignored "thereafter" */ + return vcom_epoll_create1 (0); +} + +/* + * __size argument is ignored, but must be greater than zero + */ +int +epoll_create (int __size) +{ + int rv = 0; + pid_t pid = getpid (); + + rv = vcom_epoll_create (__size); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] epoll_create: " "'%04d'='%04d'\n", pid, rv, __size); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; +} + +int +vcom_epoll_create1 (int __flags) +{ + if (vcom_init () != 0) + { + return -1; + } + + if (__flags < 0) + { + return -EINVAL; + } + if (__flags & ~EPOLL_CLOEXEC) + { + return -EINVAL; + } + /* __flags can be either zero or EPOLL_CLOEXEC */ + /* implementation */ + return vcom_socket_epoll_create1 (__flags); +} + +/* + * __flags can be either zero or EPOLL_CLOEXEC + * */ +int +epoll_create1 (int __flags) +{ + int rv = 0; + pid_t pid = getpid (); + + rv = vcom_epoll_create1 (__flags); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] epoll_create: " "'%04d'='%08x'\n", pid, rv, __flags); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; +} + +static inline int +ep_op_has_event (int op) +{ + return op != EPOLL_CTL_DEL; +} + +int +vcom_epoll_ctl (int __epfd, int __op, int __fd, struct epoll_event *__event) +{ + if (vcom_init () != 0) + { + return -1; + } + + /* + * the requested operation __op is not supported + * by this interface */ + if (!((__op == EPOLL_CTL_ADD) || + (__op == EPOLL_CTL_MOD) || (__op == EPOLL_CTL_DEL))) + { + return -EINVAL; + } + + /* op is ADD or MOD but event parameter is NULL */ + if ((ep_op_has_event (__op) && !__event)) + { + return -EFAULT; + } + + /* fd is same as epfd */ + /* do not permit adding an epoll file descriptor inside itself */ + if (__epfd == __fd) + { + return -EINVAL; + } + + /* implementation */ + return vcom_socket_epoll_ctl (__epfd, __op, __fd, __event); +} + +/* + * implement the controller interface for epoll + * that enables the insertion/removal/change of + * file descriptors inside the interest set. + */ +int +epoll_ctl (int __epfd, int __op, int __fd, struct epoll_event *__event) +{ + int rv; + pid_t pid = getpid (); + + if (is_vcom_epfd (__epfd)) + { + /* TBD: currently limiting epoll to support only vcom fds */ + if (is_vcom_socket_fd (__fd)) + { + rv = vcom_epoll_ctl (__epfd, __op, __fd, __event); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] epoll_ctl: " + "'%04d'='%04d', '%04d', '%04d'\n", + pid, rv, __epfd, __op, __fd); + if (rv != 0) + { + errno = -rv; + return -1; + } + return 0; + } + else + { + /* + * TBD: currently epoll does not support kernel fds + * or epoll fds */ + errno = EBADF; + return -1; + } + } + else + { + /* epfd is not an epoll file descriptor */ + errno = EINVAL; + return -1; + } + return 0; +} + +int +vcom_epoll_wait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout) +{ + if (vcom_init () != 0) + { + return -1; + } + + return vcom_epoll_pwait (__epfd, __events, __maxevents, __timeout, NULL); +} + +int +epoll_wait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout) +{ + int rv; + pid_t pid = getpid (); + + if (__maxevents <= 0 || __maxevents > EP_MAX_EVENTS) + { + errno = EINVAL; + return -1; + } + + if (is_vcom_epfd (__epfd)) + { + rv = vcom_epoll_wait (__epfd, __events, __maxevents, __timeout); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] epoll_wait: " + "'%04d'='%04d', '%p', " + "'%04d', '%04d'\n", + pid, rv, __epfd, __events, __maxevents, __timeout); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; + } + else + { + errno = EINVAL; + return -1; + } + return 0; +} + + +int +vcom_epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, const __sigset_t * __ss) +{ + if (vcom_init () != 0) + { + return -1; + } + + /* implementation */ + return vcom_socket_epoll_pwait (__epfd, __events, + __maxevents, __timeout, __ss); +} + +int +epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, const __sigset_t * __ss) +{ + int rv; + pid_t pid = getpid (); + + if (__maxevents <= 0 || __maxevents > EP_MAX_EVENTS) + { + errno = EINVAL; + return -1; + } + + if (is_vcom_epfd (__epfd)) + { + rv = vcom_epoll_pwait (__epfd, __events, __maxevents, __timeout, __ss); + if (VCOM_DEBUG > 0) + fprintf (stderr, + "[%d] epoll_pwait: " + "'%04d'='%04d', '%p', " + "'%04d', '%04d', " + "'%p'\n", + pid, rv, __epfd, __events, __maxevents, __timeout, __ss); + if (rv < 0) + { + errno = -rv; + return -1; + } + return rv; + } + else + { + errno = EINVAL; + return -1; + } + + return 0; +} + +/* Poll the file descriptors described by the NFDS structures starting at + FDS. If TIMEOUT is nonzero and not -1, allow TIMEOUT milliseconds for + an event to occur; if TIMEOUT is -1, block until an event occurs. + Returns the number of file descriptors with events, zero if timed out, + or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +int +vcom_poll (struct pollfd *__fds, nfds_t __nfds, int __timeout) +{ + if (vcom_init () != 0) + { + return -1; + } + + return -EOPNOTSUPP; +} + +int +poll (struct pollfd *__fds, nfds_t __nfds, int __timeout) +{ + int rv = 0; + + errno = EOPNOTSUPP; + rv = -1; + return rv; +} + +#ifdef __USE_GNU +/* Like poll, but before waiting the threads signal mask is replaced + with that specified in the fourth parameter. For better usability, + the timeout value is specified using a TIMESPEC object. + + This function is a cancellation point and therefore not marked with + __THROW. */ +int +vcom_ppoll (struct pollfd *__fds, nfds_t __nfds, + const struct timespec *__timeout, const __sigset_t * __ss) +{ + if (vcom_init () != 0) + { + return -1; + } + + return -EOPNOTSUPP; +} + +int +ppoll (struct pollfd *__fds, nfds_t __nfds, + const struct timespec *__timeout, const __sigset_t * __ss) +{ + int rv = 0; + + errno = EOPNOTSUPP; + rv = -1; + return rv; +} +#endif + +void CONSTRUCTOR_ATTRIBUTE vcom_constructor (void); + +void DESTRUCTOR_ATTRIBUTE vcom_destructor (void); + +void +vcom_constructor (void) +{ + pid_t pid = getpid (); + + swrap_constructor (); + if (vcom_init () != 0) + { + printf ("\n[%d] vcom_constructor...failed!\n", pid); + } + else + { + printf ("\n[%d] vcom_constructor...done!\n", pid); + } +} + +/* + * This function is called when the library is unloaded + */ +void +vcom_destructor (void) +{ + pid_t pid = getpid (); + + vcom_destroy (); + swrap_destructor (); + printf ("\n[%d] vcom_destructor...done!\n", pid); +} + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom.h b/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom.h new file mode 100644 index 00000000000..f6ee8a2bab0 --- /dev/null +++ b/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom.h @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vcom_h +#define included_vcom_h + +/* VCOM DEBUG flag. Setting this to 1 or 0 turns off + ASSERT & other debugging code. */ +#ifndef VCOM_DEBUG +#define VCOM_DEBUG 0 +#endif + +#include <libvcl-ldpreload/vcom_glibc_socket.h> + +#define MAX_VCOM_APP_NAME 256 + +/* Returns 0 on success or -1 on error. */ +extern int vcom_set_app_name (char *__app_name); + +/* + * + * File descriptor based APIs + * + */ + +/* + * vpp implementation of glibc APIs from <unistd.h> + */ +extern int vcom_close (int __fd); + +extern ssize_t __wur vcom_read (int __fd, void *__buf, size_t __nbytes); + +extern ssize_t __wur vcom_write (int __fd, const void *__buf, size_t __n); + +extern ssize_t __wur vcom_readv (int __fd, const struct iovec *__iov, + int __iovcnt); + +extern ssize_t __wur vcom_writev (int __fd, const struct iovec *__iov, + int __iovcnt); + +/* + * vpp implementation of glibc APIs from <fcntl.h> + */ +extern int vcom_fcntl (int __fd, int __cmd, ...); + +/* + * vpp implementation of glibc APIs from <sys/select.h> + */ +extern int +vcom_select (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + struct timeval *__restrict __timeout); + +#ifdef __USE_XOPEN2K +extern int +vcom_pselect (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + const struct timespec *__restrict __timeout, + const __sigset_t * __restrict __sigmask); +#endif + +/* + * vpp implementation of glibc APIs from <sys/socket.h> + */ +extern int __THROW vcom_socket (int __domain, int __type, int __protocol); + +/* On Linux, the only supported domain for this call is AF_UNIX +* (or synonymously, AF_LOCAL). Most implementations have the +* same restriction. +* vpp does not implement AF_UNIX domain in this release. +* */ +extern int __THROW +vcom_socketpair (int __domain, int __type, int __protocol, int __fds[2]); + +extern int __THROW +vcom_bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len); + +extern int __THROW +vcom_getsockname (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len); + +extern int +vcom_connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len); + +extern int __THROW +vcom_getpeername (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len); + +extern ssize_t +vcom_send (int __fd, const void *__buf, size_t __n, int __flags); + +extern ssize_t vcom_recv (int __fd, void *__buf, size_t __n, int __flags); + +extern ssize_t +vcom_sendto (int __fd, const void *__buf, size_t __n, + int __flags, __CONST_SOCKADDR_ARG __addr, socklen_t __addr_len); + +extern ssize_t +vcom_recvfrom (int __fd, void *__restrict __buf, + size_t __n, int __flags, + __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len); + +extern ssize_t +vcom_sendmsg (int __fd, const struct msghdr *__message, int __flags); + +#ifdef __USE_GNU +extern int +sendmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags); +#endif + +extern ssize_t vcom_recvmsg (int __fd, struct msghdr *__message, int __flags); + +#ifdef __USE_GNU +extern int +vcom_recvmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags, struct timespec *__tmo); +#endif + +extern int __THROW +vcom_getsockopt (int __fd, int __level, int __optname, + void *__restrict __optval, socklen_t * __restrict __optlen); + +extern int __THROW +vcom_setsockopt (int __fd, int __level, int __optname, + const void *__optval, socklen_t __optlen); + +extern int __THROW vcom_listen (int __fd, int __n); + +extern int +vcom_accept (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len); + +#ifdef __USE_GNU +/* + * Similar to 'accept' but takes an additional parameter to specify + * flags. + * */ +/* TBD: implemented later */ +extern int +vcom_accept4 (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len, int __flags); +#endif + +extern int __THROW vcom_shutdown (int __fd, int __how); + +extern int __THROW vcom_epoll_create (int __size); + +extern int __THROW vcom_epoll_create1 (int __flags); + +extern int __THROW +vcom_epoll_ctl (int __epfd, int __op, int __fd, struct epoll_event *__event); + +extern int +vcom_epoll_wait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout); + +extern int +vcom_epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, const __sigset_t * __ss); + +extern int vcom_poll (struct pollfd *__fds, nfds_t __nfds, int __timeout); + +#ifdef __USE_GNU +extern int +vcom_ppoll (struct pollfd *__fds, nfds_t __nfds, + const struct timespec *__timeout, const __sigset_t * __ss); +#endif + + +#endif /* included_vcom_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom_glibc_socket.h b/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom_glibc_socket.h new file mode 100644 index 00000000000..0f1b1743faa --- /dev/null +++ b/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom_glibc_socket.h @@ -0,0 +1,349 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vcom_glibc_socket_h +#define included_vcom_glibc_socket_h + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/select.h> +#include <arpa/inet.h> +#include <fcntl.h> + +#include <sys/epoll.h> +#include <poll.h> + +/* + * + * Generic glibc fd api + * + */ +/* + * glibc APIs from <unistd.h> + */ + +/* Close the file descriptor FD. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int close (int __fd); + +/* Read NBYTES into BUF from FD. Return the + number read, -1 for errors or 0 for EOF. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern ssize_t __wur read (int __fd, void *__buf, size_t __nbytes); + +/* Write N bytes of BUF to FD. Return the number written, or -1. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern ssize_t __wur write (int __fd, const void *__buf, size_t __n); + + +/* + * glibc APIs from <fcntl.h> + */ + +/* Do the file control operation described by CMD on FD. + The remaining arguments are interpreted depending on CMD. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int fcntl (int __fd, int __cmd, ...); + + +/* + * glibc APIs from <sys/select.h> + */ + +/* Check the first NFDS descriptors each in READFDS (if not NULL) for read + readiness, in WRITEFDS (if not NULL) for write readiness, and in EXCEPTFDS + (if not NULL) for exceptional conditions. If TIMEOUT is not NULL, time out + after waiting the interval specified therein. Returns the number of ready + descriptors, or -1 for errors. + + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int +select (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + struct timeval *__restrict __timeout); + +#ifdef __USE_XOPEN2K +/* Same as above only that the TIMEOUT value is given with higher + resolution and a sigmask which is been set temporarily. This version + should be used. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int +pselect (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + const struct timespec *__restrict __timeout, + const __sigset_t * __restrict __sigmask); +#endif + + +/* + * + * Socket specific glibc api + * + */ + +/* + * glibc APIs from <sys/socket.h> + */ + +/* Create a new socket of type TYPE in domain DOMAIN, using + protocol PROTOCOL. If PROTOCOL is zero, one is chosen automatically. + Returns a file descriptor for the new socket, or -1 for errors. */ +extern int __THROW socket (int __domain, int __type, int __protocol); + +/* Create two new sockets, of type TYPE in domain DOMAIN and using + protocol PROTOCOL, which are connected to each other, and put file + descriptors for them in FDS[0] and FDS[1]. If PROTOCOL is zero, + one will be chosen automatically. Returns 0 on success, -1 for errors. */ +extern int __THROW +socketpair (int __domain, int __type, int __protocol, int __fds[2]); + +/* Give the socket FD the local address ADDR (which is LEN bytes long). */ +extern int __THROW +bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len); + +/* Put the local address of FD into *ADDR and its length in *LEN. */ +extern int __THROW +getsockname (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __len); + +/* Open a connection on socket FD to peer at ADDR (which LEN bytes long). + For connectionless socket types, just set the default address to send to + and the only address from which to accept transmissions. + Return 0 on success, -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len); + +/* Put the address of the peer connected to socket FD into *ADDR + (which is *LEN bytes long), and its actual length into *LEN. */ +extern int __THROW +getpeername (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __len); + +/* Send N bytes of BUF to socket FD. Returns the number sent or -1. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern ssize_t send (int __fd, const void *__buf, size_t __n, int __flags); + +/* Read N bytes into BUF from socket FD. + Returns the number read or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern ssize_t recv (int __fd, void *__buf, size_t __n, int __flags); + +/* Send N bytes of BUF on socket FD to peer at address ADDR (which is + ADDR_LEN bytes long). Returns the number sent, or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern ssize_t +sendto (int __fd, const void *__buf, size_t __n, + int __flags, __CONST_SOCKADDR_ARG __addr, socklen_t __addr_len); + +/* Read N bytes into BUF through socket FD. + If ADDR is not NULL, fill in *ADDR_LEN bytes of it with tha address of + the sender, and store the actual size of the address in *ADDR_LEN. + Returns the number of bytes read or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern ssize_t +recvfrom (int __fd, void *__restrict __buf, + size_t __n, int __flags, + __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len); + +/* Send a message described MESSAGE on socket FD. + Returns the number of bytes sent, or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern ssize_t +sendmsg (int __fd, const struct msghdr *__message, int __flags); + +#ifdef __USE_GNU +/* Send a VLEN messages as described by VMESSAGES to socket FD. + Returns the number of datagrams successfully written or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int +sendmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags); +#endif + +/* Receive a message as described by MESSAGE from socket FD. + Returns the number of bytes read or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern ssize_t recvmsg (int __fd, struct msghdr *__message, int __flags); + +#ifdef __USE_GNU +/* Receive up to VLEN messages as described by VMESSAGES from socket FD. + Returns the number of messages received or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int +recvmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags, struct timespec *__tmo); +#endif + + +/* Put the current value for socket FD's option OPTNAME at protocol level LEVEL + into OPTVAL (which is *OPTLEN bytes long), and set *OPTLEN to the value's + actual length. Returns 0 on success, -1 for errors. */ +extern int __THROW +getsockopt (int __fd, int __level, int __optname, + void *__restrict __optval, socklen_t * __restrict __optlen); + +/* Set socket FD's option OPTNAME at protocol level LEVEL + to *OPTVAL (which is OPTLEN bytes long). + Returns 0 on success, -1 for errors. */ +extern int __THROW +setsockopt (int __fd, int __level, int __optname, + const void *__optval, socklen_t __optlen); + +/* Prepare to accept connections on socket FD. + N connection requests will be queued before further requests are refused. + Returns 0 on success, -1 for errors. */ +extern int __THROW listen (int __fd, int __n); + +/* Await a connection on socket FD. + When a connection arrives, open a new socket to communicate with it, + set *ADDR (which is *ADDR_LEN bytes long) to the address of the connecting + peer and *ADDR_LEN to the address's actual length, and return the + new socket's descriptor, or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int +accept (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len); + +#ifdef __USE_GNU +/* Similar to 'accept' but takes an additional parameter to specify flags. + + This function is a cancellation point and therefore not marked with + __THROW. */ + /* TBD: implemented later */ +extern int +accept4 (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len, int __flags); +#endif + +/* Shut down all or part of the connection open on socket FD. + HOW determines what to shut down: + SHUT_RD = No more receptions; + SHUT_WR = No more transmissions; + SHUT_RDWR = No more receptions or transmissions. + Returns 0 on success, -1 for errors. */ +extern int __THROW shutdown (int __fd, int __how); + + +/* + * glibc APIs from <sys/epoll.h> + */ + +/* Creates an epoll instance. Returns an fd for the new instance. + The "size" parameter is a hint specifying the number of file + descriptors to be associated with the new instance. The fd + returned by epoll_create() should be closed with close(). */ +extern int __THROW epoll_create (int __size); + +/* Same as epoll_create but with an FLAGS parameter. The unused SIZE + parameter has been dropped. */ +extern int __THROW epoll_create1 (int __flags); + +/* Manipulate an epoll instance "epfd". Returns 0 in case of success, + -1 in case of error ( the "errno" variable will contain the + specific error code ) The "op" parameter is one of the EPOLL_CTL_* + constants defined above. The "fd" parameter is the target of the + operation. The "event" parameter describes which events the caller + is interested in and any associated user data. */ +extern int __THROW +epoll_ctl (int __epfd, int __op, int __fd, struct epoll_event *__event); + +#define EP_INT_MAX ((int)(~0U>>1)) +#define EP_MAX_EVENTS (EP_INT_MAX / sizeof(struct epoll_event)) + +/* Wait for events on an epoll instance "epfd". Returns the number of + triggered events returned in "events" buffer. Or -1 in case of + error with the "errno" variable set to the specific error code. The + "events" parameter is a buffer that will contain triggered + events. The "maxevents" is the maximum number of events to be + returned ( usually size of "events" ). The "timeout" parameter + specifies the maximum wait time in milliseconds (-1 == infinite). + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int +epoll_wait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout); + +/* Same as epoll_wait, but the thread's signal mask is temporarily + and atomically replaced with the one provided as parameter. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int +epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, const __sigset_t * __ss); + +/* Poll the file descriptors described by the NFDS structures starting at + FDS. If TIMEOUT is nonzero and not -1, allow TIMEOUT milliseconds for + an event to occur; if TIMEOUT is -1, block until an event occurs. + Returns the number of file descriptors with events, zero if timed out, + or -1 for errors. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int poll (struct pollfd *__fds, nfds_t __nfds, int __timeout); + +#ifdef __USE_GNU +/* Like poll, but before waiting the threads signal mask is replaced + with that specified in the fourth parameter. For better usability, + the timeout value is specified using a TIMESPEC object. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int ppoll (struct pollfd *__fds, nfds_t __nfds, + const struct timespec *__timeout, const __sigset_t * __ss); +#endif + + +#endif /* included_vcom_glibc_socket_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket.c b/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket.c new file mode 100644 index 00000000000..86b923c1457 --- /dev/null +++ b/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket.c @@ -0,0 +1,2944 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <unistd.h> +#include <stdio.h> +#include <sys/uio.h> +#include <limits.h> +#define __need_IOV_MAX +#include <bits/stdio_lim.h> + +#include <vppinfra/types.h> +#include <vppinfra/hash.h> +#include <vppinfra/pool.h> + +#include <libvcl-ldpreload/vcom_socket.h> +#include <libvcl-ldpreload/vcom_socket_wrapper.h> +#include <libvcl-ldpreload/vcom.h> + +#include <uri/vppcom.h> + + +/* + * VCOM_SOCKET Private definitions and functions. + */ + +typedef struct vcom_socket_main_t_ +{ + u8 init; + + /* vcom_socket pool */ + vcom_socket_t *vsockets; + + /* Hash table for socketidx to fd mapping */ + uword *sockidx_by_fd; + + /* vcom_epoll pool */ + vcom_epoll_t *vepolls; + + /* Hash table for epollidx to epfd mapping */ + uword *epollidx_by_epfd; + + + /* common epitem poll for all epfd */ + /* TBD: epitem poll per epfd */ + /* vcom_epitem pool */ + vcom_epitem_t *vepitems; + + /* Hash table for epitemidx to epfdfd mapping */ + uword *epitemidx_by_epfdfd; + + /* Hash table - key:epfd, value:vec of epitemidx */ + uword *epitemidxs_by_epfd; + /* Hash table - key:fd, value:vec of epitemidx */ + uword *epitemidxs_by_fd; + +} vcom_socket_main_t; + +vcom_socket_main_t vcom_socket_main; + + +static int +vcom_socket_open_socket (int domain, int type, int protocol) +{ + int rv = -1; + + /* handle domains implemented by vpp */ + switch (domain) + { + case AF_INET: + case AF_INET6: + /* get socket type and + * handle the socket types supported by vpp */ + switch (type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + { + case SOCK_STREAM: + case SOCK_DGRAM: + /* the type argument serves a second purpose, + * in addition to specifying a socket type, + * it may include the bitwise OR of any of + * SOCK_NONBLOCK and SOCK_CLOEXEC, to modify + * the behavior of socket. */ + rv = libc_socket (domain, type, protocol); + if (rv == -1) + rv = -errno; + break; + + default: + break; + } + + break; + + default: + break; + } + + return rv; +} + +static int +vcom_socket_open_epoll (int flags) +{ + int rv = -1; + + if (flags < 0) + { + return -EINVAL; + } + if (flags && (flags & ~EPOLL_CLOEXEC)) + { + return -EINVAL; + } + + /* flags can be either zero or EPOLL_CLOEXEC */ + rv = libc_epoll_create1 (flags); + if (rv == -1) + rv = -errno; + + return rv; +} + +static int +vcom_socket_close_socket (int fd) +{ + int rv; + + rv = libc_close (fd); + if (rv == -1) + rv = -errno; + + return rv; +} + +static int +vcom_socket_close_epoll (int epfd) +{ + int rv; + + rv = libc_close (epfd); + if (rv == -1) + rv = -errno; + + return rv; +} + +/* + * Public API functions + */ + + +int +vcom_socket_is_vcom_fd (int fd) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, fd); + + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND) + return 1; + } + return 0; +} + +int +vcom_socket_is_vcom_epfd (int epfd) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_epoll_t *vepoll; + + p = hash_get (vsm->epollidx_by_epfd, epfd); + + if (p) + { + vepoll = pool_elt_at_index (vsm->vepolls, p[0]); + if (vepoll && vepoll->type == EPOLL_TYPE_VPPCOM_BOUND) + return 1; + } + return 0; +} + +static inline int +vcom_socket_get_sid (int fd) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, fd); + + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND) + return vsock->sid; + } + return INVALID_SESSION_ID; +} + +static inline int +vcom_socket_get_vep_idx (int epfd) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_epoll_t *vepoll; + + p = hash_get (vsm->epollidx_by_epfd, epfd); + + if (p) + { + vepoll = pool_elt_at_index (vsm->vepolls, p[0]); + if (vepoll && vepoll->type == EPOLL_TYPE_VPPCOM_BOUND) + return vepoll->vep_idx; + } + return INVALID_VEP_IDX; +} + +static inline int +vcom_socket_get_sid_and_vsock (int fd, vcom_socket_t ** vsockp) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, fd); + + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND) + { + *vsockp = vsock; + return vsock->sid; + } + } + return INVALID_SESSION_ID; +} + +static inline int +vcom_socket_get_vep_idx_and_vepoll (int epfd, vcom_epoll_t ** vepollp) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_epoll_t *vepoll; + + p = hash_get (vsm->epollidx_by_epfd, epfd); + + if (p) + { + vepoll = pool_elt_at_index (vsm->vepolls, p[0]); + if (vepoll && vepoll->type == EPOLL_TYPE_VPPCOM_BOUND) + { + *vepollp = vepoll; + return vepoll->vep_idx; + } + } + return INVALID_VEP_IDX; +} + + +static int +vcom_socket_close_vepoll (int epfd) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_epoll_t *vepoll; + + p = hash_get (vsm->epollidx_by_epfd, epfd); + if (!p) + return -EBADF; + + vepoll = pool_elt_at_index (vsm->vepolls, p[0]); + if (!vepoll) + return -EBADF; + + if (vepoll->type != EPOLL_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (vepoll->count) + { + if (!vepoll->close) + { + vepoll->close = 1; + return 0; + } + else + { + return -EBADF; + } + } + + /* count is zero */ + rv = vppcom_session_close (vepoll->vep_idx); + rv = vcom_socket_close_epoll (vepoll->epfd); + + vepoll_init (vepoll); + hash_unset (vsm->epollidx_by_epfd, epfd); + pool_put (vsm->vepolls, vepoll); + + return rv; +} + +static int +vcom_socket_close_vsock (int fd) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + vcom_epitem_t *vepitem; + + i32 *vepitemidxs = 0; + i32 *vepitemidxs_var = 0; + + p = hash_get (vsm->sockidx_by_fd, fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + rv = vppcom_session_close (vsock->sid); + rv = vcom_socket_close_socket (vsock->fd); + + vsocket_init (vsock); + hash_unset (vsm->sockidx_by_fd, fd); + pool_put (vsm->vsockets, vsock); + + /* + * NOTE: + * Before calling close(), user should remove + * this fd from the epoll-set of all epoll instances, + * otherwise resource(epitems) leaks ensues. + */ + + /* + * 00. close all epoll instances that are marked as "close" + * of which this fd is the "last" remaining member. + * 01. epitems associated with this fd are intentionally + * not removed, see NOTE: above. + * */ + + /* does this fd participate in epoll */ + p = hash_get (vsm->epitemidxs_by_fd, fd); + if (p) + { + vepitemidxs = *(i32 **) p; + vec_foreach (vepitemidxs_var, vepitemidxs) + { + vepitem = pool_elt_at_index (vsm->vepitems, vepitemidxs_var[0]); + if (vepitem && vepitem->fd == fd && + vepitem->type == FD_TYPE_VCOM_SOCKET) + { + i32 vep_idx; + vcom_epoll_t *vepoll; + if ((vep_idx = + vcom_socket_get_vep_idx_and_vepoll (vepitem->epfd, + &vepoll)) != + INVALID_VEP_IDX) + { + if (vepoll->close) + { + if (vepoll->count == 1) + { + /* + * force count to zero and + * close this epoll instance + * */ + vepoll->count = 0; + vcom_socket_close_vepoll (vepoll->epfd); + } + else + { + vepoll->count -= 1; + } + } + } + } + + } + } + + return rv; +} + +int +vcom_socket_close (int __fd) +{ + int rv; + + if (vcom_socket_is_vcom_fd (__fd)) + { + rv = vcom_socket_close_vsock (__fd); + } + else if (vcom_socket_is_vcom_epfd (__fd)) + { + rv = vcom_socket_close_vepoll (__fd); + } + else + { + rv = -EBADF; + } + + return rv; +} + +ssize_t +vcom_socket_read (int __fd, void *__buf, size_t __nbytes) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__buf || __nbytes < 0) + { + return -EINVAL; + } + + rv = vcom_fcntl (__fd, F_GETFL, 0); + if (rv < 0) + { + return rv; + + } + + /* is blocking */ + if (!(rv & O_NONBLOCK)) + { + do + { + rv = vppcom_session_read (vsock->sid, __buf, __nbytes); + } + while (rv == -EAGAIN || rv == -EWOULDBLOCK); + return rv; + } + /* The file descriptor refers to a socket and has been + * marked nonblocking(O_NONBLOCK) and the read would + * block. + * */ + /* is non blocking */ + rv = vppcom_session_read (vsock->sid, __buf, __nbytes); + return rv; +} + +ssize_t +vcom_socket_readv (int __fd, const struct iovec * __iov, int __iovcnt) +{ + int rv; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + ssize_t total = 0, len = 0; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (__iov == 0 || __iovcnt == 0 || __iovcnt > IOV_MAX) + return -EINVAL; + + /* Sanity check */ + for (int i = 0; i < __iovcnt; ++i) + { + if (SSIZE_MAX - len < __iov[i].iov_len) + return -EINVAL; + len += __iov[i].iov_len; + } + + rv = vcom_fcntl (__fd, F_GETFL, 0); + if (rv < 0) + { + return rv; + } + + /* is blocking */ + if (!(rv & O_NONBLOCK)) + { + do + { + for (int i = 0; i < __iovcnt; ++i) + { + rv = vppcom_session_read (vsock->sid, __iov[i].iov_base, + __iov[i].iov_len); + if (rv < 0) + break; + else + { + total += rv; + if (rv < __iov[i].iov_len) + /* Read less than buffer provided, no point to continue */ + break; + } + } + } + while ((rv == -EAGAIN || rv == -EWOULDBLOCK) && total == 0); + return total; + } + + /* is non blocking */ + for (int i = 0; i < __iovcnt; ++i) + { + rv = vppcom_session_read (vsock->sid, __iov[i].iov_base, + __iov[i].iov_len); + if (rv < 0) + { + if (total > 0) + break; + else + { + errno = rv; + return rv; + } + } + else + { + total += rv; + if (rv < __iov[i].iov_len) + /* Read less than buffer provided, no point to continue */ + break; + } + } + return total; +} + +ssize_t +vcom_socket_write (int __fd, const void *__buf, size_t __n) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__buf || __n < 0) + { + return -EINVAL; + } + + rv = vppcom_session_write (vsock->sid, (void *) __buf, __n); + return rv; +} + +ssize_t +vcom_socket_writev (int __fd, const struct iovec * __iov, int __iovcnt) +{ + int rv = -1; + ssize_t total = 0; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (__iov == 0 || __iovcnt == 0 || __iovcnt > IOV_MAX) + return -EINVAL; + + for (int i = 0; i < __iovcnt; ++i) + { + rv = vppcom_session_write (vsock->sid, __iov[i].iov_base, + __iov[i].iov_len); + if (rv < 0) + { + if (total > 0) + break; + else + return rv; + } + else + total += rv; + } + return total; +} + +/* + * RETURN: 0 - invalid cmd + * 1 - cmd not handled by vcom and vppcom + * 2 - cmd handled by vcom socket resource + * 3 - cmd handled by vppcom + * */ +/* TBD: incomplete list of cmd */ +static int +vcom_socket_check_fcntl_cmd (int __cmd) +{ + switch (__cmd) + { + /*cmd not handled by vcom and vppcom */ + /* Fallthrough */ + case F_DUPFD: + case F_DUPFD_CLOEXEC: + return 1; + + /* cmd handled by vcom socket resource */ + /* Fallthrough */ + case F_GETFD: + case F_SETFD: + case F_GETFL: + case F_SETFL: + case F_GETLK: + case F_SETLK: + case F_SETLKW: + case F_GETOWN: + case F_SETOWN: + return 2; + +#if 0 + /* cmd handled by vppcom */ + case F_XXXXX: + return 3; +#endif + /* invalid cmd */ + default: + return 0; + } + return 0; +} + +/* TBD: move it to vppcom */ +static int +vppcom_session_fcntl_va (int __fd, int __cmd, va_list __ap) +{ + int rv; + + rv = -EINVAL; + + return rv; +} + +int +vcom_socket_fcntl_va (int __fd, int __cmd, va_list __ap) +{ + int rv = -EBADF; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + switch (vcom_socket_check_fcntl_cmd (__cmd)) + { + /* invalid cmd */ + case 0: + rv = -EBADF; + break; + /*cmd not handled by vcom and vppcom */ + case 1: + rv = -EBADF; + break; + /* cmd handled by vcom socket resource */ + case 2: + rv = libc_vfcntl (vsock->fd, __cmd, __ap); + break; + /* cmd handled by vppcom */ + case 3: + rv = vppcom_session_fcntl_va (vsock->sid, __cmd, __ap); + break; + + default: + rv = -EINVAL; + break; + } + + return rv; +} + +static inline int +vcom_socket_fds_2_sid_fds ( + /* dest */ + int *vcom_nsid_fds, + fd_set * __restrict vcom_rd_sid_fds, + fd_set * __restrict vcom_wr_sid_fds, + fd_set * __restrict vcom_ex_sid_fds, + /* src */ + int vcom_nfds, + fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds) +{ + int rv = 0; + int fd; + int sid; + /* invalid max_sid is -1 */ + int max_sid = -1; + int nsid = 0; + + /* + * set sid in sid sets corresponding to fd's in fd sets + * compute nsid and vcom_nsid_fds from sid sets + */ + + for (fd = 0; fd < vcom_nfds; fd++) + { + /* + * F fd set, src + * S sid set, dest + */ +#define _(S,F) \ + if ((F) && (S) && FD_ISSET (fd, (F))) \ + { \ + sid = vcom_socket_get_sid (fd); \ + if (sid != INVALID_SESSION_ID) \ + { \ + FD_SET (sid, (S)); \ + if (sid > max_sid) \ + { \ + max_sid = sid; \ + } \ + ++nsid; \ + } \ + else \ + { \ + rv = -EBADFD; \ + goto done; \ + } \ + } + + + _(vcom_rd_sid_fds, vcom_readfds); + _(vcom_wr_sid_fds, vcom_writefds); + _(vcom_ex_sid_fds, vcom_exceptfds); +#undef _ + } + + *vcom_nsid_fds = max_sid != -1 ? max_sid + 1 : 0; + rv = nsid; + +done: + return rv; +} + +/* + * PRE: 00. sid sets were derived from fd sets + * 01. sid sets were updated with sids that actually changed + * status + * 02. fd sets still has watched fds + * + * This function will modify in place fd sets to indicate which fd's + * actually changed status(inferred from sid sets) + */ +static inline int +vcom_socket_sid_fds_2_fds ( + /* dest */ + int *new_vcom_nfds, + int vcom_nfds, + fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds, + /* src */ + int vcom_nsid_fds, + fd_set * __restrict vcom_rd_sid_fds, + fd_set * __restrict vcom_wr_sid_fds, + fd_set * __restrict vcom_ex_sid_fds) +{ + int rv = 0; + int fd; + int sid; + /* invalid max_fd is -1 */ + int max_fd = -1; + int nfd = 0; + + + /* + * modify in place fd sets to indicate which fd's + * actually changed status(inferred from sid sets) + */ + for (fd = 0; fd < vcom_nfds; fd++) + { + /* + * F fd set, dest + * S sid set, src + */ +#define _(S,F) \ + if ((F) && (S) && FD_ISSET (fd, (F))) \ + { \ + sid = vcom_socket_get_sid (fd); \ + if (sid != INVALID_SESSION_ID) \ + { \ + if (!FD_ISSET (sid, (S))) \ + { \ + FD_CLR(fd, (F)); \ + } \ + } \ + else \ + { \ + rv = -EBADFD; \ + goto done; \ + } \ + } + + + _(vcom_rd_sid_fds, vcom_readfds); + _(vcom_wr_sid_fds, vcom_writefds); + _(vcom_ex_sid_fds, vcom_exceptfds); +#undef _ + } + + /* + * compute nfd and new_vcom_nfds from fd sets + */ + for (fd = 0; fd < vcom_nfds; fd++) + { + +#define _(F) \ + if ((F) && FD_ISSET (fd, (F))) \ + { \ + if (fd > max_fd) \ + { \ + max_fd = fd; \ + } \ + ++nfd; \ + } + + + _(vcom_readfds); + _(vcom_writefds); + _(vcom_exceptfds); +#undef _ + + } + + *new_vcom_nfds = max_fd != -1 ? max_fd + 1 : 0; + rv = nfd; + +done: + return rv; +} + +/* + * PRE: + * vom_socket_select is always called with + * timeout->tv_sec and timeout->tv_usec set to zero. + * hence vppcom_select return immediately. + */ +/* + * TBD: do{body;} while(timeout conditional); timeout loop + */ +int +vcom_socket_select (int vcom_nfds, fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds, + struct timeval *__restrict timeout) +{ + int rv = -EBADF; + pid_t pid = getpid (); + + int new_vcom_nfds = 0; + int new_vcom_nfd = 0; + + /* vcom sid fds */ + fd_set vcom_rd_sid_fds; + fd_set vcom_wr_sid_fds; + fd_set vcom_ex_sid_fds; + unsigned long vcom_nsid_fds = 0; + int vcom_nsid = 0; + + /* in seconds eg. 3.123456789 seconds */ + double time_to_wait = (double) 0; + + /* validate inputs */ + if (vcom_nfds < 0) + { + return -EINVAL; + } + + /* convert timeval timeout to double time_to_wait */ + if (timeout) + { + if (timeout->tv_sec == 0 && timeout->tv_usec == 0) + { + /* polling: vppcom_select returns immediately */ + time_to_wait = (double) 0; + } + else + { + /*TBD: use timeval api */ + time_to_wait = (double) timeout->tv_sec + + (double) timeout->tv_usec / (double) 1000000 + + (double) (timeout->tv_usec % 1000000) / (double) 1000000; + } + } + else + { + /* + * no timeout: vppcom_select can block indefinitely + * waiting for a file descriptor to become ready + * */ + /* set to a phantom value */ + time_to_wait = ~0; + } + + /* zero the sid_sets */ + /* + * F fd set + * S sid set + */ +#define _(S,F) \ + if ((F)) \ + { \ + FD_ZERO ((S)); \ + } + + + _(&vcom_rd_sid_fds, vcom_readfds); + _(&vcom_wr_sid_fds, vcom_writefds); + _(&vcom_ex_sid_fds, vcom_exceptfds); +#undef _ + + /* populate read, write and except sid_sets */ + vcom_nsid = vcom_socket_fds_2_sid_fds ( + /* dest */ + vcom_readfds || vcom_writefds + || vcom_exceptfds ? (int *) + &vcom_nsid_fds : NULL, + vcom_readfds ? &vcom_rd_sid_fds : + NULL, + vcom_writefds ? &vcom_wr_sid_fds : + NULL, + vcom_exceptfds ? &vcom_ex_sid_fds : + NULL, + /* src */ + vcom_nfds, + vcom_readfds, + vcom_writefds, vcom_exceptfds); + if (vcom_nsid < 0) + { + return vcom_nsid; + } + if (vcom_nsid_fds < 0) + { + return -EINVAL; + } + + rv = vppcom_select (vcom_nsid_fds, + vcom_readfds ? (unsigned long *) &vcom_rd_sid_fds : + NULL, + vcom_writefds ? (unsigned long *) &vcom_wr_sid_fds : + NULL, + vcom_exceptfds ? (unsigned long *) &vcom_ex_sid_fds : + NULL, time_to_wait); + if (VCOM_DEBUG > 0) + fprintf (stderr, "[%d] vppcom_select: " + "'%04d'='%04d'\n", pid, rv, (int) vcom_nsid_fds); + + /* check if any file descriptors changed status */ + if (rv > 0) + { + /* + * on exit, sets are modified in place to indicate which + * file descriptors actually changed status + * */ + + /* + * comply with pre-condition + * do not clear vcom fd sets befor calling + * vcom_socket_sid_fds_2_fds + */ + new_vcom_nfd = vcom_socket_sid_fds_2_fds ( + /* dest */ + &new_vcom_nfds, + vcom_nfds, + vcom_readfds, + vcom_writefds, + vcom_exceptfds, + /* src */ + vcom_nsid_fds, + vcom_readfds ? + &vcom_rd_sid_fds : NULL, + vcom_writefds ? + &vcom_wr_sid_fds : NULL, + vcom_exceptfds ? + &vcom_ex_sid_fds : NULL); + if (new_vcom_nfd < 0) + { + return new_vcom_nfd; + } + if (new_vcom_nfds < 0) + { + return -EINVAL; + } + rv = new_vcom_nfd; + } + return rv; +} + + +int +vcom_socket_socket (int __domain, int __type, int __protocol) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + vcom_socket_t *vsock; + + i32 fd; + i32 sid; + i32 sockidx; + u8 is_nonblocking = __type & SOCK_NONBLOCK ? 1 : 0; + int type = __type & ~(SOCK_NONBLOCK | SOCK_CLOEXEC); + + fd = vcom_socket_open_socket (__domain, __type, __protocol); + if (fd < 0) + { + rv = fd; + goto out; + } + + sid = vppcom_session_create (VPPCOM_VRF_DEFAULT, + (type == SOCK_DGRAM) ? + VPPCOM_PROTO_UDP : VPPCOM_PROTO_TCP, + is_nonblocking); + if (sid < 0) + { + rv = sid; + goto out_close_socket; + } + + pool_get (vsm->vsockets, vsock); + vsocket_init (vsock); + + sockidx = vsock - vsm->vsockets; + hash_set (vsm->sockidx_by_fd, fd, sockidx); + + vsocket_set (vsock, fd, sid, SOCKET_TYPE_VPPCOM_BOUND); + return fd; + +out_close_socket: + vcom_socket_close_socket (fd); +out: + return rv; +} + +int +vcom_socket_socketpair (int __domain, int __type, int __protocol, + int __fds[2]) +{ +/* TBD: */ + return 0; +} + +int +vcom_socket_bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + vppcom_endpt_t ep; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__addr) + { + return -EINVAL; + } + + ep.vrf = VPPCOM_VRF_DEFAULT; + switch (__addr->sa_family) + { + case AF_INET: + if (__len != sizeof (struct sockaddr_in)) + { + return -EINVAL; + } + ep.is_ip4 = VPPCOM_IS_IP4; + ep.ip = (u8 *) & ((const struct sockaddr_in *) __addr)->sin_addr; + ep.port = (u16) ((const struct sockaddr_in *) __addr)->sin_port; + break; + + case AF_INET6: + if (__len != sizeof (struct sockaddr_in6)) + { + return -EINVAL; + } + ep.is_ip4 = VPPCOM_IS_IP6; + ep.ip = (u8 *) & ((const struct sockaddr_in6 *) __addr)->sin6_addr; + ep.port = (u16) ((const struct sockaddr_in6 *) __addr)->sin6_port; + break; + + default: + return -1; + break; + } + + rv = vppcom_session_bind (vsock->sid, &ep); + /* TBD: remove libc_bind code snippet + * once vppcom implements vppcom_session_getsockname */ + if (rv == 0) + { + rv = libc_bind (__fd, __addr, __len); + if (rv != 0) + { + rv = -errno; + } + } + return rv; +} + +int +vppcom_session_getsockname (int sid, vppcom_endpt_t * ep) +{ + /* TBD: move it to vppcom */ + return 0; +} + +int +vcom_socket_getsockname (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__addr || !__len) + return -EFAULT; + + if (*__len < 0) + { + return -EINVAL; + } + + /* TBD: remove libc_getsockname code snippet + * once vppcom implements vppcom_session_getsockname */ + rv = libc_getsockname (__fd, __addr, __len); + if (rv != 0) + { + rv = -errno; + return rv; + } + + /* TBD: use the below code snippet when vppcom + * implements vppcom_session_getsockname */ +#if 0 + vppcom_endpt_t ep; + ep.ip = (u8 *) & ((const struct sockaddr_in *) __addr)->sin_addr; + rv = vppcom_session_getsockname (vsock->sid, &ep); + if (rv == 0) + { + if (ep.vrf == VPPCOM_VRF_DEFAULT) + { + __addr->sa_family = ep.is_ip4 == VPPCOM_IS_IP4 ? AF_INET : AF_INET6; + switch (__addr->sa_family) + { + case AF_INET: + ((struct sockaddr_in *) __addr)->sin_port = ep.port; + *__len = sizeof (struct sockaddr_in); + break; + + case AF_INET6: + ((struct sockaddr_in6 *) __addr)->sin6_port = ep.port; + *__len = sizeof (struct sockaddr_in6); + break; + + default: + break; + } + } + } +#endif + + return rv; +} + +int +vcom_socket_connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + vppcom_endpt_t ep; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + + ep.vrf = VPPCOM_VRF_DEFAULT; + switch (__addr->sa_family) + { + case AF_INET: + ep.is_ip4 = VPPCOM_IS_IP4; + ep.ip = + (uint8_t *) & ((const struct sockaddr_in *) __addr)->sin_addr; + ep.port = + (uint16_t) ((const struct sockaddr_in *) __addr)->sin_port; + break; + + case AF_INET6: + ep.is_ip4 = VPPCOM_IS_IP6; + ep.ip = + (uint8_t *) & ((const struct sockaddr_in6 *) __addr)->sin6_addr; + ep.port = + (uint16_t) ((const struct sockaddr_in6 *) __addr)->sin6_port; + break; + + default: + return -1; + break; + } + + rv = vppcom_session_connect (vsock->sid, &ep); + } + return rv; +} + +int +vppcom_session_getpeername (int sid, vppcom_endpt_t * ep) +{ + /* TBD: move it to vppcom */ + return 0; +} + +int +vcom_socket_getpeername (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__addr || !__len) + return -EFAULT; + + if (*__len < 0) + { + return -EINVAL; + } + + /* DAW: hack to allow iperf3 to be happy w/ getpeername output */ + { + uint8_t *a; + ((struct sockaddr_in *) __addr)->sin_family = AF_INET; + ((struct sockaddr_in *) __addr)->sin_port = 0x1000; + a = (uint8_t *) & ((struct sockaddr_in *) __addr)->sin_addr; + a[0] = 0x7f; + a[1] = 0x00; + a[2] = 0x00; + a[3] = 0x01; + *__len = sizeof (struct sockaddr_in); + return 0; + } + + /* TBD: remove libc_getpeername code snippet + * once vppcom implements vppcom_session_getpeername */ + rv = libc_getpeername (__fd, __addr, __len); + if (rv != 0) + { + rv = -errno; + return rv; + } + + /* TBD: use the below code snippet when vppcom + * implements vppcom_session_getpeername */ +#if 0 + vppcom_endpt_t ep; + ep.ip = (u8 *) & ((const struct sockaddr_in *) __addr)->sin_addr; + rv = vppcom_session_getpeername (vsock->sid, &ep); + if (rv == 0) + { + if (ep.vrf == VPPCOM_VRF_DEFAULT) + { + __addr->sa_family = ep.is_ip4 == VPPCOM_IS_IP4 ? AF_INET : AF_INET6; + switch (__addr->sa_family) + { + case AF_INET: + ((struct sockaddr_in *) __addr)->sin_port = ep.port; + *__len = sizeof (struct sockaddr_in); + break; + + case AF_INET6: + ((struct sockaddr_in6 *) __addr)->sin6_port = ep.port; + *__len = sizeof (struct sockaddr_in6); + break; + + default: + break; + } + } + } +#endif + + return rv; +} + +ssize_t +vcom_socket_send (int __fd, const void *__buf, size_t __n, int __flags) +{ + return vcom_socket_sendto (__fd, __buf, __n, __flags, NULL, 0); +} + +ssize_t +vcom_socket_recv (int __fd, void *__buf, size_t __n, int __flags) +{ + int rv = -1; + rv = vcom_socket_recvfrom (__fd, __buf, __n, __flags, NULL, 0); + return rv; +} + +/* + * RETURN 1 if __fd is (SOCK_STREAM, SOCK_SEQPACKET), + * 0 otherwise + * */ +int +vcom_socket_is_connection_mode_socket (int __fd) +{ + int rv = -1; + /* TBD define new vppcom api */ + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + int type; + socklen_t optlen; + + p = hash_get (vsm->sockidx_by_fd, __fd); + + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (vsock && vsock->type == SOCKET_TYPE_VPPCOM_BOUND) + { + optlen = sizeof (type); + rv = libc_getsockopt (__fd, SOL_SOCKET, SO_TYPE, &type, &optlen); + if (rv != 0) + { + return 0; + } + /* get socket type */ + switch (type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + { + case SOCK_STREAM: + case SOCK_SEQPACKET: + return 1; + break; + + default: + return 0; + break; + } + } + } + return 0; +} + +ssize_t +vvppcom_session_sendto (int __sid, const void *__buf, size_t __n, + int __flags, __CONST_SOCKADDR_ARG __addr, + socklen_t __addr_len) +{ + int rv = -1; + /* TBD add new vpp api */ + /* TBD add flags parameter */ + rv = vppcom_session_write (__sid, (void *) __buf, (int) __n); + return rv; +} + +ssize_t +vcom_socket_sendto (int __fd, const void *__buf, size_t __n, + int __flags, __CONST_SOCKADDR_ARG __addr, + socklen_t __addr_len) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__buf || __n < 0) + { + return -EINVAL; + } + + if (vcom_socket_is_connection_mode_socket (__fd)) + { + /* ignore __addr and _addr_len */ + /* and EISCONN may be returned when they are not NULL and 0 */ + if ((__addr != NULL) || (__addr_len != 0)) + { + return -EISCONN; + } + } + else + { + if (!__addr || __addr_len < 0) + { + return -EDESTADDRREQ; + } + /* not a vppcom supported address family */ + if ((__addr->sa_family != AF_INET) || (__addr->sa_family != AF_INET6)) + { + return -EINVAL; + } + } + + rv = vvppcom_session_sendto (vsock->sid, (void *) __buf, (int) __n, + __flags, __addr, __addr_len); + return rv; +} + +/* TBD: move it to vppcom */ +static ssize_t +vppcom_session_recvfrom (int __sid, void *__restrict __buf, size_t __n, + int __flags, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len) +{ + int rv = -1; + + /* TBD add flags parameter */ + rv = vppcom_session_read (__sid, __buf, __n); + return rv; +} + +ssize_t +vcom_socket_recvfrom (int __fd, void *__restrict __buf, size_t __n, + int __flags, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__buf || __n < 0) + { + return -EINVAL; + } + + if (__addr || __addr_len < 0) + { + return -EINVAL; + } + + rv = vppcom_session_recvfrom (vsock->sid, __buf, __n, + __flags, __addr, __addr_len); + return rv; +} + +/* TBD: move it to vppcom */ +static ssize_t +vppcom_sendmsg (int __sid, const struct msghdr *__message, int __flags) +{ + int rv = -1; + /* rv = vppcom_session_write (__sid, (void *) __message->__buf, + (int)__n); */ + return rv; +} + +ssize_t +vcom_socket_sendmsg (int __fd, const struct msghdr * __message, int __flags) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vcom_socket_is_connection_mode_socket (__fd)) + { + /* ignore __addr and _addr_len */ + /* and EISCONN may be returned when they are not NULL and 0 */ + if ((__message->msg_name != NULL) || (__message->msg_namelen != 0)) + { + return -EISCONN; + } + } + else + { + /* TBD: validate __message->msg_name and __message->msg_namelen + * and return -EINVAL on validation error + * */ + ; + } + + rv = vppcom_sendmsg (vsock->sid, __message, __flags); + + return rv; +} + +#ifdef __USE_GNU +int +vcom_socket_sendmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags) +{ + + /* TBD: define a new vppcom api */ + return 0; +} +#endif + +/* TBD: move it to vppcom */ +static ssize_t +vppcom_recvmsg (int __sid, struct msghdr *__message, int __flags) +{ + int rv = -1; + /* rv = vppcom_session_read (__sid, (void *) __message->__buf, + (int)__n); */ + rv = -EOPNOTSUPP; + return rv; +} + +ssize_t +vcom_socket_recvmsg (int __fd, struct msghdr * __message, int __flags) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__message) + { + return -EINVAL; + } + + /* validate __flags */ + + rv = vppcom_recvmsg (vsock->sid, __message, __flags); + return rv; +} + +#ifdef __USE_GNU +int +vcom_socket_recvmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags, + struct timespec *__tmo) +{ + /* TBD: define a new vppcom api */ + return 0; +} +#endif + +/* TBD: move it to vppcom */ +static int +vppcom_getsockopt (int __sid, int __level, int __optname, + void *__restrict __optval, socklen_t * __restrict __optlen) +{ + /* 1. for socket level options that are NOT socket attributes + * and that has corresponding vpp options get from vppcom */ +#if 0 + return 0; +#endif + + /* 2. unhandled options */ + return -ENOPROTOOPT; +} + +int +vcom_socket_getsockopt (int __fd, int __level, int __optname, + void *__restrict __optval, + socklen_t * __restrict __optlen) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + if (!__optval && !__optlen) + return -EFAULT; + + if (*__optlen < 0) + { + return -EINVAL; + } + + switch (__level) + { + /* handle options at socket level */ + case SOL_SOCKET: + switch (__optname) + { +/* + * 1. for socket level options that are socket attributes, + * get from libc_getsockopt. + * 2. for socket level options that are NOT socket + * attributes and that has corresponding vpp options + * get from vppcom. + * 3. for socket level options unimplemented + * return -ENOPROTOOPT */ + case SO_DEBUG: + case SO_DONTROUTE: + case SO_BROADCAST: + case SO_SNDBUF: + case SO_RCVBUF: + case SO_REUSEADDR: + case SO_REUSEPORT: + case SO_KEEPALIVE: + case SO_TYPE: + case SO_PROTOCOL: + case SO_DOMAIN: + case SO_ERROR: + case SO_OOBINLINE: + case SO_NO_CHECK: + case SO_PRIORITY: + case SO_LINGER: + case SO_BSDCOMPAT: + case SO_TIMESTAMP: + case SO_TIMESTAMPNS: + case SO_TIMESTAMPING: + case SO_RCVTIMEO: + case SO_SNDTIMEO: + case SO_RCVLOWAT: + case SO_SNDLOWAT: + case SO_PASSCRED: + case SO_PEERCRED: + case SO_PEERNAME: + case SO_ACCEPTCONN: + case SO_PASSSEC: + case SO_PEERSEC: + case SO_MARK: + case SO_RXQ_OVFL: + case SO_WIFI_STATUS: + case SO_PEEK_OFF: + case SO_NOFCS: + case SO_BINDTODEVICE: + case SO_GET_FILTER: + case SO_LOCK_FILTER: + case SO_BPF_EXTENSIONS: + case SO_SELECT_ERR_QUEUE: +#ifdef CONFIG_NET_RX_BUSY_POLL + case SO_BUSY_POLL: +#endif + case SO_MAX_PACING_RATE: + case SO_INCOMING_CPU: + rv = libc_getsockopt (__fd, __level, __optname, __optval, __optlen); + if (rv != 0) + { + rv = -errno; + return rv; + } + break; + + default: + /* We implement the SO_SNDLOWAT etc to not be settable + * (1003.1g 7). + */ + return -ENOPROTOOPT; + } + + break; + + default: + /* 1. handle options that are NOT socket level options, + * but have corresponding vpp otions. */ + rv = vppcom_getsockopt (vsock->sid, __level, __optname, + __optval, __optlen); + + return rv; +#if 0 + /* 2. unhandled options */ + return -ENOPROTOOPT; +#endif + } + + return rv; +} + +/* TBD: move it to vppcom */ +int +vppcom_setsockopt (int __fd, int __level, int __optname, + const void *__optval, socklen_t __optlen) +{ + /* 1. for socket level options that are NOT socket attributes + * and that has corresponding vpp options set it from vppcom */ +#if 0 + return 0; +#endif + + /* 2. unhandled options */ + return -ENOPROTOOPT; +} + +int +vcom_socket_setsockopt (int __fd, int __level, int __optname, + const void *__optval, socklen_t __optlen) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (!p) + return -EBADF; + + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + if (!vsock) + return -ENOTSOCK; + + if (vsock->type != SOCKET_TYPE_VPPCOM_BOUND) + return -EINVAL; + + /* + * Options without arguments + */ + + if (__optname == SO_BINDTODEVICE) + { + rv = libc_setsockopt (__fd, __level, __optname, __optval, __optlen); + if (rv != 0) + { + rv = -errno; + } + return rv; + } + + if (!__optval) + return -EFAULT; + + if ((__optlen < 0) || (__optlen < sizeof (int))) + return -EINVAL; + + switch (__level) + { + /* handle options at socket level */ + case SOL_SOCKET: + switch (__optname) + { + /* + * 1. for socket level options that are socket attributes, + * set it from libc_getsockopt + * 2. for socket level options that are NOT socket + * attributes and that has corresponding vpp options + * set it from vppcom + * 3. for socket level options unimplemented + * return -ENOPROTOOPT */ + case SO_DEBUG: + case SO_DONTROUTE: + case SO_BROADCAST: + case SO_SNDBUF: + case SO_RCVBUF: + case SO_REUSEADDR: + case SO_REUSEPORT: + case SO_KEEPALIVE: + case SO_TYPE: + case SO_PROTOCOL: + case SO_DOMAIN: + case SO_ERROR: + case SO_OOBINLINE: + case SO_NO_CHECK: + case SO_PRIORITY: + case SO_LINGER: + case SO_BSDCOMPAT: + case SO_TIMESTAMP: + case SO_TIMESTAMPNS: + case SO_TIMESTAMPING: + case SO_RCVTIMEO: + case SO_SNDTIMEO: + case SO_RCVLOWAT: + case SO_SNDLOWAT: + case SO_PASSCRED: + case SO_PEERCRED: + case SO_PEERNAME: + case SO_ACCEPTCONN: + case SO_PASSSEC: + case SO_PEERSEC: + case SO_MARK: + case SO_RXQ_OVFL: + case SO_WIFI_STATUS: + case SO_PEEK_OFF: + case SO_NOFCS: + /* + * SO_BINDTODEVICE already handled as + * "Options without arguments" */ + /* case SO_BINDTODEVICE: */ + case SO_GET_FILTER: + case SO_LOCK_FILTER: + case SO_BPF_EXTENSIONS: + case SO_SELECT_ERR_QUEUE: +#ifdef CONFIG_NET_RX_BUSY_POLL + case SO_BUSY_POLL: +#endif + case SO_MAX_PACING_RATE: + case SO_INCOMING_CPU: + rv = libc_setsockopt (__fd, __level, __optname, __optval, __optlen); + if (rv != 0) + { + rv = -errno; + return rv; + } + break; + + default: + /* We implement the SO_SNDLOWAT etc to not be settable + * (1003.1g 7). + */ + return -ENOPROTOOPT; + } + + break; + + default: + /* 1. handle options that are NOT socket level options, + * but have corresponding vpp otions. */ + rv = vppcom_setsockopt (vsock->sid, __level, __optname, + __optval, __optlen); + return rv; +#if 0 + /* 2. unhandled options */ + return -ENOPROTOOPT; +#endif + } + + return rv; +} + +int +vcom_socket_listen (int __fd, int __n) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + + /* TBD vppcom to accept __n parameter */ + rv = vppcom_session_listen (vsock->sid, __n); + } + + return rv; +} + +static int +vcom_socket_connected_socket (int __fd, int __sid, + int *__domain, + int *__type, int *__protocol, int flags) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + vcom_socket_t *vsock; + + i32 fd; + i32 sockidx; + + socklen_t optlen; + + optlen = sizeof (*__domain); + rv = libc_getsockopt (__fd, SOL_SOCKET, SO_DOMAIN, __domain, &optlen); + if (rv != 0) + { + rv = -errno; + goto out; + } + + optlen = sizeof (*__type); + rv = libc_getsockopt (__fd, SOL_SOCKET, SO_TYPE, __type, &optlen); + if (rv != 0) + { + rv = -errno; + goto out; + } + + optlen = sizeof (*__protocol); + rv = libc_getsockopt (__fd, SOL_SOCKET, SO_PROTOCOL, __protocol, &optlen); + if (rv != 0) + { + rv = -errno; + goto out; + } + + fd = vcom_socket_open_socket (*__domain, *__type | flags, *__protocol); + if (fd < 0) + { + rv = fd; + goto out; + } + + pool_get (vsm->vsockets, vsock); + vsocket_init (vsock); + + sockidx = vsock - vsm->vsockets; + hash_set (vsm->sockidx_by_fd, fd, sockidx); + + vsocket_set (vsock, fd, __sid, SOCKET_TYPE_VPPCOM_BOUND); + return fd; + +out: + return rv; +} + +/* If flag is 0, then accept4() is the same as accept(). + * SOCK_NONBLOCK and SOCK_CLOEXEC can be bitwise ORed in flags + */ +static int +vcom_socket_accept_flags (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len, int flags) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + int fd; + int sid; + int domain; + int type; + int protocol; + + uint8_t addr8[sizeof (struct in6_addr)]; + vppcom_endpt_t ep; + + ep.ip = addr8; + + /* validate flags */ + + /* + * for documentation + * switch (flags) + * { + * case 0: + * case SOCK_NONBLOCK: + * case SOCK_CLOEXEC: + * case SOCK_NONBLOCK | SOCK_CLOEXEC: + * break; + * + * default: + * return -1; + * } + */ + /* flags can be 0 or can be bitwise OR + * of any of SOCK_NONBLOCK and SOCK_CLOEXEC */ + + if (!(!flags || (flags & (SOCK_NONBLOCK | SOCK_CLOEXEC)))) + { + /* TBD: return proper error code */ + return -1; + } + + /* TBD: return proper error code */ + + if (!vcom_socket_is_connection_mode_socket (__fd)) + { + return -EOPNOTSUPP; + } + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + + + rv = vcom_fcntl (vsock->fd, F_GETFL, 0); + if (rv < 0) + { + return rv; + } + + /* is blocking */ + if (!(rv & O_NONBLOCK)) + { + /* socket is not marked as nonblocking + * and no pending connections are present + * on the queue, accept () blocks the caller + * until a connection is present. + */ + rv = vppcom_session_accept (vsock->sid, &ep, + -1.0 /* wait forever */ ); + } + else + { + /* The file descriptor refers to a socket and has been + * marked nonblocking(O_NONBLOCK) and the accept would + * block. + * */ + /* is non blocking */ + rv = vppcom_session_accept (vsock->sid, &ep, 0); + /* If the socket is marked nonblocking and + * no pending connections are present on the + * queue, accept fails with the error + * EAGAIN or EWOULDBLOCK + */ + if (rv == VPPCOM_ETIMEDOUT) + { + rv = VPPCOM_EAGAIN; + } + } + if (rv < 0) + { + return rv; + } + + sid = rv; + + /* create a new connected socket resource and set flags + * on the new file descriptor. + * update vsockets and sockidx_by_fd table + * */ + fd = vcom_socket_connected_socket (__fd, sid, + &domain, &type, &protocol, flags); + if (fd < 0) + { + return fd; + } + + rv = fd; + + /* TBD populate __addr and __addr_len */ + /* TBD: The returned address is truncated if the buffer + * provided is too small, in this case, __addr_len will + * return a value greater than was supplied to the call.*/ + if (__addr) + { + if (ep.is_cut_thru) + { + /* TBD populate __addr and __addr_len */ + switch (domain) + { + case AF_INET: + ((struct sockaddr_in *) __addr)->sin_family = AF_INET; + ((struct sockaddr_in *) __addr)->sin_port = ep.port; + memcpy (&((struct sockaddr_in *) __addr)->sin_addr, + addr8, sizeof (struct in_addr)); + /* TBD: populate __addr_len */ + if (__addr_len) + { + *__addr_len = sizeof (struct sockaddr_in); + } + break; + + case AF_INET6: + ((struct sockaddr_in6 *) __addr)->sin6_family = AF_INET6; + ((struct sockaddr_in6 *) __addr)->sin6_port = ep.port; + memcpy (((struct sockaddr_in6 *) __addr)->sin6_addr. + __in6_u.__u6_addr8, addr8, + sizeof (struct in6_addr)); + /* TBD: populate __addr_len */ + if (__addr_len) + { + *__addr_len = sizeof (struct sockaddr_in6); + } + break; + + default: + return -EAFNOSUPPORT; + } + } + else + { + switch (ep.is_ip4) + { + case VPPCOM_IS_IP4: + ((struct sockaddr_in *) __addr)->sin_family = AF_INET; + ((struct sockaddr_in *) __addr)->sin_port = ep.port; + memcpy (&((struct sockaddr_in *) __addr)->sin_addr, + addr8, sizeof (struct in_addr)); + /* TBD: populate __addr_len */ + if (__addr_len) + { + *__addr_len = sizeof (struct sockaddr_in); + } + break; + + case VPPCOM_IS_IP6: + ((struct sockaddr_in6 *) __addr)->sin6_family = AF_INET6; + ((struct sockaddr_in6 *) __addr)->sin6_port = ep.port; + memcpy (((struct sockaddr_in6 *) __addr)->sin6_addr. + __in6_u.__u6_addr8, addr8, + sizeof (struct in6_addr)); + /* TBD: populate __addr_len */ + if (__addr_len) + { + *__addr_len = sizeof (struct sockaddr_in6); + } + break; + + default: + return -EAFNOSUPPORT; + } + } + } + else + { + /* when __addr is NULL, nothing is filled in, + * in this case, __addr_len is not used, + * and should also be null + * */ + if (__addr_len) + { + /* TBD: return proper error code */ + return -1; + } + } + } + + return rv; +} + +int +vcom_socket_accept (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len) +{ + /* set flags to 0 for accept() */ + return vcom_socket_accept_flags (__fd, __addr, __addr_len, 0); +} + +#ifdef __USE_GNU +int +vcom_socket_accept4 (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len, int __flags) +{ + /* SOCK_NONBLOCK and SOCK_CLOEXEC can be bitwise ORed in flags */ + return vcom_socket_accept_flags (__fd, __addr, __addr_len, __flags); +} +#endif + +/* TBD: move it to vppcom */ +int +vppcom_session_shutdown (int __fd, int __how) +{ + return 0; +} + +int +vcom_socket_shutdown (int __fd, int __how) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + uword *p; + vcom_socket_t *vsock; + + p = hash_get (vsm->sockidx_by_fd, __fd); + if (p) + { + vsock = pool_elt_at_index (vsm->vsockets, p[0]); + switch (__how) + { + case SHUT_RD: + case SHUT_WR: + case SHUT_RDWR: + rv = vppcom_session_shutdown (vsock->sid, __how); + return rv; + break; + + default: + return -EINVAL; + break; + } + } + + return rv; +} + +int +vcom_socket_epoll_create1 (int __flags) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + vcom_epoll_t *vepoll; + + i32 epfd; + i32 vep_idx; + i32 epollidx; + + epfd = vcom_socket_open_epoll (__flags); + if (epfd < 0) + { + rv = epfd; + goto out; + } + + vep_idx = vppcom_epoll_create (); + if (vep_idx < 0) + { + rv = vep_idx; + goto out_close_epoll; + } + + pool_get (vsm->vepolls, vepoll); + vepoll_init (vepoll); + + epollidx = vepoll - vsm->vepolls; + hash_set (vsm->epollidx_by_epfd, epfd, epollidx); + + vepoll_set (vepoll, epfd, vep_idx, EPOLL_TYPE_VPPCOM_BOUND, __flags, 0, 0); + + return epfd; + +out_close_epoll: + vcom_socket_close_epoll (epfd); +out: + return rv; +} + +/* + * PRE: vppcom_epoll_ctl() is successful + * free_vepitem_on_del : 0 - no_pool_put, 1 - pool_put + */ +int +vcom_socket_ctl_vepitem (int __epfd, int __op, int __fd, + struct epoll_event *__event, + i32 vep_idx, vcom_epoll_t * vepoll, + i32 vfd_id, void *vfd, vcom_fd_type_t type, + int free_vepitem_on_del) +{ + int rv = -1; + vcom_socket_main_t *vsm = &vcom_socket_main; + vcom_epitem_t *vepitem; + + vcom_epitem_key_t epfdfd = {.epfd = __epfd,.fd = __fd }; + uword *p; + i32 vepitemidx; + + i32 *vepitemidxs = 0; + + struct epoll_event revent = {.events = 0,.data.fd = INVALID_FD }; + + i32 vec_idx; + + /* perform control operations on the epoll instance */ + switch (__op) + { + case EPOLL_CTL_ADD: + /* + * supplied file descriptor is already + * registered with this epoll instance + * */ + /* vepitem exists */ + p = hash_get (vsm->epitemidx_by_epfdfd, epfdfd.key); + if (p) + { + rv = -EEXIST; + goto out; + } + + /* add a new vepitem */ + pool_get (vsm->vepitems, vepitem); + vepitem_init (vepitem); + + vepitemidx = vepitem - vsm->vepitems; + hash_set (vsm->epitemidx_by_epfdfd, epfdfd.key, vepitemidx); + vepitem_set (vepitem, __epfd, __fd, __fd, __fd, type, *__event, revent); + + /* update epitemidxs */ + /* by_epfd */ + p = hash_get (vsm->epitemidxs_by_epfd, __epfd); + if (!p) /* not exist */ + { + vepitemidxs = 0; + vec_add1 (vepitemidxs, vepitemidx); + hash_set (vsm->epitemidxs_by_epfd, __epfd, vepitemidxs); + } + else /* exists */ + { + vepitemidxs = *(i32 **) p; + vec_add1 (vepitemidxs, vepitemidx); + hash_set3 (vsm->epitemidxs_by_epfd, __epfd, vepitemidxs, 0); + } + /* update epitemidxs */ + /* by_fd */ + p = hash_get (vsm->epitemidxs_by_fd, __fd); + if (!p) /* not exist */ + { + vepitemidxs = 0; + vec_add1 (vepitemidxs, vepitemidx); + hash_set (vsm->epitemidxs_by_fd, __fd, vepitemidxs); + } + else /* exists */ + { + vepitemidxs = *(i32 **) p; + vec_add1 (vepitemidxs, vepitemidx); + hash_set3 (vsm->epitemidxs_by_fd, __fd, vepitemidxs, 0); + } + + /* increment vepoll fd count by 1 */ + vepoll->count += 1; + + rv = 0; + goto out; + break; + + case EPOLL_CTL_MOD: + /* + * supplied file descriptor is not + * registered with this epoll instance + * */ + /* vepitem not exist */ + p = hash_get (vsm->epitemidx_by_epfdfd, epfdfd.key); + if (!p) + { + rv = -ENOENT; + goto out; + } + vepitem = pool_elt_at_index (vsm->vepitems, p[0]); + if (vepitem) + { + vepitem->event = *__event; + vepitem->revent = revent; + } + + rv = 0; + goto out; + break; + + case EPOLL_CTL_DEL: + /* + * supplied file descriptor is not + * registered with this epoll instance + * */ + /* vepitem not exist */ + p = hash_get (vsm->epitemidx_by_epfdfd, epfdfd.key); + if (!p) + { + rv = -ENOENT; + goto out; + } + vepitemidx = *(i32 *) p; + hash_unset (vsm->epitemidx_by_epfdfd, epfdfd.key); + + /* update epitemidxs */ + /* by_epfd */ + p = hash_get (vsm->epitemidxs_by_epfd, __epfd); + if (!p) /* not exist */ + { + rv = -ENOENT; + goto out; + } + else /* exists */ + { + vepitemidxs = *(i32 **) p; + vec_idx = vec_search (vepitemidxs, vepitemidx); + if (vec_idx != ~0) + { + vec_del1 (vepitemidxs, vec_idx); + if (!vec_len (vepitemidxs)) + { + vec_free (vepitemidxs); + hash_unset (vsm->epitemidxs_by_epfd, __epfd); + } + } + } + + /* update epitemidxs */ + /* by_fd */ + p = hash_get (vsm->epitemidxs_by_fd, __fd); + if (!p) /* not exist */ + { + rv = -ENOENT; + goto out; + } + else /* exists */ + { + vepitemidxs = *(i32 **) p; + vec_idx = vec_search (vepitemidxs, vepitemidx); + if (vec_idx != ~0) + { + vec_del1 (vepitemidxs, vec_idx); + if (!vec_len (vepitemidxs)) + { + vec_free (vepitemidxs); + hash_unset (vsm->epitemidxs_by_fd, __fd); + } + } + } + + /* pool put vepitem */ + vepitem = pool_elt_at_index (vsm->vepitems, vepitemidx); + if (free_vepitem_on_del) + { + if (!vepitem) + { + rv = -ENOENT; + goto out; + } + vepitem_init (vepitem); + pool_put (vsm->vepitems, vepitem); + } + else + { + if (!vepitem) + { + vepitem_init (vepitem); + } + } + + /* decrement vepoll fd count by 1 */ + vepoll->count -= 1; + + rv = 0; + goto out; + break; + + default: + rv = -EINVAL; + goto out; + break; + } + +out: + return rv; +} + +/* + * PRE: 00. null pointer check on __event + * 01. all other parameters are validated + */ + +static int +vcom_socket_epoll_ctl_internal (int __epfd, int __op, int __fd, + struct epoll_event *__event, + int free_vepitem_on_del) +{ + int rv = -1; + + /* vcom_socket_main_t *vsm = &vcom_socket_main; */ + vcom_epoll_t *vepoll; + + /*__fd could could be vcom socket or vcom epoll or kernel fd */ + void *vfd; + vcom_epoll_t *vfd_vepoll; + vcom_socket_t *vfd_vsock; + + i32 vep_idx; + i32 vfd_id; + + vcom_fd_type_t type = FD_TYPE_INVALID; + + /* validate __event */ + + /* get vep_idx and vepoll */ + vep_idx = vcom_socket_get_vep_idx_and_vepoll (__epfd, &vepoll); + if (vep_idx == INVALID_VEP_IDX) + { + return -EBADF; + } + + /* get vcom fd type, vfd_id and vfd */ + vfd_id = vcom_socket_get_sid_and_vsock (__fd, &vfd_vsock); + if (vfd_id != INVALID_SESSION_ID) + { + type = FD_TYPE_VCOM_SOCKET; + vfd = vfd_vsock; + } + else if ((vfd_id = vcom_socket_get_vep_idx_and_vepoll (__fd, &vfd_vepoll)) + != INVALID_VEP_IDX) + { + type = FD_TYPE_EPOLL; + vfd = vfd_vepoll; + } + else + { + /* FD_TYPE_KERNEL not supported by epoll instance */ + type = FD_TYPE_INVALID; + return -EBADF; + } + + + /* vepoll and vsock are now valid */ + rv = vppcom_epoll_ctl (vep_idx, __op, vfd_id, __event); + if (rv < 0) + { + return rv; + } + + rv = vcom_socket_ctl_vepitem (__epfd, __op, __fd, + __event, + vep_idx, vepoll, + vfd_id, vfd, type, free_vepitem_on_del); + return rv; +} + +int +vcom_socket_epoll_ctl (int __epfd, int __op, int __fd, + struct epoll_event *__event) +{ + int rv = -1; + + rv = vcom_socket_epoll_ctl_internal (__epfd, __op, __fd, __event, 1); + return rv; +} + +static int +vcom_socket_epoll_ctl1 (int __epfd, int __op, int __fd, + struct epoll_event *__event) +{ + int rv = -1; + + rv = vcom_socket_epoll_ctl_internal (__epfd, __op, __fd, __event, 0); + return rv; +} + +int +vcom_socket_epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, + const __sigset_t * __ss) +{ + int rv = -EBADF; + + /* in seconds eg. 3.123456789 seconds */ + double time_to_wait = (double) 0; + + i32 vep_idx; + + /* validate __event */ + if (!__events) + { + rv = -EFAULT; + goto out; + } + + /* validate __timeout */ + if (__timeout > 0) + { + time_to_wait = (double) __timeout / (double) 1000; + } + else if (__timeout == 0) + { + time_to_wait = (double) 0; + } + else if (__timeout == -1) + { + time_to_wait = ~0; + } + else + { + rv = -EBADF; + goto out; + } + + /* get vep_idx */ + vep_idx = vcom_socket_get_vep_idx (__epfd); + if (vep_idx != INVALID_VEP_IDX) + { + rv = vppcom_epoll_wait (vep_idx, __events, __maxevents, time_to_wait); + } +out: + return rv; +} + +int +vcom_socket_main_init (void) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + + if (VCOM_DEBUG > 0) + printf ("vcom_socket_main_init\n"); + + if (!vsm->init) + { + /* TBD: define FD_MAXSIZE and use it here */ + pool_alloc (vsm->vsockets, FD_SETSIZE); + vsm->sockidx_by_fd = hash_create (0, sizeof (i32)); + + pool_alloc (vsm->vepolls, FD_SETSIZE); + vsm->epollidx_by_epfd = hash_create (0, sizeof (i32)); + + pool_alloc (vsm->vepitems, FD_SETSIZE); + vsm->epitemidx_by_epfdfd = hash_create (0, sizeof (i32)); + + vsm->epitemidxs_by_epfd = hash_create (0, sizeof (i32 *)); + vsm->epitemidxs_by_fd = hash_create (0, sizeof (i32 *)); + + vsm->init = 1; + } + + return 0; +} + + +void +vcom_socket_main_show (void) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + vcom_socket_t *vsock; + + vcom_epoll_t *vepoll; + + vcom_epitem_t *vepitem; + + i32 epfd; + i32 fd; + i32 *vepitemidxs, *vepitemidxs_var; + + if (vsm->init) + { + /* from active list of vsockets show vsock */ + + /* *INDENT-OFF* */ + pool_foreach (vsock, vsm->vsockets, + ({ + printf( + "fd='%04d', sid='%08x',type='%-30s'\n", + vsock->fd, vsock->sid, + vcom_socket_type_str (vsock->type)); + })); + /* *INDENT-ON* */ + + /* from active list of vepolls, show vepoll */ + + /* *INDENT-OFF* */ + pool_foreach (vepoll, vsm->vepolls, + ({ + printf( + "epfd='%04d', vep_idx='%08x', " + "type='%-30s', " + "flags='%d', count='%d', close='%d'\n", + vepoll->epfd, vepoll->vep_idx, + vcom_socket_epoll_type_str (vepoll->type), + vepoll->flags, vepoll->count, vepoll->close); + })); + /* *INDENT-ON* */ + + /* from active list of vepitems, show vepitem */ + + /* *INDENT-OFF* */ + pool_foreach (vepitem, vsm->vepitems, + ({ + printf( + "epfd='%04d', fd='%04d', " + "next_fd='%04d', prev_fd='%04d', " + "type='%-30s', " + "events='%04x', revents='%04x'\n", + vepitem->epfd, vepitem->fd, + vepitem->next_fd, vepitem->prev_fd, + vcom_socket_vcom_fd_type_str (vepitem->type), + vepitem->event.events, vepitem->revent.events); + })); + + /* *INDENT-ON* */ + + /* show epitemidxs for epfd */ + /* *INDENT-OFF* */ + hash_foreach (epfd, vepitemidxs, + vsm->epitemidxs_by_epfd, + ({ + printf("\n[ '%04d': ", epfd); + vec_foreach (vepitemidxs_var,vepitemidxs) + { + printf("'%04d' ", (int)vepitemidxs_var[0]); + } + printf("]\n"); + })); + /* *INDENT-ON* */ + + /* show epitemidxs for fd */ + /* *INDENT-OFF* */ + hash_foreach (fd, vepitemidxs, + vsm->epitemidxs_by_fd, + ({ + printf("\n{ '%04d': ", fd); + vec_foreach (vepitemidxs_var,vepitemidxs) + { + printf("'%04d' ", (int)vepitemidxs_var[0]); + } + printf("}\n"); + })); + /* *INDENT-ON* */ + + } +} + +void +vcom_socket_main_destroy (void) +{ + vcom_socket_main_t *vsm = &vcom_socket_main; + vcom_socket_t *vsock; + + vcom_epoll_t *vepoll; + + vcom_epitem_t *vepitem; + + i32 epfd; + i32 fd; + i32 *vepitemidxs; + + + if (VCOM_DEBUG > 0) + printf ("vcom_socket_main_destroy\n"); + + if (vsm->init) + { + + /* + * from active list of vepitems, + * remove all "vepitem" elements from the pool in a safe way + * */ + + /* *INDENT-OFF* */ + pool_flush (vepitem, vsm->vepitems, + ({ + if (vepitem->type == FD_TYPE_EPOLL || FD_TYPE_VCOM_SOCKET) + { + vcom_socket_epoll_ctl1 (vepitem->epfd, EPOLL_CTL_DEL, + vepitem->fd, NULL); + vepitem_init (vepitem); + } + })); + /* *INDENT-ON* */ + + pool_free (vsm->vepitems); + hash_free (vsm->epitemidx_by_epfdfd); + + /* free vepitemidxs for each epfd */ + /* *INDENT-OFF* */ + hash_foreach (epfd, vepitemidxs, + vsm->epitemidxs_by_epfd, + ({ + vec_free (vepitemidxs); + })); + /* *INDENT-ON* */ + hash_free (vsm->epitemidxs_by_epfd); + + /* free vepitemidxs for each fd */ + /* *INDENT-OFF* */ + hash_foreach (fd, vepitemidxs, + vsm->epitemidxs_by_fd, + ({ + vec_free (vepitemidxs); + })); + /* *INDENT-ON* */ + hash_free (vsm->epitemidxs_by_fd); + + + /* + * from active list of vsockets, + * close socket and vppcom session + * */ + + /* *INDENT-OFF* */ + pool_foreach (vsock, vsm->vsockets, + ({ + if (vsock->type == SOCKET_TYPE_VPPCOM_BOUND) + { + vppcom_session_close (vsock->sid); + vcom_socket_close_socket (vsock->fd); + vsocket_init (vsock); + } + })); + /* *INDENT-ON* */ + + /* + * return vsocket element to the pool + * */ + + /* *INDENT-OFF* */ + pool_flush (vsock, vsm->vsockets, + ({ + // vsocket_init(vsock); + ; + })); + /* *INDENT-ON* */ + + pool_free (vsm->vsockets); + hash_free (vsm->sockidx_by_fd); + + /* + * from active list of vepolls, + * close epoll and vppcom_epoll + * */ + + /* *INDENT-OFF* */ + pool_foreach (vepoll, vsm->vepolls, + ({ + if (vepoll->type == EPOLL_TYPE_VPPCOM_BOUND) + { + vppcom_session_close (vepoll->vep_idx); + vcom_socket_close_epoll (vepoll->epfd); /* TBD: */ + vepoll_init (vepoll); + } + })); + /* *INDENT-ON* */ + + /* + * return vepoll element to the pool + * */ + + /* *INDENT-OFF* */ + pool_flush (vepoll, vsm->vepolls, + ({ + // vepoll_init(vepoll); + ; + })); + /* *INDENT-ON* */ + + pool_free (vsm->vepolls); + hash_free (vsm->epollidx_by_epfd); + + vsm->init = 0; + } +} + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket.h b/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket.h new file mode 100644 index 00000000000..7170e208012 --- /dev/null +++ b/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket.h @@ -0,0 +1,457 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vcom_socket_h +#define included_vcom_socket_h + +#include <string.h> + +#include <libvcl-ldpreload/vcom_glibc_socket.h> +#include <vppinfra/types.h> + +#define INVALID_SESSION_ID (~0) +#define INVALID_FD (~0) + +#define INVALID_VEP_IDX INVALID_SESSION_ID +#define INVALID_EPFD INVALID_FD + +typedef enum +{ + SOCKET_TYPE_UNBOUND = 0, + SOCKET_TYPE_KERNEL_BOUND, + SOCKET_TYPE_VPPCOM_BOUND +} vcom_socket_type_t; + +typedef enum +{ + EPOLL_TYPE_UNBOUND = 0, + EPOLL_TYPE_KERNEL_BOUND, + EPOLL_TYPE_VPPCOM_BOUND +} vcom_epoll_type_t; + +typedef enum +{ + FD_TYPE_INVALID = 0, + FD_TYPE_KERNEL, + FD_TYPE_EPOLL, + FD_TYPE_VCOM_SOCKET, + /* add new types here */ + /* FD_TYPE_MAX should be the last entry */ + FD_TYPE_MAX +} vcom_fd_type_t; + +typedef struct +{ + /* file descriptor - + * fd 0, 1, 2 have special meaning and are reserved, + * -1 denote invalid fd */ + i32 fd; + + /* session id - -1 denote invalid sid */ + i32 sid; + + /* socket type */ + vcom_socket_type_t type; + + /* vcom socket attributes here */ + +} vcom_socket_t; + +typedef struct +{ + /* epoll file descriptor - + * epfd 0, 1, 2 have special meaning and are reserved, + * -1 denote invalid epfd */ + i32 epfd; + + /* vep idx - -1 denote invalid vep_idx */ + i32 vep_idx; + + /* epoll type */ + vcom_epoll_type_t type; + + /* flags - 0 or EPOLL_CLOEXEC */ + i32 flags; + + /* vcom epoll attributes here */ + + /* + * 00. count of file descriptors currently registered + * on this epoll instance. + * 01. number of file descriptors in the epoll set. + * 02. EPOLL_CTL_ADD, EPOLL_CTL_MOD, EPOLL_CTL_DEL + * update the count. + * 03. cached for frequent access. + * */ + i32 count; + + /* close( ) called on this epoll instance */ + /* 0 - close ( ) not called, 1 - close( ) called. */ + u32 close; + +} vcom_epoll_t; + +typedef struct +{ + /* "container" of this item */ + i32 epfd; + + /* fd - file descriptor information this item refers to */ + i32 fd; + /* next and prev fd in the "epoll set" of epfd */ + i32 next_fd; + i32 prev_fd; + + /* vcom fd type */ + vcom_fd_type_t type; + + /* interested events and the source fd */ + struct epoll_event event; + + /* ready events and the source fd */ + struct epoll_event revent; + + /* epitem attributes here */ + +} vcom_epitem_t; + +typedef union vcom_epitem_key +{ + struct + { + i32 fd; + i32 epfd; + }; + i64 key; +} __EPOLL_PACKED vcom_epitem_key_t; + +static inline char * +vcom_socket_type_str (vcom_socket_type_t t) +{ + switch (t) + { + case SOCKET_TYPE_UNBOUND: + return "SOCKET_TYPE_UNBOUND"; + + case SOCKET_TYPE_KERNEL_BOUND: + return "SOCKET_TYPE_KERNEL_BOUND"; + + case SOCKET_TYPE_VPPCOM_BOUND: + return "SOCKET_TYPE_VPPCOM_BOUND"; + + default: + return "SOCKET_TYPE_UNKNOWN"; + } +} + +static inline char * +vcom_socket_epoll_type_str (vcom_epoll_type_t t) +{ + switch (t) + { + case EPOLL_TYPE_UNBOUND: + return "EPOLL_TYPE_UNBOUND"; + + case EPOLL_TYPE_KERNEL_BOUND: + return "EPOLL_TYPE_KERNEL_BOUND"; + + case EPOLL_TYPE_VPPCOM_BOUND: + return "EPOLL_TYPE_VPPCOM_BOUND"; + + default: + return "EPOLL_TYPE_UNKNOWN"; + } +} + +static inline char * +vcom_socket_vcom_fd_type_str (vcom_fd_type_t t) +{ + switch (t) + { + case FD_TYPE_KERNEL: + return "FD_TYPE_KERNEL"; + + case FD_TYPE_EPOLL: + return "FD_TYPE_EPOLL"; + + case FD_TYPE_VCOM_SOCKET: + return "FD_TYPE_VCOM_SOCKET"; + + default: + return "FD_TYPE_UNKNOWN"; + } +} + +static inline int +vcom_socket_type_is_vppcom_bound (vcom_socket_type_t t) +{ + return t == SOCKET_TYPE_VPPCOM_BOUND; +} + +static inline int +vcom_socket_epoll_type_is_vppcom_bound (vcom_epoll_type_t t) +{ + return t == EPOLL_TYPE_VPPCOM_BOUND; +} + +static inline void +vsocket_init (vcom_socket_t * vsock) +{ + memset (vsock, 0, sizeof (*vsock)); + + vsock->fd = INVALID_FD; + vsock->sid = INVALID_SESSION_ID; + vsock->type = SOCKET_TYPE_UNBOUND; + /* vcom socket attributes init here */ +} + +static inline void +vepoll_init (vcom_epoll_t * vepoll) +{ + memset (vepoll, 0, sizeof (*vepoll)); + + vepoll->epfd = INVALID_EPFD; + vepoll->vep_idx = INVALID_VEP_IDX; + vepoll->type = EPOLL_TYPE_UNBOUND; + vepoll->flags = 0; + + vepoll->count = 0; + vepoll->close = 0; + /* vcom epoll attributes init here */ +} + +static inline void +vepitem_init (vcom_epitem_t * vepitem) +{ + struct epoll_event event = {.events = 0,.data.fd = INVALID_FD }; + + memset (vepitem, 0, sizeof (*vepitem)); + + vepitem->epfd = INVALID_EPFD; + + vepitem->fd = INVALID_FD; + vepitem->next_fd = INVALID_FD; + vepitem->prev_fd = INVALID_FD; + + vepitem->type = FD_TYPE_INVALID; + + vepitem->event = event; + vepitem->revent = event; + /* vepoll attributes init here */ +} + +static inline void +vepitemkey_init (vcom_epitem_key_t * epfdfd) +{ + memset (epfdfd, 0, sizeof (*epfdfd)); + + epfdfd->epfd = INVALID_EPFD; + epfdfd->fd = INVALID_FD; +} + +static inline void +vsocket_set (vcom_socket_t * vsock, i32 fd, i32 sid, vcom_socket_type_t type) +{ + vsock->fd = fd; + vsock->sid = sid; + vsock->type = type; + /* vcom socket attributes set here */ +} + +static inline void +vepoll_set (vcom_epoll_t * vepoll, + i32 epfd, i32 vep_idx, + vcom_epoll_type_t type, i32 flags, i32 count, u32 close) +{ + vepoll->epfd = epfd; + vepoll->vep_idx = vep_idx; + vepoll->type = type; + vepoll->flags = flags; + + vepoll->count = count; + vepoll->close = close; + /* vcom epoll attributes set here */ +} + +static inline void +vepitem_set (vcom_epitem_t * vepitem, + i32 epfd, + i32 fd, i32 next_fd, i32 prev_fd, + vcom_fd_type_t type, + struct epoll_event event, struct epoll_event revent) +{ + vepitem->epfd = epfd; + + vepitem->fd = fd; + vepitem->next_fd = next_fd; + vepitem->prev_fd = prev_fd; + + vepitem->type = type; + + vepitem->event = event; + vepitem->revent = revent; + /* vcom epitem attributes set here */ +} + +static inline void +vepitemkey_set (vcom_epitem_key_t * epfdfd, i32 epfd, i32 fd) +{ + epfdfd->epfd = epfd; + epfdfd->fd = fd; +} + +static inline int +vsocket_is_vppcom_bound (vcom_socket_t * vsock) +{ + return vcom_socket_type_is_vppcom_bound (vsock->type); +} + +static inline int +vepoll_is_vppcom_bound (vcom_epoll_t * vepoll) +{ + return vcom_socket_epoll_type_is_vppcom_bound (vepoll->type); +} + +int vcom_socket_main_init (void); + +void vcom_socket_main_destroy (void); + +void vcom_socket_main_show (void); + +int vcom_socket_is_vcom_fd (int fd); + +int vcom_socket_is_vcom_epfd (int epfd); + +int vcom_socket_close (int __fd); + +ssize_t vcom_socket_read (int __fd, void *__buf, size_t __nbytes); + +ssize_t vcom_socket_readv (int __fd, const struct iovec *__iov, int __iovcnt); + +ssize_t vcom_socket_write (int __fd, const void *__buf, size_t __n); + +ssize_t vcom_socket_writev (int __fd, const struct iovec *__iov, + int __iovcnt); + +int vcom_socket_fcntl_va (int __fd, int __cmd, va_list __ap); + +int +vcom_socket_select (int vcom_nfds, fd_set * __restrict vcom_readfds, + fd_set * __restrict vcom_writefds, + fd_set * __restrict vcom_exceptfds, + struct timeval *__restrict timeout); + + +int vcom_socket_socket (int __domain, int __type, int __protocol); + +int +vcom_socket_socketpair (int __domain, int __type, int __protocol, + int __fds[2]); + +int vcom_socket_bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len); + +int +vcom_socket_getsockname (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len); + +int +vcom_socket_connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len); + +int +vcom_socket_getpeername (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __len); + +ssize_t +vcom_socket_send (int __fd, const void *__buf, size_t __n, int __flags); + +ssize_t vcom_socket_recv (int __fd, void *__buf, size_t __n, int __flags); + +/* + * RETURN 1 if __fd is (SOCK_STREAM, SOCK_SEQPACKET), + * 0 otherwise + * */ +int vcom_socket_is_connection_mode_socket (int __fd); + +ssize_t +vcom_socket_sendto (int __fd, const void *__buf, size_t __n, + int __flags, __CONST_SOCKADDR_ARG __addr, + socklen_t __addr_len); + +ssize_t +vcom_socket_recvfrom (int __fd, void *__restrict __buf, size_t __n, + int __flags, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len); + +ssize_t +vcom_socket_sendmsg (int __fd, const struct msghdr *__message, int __flags); + +#ifdef __USE_GNU +int +vcom_socket_sendmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags); +#endif + +ssize_t vcom_socket_recvmsg (int __fd, struct msghdr *__message, int __flags); + +#ifdef __USE_GNU +int +vcom_socket_recvmmsg (int __fd, struct mmsghdr *__vmessages, + unsigned int __vlen, int __flags, + struct timespec *__tmo); +#endif + +int +vcom_socket_getsockopt (int __fd, int __level, int __optname, + void *__restrict __optval, + socklen_t * __restrict __optlen); + +int +vcom_socket_setsockopt (int __fd, int __level, int __optname, + const void *__optval, socklen_t __optlen); + +int vcom_socket_listen (int __fd, int __n); + +int +vcom_socket_accept (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len); + +#ifdef __USE_GNU +int +vcom_socket_accept4 (int __fd, __SOCKADDR_ARG __addr, + socklen_t * __restrict __addr_len, int __flags); +#endif + +int vcom_socket_shutdown (int __fd, int __how); + +int vcom_socket_epoll_create1 (int __flags); + +int +vcom_socket_epoll_ctl (int __epfd, int __op, int __fd, + struct epoll_event *__event); + +int +vcom_socket_epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, + const __sigset_t * __ss); + +#endif /* included_vcom_socket_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket_wrapper.c b/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket_wrapper.c new file mode 100644 index 00000000000..74a0dcf3b11 --- /dev/null +++ b/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket_wrapper.c @@ -0,0 +1,853 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2005-2008 Jelmer Vernooij <jelmer@samba.org> + * Copyright (C) 2006-2014 Stefan Metzmacher <metze@samba.org> + * Copyright (C) 2013-2014 Andreas Schneider <asn@samba.org> + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the author nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +/* + Socket wrapper library. Passes all socket communication over + unix domain sockets if the environment variable SOCKET_WRAPPER_DIR + is set. +*/ + +#include <signal.h> +#include <dlfcn.h> + +#include <stdio.h> +#include <stdarg.h> +#include <unistd.h> +#include <pthread.h> + +#include <libvcl-ldpreload/vcom_socket_wrapper.h> + + +enum swrap_dbglvl_e +{ + SWRAP_LOG_ERROR = 0, + SWRAP_LOG_WARN, + SWRAP_LOG_DEBUG, + SWRAP_LOG_TRACE +}; + + +/* Macros for accessing mutexes */ +#define SWRAP_LOCK(m) do { \ + pthread_mutex_lock(&(m ## _mutex)); \ +} while(0) + +#define SWRAP_UNLOCK(m) do { \ + pthread_mutex_unlock(&(m ## _mutex)); \ +} while(0) + +/* Add new global locks here please */ +#define SWRAP_LOCK_ALL \ + SWRAP_LOCK(libc_symbol_binding); \ + +#define SWRAP_UNLOCK_ALL \ + SWRAP_UNLOCK(libc_symbol_binding); \ + + + +/* The mutex for accessing the global libc.symbols */ +static pthread_mutex_t libc_symbol_binding_mutex = PTHREAD_MUTEX_INITIALIZER; + +/* Function prototypes */ + +#ifdef NDEBUG +#define SWRAP_LOG(...) +#else +static void +swrap_log (enum swrap_dbglvl_e dbglvl, const char *func, + const char *format, ...) +PRINTF_ATTRIBUTE (3, 4); +#define SWRAP_LOG(dbglvl, ...) swrap_log((dbglvl), __func__, __VA_ARGS__) + + static void + swrap_log (enum swrap_dbglvl_e dbglvl, + const char *func, const char *format, ...) +{ + char buffer[1024]; + va_list va; + unsigned int lvl = SWRAP_LOG_WARN; + + va_start (va, format); + vsnprintf (buffer, sizeof (buffer), format, va); + va_end (va); + + if (lvl >= dbglvl) + { + switch (dbglvl) + { + case SWRAP_LOG_ERROR: + fprintf (stderr, + "SWRAP_ERROR(%d) - %s: %s\n", + (int) getpid (), func, buffer); + break; + case SWRAP_LOG_WARN: + fprintf (stderr, + "SWRAP_WARN(%d) - %s: %s\n", + (int) getpid (), func, buffer); + break; + case SWRAP_LOG_DEBUG: + fprintf (stderr, + "SWRAP_DEBUG(%d) - %s: %s\n", + (int) getpid (), func, buffer); + break; + case SWRAP_LOG_TRACE: + fprintf (stderr, + "SWRAP_TRACE(%d) - %s: %s\n", + (int) getpid (), func, buffer); + break; + } + } +} +#endif + + +/********************************************************* + * SWRAP LOADING LIBC FUNCTIONS + *********************************************************/ + +#ifdef HAVE_ACCEPT4 +typedef int (*__libc_accept4) (int sockfd, + struct sockaddr * addr, + socklen_t * addrlen, int flags); +#else +typedef int (*__libc_accept) (int sockfd, + struct sockaddr * addr, socklen_t * addrlen); +#endif +typedef int (*__libc_bind) (int sockfd, + const struct sockaddr * addr, socklen_t addrlen); +typedef int (*__libc_close) (int fd); +typedef int (*__libc_connect) (int sockfd, + const struct sockaddr * addr, + socklen_t addrlen); + +#if 0 +/* TBD: dup and dup2 to be implemented later */ +typedef int (*__libc_dup) (int fd); +typedef int (*__libc_dup2) (int oldfd, int newfd); +#endif + +typedef int (*__libc_fcntl) (int fd, int cmd, ...); +typedef FILE *(*__libc_fopen) (const char *name, const char *mode); +#ifdef HAVE_FOPEN64 +typedef FILE *(*__libc_fopen64) (const char *name, const char *mode); +#endif +#ifdef HAVE_EVENTFD +typedef int (*__libc_eventfd) (int count, int flags); +#endif +typedef int (*__libc_getpeername) (int sockfd, + struct sockaddr * addr, + socklen_t * addrlen); +typedef int (*__libc_getsockname) (int sockfd, + struct sockaddr * addr, + socklen_t * addrlen); +typedef int (*__libc_getsockopt) (int sockfd, + int level, + int optname, + void *optval, socklen_t * optlen); +typedef int (*__libc_ioctl) (int d, unsigned long int request, ...); +typedef int (*__libc_listen) (int sockfd, int backlog); +typedef int (*__libc_open) (const char *pathname, int flags, mode_t mode); +#ifdef HAVE_OPEN64 +typedef int (*__libc_open64) (const char *pathname, int flags, mode_t mode); +#endif /* HAVE_OPEN64 */ +typedef int (*__libc_openat) (int dirfd, const char *path, int flags, ...); +typedef int (*__libc_pipe) (int pipefd[2]); +typedef int (*__libc_read) (int fd, void *buf, size_t count); +typedef ssize_t (*__libc_readv) (int fd, const struct iovec * iov, + int iovcnt); +typedef int (*__libc_recv) (int sockfd, void *buf, size_t len, int flags); +typedef int (*__libc_recvfrom) (int sockfd, + void *buf, + size_t len, + int flags, + struct sockaddr * src_addr, + socklen_t * addrlen); +typedef int (*__libc_recvmsg) (int sockfd, const struct msghdr * msg, + int flags); +typedef int (*__libc_send) (int sockfd, const void *buf, size_t len, + int flags); +typedef int (*__libc_sendmsg) (int sockfd, const struct msghdr * msg, + int flags); +typedef int (*__libc_sendto) (int sockfd, const void *buf, size_t len, + int flags, const struct sockaddr * dst_addr, + socklen_t addrlen); +typedef int (*__libc_setsockopt) (int sockfd, int level, int optname, + const void *optval, socklen_t optlen); +#ifdef HAVE_SIGNALFD +typedef int (*__libc_signalfd) (int fd, const sigset_t * mask, int flags); +#endif +typedef int (*__libc_socket) (int domain, int type, int protocol); +typedef int (*__libc_socketpair) (int domain, int type, int protocol, + int sv[2]); +#ifdef HAVE_TIMERFD_CREATE +typedef int (*__libc_timerfd_create) (int clockid, int flags); +#endif +typedef ssize_t (*__libc_write) (int fd, const void *buf, size_t count); +typedef ssize_t (*__libc_writev) (int fd, const struct iovec * iov, + int iovcnt); + +typedef int (*__libc_shutdown) (int fd, int how); + +typedef int (*__libc_select) (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + struct timeval * __restrict __timeout); + +#ifdef __USE_XOPEN2K +typedef int (*__libc_pselect) (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + const struct timespec * __restrict __timeout, + const __sigset_t * __restrict __sigmask); +#endif + +typedef int (*__libc_epoll_create) (int __size); + +typedef int (*__libc_epoll_create1) (int __flags); + +typedef int (*__libc_epoll_ctl) (int __epfd, int __op, int __fd, + struct epoll_event * __event); + +typedef int (*__libc_epoll_wait) (int __epfd, struct epoll_event * __events, + int __maxevents, int __timeout); + +typedef int (*__libc_epoll_pwait) (int __epfd, struct epoll_event * __events, + int __maxevents, int __timeout, + const __sigset_t * __ss); + + +#define SWRAP_SYMBOL_ENTRY(i) \ + union { \ + __libc_##i f; \ + void *obj; \ + } _libc_##i + +struct swrap_libc_symbols +{ +#ifdef HAVE_ACCEPT4 + SWRAP_SYMBOL_ENTRY (accept4); +#else + SWRAP_SYMBOL_ENTRY (accept); +#endif + SWRAP_SYMBOL_ENTRY (bind); + SWRAP_SYMBOL_ENTRY (close); + SWRAP_SYMBOL_ENTRY (connect); +#if 0 + /* TBD: dup and dup2 to be implemented later */ + SWRAP_SYMBOL_ENTRY (dup); + SWRAP_SYMBOL_ENTRY (dup2); +#endif + SWRAP_SYMBOL_ENTRY (fcntl); + SWRAP_SYMBOL_ENTRY (fopen); +#ifdef HAVE_FOPEN64 + SWRAP_SYMBOL_ENTRY (fopen64); +#endif +#ifdef HAVE_EVENTFD + SWRAP_SYMBOL_ENTRY (eventfd); +#endif + SWRAP_SYMBOL_ENTRY (getpeername); + SWRAP_SYMBOL_ENTRY (getsockname); + SWRAP_SYMBOL_ENTRY (getsockopt); + SWRAP_SYMBOL_ENTRY (ioctl); + SWRAP_SYMBOL_ENTRY (listen); + SWRAP_SYMBOL_ENTRY (open); +#ifdef HAVE_OPEN64 + SWRAP_SYMBOL_ENTRY (open64); +#endif + SWRAP_SYMBOL_ENTRY (openat); + SWRAP_SYMBOL_ENTRY (pipe); + SWRAP_SYMBOL_ENTRY (read); + SWRAP_SYMBOL_ENTRY (readv); + SWRAP_SYMBOL_ENTRY (recv); + SWRAP_SYMBOL_ENTRY (recvfrom); + SWRAP_SYMBOL_ENTRY (recvmsg); + SWRAP_SYMBOL_ENTRY (send); + SWRAP_SYMBOL_ENTRY (sendmsg); + SWRAP_SYMBOL_ENTRY (sendto); + SWRAP_SYMBOL_ENTRY (setsockopt); +#ifdef HAVE_SIGNALFD + SWRAP_SYMBOL_ENTRY (signalfd); +#endif + SWRAP_SYMBOL_ENTRY (socket); + SWRAP_SYMBOL_ENTRY (socketpair); +#ifdef HAVE_TIMERFD_CREATE + SWRAP_SYMBOL_ENTRY (timerfd_create); +#endif + SWRAP_SYMBOL_ENTRY (write); + SWRAP_SYMBOL_ENTRY (writev); + + SWRAP_SYMBOL_ENTRY (shutdown); + SWRAP_SYMBOL_ENTRY (select); +#ifdef __USE_XOPEN2K + SWRAP_SYMBOL_ENTRY (pselect); +#endif + SWRAP_SYMBOL_ENTRY (epoll_create); + SWRAP_SYMBOL_ENTRY (epoll_create1); + SWRAP_SYMBOL_ENTRY (epoll_ctl); + SWRAP_SYMBOL_ENTRY (epoll_wait); + SWRAP_SYMBOL_ENTRY (epoll_pwait); +}; + +struct swrap +{ + struct + { + void *handle; + void *socket_handle; + struct swrap_libc_symbols symbols; + } libc; +}; + +static struct swrap swrap; + +#define LIBC_NAME "libc.so" + +enum swrap_lib +{ + SWRAP_LIBC, +}; + +#ifndef NDEBUG +static const char * +swrap_str_lib (enum swrap_lib lib) +{ + switch (lib) + { + case SWRAP_LIBC: + return "libc"; + } + + /* Compiler would warn us about unhandled enum value if we get here */ + return "unknown"; +} +#endif + +static void * +swrap_load_lib_handle (enum swrap_lib lib) +{ + int flags = RTLD_LAZY; + void *handle = NULL; + int i; + +#ifdef RTLD_DEEPBIND + flags |= RTLD_DEEPBIND; +#endif + + switch (lib) + { + case SWRAP_LIBC: + handle = swrap.libc.handle; +#ifdef LIBC_SO + if (handle == NULL) + { + handle = dlopen (LIBC_SO, flags); + + swrap.libc.handle = handle; + } +#endif + if (handle == NULL) + { + for (i = 10; i >= 0; i--) + { + char soname[256] = { 0 }; + + snprintf (soname, sizeof (soname), "libc.so.%d", i); + handle = dlopen (soname, flags); + if (handle != NULL) + { + break; + } + } + + swrap.libc.handle = handle; + } + break; + } + + if (handle == NULL) + { + SWRAP_LOG (SWRAP_LOG_ERROR, + "Failed to dlopen library: %s\n", dlerror ()); + exit (-1); + } + + return handle; +} + +static void * +_swrap_bind_symbol (enum swrap_lib lib, const char *fn_name) +{ + void *handle; + void *func; + + handle = swrap_load_lib_handle (lib); + + func = dlsym (handle, fn_name); + if (func == NULL) + { + SWRAP_LOG (SWRAP_LOG_ERROR, + "Failed to find %s: %s\n", fn_name, dlerror ()); + exit (-1); + } + + SWRAP_LOG (SWRAP_LOG_TRACE, + "Loaded %s from %s", fn_name, swrap_str_lib (lib)); + + return func; +} + +#define swrap_bind_symbol_libc(sym_name) \ + SWRAP_LOCK(libc_symbol_binding); \ + if (swrap.libc.symbols._libc_##sym_name.obj == NULL) { \ + swrap.libc.symbols._libc_##sym_name.obj = \ + _swrap_bind_symbol(SWRAP_LIBC, #sym_name); \ + } \ + SWRAP_UNLOCK(libc_symbol_binding) + +/* + * IMPORTANT + * + * Functions especially from libc need to be loaded individually, you can't load + * all at once or gdb will segfault at startup. The same applies to valgrind and + * has probably something todo with with the linker. + * So we need load each function at the point it is called the first time. + */ +#ifdef HAVE_ACCEPT4 +int +libc_accept4 (int sockfd, + struct sockaddr *addr, socklen_t * addrlen, int flags) +{ + swrap_bind_symbol_libc (accept4); + + return swrap.libc.symbols._libc_accept4.f (sockfd, addr, addrlen, flags); +} + +#else /* HAVE_ACCEPT4 */ + +int +libc_accept (int sockfd, struct sockaddr *addr, socklen_t * addrlen) +{ + swrap_bind_symbol_libc (accept); + + return swrap.libc.symbols._libc_accept.f (sockfd, addr, addrlen); +} +#endif /* HAVE_ACCEPT4 */ + +int +libc_bind (int sockfd, const struct sockaddr *addr, socklen_t addrlen) +{ + swrap_bind_symbol_libc (bind); + + return swrap.libc.symbols._libc_bind.f (sockfd, addr, addrlen); +} + +int +libc_close (int fd) +{ + swrap_bind_symbol_libc (close); + + return swrap.libc.symbols._libc_close.f (fd); +} + +int +libc_connect (int sockfd, const struct sockaddr *addr, socklen_t addrlen) +{ + swrap_bind_symbol_libc (connect); + + return swrap.libc.symbols._libc_connect.f (sockfd, addr, addrlen); +} + +#if 0 +/* TBD: dup and dup2 to be implemented later */ +int +libc_dup (int fd) +{ + swrap_bind_symbol_libc (dup); + + return swrap.libc.symbols._libc_dup.f (fd); +} + +int +libc_dup2 (int oldfd, int newfd) +{ + swrap_bind_symbol_libc (dup2); + + return swrap.libc.symbols._libc_dup2.f (oldfd, newfd); +} +#endif + +#ifdef HAVE_EVENTFD +int +libc_eventfd (int count, int flags) +{ + swrap_bind_symbol_libc (eventfd); + + return swrap.libc.symbols._libc_eventfd.f (count, flags); +} +#endif + +DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE int +libc_vfcntl (int fd, int cmd, va_list ap) +{ + long int args[4]; + int rc; + int i; + + swrap_bind_symbol_libc (fcntl); + + for (i = 0; i < 4; i++) + { + args[i] = va_arg (ap, long int); + } + + rc = swrap.libc.symbols._libc_fcntl.f (fd, + cmd, + args[0], args[1], args[2], args[3]); + + return rc; +} + +int +libc_getpeername (int sockfd, struct sockaddr *addr, socklen_t * addrlen) +{ + swrap_bind_symbol_libc (getpeername); + + return swrap.libc.symbols._libc_getpeername.f (sockfd, addr, addrlen); +} + +int +libc_getsockname (int sockfd, struct sockaddr *addr, socklen_t * addrlen) +{ + swrap_bind_symbol_libc (getsockname); + + return swrap.libc.symbols._libc_getsockname.f (sockfd, addr, addrlen); +} + +int +libc_getsockopt (int sockfd, + int level, int optname, void *optval, socklen_t * optlen) +{ + swrap_bind_symbol_libc (getsockopt); + + return swrap.libc.symbols._libc_getsockopt.f (sockfd, + level, + optname, optval, optlen); +} + +int +libc_listen (int sockfd, int backlog) +{ + swrap_bind_symbol_libc (listen); + + return swrap.libc.symbols._libc_listen.f (sockfd, backlog); +} + +int +libc_read (int fd, void *buf, size_t count) +{ + swrap_bind_symbol_libc (read); + + return swrap.libc.symbols._libc_read.f (fd, buf, count); +} + +ssize_t +libc_readv (int fd, const struct iovec * iov, int iovcnt) +{ + swrap_bind_symbol_libc (readv); + + return swrap.libc.symbols._libc_readv.f (fd, iov, iovcnt); +} + +int +libc_recv (int sockfd, void *buf, size_t len, int flags) +{ + swrap_bind_symbol_libc (recv); + + return swrap.libc.symbols._libc_recv.f (sockfd, buf, len, flags); +} + +int +libc_recvfrom (int sockfd, + void *buf, + size_t len, + int flags, struct sockaddr *src_addr, socklen_t * addrlen) +{ + swrap_bind_symbol_libc (recvfrom); + + return swrap.libc.symbols._libc_recvfrom.f (sockfd, + buf, + len, flags, src_addr, addrlen); +} + +int +libc_recvmsg (int sockfd, struct msghdr *msg, int flags) +{ + swrap_bind_symbol_libc (recvmsg); + + return swrap.libc.symbols._libc_recvmsg.f (sockfd, msg, flags); +} + +int +libc_send (int sockfd, const void *buf, size_t len, int flags) +{ + swrap_bind_symbol_libc (send); + + return swrap.libc.symbols._libc_send.f (sockfd, buf, len, flags); +} + +int +libc_sendmsg (int sockfd, const struct msghdr *msg, int flags) +{ + swrap_bind_symbol_libc (sendmsg); + + return swrap.libc.symbols._libc_sendmsg.f (sockfd, msg, flags); +} + +int +libc_sendto (int sockfd, + const void *buf, + size_t len, + int flags, const struct sockaddr *dst_addr, socklen_t addrlen) +{ + swrap_bind_symbol_libc (sendto); + + return swrap.libc.symbols._libc_sendto.f (sockfd, + buf, + len, flags, dst_addr, addrlen); +} + +int +libc_setsockopt (int sockfd, + int level, int optname, const void *optval, socklen_t optlen) +{ + swrap_bind_symbol_libc (setsockopt); + + return swrap.libc.symbols._libc_setsockopt.f (sockfd, + level, + optname, optval, optlen); +} + +int +libc_socket (int domain, int type, int protocol) +{ + swrap_bind_symbol_libc (socket); + + return swrap.libc.symbols._libc_socket.f (domain, type, protocol); +} + +int +libc_socketpair (int domain, int type, int protocol, int sv[2]) +{ + swrap_bind_symbol_libc (socketpair); + + return swrap.libc.symbols._libc_socketpair.f (domain, type, protocol, sv); +} + +ssize_t +libc_write (int fd, const void *buf, size_t count) +{ + swrap_bind_symbol_libc (write); + + return swrap.libc.symbols._libc_write.f (fd, buf, count); +} + +ssize_t +libc_writev (int fd, const struct iovec * iov, int iovcnt) +{ + swrap_bind_symbol_libc (writev); + + return swrap.libc.symbols._libc_writev.f (fd, iov, iovcnt); +} + +int +libc_shutdown (int fd, int how) +{ + swrap_bind_symbol_libc (shutdown); + + return swrap.libc.symbols._libc_shutdown.f (fd, how); +} + +int +libc_select (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + struct timeval *__restrict __timeout) +{ + swrap_bind_symbol_libc (select); + + return swrap.libc.symbols._libc_select.f (__nfds, __readfds, + __writefds, + __exceptfds, __timeout); +} + +#ifdef __USE_XOPEN2K +int +libc_pselect (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + const struct timespec *__restrict __timeout, + const __sigset_t * __restrict __sigmask) +{ + swrap_bind_symbol_libc (pselect); + + return swrap.libc.symbols._libc_pselect.f (__nfds, __readfds, + __writefds, + __exceptfds, + __timeout, __sigmask); +} +#endif + +int +libc_epoll_create (int __size) +{ + swrap_bind_symbol_libc (epoll_create); + + return swrap.libc.symbols._libc_epoll_create.f (__size); +} + +int +libc_epoll_create1 (int __flags) +{ + swrap_bind_symbol_libc (epoll_create1); + + return swrap.libc.symbols._libc_epoll_create1.f (__flags); +} + +int +libc_epoll_ctl (int __epfd, int __op, int __fd, struct epoll_event *__event) +{ + swrap_bind_symbol_libc (epoll_ctl); + + return swrap.libc.symbols._libc_epoll_ctl.f (__epfd, __op, __fd, __event); +} + +int +libc_epoll_wait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout) +{ + swrap_bind_symbol_libc (epoll_wait); + + return swrap.libc.symbols._libc_epoll_wait.f (__epfd, __events, + __maxevents, __timeout); +} + +int +libc_epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, const __sigset_t * __ss) +{ + swrap_bind_symbol_libc (epoll_pwait); + + return swrap.libc.symbols._libc_epoll_pwait.f (__epfd, __events, + __maxevents, __timeout, + __ss); +} + +static void +swrap_thread_prepare (void) +{ + SWRAP_LOCK_ALL; +} + +static void +swrap_thread_parent (void) +{ + SWRAP_UNLOCK_ALL; +} + +static void +swrap_thread_child (void) +{ + SWRAP_UNLOCK_ALL; +} + +/**************************** + * CONSTRUCTOR + ***************************/ +void +swrap_constructor (void) +{ + /* + * If we hold a lock and the application forks, then the child + * is not able to unlock the mutex and we are in a deadlock. + * This should prevent such deadlocks. + */ + pthread_atfork (&swrap_thread_prepare, + &swrap_thread_parent, &swrap_thread_child); +} + +/**************************** + * DESTRUCTOR + ***************************/ + +/* + * This function is called when the library is unloaded and makes sure that + * sockets get closed and the unix file for the socket are unlinked. + */ +void +swrap_destructor (void) +{ + if (swrap.libc.handle != NULL) + { + dlclose (swrap.libc.handle); + } + if (swrap.libc.socket_handle) + { + dlclose (swrap.libc.socket_handle); + } +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket_wrapper.h b/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket_wrapper.h new file mode 100644 index 00000000000..ef3ece01eb2 --- /dev/null +++ b/extras/vcl-ldpreload/src/libvcl-ldpreload/vcom_socket_wrapper.h @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2005-2008 Jelmer Vernooij <jelmer@samba.org> + * Copyright (C) 2006-2014 Stefan Metzmacher <metze@samba.org> + * Copyright (C) 2013-2014 Andreas Schneider <asn@samba.org> + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the author nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +/* + Socket wrapper library. Passes all socket communication over + unix domain sockets if the environment variable SOCKET_WRAPPER_DIR + is set. +*/ + +#ifndef included_vcom_socket_wrapper_h +#define included_vcom_socket_wrapper_h + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <sys/select.h> +#include <sys/epoll.h> +#include <sys/uio.h> +#include <stdlib.h> + + +/* GCC have printf type attribute check. */ +#ifdef HAVE_FUNCTION_ATTRIBUTE_FORMAT +#define PRINTF_ATTRIBUTE(a,b) __attribute__ ((__format__ (__printf__, a, b))) +#else +#define PRINTF_ATTRIBUTE(a,b) +#endif /* HAVE_FUNCTION_ATTRIBUTE_FORMAT */ + +#define HAVE_CONSTRUCTOR_ATTRIBUTE +#ifdef HAVE_CONSTRUCTOR_ATTRIBUTE +#define CONSTRUCTOR_ATTRIBUTE __attribute__ ((constructor)) +#else +#define CONSTRUCTOR_ATTRIBUTE +#endif /* HAVE_CONSTRUCTOR_ATTRIBUTE */ + +#define HAVE_DESTRUCTOR_ATTRIBUTE +#ifdef HAVE_DESTRUCTOR_ATTRIBUTE +#define DESTRUCTOR_ATTRIBUTE __attribute__ ((destructor)) +#else +#define DESTRUCTOR_ATTRIBUTE +#endif + +#define HAVE_ADDRESS_SANITIZER_ATTRIBUTE +#ifdef HAVE_ADDRESS_SANITIZER_ATTRIBUTE +#define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE __attribute__((no_sanitize_address)) +#else +#define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE +#endif + +/* + * IMPORTANT + * + * Functions especially from libc need to be loaded individually, you can't load + * all at once or gdb will segfault at startup. The same applies to valgrind and + * has probably something todo with with the linker. + * So we need load each function at the point it is called the first time. + */ +#ifdef HAVE_ACCEPT4 +int +libc_accept4 (int sockfd, + struct sockaddr *addr, socklen_t * addrlen, int flags); +#else /* HAVE_ACCEPT4 */ +int libc_accept (int sockfd, struct sockaddr *addr, socklen_t * addrlen); +#endif /* HAVE_ACCEPT4 */ + +int libc_bind (int sockfd, const struct sockaddr *addr, socklen_t addrlen); + +int libc_close (int fd); + +int libc_connect (int sockfd, const struct sockaddr *addr, socklen_t addrlen); + +#if 0 +/* TBD: dup and dup2 to be implemented later */ +int libc_dup (int fd); + +int libc_dup2 (int oldfd, int newfd); +#endif + +#ifdef HAVE_EVENTFD +int libc_eventfd (int count, int flags); +#endif + +DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE int +libc_vfcntl (int fd, int cmd, va_list ap); + +int libc_getpeername (int sockfd, struct sockaddr *addr, socklen_t * addrlen); + +int libc_getsockname (int sockfd, struct sockaddr *addr, socklen_t * addrlen); + +int +libc_getsockopt (int sockfd, + int level, int optname, void *optval, socklen_t * optlen); + +int libc_listen (int sockfd, int backlog); + +int libc_read (int fd, void *buf, size_t count); + +ssize_t libc_readv (int fd, const struct iovec *iov, int iovcnt); + +int libc_recv (int sockfd, void *buf, size_t len, int flags); + +int +libc_recvfrom (int sockfd, + void *buf, + size_t len, + int flags, struct sockaddr *src_addr, socklen_t * addrlen); + +int libc_recvmsg (int sockfd, struct msghdr *msg, int flags); + +int libc_send (int sockfd, const void *buf, size_t len, int flags); + +int libc_sendmsg (int sockfd, const struct msghdr *msg, int flags); + +int +libc_sendto (int sockfd, + const void *buf, + size_t len, + int flags, const struct sockaddr *dst_addr, socklen_t addrlen); + +int +libc_setsockopt (int sockfd, + int level, int optname, const void *optval, + socklen_t optlen); + +int libc_socket (int domain, int type, int protocol); + +int libc_socketpair (int domain, int type, int protocol, int sv[2]); + +ssize_t libc_write (int fd, const void *buf, size_t count); + +ssize_t libc_writev (int fd, const struct iovec *iov, int iovcnt); + +int libc_shutdown (int fd, int how); + +int +libc_select (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + struct timeval *__restrict __timeout); + +#ifdef __USE_XOPEN2K +int +libc_pselect (int __nfds, fd_set * __restrict __readfds, + fd_set * __restrict __writefds, + fd_set * __restrict __exceptfds, + const struct timespec *__restrict __timeout, + const __sigset_t * __restrict __sigmask); +#endif + +int libc_epoll_create (int __size); + +int libc_epoll_create1 (int __flags); + +int libc_epoll_ctl (int __epfd, int __op, int __fd, + struct epoll_event *__event); + +int libc_epoll_wait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout); + +int libc_epoll_pwait (int __epfd, struct epoll_event *__events, + int __maxevents, int __timeout, + const __sigset_t * __ss); + +void swrap_constructor (void); + +void swrap_destructor (void); + +#endif /* included_vcom_socket_wrapper_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/vcl-ldpreload/src/vcl-ldpreload.mk b/extras/vcl-ldpreload/src/vcl-ldpreload.mk new file mode 100644 index 00000000000..001308a6edd --- /dev/null +++ b/extras/vcl-ldpreload/src/vcl-ldpreload.mk @@ -0,0 +1,9 @@ +vcl_ldpreload_configure_depend = vpp-install + +vcl_ldpreload_CPPFLAGS = $(call installed_includes_fn, \ + vppinfra \ + uri) + +vcl_ldpreload_LDFLAGS = $(call installed_libs_fn, \ + vppinfra \ + uri) |