From c893f77a90344f6390cef8b9930bae79103b6fc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Czsj=E2=80=9D?= <“reGliming@gmail.com”> Date: Wed, 3 Jun 2020 15:46:49 +0800 Subject: [PATCH] LDP remove lock --- src/vcl/CMakeLists.txt | 5 +- src/vcl/ldp.c | 774 ++++++---- src/vcl/ldp.c.orig | 2720 ++++++++++++++++++++++++++++++++++++ src/vcl/ldp.c.rej | 87 ++ src/vcl/ldp.h | 2 +- src/vcl/vcl_private.h | 7 +- src/vcl/vcl_private.h.orig | 679 +++++++++ 7 files changed, 3997 insertions(+), 277 deletions(-) create mode 100644 src/vcl/ldp.c.orig create mode 100644 src/vcl/ldp.c.rej create mode 100644 src/vcl/vcl_private.h.orig diff --git a/src/vcl/CMakeLists.txt b/src/vcl/CMakeLists.txt index ab0a6ad6a..0d30eb6af 100644 --- a/src/vcl/CMakeLists.txt +++ b/src/vcl/CMakeLists.txt @@ -20,7 +20,6 @@ add_vpp_library(vppcom vcl_bapi.c vcl_cfg.c vcl_private.c - vcl_locked.c LINK_LIBRARIES vppinfra svm vlibmemoryclient rt pthread @@ -41,7 +40,7 @@ add_vpp_library(vcl_ldpreload add_vpp_headers(vcl ldp.h ldp_glibc_socket.h + vcl_private.h vppcom.h - vcl_locked.h ldp_socket_wrapper.h -) \ No newline at end of file +) diff --git a/src/vcl/ldp.c b/src/vcl/ldp.c index cda4425e5..36aaf5d17 100644 --- a/src/vcl/ldp.c +++ b/src/vcl/ldp.c @@ -24,9 +24,10 @@ #include #include +#include +#include #include -#include #include #include #include @@ -97,8 +98,8 @@ typedef struct ldp_worker_ctx_t *workers; int init; char app_name[LDP_APP_NAME_MAX]; - u32 vlsh_bit_val; - u32 vlsh_bit_mask; + u32 vcl_bit_val; + u32 vcl_bit_mask; u32 debug; u8 transparent_tls; @@ -117,8 +118,8 @@ typedef struct } static ldp_main_t ldp_main = { - .vlsh_bit_val = (1 << LDP_SID_BIT_MIN), - .vlsh_bit_mask = (1 << LDP_SID_BIT_MIN) - 1, + .vcl_bit_val = (1 << LDP_SID_BIT_MIN), + .vcl_bit_mask = (1 << LDP_SID_BIT_MIN) - 1, .debug = LDP_DEBUG_INIT, .transparent_tls = 0, }; @@ -151,18 +152,18 @@ ldp_get_app_name () } static inline int -ldp_vlsh_to_fd (vls_handle_t vlsh) +ldp_vclsh_to_fd (vcl_session_handle_t vclsh) { - return (vlsh + ldp->vlsh_bit_val); + return (vclsh + ldp->vcl_bit_val); } -static inline vls_handle_t -ldp_fd_to_vlsh (int fd) +static inline vcl_session_handle_t +ldp_fd_to_vclsh (int fd) { - if (fd < ldp->vlsh_bit_val) - return VLS_INVALID_HANDLE; + if (fd < (ldp->vcl_bit_val)) + return INVALID_SESSION_ID; - return (fd - ldp->vlsh_bit_val); + return (fd - ldp->vcl_bit_val); } static void @@ -173,6 +174,194 @@ ldp_alloc_workers (void) pool_alloc (ldp->workers, LDP_MAX_NWORKERS); } +static void +ldp_share_listen_session (vcl_worker_t * parent_wrk, + vcl_worker_t * child_wrk, + vcl_session_t * listen_session) +{ +/*Find the listen session of parent worker*/ + if (listen_session->session_index == parent_wrk->listen_session_index) + { + listen_session->session_state = STATE_LISTEN_NO_MQ; + vppcom_session_listen (vcl_session_handle_from_index + (parent_wrk->listen_session_index), + parent_wrk->listen_queue_size); + } +} + +void +ldp_vcl_worker_copy_on_fork (vcl_worker_t * parent_wrk) +{ + vcl_worker_t *wrk = vcl_worker_get_current (); + vcl_session_t *listen_session; + wrk->vpp_event_queues = vec_dup (parent_wrk->vpp_event_queues); + wrk->sessions = pool_dup (parent_wrk->sessions); + wrk->session_index_by_vpp_handles = + hash_dup (parent_wrk->session_index_by_vpp_handles); +/*Update listen session for child*/ + pool_foreach (listen_session, wrk->sessions, ( + { + ldp_share_listen_session + (parent_wrk, wrk, + listen_session);})); +} + +static void +ldp_cleanup_vcl_worker (vcl_worker_t * wrk) +{ + vcl_worker_cleanup (wrk, 1 /* notify vpp */ ); +} + +static void +ldp_cleanup_forked_child (vcl_worker_t * wrk, vcl_worker_t * child_wrk) +{ + vcl_worker_t *sub_child; + int tries = 0; + + if (child_wrk->forked_child != ~0) + { + sub_child = vcl_worker_get_if_valid (child_wrk->forked_child); + if (sub_child) + { + /* Wait a bit, maybe the process is going away */ + while (kill (sub_child->current_pid, 0) >= 0 && tries++ < 50) + usleep (1e3); + if (kill (sub_child->current_pid, 0) < 0) + ldp_cleanup_forked_child (child_wrk, sub_child); + } + } + ldp_cleanup_vcl_worker (child_wrk); + VDBG (0, "Cleaned up forked child wrk %u", child_wrk->wrk_index); + wrk->forked_child = ~0; +} + +static struct sigaction old_sa; + +static void +ldp_intercept_sigchld_handler (int signum, siginfo_t * si, void *uc) +{ + vcl_worker_t *wrk, *child_wrk; + + if (vcl_get_worker_index () == ~0) + return; + + /*restore sigchld */ + if (sigaction (SIGCHLD, &old_sa, 0)) + { + VERR ("couldn't restore sigchld"); + exit (-1); + } + + wrk = vcl_worker_get_current (); + if (wrk->forked_child == ~0) + return; + + child_wrk = vcl_worker_get_if_valid (wrk->forked_child); + if (!child_wrk) + goto done; + + if (si && si->si_pid != child_wrk->current_pid) + { + VDBG (0, "unexpected child pid %u", si->si_pid); + goto done; + } + ldp_cleanup_forked_child (wrk, child_wrk); + +done: + if (old_sa.sa_flags & SA_SIGINFO) + { + void (*fn) (int, siginfo_t *, void *) = old_sa.sa_sigaction; + fn (signum, si, uc); + } + else + { + void (*fn) (int) = old_sa.sa_handler; + if (fn) + fn (signum); + } +} + +/*Intercept signal SIGCHLD*/ +static void +ldp_intercept_sigchld () +{ + struct sigaction sa; + clib_memset (&sa, 0, sizeof (sa)); + /*set SA_SIGINFO to validate sa.sa_sigaction rather than sa.sa_handler */ + sa.sa_sigaction = ldp_intercept_sigchld_handler; + sa.sa_flags = SA_SIGINFO; + /*When current process receive the SIGCHLD signal, it would call + **ldp_intercept_sigchld_handler. + */ + if (sigaction (SIGCHLD, &sa, &old_sa)) + { + VERR ("couldn't intercept sigchld"); + exit (-1); + } +} + +static void +ldp_app_pre_fork (void) +{ + ldp_intercept_sigchld (); + vcl_flush_mq_events (); +} + +static void +ldp_app_fork_parent_handler (void) +{ + vcl_session_t *listen_session; + vcl_worker_t *wrk = vcl_worker_get_current (); + listen_session = vcl_session_get (wrk, wrk->listen_session_index); + listen_session->session_state = STATE_LISTEN_NO_MQ; + vcl_send_session_unlisten (wrk, listen_session); + vcm->forking = 1; + while (vcm->forking) + ; + +} + +static void +ldp_app_fork_child_handler (void) +{ + vcl_worker_t *parent_wrk; + int rv, parent_wrk_index; + u8 *child_name; + + parent_wrk_index = vcl_get_worker_index (); + VDBG (0, + "initializing forked child (pid) %u with parent wrk (vcl worker index) %u", + getpid (), parent_wrk_index); + +/*Allocate vcl worker for child*/ + vcl_set_worker_index (~0); + if (!vcl_worker_alloc_and_init ()) + VERR ("couldn't allocate new worker for child process %u", getpid ()); + +/*Attach to binary api*/ + child_name = format (0, "%v-child-%u%c", vcm->app_name, getpid (), 0); + vcl_cleanup_bapi (); + vppcom_api_hookup (); + vcm->app_state = STATE_APP_START; + rv = vppcom_connect_to_vpp ((char *) child_name); + vec_free (child_name); + if (rv) + { + VERR ("couldn't connect to VPP!"); + return; + } + +/* +**Register new allocated vcl worker with VPP +*/ + vcl_worker_register_with_vpp (); + parent_wrk = vcl_worker_get (parent_wrk_index); + ldp_vcl_worker_copy_on_fork (parent_wrk); + parent_wrk->forked_child = vcl_get_worker_index (); + VDBG (0, "forked child main worker initialized"); + vcm->forking = 0; +} + static inline int ldp_init (void) { @@ -184,7 +373,9 @@ ldp_init (void) ldp->init = 1; ldp->vcl_needs_real_epoll = 1; - rv = vls_app_create (ldp_get_app_name ()); + rv = vppcom_app_create (ldp_get_app_name ()); + pthread_atfork (ldp_app_pre_fork, ldp_app_fork_parent_handler, + ldp_app_fork_child_handler); if (rv != VPPCOM_OK) { ldp->vcl_needs_real_epoll = 0; @@ -231,43 +422,43 @@ ldp_init (void) { LDBG (0, "WARNING: Invalid LDP sid bit specified in the env var " LDP_ENV_SID_BIT " (%s)! sid bit value %d (0x%x)", env_var_str, - ldp->vlsh_bit_val, ldp->vlsh_bit_val); + ldp->vcl_bit_val, ldp->vcl_bit_val); } else if (sb < LDP_SID_BIT_MIN) { - ldp->vlsh_bit_val = (1 << LDP_SID_BIT_MIN); - ldp->vlsh_bit_mask = ldp->vlsh_bit_val - 1; + ldp->vcl_bit_val = (1 << LDP_SID_BIT_MIN); + ldp->vcl_bit_mask = ldp->vcl_bit_val - 1; LDBG (0, "WARNING: LDP sid bit (%u) specified in the env var " LDP_ENV_SID_BIT " (%s) is too small. Using LDP_SID_BIT_MIN" " (%d)! sid bit value %d (0x%x)", sb, env_var_str, - LDP_SID_BIT_MIN, ldp->vlsh_bit_val, ldp->vlsh_bit_val); + LDP_SID_BIT_MIN, ldp->vcl_bit_val, ldp->vcl_bit_val); } else if (sb > LDP_SID_BIT_MAX) { - ldp->vlsh_bit_val = (1 << LDP_SID_BIT_MAX); - ldp->vlsh_bit_mask = ldp->vlsh_bit_val - 1; + ldp->vcl_bit_val = (1 << LDP_SID_BIT_MAX); + ldp->vcl_bit_mask = ldp->vcl_bit_val - 1; LDBG (0, "WARNING: LDP sid bit (%u) specified in the env var " LDP_ENV_SID_BIT " (%s) is too big. Using LDP_SID_BIT_MAX" " (%d)! sid bit value %d (0x%x)", sb, env_var_str, - LDP_SID_BIT_MAX, ldp->vlsh_bit_val, ldp->vlsh_bit_val); + LDP_SID_BIT_MAX, ldp->vcl_bit_val, ldp->vcl_bit_val); } else { - ldp->vlsh_bit_val = (1 << sb); - ldp->vlsh_bit_mask = ldp->vlsh_bit_val - 1; + ldp->vcl_bit_val = (1 << sb); + ldp->vcl_bit_mask = ldp->vcl_bit_val - 1; LDBG (0, "configured LDP sid bit (%u) from " LDP_ENV_SID_BIT "! sid bit value %d (0x%x)", sb, - ldp->vlsh_bit_val, ldp->vlsh_bit_val); + ldp->vcl_bit_val, ldp->vcl_bit_val); } /* Make sure there are enough bits in the fd set for vcl sessions */ - if (ldp->vlsh_bit_val > FD_SETSIZE / 2) + if (ldp->vcl_bit_val > FD_SETSIZE / 2) { - LDBG (0, "ERROR: LDP vlsh bit value %d > FD_SETSIZE/2 %d!", - ldp->vlsh_bit_val, FD_SETSIZE / 2); + LDBG (0, "ERROR: LDP vclsh bit value %d > FD_SETSIZE/2 %d!", + ldp->vcl_bit_val, FD_SETSIZE / 2); ldp->init = 0; return -1; } @@ -292,16 +483,16 @@ ldp_init (void) int close (int fd) { - vls_handle_t vlsh; + vcl_session_handle_t vclsh; int rv, epfd; if ((errno = -ldp_init ())) return -1; - vlsh = ldp_fd_to_vlsh (fd); - if (vlsh != VLS_INVALID_HANDLE) + vclsh = ldp_fd_to_vclsh (fd); + if (vclsh != INVALID_SESSION_ID) { - epfd = vls_attr (vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0); + epfd = vppcom_session_attr (vclsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0); if (epfd > 0) { LDBG (0, "fd %d: calling libc_close: epfd %u", fd, epfd); @@ -312,7 +503,8 @@ close (int fd) u32 size = sizeof (epfd); epfd = 0; - (void) vls_attr (vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &epfd, &size); + (void) vppcom_session_attr (vclsh, VPPCOM_ATTR_SET_LIBC_EPFD, + &epfd, &size); } } else if (PREDICT_FALSE (epfd < 0)) @@ -322,9 +514,9 @@ close (int fd) goto done; } - LDBG (0, "fd %d: calling vls_close: vlsh %u", fd, vlsh); + LDBG (0, "fd %d: calling vppcom_session_close: vclsh %u", fd, vclsh); - rv = vls_close (vlsh); + rv = vppcom_session_close (vclsh); if (rv != VPPCOM_OK) { errno = -rv; @@ -344,16 +536,16 @@ done: ssize_t read (int fd, void *buf, size_t nbytes) { - vls_handle_t vlsh; + vcl_session_handle_t vclsh; ssize_t size; if ((errno = -ldp_init ())) return -1; - vlsh = ldp_fd_to_vlsh (fd); - if (vlsh != VLS_INVALID_HANDLE) + vclsh = ldp_fd_to_vclsh (fd); + if (vclsh != INVALID_SESSION_ID) { - size = vls_read (vlsh, buf, nbytes); + size = vppcom_session_read (vclsh, buf, nbytes); if (size < 0) { errno = -size; @@ -372,18 +564,18 @@ ssize_t readv (int fd, const struct iovec * iov, int iovcnt) { int rv = 0, i, total = 0; - vls_handle_t vlsh; + vcl_session_handle_t vclsh; ssize_t size = 0; if ((errno = -ldp_init ())) return -1; - vlsh = ldp_fd_to_vlsh (fd); - if (vlsh != VLS_INVALID_HANDLE) + vclsh = ldp_fd_to_vclsh (fd); + if (vclsh != INVALID_SESSION_ID) { for (i = 0; i < iovcnt; ++i) { - rv = vls_read (vlsh, iov[i].iov_base, iov[i].iov_len); + rv = vppcom_session_read (vclsh, iov[i].iov_base, iov[i].iov_len); if (rv <= 0) break; else @@ -412,16 +604,16 @@ readv (int fd, const struct iovec * iov, int iovcnt) ssize_t write (int fd, const void *buf, size_t nbytes) { - vls_handle_t vlsh; + vcl_session_handle_t vclsh; ssize_t size = 0; if ((errno = -ldp_init ())) return -1; - vlsh = ldp_fd_to_vlsh (fd); - if (vlsh != VLS_INVALID_HANDLE) + vclsh = ldp_fd_to_vclsh (fd); + if (vclsh != INVALID_SESSION_ID) { - size = vls_write_msg (vlsh, (void *) buf, nbytes); + size = vppcom_session_write_msg (vclsh, (void *) buf, nbytes); if (size < 0) { errno = -size; @@ -440,18 +632,19 @@ ssize_t writev (int fd, const struct iovec * iov, int iovcnt) { ssize_t size = 0, total = 0; - vls_handle_t vlsh; + vcl_session_handle_t vclsh; int i, rv = 0; if ((errno = -ldp_init ())) return -1; - vlsh = ldp_fd_to_vlsh (fd); - if (vlsh != VLS_INVALID_HANDLE) + vclsh = ldp_fd_to_vclsh (fd); + if (vclsh != INVALID_SESSION_ID) { for (i = 0; i < iovcnt; ++i) { - rv = vls_write_msg (vlsh, iov[i].iov_base, iov[i].iov_len); + rv = vppcom_session_write_msg (vclsh, iov[i].iov_base, + iov[i].iov_len); if (rv < 0) break; else @@ -481,12 +674,12 @@ writev (int fd, const struct iovec * iov, int iovcnt) static int fcntl_internal (int fd, int cmd, va_list ap) { - vls_handle_t vlsh; + vcl_session_handle_t vclsh; int rv = 0; - vlsh = ldp_fd_to_vlsh (fd); - LDBG (0, "fd %u vlsh %d, cmd %u", fd, vlsh, cmd); - if (vlsh != VLS_INVALID_HANDLE) + vclsh = ldp_fd_to_vclsh (fd); + LDBG (0, "fd %u vclsh %d, cmd %u", fd, vclsh, cmd); + if (vclsh != INVALID_SESSION_ID) { int flags = va_arg (ap, int); u32 size; @@ -496,11 +689,13 @@ fcntl_internal (int fd, int cmd, va_list ap) switch (cmd) { case F_SETFL: - rv = vls_attr (vlsh, VPPCOM_ATTR_SET_FLAGS, &flags, &size); + rv = + vppcom_session_attr (vclsh, VPPCOM_ATTR_SET_FLAGS, &flags, &size); break; case F_GETFL: - rv = vls_attr (vlsh, VPPCOM_ATTR_GET_FLAGS, &flags, &size); + rv = + vppcom_session_attr (vclsh, VPPCOM_ATTR_GET_FLAGS, &flags, &size); if (rv == VPPCOM_OK) rv = flags; break; @@ -565,7 +760,7 @@ fcntl64 (int fd, int cmd, ...) int ioctl (int fd, unsigned long int cmd, ...) { - vls_handle_t vlsh; + vcl_session_handle_t vclsh; va_list ap; int rv; @@ -574,13 +769,13 @@ ioctl (int fd, unsigned long int cmd, ...) va_start (ap, cmd); - vlsh = ldp_fd_to_vlsh (fd); - if (vlsh != VLS_INVALID_HANDLE) + vclsh = ldp_fd_to_vclsh (fd); + if (vclsh != INVALID_SESSION_ID) { switch (cmd) { case FIONREAD: - rv = vls_attr (vlsh, VPPCOM_ATTR_GET_NREAD, 0, 0); + rv = vppcom_session_attr (vclsh, VPPCOM_ATTR_GET_NREAD, 0, 0); break; case FIONBIO: @@ -592,7 +787,9 @@ ioctl (int fd, unsigned long int cmd, ...) * non-blocking, the flags should be read here and merged * with O_NONBLOCK. */ - rv = vls_attr (vlsh, VPPCOM_ATTR_SET_FLAGS, &flags, &size); + rv = + vppcom_session_attr (vclsh, VPPCOM_ATTR_SET_FLAGS, &flags, + &size); } break; @@ -622,7 +819,7 @@ ldp_select_init_maps (fd_set * __restrict original, u32 n_bytes, uword * si_bits, uword * libc_bits) { uword si_bits_set, libc_bits_set; - vls_handle_t vlsh; + vcl_session_handle_t vclsh; int fd; clib_bitmap_validate (*vclb, minbits); @@ -635,11 +832,11 @@ ldp_select_init_maps (fd_set * __restrict original, clib_bitmap_foreach (fd, *resultb, ({ if (fd > nfds) break; - vlsh = ldp_fd_to_vlsh (fd); - if (vlsh == VLS_INVALID_HANDLE) + vclsh = ldp_fd_to_vclsh (fd); + if (vclsh == INVALID_SESSION_ID) clib_bitmap_set_no_check (*libcb, fd, 1); else - *vclb = clib_bitmap_set (*vclb, vlsh_to_session_index (vlsh), 1); + *vclb = clib_bitmap_set (*vclb, vppcom_session_index (vclsh), 1); })); /* *INDENT-ON* */ @@ -654,7 +851,7 @@ ldp_select_init_maps (fd_set * __restrict original, always_inline int ldp_select_vcl_map_to_libc (clib_bitmap_t * vclb, fd_set * __restrict libcb) { - vls_handle_t vlsh; + vcl_session_handle_t vclsh; uword si; int fd; @@ -663,9 +860,9 @@ ldp_select_vcl_map_to_libc (clib_bitmap_t * vclb, fd_set * __restrict libcb) /* *INDENT-OFF* */ clib_bitmap_foreach (si, vclb, ({ - vlsh = vls_session_index_to_vlsh (si); - ASSERT (vlsh != VLS_INVALID_HANDLE); - fd = ldp_vlsh_to_fd (vlsh); + vclsh = vcl_session_handle_from_index (si); + ASSERT (vclsh != INVALID_SESSION_ID); + fd = ldp_vclsh_to_fd (vclsh); if (PREDICT_FALSE (fd < 0)) { errno = EBADFD; @@ -738,7 +935,7 @@ ldp_pselect (int nfds, fd_set * __restrict readfds, else time_out = -1; - if (nfds <= ldp->vlsh_bit_val) + if (nfds <= ldp->vcl_bit_val) { rv = libc_pselect (nfds, readfds, writefds, exceptfds, timeout, sigmask); @@ -788,9 +985,10 @@ ldp_pselect (int nfds, fd_set * __restrict readfds, vec_len (ldpw->si_ex_bitmap) * sizeof (clib_bitmap_t)); - rv = vls_select (si_bits, readfds ? ldpw->rd_bitmap : NULL, - writefds ? ldpw->wr_bitmap : NULL, - exceptfds ? ldpw->ex_bitmap : NULL, vcl_timeout); + rv = vppcom_select (si_bits, readfds ? ldpw->rd_bitmap : NULL, + writefds ? ldpw->wr_bitmap : NULL, + exceptfds ? ldpw->ex_bitmap : NULL, + vcl_timeout); if (rv < 0) { errno = -rv; @@ -903,7 +1101,7 @@ pselect (int nfds, fd_set * __restrict readfds, /* If transparent TLS mode is turned on, then ldp will load key and cert. */ static int -load_tls_cert (vls_handle_t vlsh) +load_tls_cert (vcl_session_handle_t vclsh) { char *env_var_str = getenv (LDP_ENV_TLS_CERT); char inbuf[4096]; @@ -921,7 +1119,7 @@ load_tls_cert (vls_handle_t vlsh) } cert_size = fread (inbuf, sizeof (char), sizeof (inbuf), fp); tls_cert = inbuf; - vppcom_session_tls_add_cert (vlsh_to_session_index (vlsh), tls_cert, + vppcom_session_tls_add_cert (vppcom_session_index (vclsh), tls_cert, cert_size); fclose (fp); } @@ -935,7 +1133,7 @@ load_tls_cert (vls_handle_t vlsh) } static int -load_tls_key (vls_handle_t vlsh) +load_tls_key (vcl_session_handle_t vclsh) { char *env_var_str = getenv (LDP_ENV_TLS_KEY); char inbuf[4096]; @@ -953,7 +1151,7 @@ load_tls_key (vls_handle_t vlsh) } key_size = fread (inbuf, sizeof (char), sizeof (inbuf), fp); tls_key = inbuf; - vppcom_session_tls_add_key (vlsh_to_session_index (vlsh), tls_key, + vppcom_session_tls_add_key (vppcom_session_index (vclsh), tls_key, key_size); fclose (fp); } @@ -970,7 +1168,7 @@ socket (int domain, int type, int protocol) { int rv, sock_type = type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK); u8 is_nonblocking = type & SOCK_NONBLOCK ? 1 : 0; - vls_handle_t vlsh; + vcl_session_handle_t vclsh; if ((errno = -ldp_init ())) return -1; @@ -987,25 +1185,26 @@ socket (int domain, int type, int protocol) proto = ((sock_type == SOCK_DGRAM) ? VPPCOM_PROTO_UDP : VPPCOM_PROTO_TCP); - LDBG (0, "calling vls_create: proto %u (%s), is_nonblocking %u", + LDBG (0, + "calling vppcom_session_create: proto %u (%s), is_nonblocking %u", proto, vppcom_proto_str (proto), is_nonblocking); - vlsh = vls_create (proto, is_nonblocking); - if (vlsh < 0) + vclsh = vppcom_session_create (proto, is_nonblocking); + if (vclsh < 0) { - errno = -vlsh; + errno = -vclsh; rv = -1; } else { if (ldp->transparent_tls) { - if (load_tls_cert (vlsh) < 0 || load_tls_key (vlsh) < 0) + if (load_tls_cert (vclsh) < 0 || load_tls_key (vclsh) < 0) { return -1; } } - rv = ldp_vlsh_to_fd (vlsh); + rv = ldp_vclsh_to_fd (vclsh); } } else @@ -1051,14 +1250,14 @@ socketpair (int domain, int type, int protocol, int fds[2]) int bind (int fd, __CONST_SOCKADDR_ARG addr, socklen_t len) { - vls_handle_t vlsh; + vcl_session_handle_t vclsh; int rv; if ((errno = -ldp_init ())) return -1; - vlsh = ldp_fd_to_vlsh (fd); - if (vlsh != VLS_INVALID_HANDLE) + vclsh = ldp_fd_to_vclsh (fd); + if (vclsh != INVALID_SESSION_ID) { vppcom_endpt_t ep; @@ -1067,8 +1266,8 @@ bind (int fd, __CONST_SOCKADDR_ARG addr, socklen_t len) case AF_INET: if (len != sizeof (struct sockaddr_in)) { - LDBG (0, "ERROR: fd %d: vlsh %u: Invalid AF_INET addr len %u!", - fd, vlsh, len); + LDBG (0, "ERROR: fd %d: vclsh %u: Invalid AF_INET addr len %u!", + fd, vclsh, len); errno = EINVAL; rv = -1; goto done; @@ -1081,8 +1280,9 @@ bind (int fd, __CONST_SOCKADDR_ARG addr, socklen_t len) case AF_INET6: if (len != sizeof (struct sockaddr_in6)) { - LDBG (0, "ERROR: fd %d: vlsh %u: Invalid AF_INET6 addr len %u!", - fd, vlsh, len); + LDBG (0, + "ERROR: fd %d: vclsh %u: Invalid AF_INET6 addr len %u!", + fd, vclsh, len); errno = EINVAL; rv = -1; goto done; @@ -1093,16 +1293,17 @@ bind (int fd, __CONST_SOCKADDR_ARG addr, socklen_t len) break; default: - LDBG (0, "ERROR: fd %d: vlsh %u: Unsupported address family %u!", - fd, vlsh, addr->sa_family); + LDBG (0, "ERROR: fd %d: vclsh %u: Unsupported address family %u!", + fd, vclsh, addr->sa_family); errno = EAFNOSUPPORT; rv = -1; goto done; } - LDBG (0, "fd %d: calling vls_bind: vlsh %u, addr %p, len %u", fd, vlsh, - addr, len); + LDBG (0, + "fd %d: calling vppcom_session_bind: vclsh %u, addr %p, len %u", + fd, vclsh, addr, len); - rv = vls_bind (vlsh, &ep); + rv = vppcom_session_bind (vclsh, &ep); if (rv != VPPCOM_OK) { errno = -rv; @@ -1170,14 +1371,14 @@ ldp_copy_ep_to_sockaddr (__SOCKADDR_ARG addr, socklen_t * __restrict len, int getsockname (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict len) { - vls_handle_t vlsh; + vcl_session_handle_t vclsh; int rv; if ((errno = -ldp_init ())) return -1; - vlsh = ldp_fd_to_vlsh (fd); - if (vlsh != VLS_INVALID_HANDLE) + vclsh = ldp_fd_to_vclsh (fd); + if (vclsh != INVALID_SESSION_ID) { vppcom_endpt_t ep; u8 addr_buf[sizeof (struct in6_addr)]; @@ -1185,7 +1386,7 @@ getsockname (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict len) ep.ip = addr_buf; - rv = vls_attr (vlsh, VPPCOM_ATTR_GET_LCL_ADDR, &ep, &size); + rv = vppcom_session_attr (vclsh, VPPCOM_ATTR_GET_LCL_ADDR, &ep, &size); if (rv != VPPCOM_OK) { errno = -rv; @@ -1212,7 +1413,7 @@ getsockname (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict len) int connect (int fd, __CONST_SOCKADDR_ARG addr, socklen_t len) { - vls_handle_t vlsh; + vcl_session_handle_t vclsh; int rv; if ((errno = -ldp_init ())) @@ -1226,8 +1427,8 @@ connect (int fd, __CONST_SOCKADDR_ARG addr, socklen_t len) goto done; } - vlsh = ldp_fd_to_vlsh (fd); - if (vlsh != VLS_INVALID_HANDLE) + vclsh = ldp_fd_to_vclsh (fd); + if (vclsh != INVALID_SESSION_ID) { vppcom_endpt_t ep; @@ -1236,8 +1437,8 @@ connect (int fd, __CONST_SOCKADDR_ARG addr, socklen_t len) case AF_INET: if (len != sizeof (struct sockaddr_in)) { - LDBG (0, "fd %d: ERROR vlsh %u: Invalid AF_INET addr len %u!", - fd, vlsh, len); + LDBG (0, "fd %d: ERROR vclsh %u: Invalid AF_INET addr len %u!", + fd, vclsh, len); errno = EINVAL; rv = -1; goto done; @@ -1250,8 +1451,8 @@ connect (int fd, __CONST_SOCKADDR_ARG addr, socklen_t len) case AF_INET6: if (len != sizeof (struct sockaddr_in6)) { - LDBG (0, "fd %d: ERROR vlsh %u: Invalid AF_INET6 addr len %u!", - fd, vlsh, len); + LDBG (0, "fd %d: ERROR vclsh %u: Invalid AF_INET6 addr len %u!", + fd, vclsh, len); errno = EINVAL; rv = -1; goto done; @@ -1262,16 +1463,17 @@ connect (int fd, __CONST_SOCKADDR_ARG addr, socklen_t len) break; default: - LDBG (0, "fd %d: ERROR vlsh %u: Unsupported address family %u!", - fd, vlsh, addr->sa_family); + LDBG (0, "fd %d: ERROR vclsh %u: Unsupported address family %u!", + fd, vclsh, addr->sa_family); errno = EAFNOSUPPORT; rv = -1; goto done; } - LDBG (0, "fd %d: calling vls_connect(): vlsh %u addr %p len %u", fd, - vlsh, addr, len); + LDBG (0, + "fd %d: calling vppcom_session_connect(): vclsh %u addr %p len %u", + fd, vclsh, addr, len); - rv = vls_connect (vlsh, &ep); + rv = vppcom_session_connect (vclsh, &ep); if (rv != VPPCOM_OK) { errno = -rv; @@ -1294,21 +1496,21 @@ done: int getpeername (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict len) { - vls_handle_t vlsh; + vcl_session_handle_t vclsh; int rv; if ((errno = -ldp_init ())) return -1; - vlsh = ldp_fd_to_vlsh (fd); - if (vlsh != VLS_INVALID_HANDLE) + vclsh = ldp_fd_to_vclsh (fd); + if (vclsh != INVALID_SESSION_ID) { vppcom_endpt_t ep; u8 addr_buf[sizeof (struct in6_addr)]; u32 size = sizeof (ep); ep.ip = addr_buf; - rv = vls_attr (vlsh, VPPCOM_ATTR_GET_PEER_ADDR, &ep, &size); + rv = vppcom_session_attr (vclsh, VPPCOM_ATTR_GET_PEER_ADDR, &ep, &size); if (rv != VPPCOM_OK) { errno = -rv; @@ -1335,15 +1537,15 @@ getpeername (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict len) ssize_t send (int fd, const void *buf, size_t n, int flags) { - vls_handle_t vlsh = ldp_fd_to_vlsh (fd); + vcl_session_handle_t vclsh = ldp_fd_to_vclsh (fd); ssize_t size; if ((errno = -ldp_init ())) return -1; - if (vlsh != VLS_INVALID_HANDLE) + if (vclsh != INVALID_SESSION_ID) { - size = vls_sendto (vlsh, (void *) buf, n, flags, NULL); + size = vppcom_session_sendto (vclsh, (void *) buf, n, flags, NULL); if (size < VPPCOM_OK) { errno = -size; @@ -1362,14 +1564,14 @@ ssize_t sendfile (int out_fd, int in_fd, off_t * offset, size_t len) { ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); - vls_handle_t vlsh; + vcl_session_handle_t vclsh; ssize_t size = 0; if ((errno = -ldp_init ())) return -1; - vlsh = ldp_fd_to_vlsh (out_fd); - if (vlsh != VLS_INVALID_HANDLE) + vclsh = ldp_fd_to_vclsh (out_fd); + if (vclsh != INVALID_SESSION_ID) { int rv; ssize_t results = 0; @@ -1379,11 +1581,14 @@ sendfile (int out_fd, int in_fd, off_t * offset, size_t len) u8 eagain = 0; u32 flags, flags_len = sizeof (flags); - rv = vls_attr (vlsh, VPPCOM_ATTR_GET_FLAGS, &flags, &flags_len); + rv = + vppcom_session_attr (vclsh, VPPCOM_ATTR_GET_FLAGS, &flags, + &flags_len); if (PREDICT_FALSE (rv != VPPCOM_OK)) { - LDBG (0, "ERROR: out fd %d: vls_attr: vlsh %u, returned %d (%s)!", - out_fd, vlsh, rv, vppcom_retval_str (rv)); + LDBG (0, + "ERROR: out fd %d: vppcom_session_attr: vclsh %u, returned %d (%s)!", + out_fd, vclsh, rv, vppcom_retval_str (rv)); vec_reset_length (ldpw->io_buffer); errno = -rv; @@ -1405,11 +1610,12 @@ sendfile (int out_fd, int in_fd, off_t * offset, size_t len) do { - size = vls_attr (vlsh, VPPCOM_ATTR_GET_NWRITE, 0, 0); + size = vppcom_session_attr (vclsh, VPPCOM_ATTR_GET_NWRITE, 0, 0); if (size < 0) { - LDBG (0, "ERROR: fd %d: vls_attr: vlsh %u returned %d (%s)!", - out_fd, vlsh, size, vppcom_retval_str (size)); + LDBG (0, + "ERROR: fd %d: voocom_session_attr: vclsh %u returned %d (%s)!", + out_fd, vclsh, size, vppcom_retval_str (size)); vec_reset_length (ldpw->io_buffer); errno = -size; size = -1; @@ -1442,7 +1648,7 @@ sendfile (int out_fd, int in_fd, off_t * offset, size_t len) goto update_offset; } - size = vls_write (vlsh, ldpw->io_buffer, nbytes); + size = vppcom_session_write (vclsh, ldpw->io_buffer, nbytes); if (size < 0) { if (size == VPPCOM_EAGAIN) @@ -1512,16 +1718,16 @@ sendfile64 (int out_fd, int in_fd, off_t * offset, size_t len) ssize_t recv (int fd, void *buf, size_t n, int flags) { - vls_handle_t vlsh; + vcl_session_handle_t vclsh; ssize_t size; if ((errno = -ldp_init ())) return -1; - vlsh = ldp_fd_to_vlsh (fd); - if (vlsh != VLS_INVALID_HANDLE) + vclsh = ldp_fd_to_vclsh (fd); + if (vclsh != INVALID_SESSION_ID) { - size = vls_recvfrom (vlsh, buf, n, flags, NULL); + size = vppcom_session_recvfrom (vclsh, buf, n, flags, NULL); if (size < 0) { errno = -size; @@ -1537,7 +1743,7 @@ recv (int fd, void *buf, size_t n, int flags) } static int -ldp_vls_sendo (vls_handle_t vlsh, const void *buf, size_t n, int flags, +ldp_vls_sendo (vcl_session_handle_t vclsh, const void *buf, size_t n, int flags, __CONST_SOCKADDR_ARG addr, socklen_t addr_len) { vppcom_endpt_t *ep = 0; @@ -1568,11 +1774,11 @@ ldp_vls_sendo (vls_handle_t vlsh, const void *buf, size_t n, int flags, } } - return vls_sendto (vlsh, (void *) buf, n, flags, ep); + return vppcom_session_sendto (vclsh, (void *) buf, n, flags, ep); } static int -ldp_vls_recvfrom (vls_handle_t vlsh, void *__restrict buf, size_t n, +ldp_vls_recvfrom (vcl_session_handle_t vclsh, void *__restrict buf, size_t n, int flags, __SOCKADDR_ARG addr, socklen_t * __restrict addr_len) { @@ -1584,7 +1790,7 @@ ldp_vls_recvfrom (vls_handle_t vlsh, void *__restrict buf, size_t n, if (addr) { ep.ip = src_addr; - size = vls_recvfrom (vlsh, buf, n, flags, &ep); + size = vppcom_session_recvfrom (vclsh, buf, n, flags, &ep); if (size > 0) { @@ -1594,7 +1800,7 @@ ldp_vls_recvfrom (vls_handle_t vlsh, void *__restrict buf, size_t n, } } else - size = vls_recvfrom (vlsh, buf, n, flags, NULL); + size = vppcom_session_recvfrom (vclsh, buf, n, flags, NULL); return size; } @@ -1603,16 +1809,16 @@ ssize_t sendto (int fd, const void *buf, size_t n, int flags, __CONST_SOCKADDR_ARG addr, socklen_t addr_len) { - vls_handle_t vlsh; + vcl_session_handle_t vclsh; ssize_t size; if ((errno = -ldp_init ())) return -1; - vlsh = ldp_fd_to_vlsh (fd); - if (vlsh != INVALID_SESSION_ID) + vclsh = ldp_fd_to_vclsh (fd); + if (vclsh != INVALID_SESSION_ID) { - size = ldp_vls_sendo (vlsh, buf, n, flags, addr, addr_len); + size = ldp_vls_sendo (vclsh, buf, n, flags, addr, addr_len); if (size < 0) { errno = -size; @@ -1631,16 +1837,16 @@ ssize_t recvfrom (int fd, void *__restrict buf, size_t n, int flags, __SOCKADDR_ARG addr, socklen_t * __restrict addr_len) { - vls_handle_t vlsh; + vcl_session_handle_t vclsh; ssize_t size; if ((errno = -ldp_init ())) return -1; - vlsh = ldp_fd_to_vlsh (fd); - if (vlsh != VLS_INVALID_HANDLE) + vclsh = ldp_fd_to_vclsh (fd); + if (vclsh != INVALID_SESSION_ID) { - size = ldp_vls_recvfrom (vlsh, buf, n, flags, addr, addr_len); + size = ldp_vls_recvfrom (vclsh, buf, n, flags, addr, addr_len); if (size < 0) { errno = -size; @@ -1658,14 +1864,14 @@ recvfrom (int fd, void *__restrict buf, size_t n, int flags, ssize_t sendmsg (int fd, const struct msghdr * msg, int flags) { - vls_handle_t vlsh; + vcl_session_handle_t vclsh; ssize_t size; if ((errno = -ldp_init ())) return -1; - vlsh = ldp_fd_to_vlsh (fd); - if (vlsh != VLS_INVALID_HANDLE) + vclsh = ldp_fd_to_vclsh (fd); + if (vclsh != INVALID_SESSION_ID) { struct iovec *iov = msg->msg_iov; ssize_t total = 0; @@ -1673,7 +1879,7 @@ sendmsg (int fd, const struct msghdr * msg, int flags) for (i = 0; i < msg->msg_iovlen; ++i) { - rv = ldp_vls_sendo (vlsh, iov[i].iov_base, iov[i].iov_len, flags, + rv = ldp_vls_sendo (vclsh, iov[i].iov_base, iov[i].iov_len, flags, msg->msg_name, msg->msg_namelen); if (rv < 0) break; @@ -1752,26 +1958,26 @@ sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags) ssize_t recvmsg (int fd, struct msghdr * msg, int flags) { - vls_handle_t vlsh; + vcl_session_handle_t vclsh; ssize_t size; if ((errno = -ldp_init ())) return -1; - vlsh = ldp_fd_to_vlsh (fd); - if (vlsh != VLS_INVALID_HANDLE) + vclsh = ldp_fd_to_vclsh (fd); + if (vclsh != INVALID_SESSION_ID) { struct iovec *iov = msg->msg_iov; ssize_t max_deq, total = 0; int i, rv; - max_deq = vls_attr (vlsh, VPPCOM_ATTR_GET_NREAD, 0, 0); + max_deq = vppcom_session_attr (vclsh, VPPCOM_ATTR_GET_NREAD, 0, 0); if (!max_deq) return 0; for (i = 0; i < msg->msg_iovlen; i++) { - rv = ldp_vls_recvfrom (vlsh, iov[i].iov_base, iov[i].iov_len, flags, + rv = ldp_vls_recvfrom (vclsh, iov[i].iov_base, iov[i].iov_len, flags, (i == 0 ? msg->msg_name : NULL), (i == 0 ? &msg->msg_namelen : NULL)); if (rv <= 0) @@ -1809,7 +2015,7 @@ recvmmsg (int fd, struct mmsghdr *vmessages, { ssize_t size; const char *func_str; - u32 sh = ldp_fd_to_vlsh (fd); + u32 sh = ldp_fd_to_vclsh (fd); if ((errno = -ldp_init ())) return -1; @@ -1856,14 +2062,14 @@ int getsockopt (int fd, int level, int optname, void *__restrict optval, socklen_t * __restrict optlen) { - vls_handle_t vlsh; + vcl_session_handle_t vclsh; int rv; if ((errno = -ldp_init ())) return -1; - vlsh = ldp_fd_to_vlsh (fd); - if (vlsh != VLS_INVALID_HANDLE) + vclsh = ldp_fd_to_vclsh (fd); + if (vclsh != INVALID_SESSION_ID) { rv = -EOPNOTSUPP; @@ -1873,26 +2079,26 @@ getsockopt (int fd, int level, int optname, switch (optname) { case TCP_NODELAY: - rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_NODELAY, - optval, optlen); + rv = vppcom_session_attr (vclsh, VPPCOM_ATTR_GET_TCP_NODELAY, + optval, optlen); break; case TCP_MAXSEG: - rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_USER_MSS, - optval, optlen); + rv = vppcom_session_attr (vclsh, VPPCOM_ATTR_GET_TCP_USER_MSS, + optval, optlen); break; case TCP_KEEPIDLE: - rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_KEEPIDLE, - optval, optlen); + rv = vppcom_session_attr (vclsh, VPPCOM_ATTR_GET_TCP_KEEPIDLE, + optval, optlen); break; case TCP_KEEPINTVL: - rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_KEEPINTVL, - optval, optlen); + rv = vppcom_session_attr (vclsh, VPPCOM_ATTR_GET_TCP_KEEPINTVL, + optval, optlen); break; case TCP_INFO: if (optval && optlen && (*optlen == sizeof (struct tcp_info))) { - LDBG (1, "fd %d: vlsh %u SOL_TCP, TCP_INFO, optval %p, " - "optlen %d: #LDP-NOP#", fd, vlsh, optval, *optlen); + LDBG (1, "fd %d: vclsh %u SOL_TCP, TCP_INFO, optval %p, " + "optlen %d: #LDP-NOP#", fd, vclsh, optval, *optlen); memset (optval, 0, *optlen); rv = VPPCOM_OK; } @@ -1906,7 +2112,7 @@ getsockopt (int fd, int level, int optname, break; default: LDBG (0, "ERROR: fd %d: getsockopt SOL_TCP: sid %u, " - "optname %d unsupported!", fd, vlsh, optname); + "optname %d unsupported!", fd, vclsh, optname); break; } break; @@ -1914,11 +2120,13 @@ getsockopt (int fd, int level, int optname, switch (optname) { case IPV6_V6ONLY: - rv = vls_attr (vlsh, VPPCOM_ATTR_GET_V6ONLY, optval, optlen); + rv = + vppcom_session_attr (vclsh, VPPCOM_ATTR_GET_V6ONLY, optval, + optlen); break; default: - LDBG (0, "ERROR: fd %d: getsockopt SOL_IPV6: vlsh %u " - "optname %d unsupported!", fd, vlsh, optname); + LDBG (0, "ERROR: fd %d: getsockopt SOL_IPV6: vclsh %u " + "optname %d unsupported!", fd, vclsh, optname); break; } break; @@ -1926,35 +2134,47 @@ getsockopt (int fd, int level, int optname, switch (optname) { case SO_ACCEPTCONN: - rv = vls_attr (vlsh, VPPCOM_ATTR_GET_LISTEN, optval, optlen); + rv = + vppcom_session_attr (vclsh, VPPCOM_ATTR_GET_LISTEN, optval, + optlen); break; case SO_KEEPALIVE: - rv = vls_attr (vlsh, VPPCOM_ATTR_GET_KEEPALIVE, optval, optlen); + rv = + vppcom_session_attr (vclsh, VPPCOM_ATTR_GET_KEEPALIVE, optval, + optlen); break; case SO_PROTOCOL: - rv = vls_attr (vlsh, VPPCOM_ATTR_GET_PROTOCOL, optval, optlen); + rv = + vppcom_session_attr (vclsh, VPPCOM_ATTR_GET_PROTOCOL, optval, + optlen); *(int *) optval = *(int *) optval ? SOCK_DGRAM : SOCK_STREAM; break; case SO_SNDBUF: - rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TX_FIFO_LEN, - optval, optlen); + rv = vppcom_session_attr (vclsh, VPPCOM_ATTR_GET_TX_FIFO_LEN, + optval, optlen); break; case SO_RCVBUF: - rv = vls_attr (vlsh, VPPCOM_ATTR_GET_RX_FIFO_LEN, - optval, optlen); + rv = vppcom_session_attr (vclsh, VPPCOM_ATTR_GET_RX_FIFO_LEN, + optval, optlen); break; case SO_REUSEADDR: - rv = vls_attr (vlsh, VPPCOM_ATTR_GET_REUSEADDR, optval, optlen); + rv = + vppcom_session_attr (vclsh, VPPCOM_ATTR_GET_REUSEADDR, optval, + optlen); break; case SO_BROADCAST: - rv = vls_attr (vlsh, VPPCOM_ATTR_GET_BROADCAST, optval, optlen); + rv = + vppcom_session_attr (vclsh, VPPCOM_ATTR_GET_BROADCAST, optval, + optlen); break; case SO_ERROR: - rv = vls_attr (vlsh, VPPCOM_ATTR_GET_ERROR, optval, optlen); + rv = + vppcom_session_attr (vclsh, VPPCOM_ATTR_GET_ERROR, optval, + optlen); break; default: - LDBG (0, "ERROR: fd %d: getsockopt SOL_SOCKET: vlsh %u " - "optname %d unsupported!", fd, vlsh, optname); + LDBG (0, "ERROR: fd %d: getsockopt SOL_SOCKET: vclsh %u " + "optname %d unsupported!", fd, vclsh, optname); break; } break; @@ -1980,14 +2200,14 @@ int setsockopt (int fd, int level, int optname, const void *optval, socklen_t optlen) { - vls_handle_t vlsh; + vcl_session_handle_t vclsh; int rv; if ((errno = -ldp_init ())) return -1; - vlsh = ldp_fd_to_vlsh (fd); - if (vlsh != VLS_INVALID_HANDLE) + vclsh = ldp_fd_to_vclsh (fd); + if (vclsh != INVALID_SESSION_ID) { rv = -EOPNOTSUPP; @@ -1997,20 +2217,20 @@ setsockopt (int fd, int level, int optname, switch (optname) { case TCP_NODELAY: - rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_NODELAY, - (void *) optval, &optlen); + rv = vppcom_session_attr (vclsh, VPPCOM_ATTR_SET_TCP_NODELAY, + (void *) optval, &optlen); break; case TCP_MAXSEG: - rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_USER_MSS, - (void *) optval, &optlen); + rv = vppcom_session_attr (vclsh, VPPCOM_ATTR_SET_TCP_USER_MSS, + (void *) optval, &optlen); break; case TCP_KEEPIDLE: - rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_KEEPIDLE, - (void *) optval, &optlen); + rv = vppcom_session_attr (vclsh, VPPCOM_ATTR_SET_TCP_KEEPIDLE, + (void *) optval, &optlen); break; case TCP_KEEPINTVL: - rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_KEEPINTVL, - (void *) optval, &optlen); + rv = vppcom_session_attr (vclsh, VPPCOM_ATTR_SET_TCP_KEEPINTVL, + (void *) optval, &optlen); break; case TCP_CONGESTION: case TCP_CORK: @@ -2018,8 +2238,8 @@ setsockopt (int fd, int level, int optname, rv = 0; break; default: - LDBG (0, "ERROR: fd %d: setsockopt() SOL_TCP: vlsh %u" - "optname %d unsupported!", fd, vlsh, optname); + LDBG (0, "ERROR: fd %d: setsockopt() SOL_TCP: vclsh %u" + "optname %d unsupported!", fd, vclsh, optname); break; } break; @@ -2027,12 +2247,12 @@ setsockopt (int fd, int level, int optname, switch (optname) { case IPV6_V6ONLY: - rv = vls_attr (vlsh, VPPCOM_ATTR_SET_V6ONLY, - (void *) optval, &optlen); + rv = vppcom_session_attr (vclsh, VPPCOM_ATTR_SET_V6ONLY, + (void *) optval, &optlen); break; default: - LDBG (0, "ERROR: fd %d: setsockopt SOL_IPV6: vlsh %u" - "optname %d unsupported!", fd, vlsh, optname); + LDBG (0, "ERROR: fd %d: setsockopt SOL_IPV6: vclsh %u" + "optname %d unsupported!", fd, vclsh, optname); break; } break; @@ -2040,20 +2260,20 @@ setsockopt (int fd, int level, int optname, switch (optname) { case SO_KEEPALIVE: - rv = vls_attr (vlsh, VPPCOM_ATTR_SET_KEEPALIVE, - (void *) optval, &optlen); + rv = vppcom_session_attr (vclsh, VPPCOM_ATTR_SET_KEEPALIVE, + (void *) optval, &optlen); break; case SO_REUSEADDR: - rv = vls_attr (vlsh, VPPCOM_ATTR_SET_REUSEADDR, - (void *) optval, &optlen); + rv = vppcom_session_attr (vclsh, VPPCOM_ATTR_SET_REUSEADDR, + (void *) optval, &optlen); break; case SO_BROADCAST: - rv = vls_attr (vlsh, VPPCOM_ATTR_SET_BROADCAST, - (void *) optval, &optlen); + rv = vppcom_session_attr (vclsh, VPPCOM_ATTR_SET_BROADCAST, + (void *) optval, &optlen); break; default: - LDBG (0, "ERROR: fd %d: setsockopt SOL_SOCKET: vlsh %u " - "optname %d unsupported!", fd, vlsh, optname); + LDBG (0, "ERROR: fd %d: setsockopt SOL_SOCKET: vclsh %u " + "optname %d unsupported!", fd, vclsh, optname); break; } break; @@ -2078,18 +2298,20 @@ setsockopt (int fd, int level, int optname, int listen (int fd, int n) { - vls_handle_t vlsh; + vcl_session_handle_t vclsh; + vcl_worker_t *wrk = vcl_worker_get_current (); int rv; if ((errno = -ldp_init ())) return -1; - vlsh = ldp_fd_to_vlsh (fd); - if (vlsh != VLS_INVALID_HANDLE) + vclsh = ldp_fd_to_vclsh (fd); + if (vclsh != INVALID_SESSION_ID) { - LDBG (0, "fd %d: calling vls_listen: vlsh %u, n %d", fd, vlsh, n); + LDBG (0, "fd %d: calling vppcom_session_listen: vclsh %u, n %d", fd, + vclsh, n); - rv = vls_listen (vlsh, n); + rv = vppcom_session_listen (vclsh, n); if (rv != VPPCOM_OK) { errno = -rv; @@ -2103,6 +2325,10 @@ listen (int fd, int n) } LDBG (1, "fd %d: returning %d", fd, rv); +/*Update listen info in vcl worker*/ + wrk->listen_fd = fd; + wrk->listen_queue_size = n; + wrk->listen_session_index = vppcom_session_index (vclsh); return rv; } @@ -2110,14 +2336,14 @@ static inline int ldp_accept4 (int listen_fd, __SOCKADDR_ARG addr, socklen_t * __restrict addr_len, int flags) { - vls_handle_t listen_vlsh, accept_vlsh; + vcl_session_handle_t listen_vclsh, accept_vclsh; int rv; if ((errno = -ldp_init ())) return -1; - listen_vlsh = ldp_fd_to_vlsh (listen_fd); - if (listen_vlsh != VLS_INVALID_HANDLE) + listen_vclsh = ldp_fd_to_vclsh (listen_fd); + if (listen_vclsh != INVALID_SESSION_ID) { vppcom_endpt_t ep; u8 src_addr[sizeof (struct sockaddr_in6)]; @@ -2125,12 +2351,12 @@ ldp_accept4 (int listen_fd, __SOCKADDR_ARG addr, ep.ip = src_addr; LDBG (0, "listen fd %d: calling vppcom_session_accept: listen sid %u," - " ep %p, flags 0x%x", listen_fd, listen_vlsh, ep, flags); + " ep %p, flags 0x%x", listen_fd, listen_vclsh, ep, flags); - accept_vlsh = vls_accept (listen_vlsh, &ep, flags); - if (accept_vlsh < 0) + accept_vclsh = vppcom_session_accept (listen_vclsh, &ep, flags); + if (accept_vclsh < 0) { - errno = -accept_vlsh; + errno = -accept_vclsh; rv = -1; } else @@ -2138,13 +2364,13 @@ ldp_accept4 (int listen_fd, __SOCKADDR_ARG addr, rv = ldp_copy_ep_to_sockaddr (addr, addr_len, &ep); if (rv != VPPCOM_OK) { - (void) vls_close (accept_vlsh); + (void) vppcom_session_close (accept_vclsh); errno = -rv; rv = -1; } else { - rv = ldp_vlsh_to_fd (accept_vlsh); + rv = ldp_vclsh_to_fd (accept_vclsh); } } } @@ -2177,25 +2403,26 @@ accept (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict addr_len) int shutdown (int fd, int how) { - vls_handle_t vlsh; + vcl_session_handle_t vclsh; int rv = 0, flags; u32 flags_len = sizeof (flags); if ((errno = -ldp_init ())) return -1; - vlsh = ldp_fd_to_vlsh (fd); - if (vlsh != VLS_INVALID_HANDLE) + vclsh = ldp_fd_to_vclsh (fd); + if (vclsh != INVALID_SESSION_ID) { - LDBG (0, "called shutdown: fd %u vlsh %u how %d", fd, vlsh, how); + LDBG (0, "called shutdown: fd %u vclsh %u how %d", fd, vclsh, how); - if (vls_attr (vlsh, VPPCOM_ATTR_SET_SHUT, &how, &flags_len)) + if (vppcom_session_attr (vclsh, VPPCOM_ATTR_SET_SHUT, &how, &flags_len)) { close (fd); return -1; } - if (vls_attr (vlsh, VPPCOM_ATTR_GET_SHUT, &flags, &flags_len)) + if (vppcom_session_attr + (vclsh, VPPCOM_ATTR_GET_SHUT, &flags, &flags_len)) { close (fd); return -1; @@ -2217,7 +2444,7 @@ int epoll_create1 (int flags) { ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); - vls_handle_t vlsh; + vcl_session_handle_t vclsh; int rv; if ((errno = -ldp_init ())) @@ -2238,17 +2465,17 @@ epoll_create1 (int flags) return rv; } - vlsh = vls_epoll_create (); - if (PREDICT_FALSE (vlsh == VLS_INVALID_HANDLE)) + vclsh = vppcom_epoll_create (); + if (PREDICT_FALSE (vclsh == INVALID_SESSION_ID)) { - errno = -vlsh; + errno = -vclsh; rv = -1; } else { - rv = ldp_vlsh_to_fd (vlsh); + rv = ldp_vclsh_to_fd (vclsh); } - LDBG (0, "epoll_create epfd %u vlsh %u", rv, vlsh); + LDBG (0, "epoll_create epfd %u vclsh %u", rv, vclsh); return rv; } @@ -2261,14 +2488,14 @@ epoll_create (int size) int epoll_ctl (int epfd, int op, int fd, struct epoll_event *event) { - vls_handle_t vep_vlsh, vlsh; + vcl_session_handle_t vep_vclsh, vclsh; int rv; if ((errno = -ldp_init ())) return -1; - vep_vlsh = ldp_fd_to_vlsh (epfd); - if (PREDICT_FALSE (vep_vlsh == VLS_INVALID_HANDLE)) + vep_vclsh = ldp_fd_to_vclsh (epfd); + if (PREDICT_FALSE (vep_vclsh == INVALID_SESSION_ID)) { /* The LDP epoll_create1 always creates VCL epfd's. * The app should never have a kernel base epoll fd unless it @@ -2282,17 +2509,18 @@ epoll_ctl (int epfd, int op, int fd, struct epoll_event *event) goto done; } - vlsh = ldp_fd_to_vlsh (fd); + vclsh = ldp_fd_to_vclsh (fd); - LDBG (0, "epfd %d ep_vlsh %d, fd %u vlsh %d, op %u", epfd, vep_vlsh, fd, - vlsh, op); + LDBG (0, "epfd %d ep_vclsh %d, fd %u vclsh %d, op %u", epfd, vep_vclsh, fd, + vclsh, op); - if (vlsh != VLS_INVALID_HANDLE) + if (vclsh != INVALID_SESSION_ID) { - LDBG (1, "epfd %d: calling vls_epoll_ctl: ep_vlsh %d op %d, vlsh %u," - " event %p", epfd, vep_vlsh, vlsh, event); + LDBG (1, + "epfd %d: calling vppcom_epoll_ctl: ep_vclsh %d op %d, vclsh %u," + " event %p", epfd, vep_vclsh, vclsh, event); - rv = vls_epoll_ctl (vep_vlsh, op, vlsh, event); + rv = vppcom_epoll_ctl (vep_vclsh, op, vclsh, event); if (rv != VPPCOM_OK) { errno = -rv; @@ -2304,11 +2532,12 @@ epoll_ctl (int epfd, int op, int fd, struct epoll_event *event) int libc_epfd; u32 size = sizeof (epfd); - libc_epfd = vls_attr (vep_vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0); + libc_epfd = + vppcom_session_attr (vep_vclsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0); if (!libc_epfd) { - LDBG (1, "epfd %d, vep_vlsh %d calling libc_epoll_create1: " - "EPOLL_CLOEXEC", epfd, vep_vlsh); + LDBG (1, "epfd %d, vep_vclsh %d calling libc_epoll_create1: " + "EPOLL_CLOEXEC", epfd, vep_vclsh); libc_epfd = libc_epoll_create1 (EPOLL_CLOEXEC); if (libc_epfd < 0) @@ -2317,8 +2546,9 @@ epoll_ctl (int epfd, int op, int fd, struct epoll_event *event) goto done; } - rv = vls_attr (vep_vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &libc_epfd, - &size); + rv = + vppcom_session_attr (vep_vclsh, VPPCOM_ATTR_SET_LIBC_EPFD, + &libc_epfd, &size); if (rv < 0) { errno = -rv; @@ -2350,7 +2580,7 @@ ldp_epoll_pwait (int epfd, struct epoll_event *events, int maxevents, ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); double time_to_wait = (double) 0, max_time; int libc_epfd, rv = 0; - vls_handle_t ep_vlsh; + vcl_session_handle_t ep_vclsh; if ((errno = -ldp_init ())) return -1; @@ -2364,10 +2594,10 @@ ldp_epoll_pwait (int epfd, struct epoll_event *events, int maxevents, if (epfd == ldpw->vcl_mq_epfd) return libc_epoll_pwait (epfd, events, maxevents, timeout, sigmask); - ep_vlsh = ldp_fd_to_vlsh (epfd); - if (PREDICT_FALSE (ep_vlsh == VLS_INVALID_HANDLE)) + ep_vclsh = ldp_fd_to_vclsh (epfd); + if (PREDICT_FALSE (ep_vclsh == INVALID_SESSION_ID)) { - LDBG (0, "epfd %d: bad ep_vlsh %d!", epfd, ep_vlsh); + LDBG (0, "epfd %d: bad ep_vclsh %d!", epfd, ep_vclsh); errno = EBADFD; return -1; } @@ -2377,7 +2607,7 @@ ldp_epoll_pwait (int epfd, struct epoll_event *events, int maxevents, time_to_wait = ((timeout >= 0) ? (double) timeout / 1000 : 0); max_time = clib_time_now (&ldpw->clib_time) + time_to_wait; - libc_epfd = vls_attr (ep_vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0); + libc_epfd = vppcom_session_attr (ep_vclsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0); if (PREDICT_FALSE (libc_epfd < 0)) { errno = -libc_epfd; @@ -2386,13 +2616,13 @@ ldp_epoll_pwait (int epfd, struct epoll_event *events, int maxevents, } LDBG (2, "epfd %d: vep_idx %d, libc_epfd %d, events %p, maxevents %d, " - "timeout %d, sigmask %p: time_to_wait %.02f", epfd, ep_vlsh, + "timeout %d, sigmask %p: time_to_wait %.02f", epfd, ep_vclsh, libc_epfd, events, maxevents, timeout, sigmask, time_to_wait); do { if (!ldpw->epoll_wait_vcl) { - rv = vls_epoll_wait (ep_vlsh, events, maxevents, 0); + rv = vppcom_epoll_wait (ep_vclsh, events, maxevents, 0); if (rv > 0) { ldpw->epoll_wait_vcl = 1; @@ -2427,7 +2657,7 @@ ldp_epoll_pwait_eventfd (int epfd, struct epoll_event *events, { ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); int libc_epfd, rv = 0, num_ev; - vls_handle_t ep_vlsh; + vcl_session_handle_t ep_vclsh; if ((errno = -ldp_init ())) return -1; @@ -2441,21 +2671,21 @@ ldp_epoll_pwait_eventfd (int epfd, struct epoll_event *events, if (epfd == ldpw->vcl_mq_epfd) return libc_epoll_pwait (epfd, events, maxevents, timeout, sigmask); - ep_vlsh = ldp_fd_to_vlsh (epfd); - if (PREDICT_FALSE (ep_vlsh == VLS_INVALID_HANDLE)) + ep_vclsh = ldp_fd_to_vclsh (epfd); + if (PREDICT_FALSE (ep_vclsh == INVALID_SESSION_ID)) { - LDBG (0, "epfd %d: bad ep_vlsh %d!", epfd, ep_vlsh); + LDBG (0, "epfd %d: bad ep_vlsh %d!", epfd, ep_vclsh); errno = EBADFD; return -1; } - libc_epfd = vls_attr (ep_vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0); + libc_epfd = vppcom_session_attr (ep_vclsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0); if (PREDICT_FALSE (!libc_epfd)) { u32 size = sizeof (epfd); LDBG (1, "epfd %d, vep_vlsh %d calling libc_epoll_create1: " - "EPOLL_CLOEXEC", epfd, ep_vlsh); + "EPOLL_CLOEXEC", epfd, ep_vclsh); libc_epfd = libc_epoll_create1 (EPOLL_CLOEXEC); if (libc_epfd < 0) { @@ -2463,7 +2693,7 @@ ldp_epoll_pwait_eventfd (int epfd, struct epoll_event *events, goto done; } - rv = vls_attr (ep_vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &libc_epfd, &size); + rv = vppcom_session_attr (ep_vclsh, VPPCOM_ATTR_SET_LIBC_EPFD, &libc_epfd, &size); if (rv < 0) { errno = -rv; @@ -2494,7 +2724,7 @@ ldp_epoll_pwait_eventfd (int epfd, struct epoll_event *events, ldpw->mq_epfd_added = 1; } - rv = vls_epoll_wait (ep_vlsh, events, maxevents, 0); + rv = vppcom_epoll_wait (ep_vclsh, events, maxevents, 0); if (rv > 0) goto done; else if (rv < 0) @@ -2518,7 +2748,7 @@ ldp_epoll_pwait_eventfd (int epfd, struct epoll_event *events, events[i].events = events[rv].events; events[i].data.u64 = events[rv].data.u64; } - num_ev = vls_epoll_wait (ep_vlsh, &events[rv], maxevents - rv, 0); + num_ev = vppcom_epoll_wait (ep_vclsh, &events[rv], maxevents - rv, 0); if (PREDICT_TRUE (num_ev > 0)) rv += num_ev; break; @@ -2533,7 +2763,7 @@ int epoll_pwait (int epfd, struct epoll_event *events, int maxevents, int timeout, const sigset_t * sigmask) { - if (vls_use_eventfd ()) + if (vcm->cfg.use_mq_eventfd) return ldp_epoll_pwait_eventfd (epfd, events, maxevents, timeout, sigmask); else @@ -2543,7 +2773,7 @@ epoll_pwait (int epfd, struct epoll_event *events, int epoll_wait (int epfd, struct epoll_event *events, int maxevents, int timeout) { - if (vls_use_eventfd ()) + if (vcm->cfg.use_mq_eventfd) return ldp_epoll_pwait_eventfd (epfd, events, maxevents, timeout, NULL); else return ldp_epoll_pwait (epfd, events, maxevents, timeout, NULL); @@ -2554,7 +2784,7 @@ poll (struct pollfd *fds, nfds_t nfds, int timeout) { ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); int rv, i, n_revents = 0; - vls_handle_t vlsh; + vcl_session_handle_t vclsh; vcl_poll_t *vp; double max_time; @@ -2571,13 +2801,13 @@ poll (struct pollfd *fds, nfds_t nfds, int timeout) if (fds[i].fd < 0) continue; - vlsh = ldp_fd_to_vlsh (fds[i].fd); - if (vlsh != VLS_INVALID_HANDLE) + vclsh = ldp_fd_to_vclsh (fds[i].fd); + if (vclsh != INVALID_SESSION_ID) { fds[i].fd = -fds[i].fd; vec_add2 (ldpw->vcl_poll, vp, 1); vp->fds_ndx = i; - vp->sh = vlsh_to_sh (vlsh); + vp->sh = vclsh; vp->events = fds[i].events; #ifdef __USE_XOPEN2K if (fds[i].events & POLLRDNORM) diff --git a/src/vcl/ldp.c.orig b/src/vcl/ldp.c.orig new file mode 100644 index 000000000..cda4425e5 --- /dev/null +++ b/src/vcl/ldp.c.orig @@ -0,0 +1,2720 @@ +/* + * Copyright (c) 2016-2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#define HAVE_CONSTRUCTOR_ATTRIBUTE +#ifdef HAVE_CONSTRUCTOR_ATTRIBUTE +#define CONSTRUCTOR_ATTRIBUTE \ + __attribute__ ((constructor)) +#else +#define CONSTRUCTOR_ATTRIBUTE +#endif /* HAVE_CONSTRUCTOR_ATTRIBUTE */ + +#define HAVE_DESTRUCTOR_ATTRIBUTE +#ifdef HAVE_DESTRUCTOR_ATTRIBUTE +#define DESTRUCTOR_ATTRIBUTE \ + __attribute__ ((destructor)) +#else +#define DESTRUCTOR_ATTRIBUTE +#endif + +#define LDP_MAX_NWORKERS 32 + +typedef struct ldp_worker_ctx_ +{ + u8 *io_buffer; + clib_time_t clib_time; + + /* + * Select state + */ + clib_bitmap_t *rd_bitmap; + clib_bitmap_t *wr_bitmap; + clib_bitmap_t *ex_bitmap; + clib_bitmap_t *si_rd_bitmap; + clib_bitmap_t *si_wr_bitmap; + clib_bitmap_t *si_ex_bitmap; + clib_bitmap_t *libc_rd_bitmap; + clib_bitmap_t *libc_wr_bitmap; + clib_bitmap_t *libc_ex_bitmap; + + /* + * Poll state + */ + vcl_poll_t *vcl_poll; + struct pollfd *libc_poll; + u16 *libc_poll_idxs; + + /* + * Epoll state + */ + u8 epoll_wait_vcl; + u8 mq_epfd_added; + int vcl_mq_epfd; + +} ldp_worker_ctx_t; + +/* clib_bitmap_t, fd_mask and vcl_si_set are used interchangeably. Make sure + * they are the same size */ +STATIC_ASSERT (sizeof (clib_bitmap_t) == sizeof (fd_mask), + "ldp bitmap size mismatch"); +STATIC_ASSERT (sizeof (vcl_si_set) == sizeof (fd_mask), + "ldp bitmap size mismatch"); + +typedef struct +{ + ldp_worker_ctx_t *workers; + int init; + char app_name[LDP_APP_NAME_MAX]; + u32 vlsh_bit_val; + u32 vlsh_bit_mask; + u32 debug; + u8 transparent_tls; + + /** vcl needs next epoll_create to go to libc_epoll */ + u8 vcl_needs_real_epoll; +} ldp_main_t; + +#define LDP_DEBUG ldp->debug + +#define LDBG(_lvl, _fmt, _args...) \ + if (ldp->debug > _lvl) \ + { \ + int errno_saved = errno; \ + clib_warning ("ldp<%d>: " _fmt, getpid(), ##_args); \ + errno = errno_saved; \ + } + +static ldp_main_t ldp_main = { + .vlsh_bit_val = (1 << LDP_SID_BIT_MIN), + .vlsh_bit_mask = (1 << LDP_SID_BIT_MIN) - 1, + .debug = LDP_DEBUG_INIT, + .transparent_tls = 0, +}; + +static ldp_main_t *ldp = &ldp_main; + +static inline ldp_worker_ctx_t * +ldp_worker_get_current (void) +{ + return (ldp->workers + vppcom_worker_index ()); +} + +/* + * RETURN: 0 on success or -1 on error. + * */ +static inline void +ldp_set_app_name (char *app_name) +{ + snprintf (ldp->app_name, LDP_APP_NAME_MAX, + "ldp-%d-%s", getpid (), app_name); +} + +static inline char * +ldp_get_app_name () +{ + if (ldp->app_name[0] == '\0') + ldp_set_app_name ("app"); + + return ldp->app_name; +} + +static inline int +ldp_vlsh_to_fd (vls_handle_t vlsh) +{ + return (vlsh + ldp->vlsh_bit_val); +} + +static inline vls_handle_t +ldp_fd_to_vlsh (int fd) +{ + if (fd < ldp->vlsh_bit_val) + return VLS_INVALID_HANDLE; + + return (fd - ldp->vlsh_bit_val); +} + +static void +ldp_alloc_workers (void) +{ + if (ldp->workers) + return; + pool_alloc (ldp->workers, LDP_MAX_NWORKERS); +} + +static inline int +ldp_init (void) +{ + ldp_worker_ctx_t *ldpw; + int rv; + + if (PREDICT_TRUE (ldp->init)) + return 0; + + ldp->init = 1; + ldp->vcl_needs_real_epoll = 1; + rv = vls_app_create (ldp_get_app_name ()); + if (rv != VPPCOM_OK) + { + ldp->vcl_needs_real_epoll = 0; + if (rv == VPPCOM_EEXIST) + return 0; + LDBG (2, "\nERROR: ldp_init: vppcom_app_create()" + " failed! rv = %d (%s)\n", rv, vppcom_retval_str (rv)); + ldp->init = 0; + return rv; + } + ldp->vcl_needs_real_epoll = 0; + ldp_alloc_workers (); + ldpw = ldp_worker_get_current (); + + char *env_var_str = getenv (LDP_ENV_DEBUG); + if (env_var_str) + { + u32 tmp; + if (sscanf (env_var_str, "%u", &tmp) != 1) + clib_warning ("LDP<%d>: WARNING: Invalid LDP debug level specified in" + " the env var " LDP_ENV_DEBUG " (%s)!", getpid (), + env_var_str); + else + { + ldp->debug = tmp; + LDBG (0, "configured LDP debug level (%u) from env var " + LDP_ENV_DEBUG "!", ldp->debug); + } + } + + env_var_str = getenv (LDP_ENV_APP_NAME); + if (env_var_str) + { + ldp_set_app_name (env_var_str); + LDBG (0, "configured LDP app name (%s) from the env var " + LDP_ENV_APP_NAME "!", ldp->app_name); + } + + env_var_str = getenv (LDP_ENV_SID_BIT); + if (env_var_str) + { + u32 sb; + if (sscanf (env_var_str, "%u", &sb) != 1) + { + LDBG (0, "WARNING: Invalid LDP sid bit specified in the env var " + LDP_ENV_SID_BIT " (%s)! sid bit value %d (0x%x)", env_var_str, + ldp->vlsh_bit_val, ldp->vlsh_bit_val); + } + else if (sb < LDP_SID_BIT_MIN) + { + ldp->vlsh_bit_val = (1 << LDP_SID_BIT_MIN); + ldp->vlsh_bit_mask = ldp->vlsh_bit_val - 1; + + LDBG (0, "WARNING: LDP sid bit (%u) specified in the env var " + LDP_ENV_SID_BIT " (%s) is too small. Using LDP_SID_BIT_MIN" + " (%d)! sid bit value %d (0x%x)", sb, env_var_str, + LDP_SID_BIT_MIN, ldp->vlsh_bit_val, ldp->vlsh_bit_val); + } + else if (sb > LDP_SID_BIT_MAX) + { + ldp->vlsh_bit_val = (1 << LDP_SID_BIT_MAX); + ldp->vlsh_bit_mask = ldp->vlsh_bit_val - 1; + + LDBG (0, "WARNING: LDP sid bit (%u) specified in the env var " + LDP_ENV_SID_BIT " (%s) is too big. Using LDP_SID_BIT_MAX" + " (%d)! sid bit value %d (0x%x)", sb, env_var_str, + LDP_SID_BIT_MAX, ldp->vlsh_bit_val, ldp->vlsh_bit_val); + } + else + { + ldp->vlsh_bit_val = (1 << sb); + ldp->vlsh_bit_mask = ldp->vlsh_bit_val - 1; + + LDBG (0, "configured LDP sid bit (%u) from " + LDP_ENV_SID_BIT "! sid bit value %d (0x%x)", sb, + ldp->vlsh_bit_val, ldp->vlsh_bit_val); + } + + /* Make sure there are enough bits in the fd set for vcl sessions */ + if (ldp->vlsh_bit_val > FD_SETSIZE / 2) + { + LDBG (0, "ERROR: LDP vlsh bit value %d > FD_SETSIZE/2 %d!", + ldp->vlsh_bit_val, FD_SETSIZE / 2); + ldp->init = 0; + return -1; + } + } + env_var_str = getenv (LDP_ENV_TLS_TRANS); + if (env_var_str) + { + ldp->transparent_tls = 1; + } + + /* *INDENT-OFF* */ + pool_foreach (ldpw, ldp->workers, ({ + clib_memset (&ldpw->clib_time, 0, sizeof (ldpw->clib_time)); + })); + /* *INDENT-ON* */ + + LDBG (0, "LDP initialization: done!"); + + return 0; +} + +int +close (int fd) +{ + vls_handle_t vlsh; + int rv, epfd; + + if ((errno = -ldp_init ())) + return -1; + + vlsh = ldp_fd_to_vlsh (fd); + if (vlsh != VLS_INVALID_HANDLE) + { + epfd = vls_attr (vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0); + if (epfd > 0) + { + LDBG (0, "fd %d: calling libc_close: epfd %u", fd, epfd); + + rv = libc_close (epfd); + if (rv < 0) + { + u32 size = sizeof (epfd); + epfd = 0; + + (void) vls_attr (vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &epfd, &size); + } + } + else if (PREDICT_FALSE (epfd < 0)) + { + errno = -epfd; + rv = -1; + goto done; + } + + LDBG (0, "fd %d: calling vls_close: vlsh %u", fd, vlsh); + + rv = vls_close (vlsh); + if (rv != VPPCOM_OK) + { + errno = -rv; + rv = -1; + } + } + else + { + LDBG (0, "fd %d: calling libc_close", fd); + rv = libc_close (fd); + } + +done: + return rv; +} + +ssize_t +read (int fd, void *buf, size_t nbytes) +{ + vls_handle_t vlsh; + ssize_t size; + + if ((errno = -ldp_init ())) + return -1; + + vlsh = ldp_fd_to_vlsh (fd); + if (vlsh != VLS_INVALID_HANDLE) + { + size = vls_read (vlsh, buf, nbytes); + if (size < 0) + { + errno = -size; + size = -1; + } + } + else + { + size = libc_read (fd, buf, nbytes); + } + + return size; +} + +ssize_t +readv (int fd, const struct iovec * iov, int iovcnt) +{ + int rv = 0, i, total = 0; + vls_handle_t vlsh; + ssize_t size = 0; + + if ((errno = -ldp_init ())) + return -1; + + vlsh = ldp_fd_to_vlsh (fd); + if (vlsh != VLS_INVALID_HANDLE) + { + for (i = 0; i < iovcnt; ++i) + { + rv = vls_read (vlsh, iov[i].iov_base, iov[i].iov_len); + if (rv <= 0) + break; + else + { + total += rv; + if (rv < iov[i].iov_len) + break; + } + } + if (rv < 0 && total == 0) + { + errno = -rv; + size = -1; + } + else + size = total; + } + else + { + size = libc_readv (fd, iov, iovcnt); + } + + return size; +} + +ssize_t +write (int fd, const void *buf, size_t nbytes) +{ + vls_handle_t vlsh; + ssize_t size = 0; + + if ((errno = -ldp_init ())) + return -1; + + vlsh = ldp_fd_to_vlsh (fd); + if (vlsh != VLS_INVALID_HANDLE) + { + size = vls_write_msg (vlsh, (void *) buf, nbytes); + if (size < 0) + { + errno = -size; + size = -1; + } + } + else + { + size = libc_write (fd, buf, nbytes); + } + + return size; +} + +ssize_t +writev (int fd, const struct iovec * iov, int iovcnt) +{ + ssize_t size = 0, total = 0; + vls_handle_t vlsh; + int i, rv = 0; + + if ((errno = -ldp_init ())) + return -1; + + vlsh = ldp_fd_to_vlsh (fd); + if (vlsh != VLS_INVALID_HANDLE) + { + for (i = 0; i < iovcnt; ++i) + { + rv = vls_write_msg (vlsh, iov[i].iov_base, iov[i].iov_len); + if (rv < 0) + break; + else + { + total += rv; + if (rv < iov[i].iov_len) + break; + } + } + + if (rv < 0 && total == 0) + { + errno = -rv; + size = -1; + } + else + size = total; + } + else + { + size = libc_writev (fd, iov, iovcnt); + } + + return size; +} + +static int +fcntl_internal (int fd, int cmd, va_list ap) +{ + vls_handle_t vlsh; + int rv = 0; + + vlsh = ldp_fd_to_vlsh (fd); + LDBG (0, "fd %u vlsh %d, cmd %u", fd, vlsh, cmd); + if (vlsh != VLS_INVALID_HANDLE) + { + int flags = va_arg (ap, int); + u32 size; + + size = sizeof (flags); + rv = -EOPNOTSUPP; + switch (cmd) + { + case F_SETFL: + rv = vls_attr (vlsh, VPPCOM_ATTR_SET_FLAGS, &flags, &size); + break; + + case F_GETFL: + rv = vls_attr (vlsh, VPPCOM_ATTR_GET_FLAGS, &flags, &size); + if (rv == VPPCOM_OK) + rv = flags; + break; + case F_SETFD: + /* TODO handle this */ + LDBG (0, "F_SETFD ignored flags %u", flags); + rv = 0; + break; + default: + rv = -EOPNOTSUPP; + break; + } + if (rv < 0) + { + errno = -rv; + rv = -1; + } + } + else + { +#ifdef HAVE_FCNTL64 + rv = libc_vfcntl64 (fd, cmd, ap); +#else + rv = libc_vfcntl (fd, cmd, ap); +#endif + } + + return rv; +} + +int +fcntl (int fd, int cmd, ...) +{ + va_list ap; + int rv; + + if ((errno = -ldp_init ())) + return -1; + + va_start (ap, cmd); + rv = fcntl_internal (fd, cmd, ap); + va_end (ap); + + return rv; +} + +int +fcntl64 (int fd, int cmd, ...) +{ + va_list ap; + int rv; + + if ((errno = -ldp_init ())) + return -1; + + va_start (ap, cmd); + rv = fcntl_internal (fd, cmd, ap); + va_end (ap); + return rv; +} + +int +ioctl (int fd, unsigned long int cmd, ...) +{ + vls_handle_t vlsh; + va_list ap; + int rv; + + if ((errno = -ldp_init ())) + return -1; + + va_start (ap, cmd); + + vlsh = ldp_fd_to_vlsh (fd); + if (vlsh != VLS_INVALID_HANDLE) + { + switch (cmd) + { + case FIONREAD: + rv = vls_attr (vlsh, VPPCOM_ATTR_GET_NREAD, 0, 0); + break; + + case FIONBIO: + { + u32 flags = va_arg (ap, int) ? O_NONBLOCK : 0; + u32 size = sizeof (flags); + + /* TBD: When VPPCOM_ATTR_[GS]ET_FLAGS supports flags other than + * non-blocking, the flags should be read here and merged + * with O_NONBLOCK. + */ + rv = vls_attr (vlsh, VPPCOM_ATTR_SET_FLAGS, &flags, &size); + } + break; + + default: + rv = -EOPNOTSUPP; + break; + } + if (rv < 0) + { + errno = -rv; + rv = -1; + } + } + else + { + rv = libc_vioctl (fd, cmd, ap); + } + + va_end (ap); + return rv; +} + +always_inline void +ldp_select_init_maps (fd_set * __restrict original, + clib_bitmap_t ** resultb, clib_bitmap_t ** libcb, + clib_bitmap_t ** vclb, int nfds, u32 minbits, + u32 n_bytes, uword * si_bits, uword * libc_bits) +{ + uword si_bits_set, libc_bits_set; + vls_handle_t vlsh; + int fd; + + clib_bitmap_validate (*vclb, minbits); + clib_bitmap_validate (*libcb, minbits); + clib_bitmap_validate (*resultb, minbits); + clib_memcpy_fast (*resultb, original, n_bytes); + memset (original, 0, n_bytes); + + /* *INDENT-OFF* */ + clib_bitmap_foreach (fd, *resultb, ({ + if (fd > nfds) + break; + vlsh = ldp_fd_to_vlsh (fd); + if (vlsh == VLS_INVALID_HANDLE) + clib_bitmap_set_no_check (*libcb, fd, 1); + else + *vclb = clib_bitmap_set (*vclb, vlsh_to_session_index (vlsh), 1); + })); + /* *INDENT-ON* */ + + si_bits_set = clib_bitmap_last_set (*vclb) + 1; + *si_bits = (si_bits_set > *si_bits) ? si_bits_set : *si_bits; + clib_bitmap_validate (*resultb, *si_bits); + + libc_bits_set = clib_bitmap_last_set (*libcb) + 1; + *libc_bits = (libc_bits_set > *libc_bits) ? libc_bits_set : *libc_bits; +} + +always_inline int +ldp_select_vcl_map_to_libc (clib_bitmap_t * vclb, fd_set * __restrict libcb) +{ + vls_handle_t vlsh; + uword si; + int fd; + + if (!libcb) + return 0; + + /* *INDENT-OFF* */ + clib_bitmap_foreach (si, vclb, ({ + vlsh = vls_session_index_to_vlsh (si); + ASSERT (vlsh != VLS_INVALID_HANDLE); + fd = ldp_vlsh_to_fd (vlsh); + if (PREDICT_FALSE (fd < 0)) + { + errno = EBADFD; + return -1; + } + FD_SET (fd, libcb); + })); + /* *INDENT-ON* */ + + return 0; +} + +always_inline void +ldp_select_libc_map_merge (clib_bitmap_t * result, fd_set * __restrict libcb) +{ + uword fd; + + if (!libcb) + return; + + /* *INDENT-OFF* */ + clib_bitmap_foreach (fd, result, ({ + FD_SET ((int)fd, libcb); + })); + /* *INDENT-ON* */ +} + +int +ldp_pselect (int nfds, fd_set * __restrict readfds, + fd_set * __restrict writefds, + fd_set * __restrict exceptfds, + const struct timespec *__restrict timeout, + const __sigset_t * __restrict sigmask) +{ + u32 minbits = clib_max (nfds, BITS (uword)), n_bytes; + ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); + struct timespec libc_tspec = { 0 }; + f64 time_out, vcl_timeout = 0; + uword si_bits, libc_bits; + int rv, bits_set = 0; + + if (nfds < 0) + { + errno = EINVAL; + return -1; + } + + if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0)) + clib_time_init (&ldpw->clib_time); + + if (timeout) + { + time_out = (timeout->tv_sec == 0 && timeout->tv_nsec == 0) ? + (f64) 0 : (f64) timeout->tv_sec + (f64) timeout->tv_nsec / (f64) 1e9; + + /* select as fine grained sleep */ + if (!nfds) + { + time_out += clib_time_now (&ldpw->clib_time); + while (clib_time_now (&ldpw->clib_time) < time_out) + ; + return 0; + } + } + else if (!nfds) + { + errno = EINVAL; + return -1; + } + else + time_out = -1; + + if (nfds <= ldp->vlsh_bit_val) + { + rv = libc_pselect (nfds, readfds, writefds, exceptfds, + timeout, sigmask); + goto done; + } + + si_bits = libc_bits = 0; + n_bytes = nfds / 8 + ((nfds % 8) ? 1 : 0); + + if (readfds) + ldp_select_init_maps (readfds, &ldpw->rd_bitmap, &ldpw->libc_rd_bitmap, + &ldpw->si_rd_bitmap, nfds, minbits, n_bytes, + &si_bits, &libc_bits); + if (writefds) + ldp_select_init_maps (writefds, &ldpw->wr_bitmap, + &ldpw->libc_wr_bitmap, &ldpw->si_wr_bitmap, nfds, + minbits, n_bytes, &si_bits, &libc_bits); + if (exceptfds) + ldp_select_init_maps (exceptfds, &ldpw->ex_bitmap, + &ldpw->libc_ex_bitmap, &ldpw->si_ex_bitmap, nfds, + minbits, n_bytes, &si_bits, &libc_bits); + + if (PREDICT_FALSE (!si_bits && !libc_bits)) + { + errno = EINVAL; + rv = -1; + goto done; + } + + if (!si_bits) + libc_tspec = timeout ? *timeout : libc_tspec; + + do + { + if (si_bits) + { + if (readfds) + clib_memcpy_fast (ldpw->rd_bitmap, ldpw->si_rd_bitmap, + vec_len (ldpw->si_rd_bitmap) * + sizeof (clib_bitmap_t)); + if (writefds) + clib_memcpy_fast (ldpw->wr_bitmap, ldpw->si_wr_bitmap, + vec_len (ldpw->si_wr_bitmap) * + sizeof (clib_bitmap_t)); + if (exceptfds) + clib_memcpy_fast (ldpw->ex_bitmap, ldpw->si_ex_bitmap, + vec_len (ldpw->si_ex_bitmap) * + sizeof (clib_bitmap_t)); + + rv = vls_select (si_bits, readfds ? ldpw->rd_bitmap : NULL, + writefds ? ldpw->wr_bitmap : NULL, + exceptfds ? ldpw->ex_bitmap : NULL, vcl_timeout); + if (rv < 0) + { + errno = -rv; + rv = -1; + goto done; + } + else if (rv > 0) + { + if (ldp_select_vcl_map_to_libc (ldpw->rd_bitmap, readfds)) + { + rv = -1; + goto done; + } + + if (ldp_select_vcl_map_to_libc (ldpw->wr_bitmap, writefds)) + { + rv = -1; + goto done; + } + + if (ldp_select_vcl_map_to_libc (ldpw->ex_bitmap, exceptfds)) + { + rv = -1; + goto done; + } + bits_set = rv; + } + } + if (libc_bits) + { + if (readfds) + clib_memcpy_fast (ldpw->rd_bitmap, ldpw->libc_rd_bitmap, + vec_len (ldpw->libc_rd_bitmap) * + sizeof (clib_bitmap_t)); + if (writefds) + clib_memcpy_fast (ldpw->wr_bitmap, ldpw->libc_wr_bitmap, + vec_len (ldpw->libc_wr_bitmap) * + sizeof (clib_bitmap_t)); + if (exceptfds) + clib_memcpy_fast (ldpw->ex_bitmap, ldpw->libc_ex_bitmap, + vec_len (ldpw->libc_ex_bitmap) * + sizeof (clib_bitmap_t)); + + rv = libc_pselect (libc_bits, + readfds ? (fd_set *) ldpw->rd_bitmap : NULL, + writefds ? (fd_set *) ldpw->wr_bitmap : NULL, + exceptfds ? (fd_set *) ldpw->ex_bitmap : NULL, + &libc_tspec, sigmask); + if (rv > 0) + { + ldp_select_libc_map_merge (ldpw->rd_bitmap, readfds); + ldp_select_libc_map_merge (ldpw->wr_bitmap, writefds); + ldp_select_libc_map_merge (ldpw->ex_bitmap, exceptfds); + bits_set += rv; + } + } + + if (bits_set) + { + rv = bits_set; + goto done; + } + } + while ((time_out == -1) || (clib_time_now (&ldpw->clib_time) < time_out)); + rv = 0; + +done: + /* TBD: set timeout to amount of time left */ + clib_bitmap_zero (ldpw->rd_bitmap); + clib_bitmap_zero (ldpw->si_rd_bitmap); + clib_bitmap_zero (ldpw->libc_rd_bitmap); + clib_bitmap_zero (ldpw->wr_bitmap); + clib_bitmap_zero (ldpw->si_wr_bitmap); + clib_bitmap_zero (ldpw->libc_wr_bitmap); + clib_bitmap_zero (ldpw->ex_bitmap); + clib_bitmap_zero (ldpw->si_ex_bitmap); + clib_bitmap_zero (ldpw->libc_ex_bitmap); + + return rv; +} + +int +select (int nfds, fd_set * __restrict readfds, + fd_set * __restrict writefds, + fd_set * __restrict exceptfds, struct timeval *__restrict timeout) +{ + struct timespec tspec; + + if (timeout) + { + tspec.tv_sec = timeout->tv_sec; + tspec.tv_nsec = timeout->tv_usec * 1000; + } + return ldp_pselect (nfds, readfds, writefds, exceptfds, + timeout ? &tspec : NULL, NULL); +} + +#ifdef __USE_XOPEN2K +int +pselect (int nfds, fd_set * __restrict readfds, + fd_set * __restrict writefds, + fd_set * __restrict exceptfds, + const struct timespec *__restrict timeout, + const __sigset_t * __restrict sigmask) +{ + return ldp_pselect (nfds, readfds, writefds, exceptfds, timeout, 0); +} +#endif + +/* If transparent TLS mode is turned on, then ldp will load key and cert. + */ +static int +load_tls_cert (vls_handle_t vlsh) +{ + char *env_var_str = getenv (LDP_ENV_TLS_CERT); + char inbuf[4096]; + char *tls_cert; + int cert_size; + FILE *fp; + + if (env_var_str) + { + fp = fopen (env_var_str, "r"); + if (fp == NULL) + { + LDBG (0, "ERROR: failed to open cert file %s \n", env_var_str); + return -1; + } + cert_size = fread (inbuf, sizeof (char), sizeof (inbuf), fp); + tls_cert = inbuf; + vppcom_session_tls_add_cert (vlsh_to_session_index (vlsh), tls_cert, + cert_size); + fclose (fp); + } + else + { + LDBG (0, "ERROR: failed to read LDP environment %s\n", + LDP_ENV_TLS_CERT); + return -1; + } + return 0; +} + +static int +load_tls_key (vls_handle_t vlsh) +{ + char *env_var_str = getenv (LDP_ENV_TLS_KEY); + char inbuf[4096]; + char *tls_key; + int key_size; + FILE *fp; + + if (env_var_str) + { + fp = fopen (env_var_str, "r"); + if (fp == NULL) + { + LDBG (0, "ERROR: failed to open key file %s \n", env_var_str); + return -1; + } + key_size = fread (inbuf, sizeof (char), sizeof (inbuf), fp); + tls_key = inbuf; + vppcom_session_tls_add_key (vlsh_to_session_index (vlsh), tls_key, + key_size); + fclose (fp); + } + else + { + LDBG (0, "ERROR: failed to read LDP environment %s\n", LDP_ENV_TLS_KEY); + return -1; + } + return 0; +} + +int +socket (int domain, int type, int protocol) +{ + int rv, sock_type = type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK); + u8 is_nonblocking = type & SOCK_NONBLOCK ? 1 : 0; + vls_handle_t vlsh; + + if ((errno = -ldp_init ())) + return -1; + + if (((domain == AF_INET) || (domain == AF_INET6)) && + ((sock_type == SOCK_STREAM) || (sock_type == SOCK_DGRAM))) + { + u8 proto; + if (ldp->transparent_tls) + { + proto = VPPCOM_PROTO_TLS; + } + else + proto = ((sock_type == SOCK_DGRAM) ? + VPPCOM_PROTO_UDP : VPPCOM_PROTO_TCP); + + LDBG (0, "calling vls_create: proto %u (%s), is_nonblocking %u", + proto, vppcom_proto_str (proto), is_nonblocking); + + vlsh = vls_create (proto, is_nonblocking); + if (vlsh < 0) + { + errno = -vlsh; + rv = -1; + } + else + { + if (ldp->transparent_tls) + { + if (load_tls_cert (vlsh) < 0 || load_tls_key (vlsh) < 0) + { + return -1; + } + } + rv = ldp_vlsh_to_fd (vlsh); + } + } + else + { + LDBG (0, "calling libc_socket"); + rv = libc_socket (domain, type, protocol); + } + + return rv; +} + +/* + * Create two new sockets, of type TYPE in domain DOMAIN and using + * protocol PROTOCOL, which are connected to each other, and put file + * descriptors for them in FDS[0] and FDS[1]. If PROTOCOL is zero, + * one will be chosen automatically. + * Returns 0 on success, -1 for errors. + * */ +int +socketpair (int domain, int type, int protocol, int fds[2]) +{ + int rv, sock_type = type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK); + + if ((errno = -ldp_init ())) + return -1; + + if (((domain == AF_INET) || (domain == AF_INET6)) && + ((sock_type == SOCK_STREAM) || (sock_type == SOCK_DGRAM))) + { + LDBG (0, "LDP-TBD"); + errno = ENOSYS; + rv = -1; + } + else + { + LDBG (1, "calling libc_socketpair"); + rv = libc_socketpair (domain, type, protocol, fds); + } + + return rv; +} + +int +bind (int fd, __CONST_SOCKADDR_ARG addr, socklen_t len) +{ + vls_handle_t vlsh; + int rv; + + if ((errno = -ldp_init ())) + return -1; + + vlsh = ldp_fd_to_vlsh (fd); + if (vlsh != VLS_INVALID_HANDLE) + { + vppcom_endpt_t ep; + + switch (addr->sa_family) + { + case AF_INET: + if (len != sizeof (struct sockaddr_in)) + { + LDBG (0, "ERROR: fd %d: vlsh %u: Invalid AF_INET addr len %u!", + fd, vlsh, len); + errno = EINVAL; + rv = -1; + goto done; + } + ep.is_ip4 = VPPCOM_IS_IP4; + ep.ip = (u8 *) & ((const struct sockaddr_in *) addr)->sin_addr; + ep.port = (u16) ((const struct sockaddr_in *) addr)->sin_port; + break; + + case AF_INET6: + if (len != sizeof (struct sockaddr_in6)) + { + LDBG (0, "ERROR: fd %d: vlsh %u: Invalid AF_INET6 addr len %u!", + fd, vlsh, len); + errno = EINVAL; + rv = -1; + goto done; + } + ep.is_ip4 = VPPCOM_IS_IP6; + ep.ip = (u8 *) & ((const struct sockaddr_in6 *) addr)->sin6_addr; + ep.port = (u16) ((const struct sockaddr_in6 *) addr)->sin6_port; + break; + + default: + LDBG (0, "ERROR: fd %d: vlsh %u: Unsupported address family %u!", + fd, vlsh, addr->sa_family); + errno = EAFNOSUPPORT; + rv = -1; + goto done; + } + LDBG (0, "fd %d: calling vls_bind: vlsh %u, addr %p, len %u", fd, vlsh, + addr, len); + + rv = vls_bind (vlsh, &ep); + if (rv != VPPCOM_OK) + { + errno = -rv; + rv = -1; + } + } + else + { + LDBG (0, "fd %d: calling libc_bind: addr %p, len %u", fd, addr, len); + rv = libc_bind (fd, addr, len); + } + +done: + LDBG (1, "fd %d: returning %d", fd, rv); + + return rv; +} + +static inline int +ldp_copy_ep_to_sockaddr (__SOCKADDR_ARG addr, socklen_t * __restrict len, + vppcom_endpt_t * ep) +{ + int rv = 0; + int sa_len, copy_len; + + if ((errno = -ldp_init ())) + return -1; + + if (addr && len && ep) + { + addr->sa_family = (ep->is_ip4 == VPPCOM_IS_IP4) ? AF_INET : AF_INET6; + switch (addr->sa_family) + { + case AF_INET: + ((struct sockaddr_in *) addr)->sin_port = ep->port; + if (*len > sizeof (struct sockaddr_in)) + *len = sizeof (struct sockaddr_in); + sa_len = sizeof (struct sockaddr_in) - sizeof (struct in_addr); + copy_len = *len - sa_len; + if (copy_len > 0) + memcpy (&((struct sockaddr_in *) addr)->sin_addr, ep->ip, + copy_len); + break; + + case AF_INET6: + ((struct sockaddr_in6 *) addr)->sin6_port = ep->port; + if (*len > sizeof (struct sockaddr_in6)) + *len = sizeof (struct sockaddr_in6); + sa_len = sizeof (struct sockaddr_in6) - sizeof (struct in6_addr); + copy_len = *len - sa_len; + if (copy_len > 0) + memcpy (((struct sockaddr_in6 *) addr)->sin6_addr. + __in6_u.__u6_addr8, ep->ip, copy_len); + break; + + default: + /* Not possible */ + rv = -EAFNOSUPPORT; + break; + } + } + return rv; +} + +int +getsockname (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict len) +{ + vls_handle_t vlsh; + int rv; + + if ((errno = -ldp_init ())) + return -1; + + vlsh = ldp_fd_to_vlsh (fd); + if (vlsh != VLS_INVALID_HANDLE) + { + vppcom_endpt_t ep; + u8 addr_buf[sizeof (struct in6_addr)]; + u32 size = sizeof (ep); + + ep.ip = addr_buf; + + rv = vls_attr (vlsh, VPPCOM_ATTR_GET_LCL_ADDR, &ep, &size); + if (rv != VPPCOM_OK) + { + errno = -rv; + rv = -1; + } + else + { + rv = ldp_copy_ep_to_sockaddr (addr, len, &ep); + if (rv != VPPCOM_OK) + { + errno = -rv; + rv = -1; + } + } + } + else + { + rv = libc_getsockname (fd, addr, len); + } + + return rv; +} + +int +connect (int fd, __CONST_SOCKADDR_ARG addr, socklen_t len) +{ + vls_handle_t vlsh; + int rv; + + if ((errno = -ldp_init ())) + return -1; + + if (!addr) + { + LDBG (0, "ERROR: fd %d: NULL addr, len %u", fd, len); + errno = EINVAL; + rv = -1; + goto done; + } + + vlsh = ldp_fd_to_vlsh (fd); + if (vlsh != VLS_INVALID_HANDLE) + { + vppcom_endpt_t ep; + + switch (addr->sa_family) + { + case AF_INET: + if (len != sizeof (struct sockaddr_in)) + { + LDBG (0, "fd %d: ERROR vlsh %u: Invalid AF_INET addr len %u!", + fd, vlsh, len); + errno = EINVAL; + rv = -1; + goto done; + } + ep.is_ip4 = VPPCOM_IS_IP4; + ep.ip = (u8 *) & ((const struct sockaddr_in *) addr)->sin_addr; + ep.port = (u16) ((const struct sockaddr_in *) addr)->sin_port; + break; + + case AF_INET6: + if (len != sizeof (struct sockaddr_in6)) + { + LDBG (0, "fd %d: ERROR vlsh %u: Invalid AF_INET6 addr len %u!", + fd, vlsh, len); + errno = EINVAL; + rv = -1; + goto done; + } + ep.is_ip4 = VPPCOM_IS_IP6; + ep.ip = (u8 *) & ((const struct sockaddr_in6 *) addr)->sin6_addr; + ep.port = (u16) ((const struct sockaddr_in6 *) addr)->sin6_port; + break; + + default: + LDBG (0, "fd %d: ERROR vlsh %u: Unsupported address family %u!", + fd, vlsh, addr->sa_family); + errno = EAFNOSUPPORT; + rv = -1; + goto done; + } + LDBG (0, "fd %d: calling vls_connect(): vlsh %u addr %p len %u", fd, + vlsh, addr, len); + + rv = vls_connect (vlsh, &ep); + if (rv != VPPCOM_OK) + { + errno = -rv; + rv = -1; + } + } + else + { + LDBG (0, "fd %d: calling libc_connect(): addr %p, len %u", + fd, addr, len); + + rv = libc_connect (fd, addr, len); + } + +done: + LDBG (1, "fd %d: returning %d (0x%x)", fd, rv, rv); + return rv; +} + +int +getpeername (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict len) +{ + vls_handle_t vlsh; + int rv; + + if ((errno = -ldp_init ())) + return -1; + + vlsh = ldp_fd_to_vlsh (fd); + if (vlsh != VLS_INVALID_HANDLE) + { + vppcom_endpt_t ep; + u8 addr_buf[sizeof (struct in6_addr)]; + u32 size = sizeof (ep); + + ep.ip = addr_buf; + rv = vls_attr (vlsh, VPPCOM_ATTR_GET_PEER_ADDR, &ep, &size); + if (rv != VPPCOM_OK) + { + errno = -rv; + rv = -1; + } + else + { + rv = ldp_copy_ep_to_sockaddr (addr, len, &ep); + if (rv != VPPCOM_OK) + { + errno = -rv; + rv = -1; + } + } + } + else + { + rv = libc_getpeername (fd, addr, len); + } + + return rv; +} + +ssize_t +send (int fd, const void *buf, size_t n, int flags) +{ + vls_handle_t vlsh = ldp_fd_to_vlsh (fd); + ssize_t size; + + if ((errno = -ldp_init ())) + return -1; + + if (vlsh != VLS_INVALID_HANDLE) + { + size = vls_sendto (vlsh, (void *) buf, n, flags, NULL); + if (size < VPPCOM_OK) + { + errno = -size; + size = -1; + } + } + else + { + size = libc_send (fd, buf, n, flags); + } + + return size; +} + +ssize_t +sendfile (int out_fd, int in_fd, off_t * offset, size_t len) +{ + ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); + vls_handle_t vlsh; + ssize_t size = 0; + + if ((errno = -ldp_init ())) + return -1; + + vlsh = ldp_fd_to_vlsh (out_fd); + if (vlsh != VLS_INVALID_HANDLE) + { + int rv; + ssize_t results = 0; + size_t n_bytes_left = len; + size_t bytes_to_read; + int nbytes; + u8 eagain = 0; + u32 flags, flags_len = sizeof (flags); + + rv = vls_attr (vlsh, VPPCOM_ATTR_GET_FLAGS, &flags, &flags_len); + if (PREDICT_FALSE (rv != VPPCOM_OK)) + { + LDBG (0, "ERROR: out fd %d: vls_attr: vlsh %u, returned %d (%s)!", + out_fd, vlsh, rv, vppcom_retval_str (rv)); + + vec_reset_length (ldpw->io_buffer); + errno = -rv; + size = -1; + goto done; + } + + if (offset) + { + off_t off = lseek (in_fd, *offset, SEEK_SET); + if (PREDICT_FALSE (off == -1)) + { + size = -1; + goto done; + } + + ASSERT (off == *offset); + } + + do + { + size = vls_attr (vlsh, VPPCOM_ATTR_GET_NWRITE, 0, 0); + if (size < 0) + { + LDBG (0, "ERROR: fd %d: vls_attr: vlsh %u returned %d (%s)!", + out_fd, vlsh, size, vppcom_retval_str (size)); + vec_reset_length (ldpw->io_buffer); + errno = -size; + size = -1; + goto done; + } + + bytes_to_read = size; + if (bytes_to_read == 0) + { + if (flags & O_NONBLOCK) + { + if (!results) + eagain = 1; + goto update_offset; + } + else + continue; + } + bytes_to_read = clib_min (n_bytes_left, bytes_to_read); + vec_validate (ldpw->io_buffer, bytes_to_read); + nbytes = libc_read (in_fd, ldpw->io_buffer, bytes_to_read); + if (nbytes < 0) + { + if (results == 0) + { + vec_reset_length (ldpw->io_buffer); + size = -1; + goto done; + } + goto update_offset; + } + + size = vls_write (vlsh, ldpw->io_buffer, nbytes); + if (size < 0) + { + if (size == VPPCOM_EAGAIN) + { + if (flags & O_NONBLOCK) + { + if (!results) + eagain = 1; + goto update_offset; + } + else + continue; + } + if (results == 0) + { + vec_reset_length (ldpw->io_buffer); + errno = -size; + size = -1; + goto done; + } + goto update_offset; + } + + results += nbytes; + ASSERT (n_bytes_left >= nbytes); + n_bytes_left = n_bytes_left - nbytes; + } + while (n_bytes_left > 0); + + update_offset: + vec_reset_length (ldpw->io_buffer); + if (offset) + { + off_t off = lseek (in_fd, *offset, SEEK_SET); + if (PREDICT_FALSE (off == -1)) + { + size = -1; + goto done; + } + + ASSERT (off == *offset); + *offset += results + 1; + } + if (eagain) + { + errno = EAGAIN; + size = -1; + } + else + size = results; + } + else + { + size = libc_sendfile (out_fd, in_fd, offset, len); + } + +done: + return size; +} + +ssize_t +sendfile64 (int out_fd, int in_fd, off_t * offset, size_t len) +{ + return sendfile (out_fd, in_fd, offset, len); +} + +ssize_t +recv (int fd, void *buf, size_t n, int flags) +{ + vls_handle_t vlsh; + ssize_t size; + + if ((errno = -ldp_init ())) + return -1; + + vlsh = ldp_fd_to_vlsh (fd); + if (vlsh != VLS_INVALID_HANDLE) + { + size = vls_recvfrom (vlsh, buf, n, flags, NULL); + if (size < 0) + { + errno = -size; + size = -1; + } + } + else + { + size = libc_recv (fd, buf, n, flags); + } + + return size; +} + +static int +ldp_vls_sendo (vls_handle_t vlsh, const void *buf, size_t n, int flags, + __CONST_SOCKADDR_ARG addr, socklen_t addr_len) +{ + vppcom_endpt_t *ep = 0; + vppcom_endpt_t _ep; + + if (addr) + { + ep = &_ep; + switch (addr->sa_family) + { + case AF_INET: + ep->is_ip4 = VPPCOM_IS_IP4; + ep->ip = + (uint8_t *) & ((const struct sockaddr_in *) addr)->sin_addr; + ep->port = (uint16_t) ((const struct sockaddr_in *) addr)->sin_port; + break; + + case AF_INET6: + ep->is_ip4 = VPPCOM_IS_IP6; + ep->ip = + (uint8_t *) & ((const struct sockaddr_in6 *) addr)->sin6_addr; + ep->port = + (uint16_t) ((const struct sockaddr_in6 *) addr)->sin6_port; + break; + + default: + return EAFNOSUPPORT; + } + } + + return vls_sendto (vlsh, (void *) buf, n, flags, ep); +} + +static int +ldp_vls_recvfrom (vls_handle_t vlsh, void *__restrict buf, size_t n, + int flags, __SOCKADDR_ARG addr, + socklen_t * __restrict addr_len) +{ + u8 src_addr[sizeof (struct sockaddr_in6)]; + vppcom_endpt_t ep; + ssize_t size; + int rv; + + if (addr) + { + ep.ip = src_addr; + size = vls_recvfrom (vlsh, buf, n, flags, &ep); + + if (size > 0) + { + rv = ldp_copy_ep_to_sockaddr (addr, addr_len, &ep); + if (rv < 0) + size = rv; + } + } + else + size = vls_recvfrom (vlsh, buf, n, flags, NULL); + + return size; +} + +ssize_t +sendto (int fd, const void *buf, size_t n, int flags, + __CONST_SOCKADDR_ARG addr, socklen_t addr_len) +{ + vls_handle_t vlsh; + ssize_t size; + + if ((errno = -ldp_init ())) + return -1; + + vlsh = ldp_fd_to_vlsh (fd); + if (vlsh != INVALID_SESSION_ID) + { + size = ldp_vls_sendo (vlsh, buf, n, flags, addr, addr_len); + if (size < 0) + { + errno = -size; + size = -1; + } + } + else + { + size = libc_sendto (fd, buf, n, flags, addr, addr_len); + } + + return size; +} + +ssize_t +recvfrom (int fd, void *__restrict buf, size_t n, int flags, + __SOCKADDR_ARG addr, socklen_t * __restrict addr_len) +{ + vls_handle_t vlsh; + ssize_t size; + + if ((errno = -ldp_init ())) + return -1; + + vlsh = ldp_fd_to_vlsh (fd); + if (vlsh != VLS_INVALID_HANDLE) + { + size = ldp_vls_recvfrom (vlsh, buf, n, flags, addr, addr_len); + if (size < 0) + { + errno = -size; + size = -1; + } + } + else + { + size = libc_recvfrom (fd, buf, n, flags, addr, addr_len); + } + + return size; +} + +ssize_t +sendmsg (int fd, const struct msghdr * msg, int flags) +{ + vls_handle_t vlsh; + ssize_t size; + + if ((errno = -ldp_init ())) + return -1; + + vlsh = ldp_fd_to_vlsh (fd); + if (vlsh != VLS_INVALID_HANDLE) + { + struct iovec *iov = msg->msg_iov; + ssize_t total = 0; + int i, rv; + + for (i = 0; i < msg->msg_iovlen; ++i) + { + rv = ldp_vls_sendo (vlsh, iov[i].iov_base, iov[i].iov_len, flags, + msg->msg_name, msg->msg_namelen); + if (rv < 0) + break; + else + { + total += rv; + if (rv < iov[i].iov_len) + break; + } + } + + if (rv < 0 && total == 0) + { + errno = -rv; + size = -1; + } + else + size = total; + } + else + { + size = libc_sendmsg (fd, msg, flags); + } + + return size; +} + +#ifdef USE_GNU +int +sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags) +{ + ssize_t size; + const char *func_str; + u32 sh = ldp_fd_to_vlsh (fd); + + if ((errno = -ldp_init ())) + return -1; + + if (sh != INVALID_SESSION_ID) + { + clib_warning ("LDP<%d>: LDP-TBD", getpid ()); + errno = ENOSYS; + size = -1; + } + else + { + func_str = "libc_sendmmsg"; + + if (LDP_DEBUG > 2) + clib_warning ("LDP<%d>: fd %d (0x%x): calling %s(): " + "vmessages %p, vlen %u, flags 0x%x", + getpid (), fd, fd, func_str, vmessages, vlen, flags); + + size = libc_sendmmsg (fd, vmessages, vlen, flags); + } + + if (LDP_DEBUG > 2) + { + if (size < 0) + { + int errno_val = errno; + perror (func_str); + clib_warning ("LDP<%d>: ERROR: fd %d (0x%x): %s() failed! " + "rv %d, errno = %d", getpid (), fd, fd, + func_str, size, errno_val); + errno = errno_val; + } + else + clib_warning ("LDP<%d>: fd %d (0x%x): returning %d (0x%x)", + getpid (), fd, fd, size, size); + } + return size; +} +#endif + +ssize_t +recvmsg (int fd, struct msghdr * msg, int flags) +{ + vls_handle_t vlsh; + ssize_t size; + + if ((errno = -ldp_init ())) + return -1; + + vlsh = ldp_fd_to_vlsh (fd); + if (vlsh != VLS_INVALID_HANDLE) + { + struct iovec *iov = msg->msg_iov; + ssize_t max_deq, total = 0; + int i, rv; + + max_deq = vls_attr (vlsh, VPPCOM_ATTR_GET_NREAD, 0, 0); + if (!max_deq) + return 0; + + for (i = 0; i < msg->msg_iovlen; i++) + { + rv = ldp_vls_recvfrom (vlsh, iov[i].iov_base, iov[i].iov_len, flags, + (i == 0 ? msg->msg_name : NULL), + (i == 0 ? &msg->msg_namelen : NULL)); + if (rv <= 0) + break; + else + { + total += rv; + if (rv < iov[i].iov_len) + break; + } + if (total >= max_deq) + break; + } + + if (rv < 0 && total == 0) + { + errno = -rv; + size = -1; + } + else + size = total; + } + else + { + size = libc_recvmsg (fd, msg, flags); + } + + return size; +} + +#ifdef USE_GNU +int +recvmmsg (int fd, struct mmsghdr *vmessages, + unsigned int vlen, int flags, struct timespec *tmo) +{ + ssize_t size; + const char *func_str; + u32 sh = ldp_fd_to_vlsh (fd); + + if ((errno = -ldp_init ())) + return -1; + + if (sh != INVALID_SESSION_ID) + { + clib_warning ("LDP<%d>: LDP-TBD", getpid ()); + errno = ENOSYS; + size = -1; + } + else + { + func_str = "libc_recvmmsg"; + + if (LDP_DEBUG > 2) + clib_warning ("LDP<%d>: fd %d (0x%x): calling %s(): " + "vmessages %p, vlen %u, flags 0x%x, tmo %p", + getpid (), fd, fd, func_str, vmessages, vlen, + flags, tmo); + + size = libc_recvmmsg (fd, vmessages, vlen, flags, tmo); + } + + if (LDP_DEBUG > 2) + { + if (size < 0) + { + int errno_val = errno; + perror (func_str); + clib_warning ("LDP<%d>: ERROR: fd %d (0x%x): %s() failed! " + "rv %d, errno = %d", getpid (), fd, fd, + func_str, size, errno_val); + errno = errno_val; + } + else + clib_warning ("LDP<%d>: fd %d (0x%x): returning %d (0x%x)", + getpid (), fd, fd, size, size); + } + return size; +} +#endif + +int +getsockopt (int fd, int level, int optname, + void *__restrict optval, socklen_t * __restrict optlen) +{ + vls_handle_t vlsh; + int rv; + + if ((errno = -ldp_init ())) + return -1; + + vlsh = ldp_fd_to_vlsh (fd); + if (vlsh != VLS_INVALID_HANDLE) + { + rv = -EOPNOTSUPP; + + switch (level) + { + case SOL_TCP: + switch (optname) + { + case TCP_NODELAY: + rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_NODELAY, + optval, optlen); + break; + case TCP_MAXSEG: + rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_USER_MSS, + optval, optlen); + break; + case TCP_KEEPIDLE: + rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_KEEPIDLE, + optval, optlen); + break; + case TCP_KEEPINTVL: + rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_KEEPINTVL, + optval, optlen); + break; + case TCP_INFO: + if (optval && optlen && (*optlen == sizeof (struct tcp_info))) + { + LDBG (1, "fd %d: vlsh %u SOL_TCP, TCP_INFO, optval %p, " + "optlen %d: #LDP-NOP#", fd, vlsh, optval, *optlen); + memset (optval, 0, *optlen); + rv = VPPCOM_OK; + } + else + rv = -EFAULT; + break; + case TCP_CONGESTION: + *optlen = strlen ("cubic"); + strncpy (optval, "cubic", *optlen + 1); + rv = 0; + break; + default: + LDBG (0, "ERROR: fd %d: getsockopt SOL_TCP: sid %u, " + "optname %d unsupported!", fd, vlsh, optname); + break; + } + break; + case SOL_IPV6: + switch (optname) + { + case IPV6_V6ONLY: + rv = vls_attr (vlsh, VPPCOM_ATTR_GET_V6ONLY, optval, optlen); + break; + default: + LDBG (0, "ERROR: fd %d: getsockopt SOL_IPV6: vlsh %u " + "optname %d unsupported!", fd, vlsh, optname); + break; + } + break; + case SOL_SOCKET: + switch (optname) + { + case SO_ACCEPTCONN: + rv = vls_attr (vlsh, VPPCOM_ATTR_GET_LISTEN, optval, optlen); + break; + case SO_KEEPALIVE: + rv = vls_attr (vlsh, VPPCOM_ATTR_GET_KEEPALIVE, optval, optlen); + break; + case SO_PROTOCOL: + rv = vls_attr (vlsh, VPPCOM_ATTR_GET_PROTOCOL, optval, optlen); + *(int *) optval = *(int *) optval ? SOCK_DGRAM : SOCK_STREAM; + break; + case SO_SNDBUF: + rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TX_FIFO_LEN, + optval, optlen); + break; + case SO_RCVBUF: + rv = vls_attr (vlsh, VPPCOM_ATTR_GET_RX_FIFO_LEN, + optval, optlen); + break; + case SO_REUSEADDR: + rv = vls_attr (vlsh, VPPCOM_ATTR_GET_REUSEADDR, optval, optlen); + break; + case SO_BROADCAST: + rv = vls_attr (vlsh, VPPCOM_ATTR_GET_BROADCAST, optval, optlen); + break; + case SO_ERROR: + rv = vls_attr (vlsh, VPPCOM_ATTR_GET_ERROR, optval, optlen); + break; + default: + LDBG (0, "ERROR: fd %d: getsockopt SOL_SOCKET: vlsh %u " + "optname %d unsupported!", fd, vlsh, optname); + break; + } + break; + default: + break; + } + + if (rv != VPPCOM_OK) + { + errno = -rv; + rv = -1; + } + } + else + { + rv = libc_getsockopt (fd, level, optname, optval, optlen); + } + + return rv; +} + +int +setsockopt (int fd, int level, int optname, + const void *optval, socklen_t optlen) +{ + vls_handle_t vlsh; + int rv; + + if ((errno = -ldp_init ())) + return -1; + + vlsh = ldp_fd_to_vlsh (fd); + if (vlsh != VLS_INVALID_HANDLE) + { + rv = -EOPNOTSUPP; + + switch (level) + { + case SOL_TCP: + switch (optname) + { + case TCP_NODELAY: + rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_NODELAY, + (void *) optval, &optlen); + break; + case TCP_MAXSEG: + rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_USER_MSS, + (void *) optval, &optlen); + break; + case TCP_KEEPIDLE: + rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_KEEPIDLE, + (void *) optval, &optlen); + break; + case TCP_KEEPINTVL: + rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_KEEPINTVL, + (void *) optval, &optlen); + break; + case TCP_CONGESTION: + case TCP_CORK: + /* Ignore */ + rv = 0; + break; + default: + LDBG (0, "ERROR: fd %d: setsockopt() SOL_TCP: vlsh %u" + "optname %d unsupported!", fd, vlsh, optname); + break; + } + break; + case SOL_IPV6: + switch (optname) + { + case IPV6_V6ONLY: + rv = vls_attr (vlsh, VPPCOM_ATTR_SET_V6ONLY, + (void *) optval, &optlen); + break; + default: + LDBG (0, "ERROR: fd %d: setsockopt SOL_IPV6: vlsh %u" + "optname %d unsupported!", fd, vlsh, optname); + break; + } + break; + case SOL_SOCKET: + switch (optname) + { + case SO_KEEPALIVE: + rv = vls_attr (vlsh, VPPCOM_ATTR_SET_KEEPALIVE, + (void *) optval, &optlen); + break; + case SO_REUSEADDR: + rv = vls_attr (vlsh, VPPCOM_ATTR_SET_REUSEADDR, + (void *) optval, &optlen); + break; + case SO_BROADCAST: + rv = vls_attr (vlsh, VPPCOM_ATTR_SET_BROADCAST, + (void *) optval, &optlen); + break; + default: + LDBG (0, "ERROR: fd %d: setsockopt SOL_SOCKET: vlsh %u " + "optname %d unsupported!", fd, vlsh, optname); + break; + } + break; + default: + break; + } + + if (rv != VPPCOM_OK) + { + errno = -rv; + rv = -1; + } + } + else + { + rv = libc_setsockopt (fd, level, optname, optval, optlen); + } + + return rv; +} + +int +listen (int fd, int n) +{ + vls_handle_t vlsh; + int rv; + + if ((errno = -ldp_init ())) + return -1; + + vlsh = ldp_fd_to_vlsh (fd); + if (vlsh != VLS_INVALID_HANDLE) + { + LDBG (0, "fd %d: calling vls_listen: vlsh %u, n %d", fd, vlsh, n); + + rv = vls_listen (vlsh, n); + if (rv != VPPCOM_OK) + { + errno = -rv; + rv = -1; + } + } + else + { + LDBG (0, "fd %d: calling libc_listen(): n %d", fd, n); + rv = libc_listen (fd, n); + } + + LDBG (1, "fd %d: returning %d", fd, rv); + return rv; +} + +static inline int +ldp_accept4 (int listen_fd, __SOCKADDR_ARG addr, + socklen_t * __restrict addr_len, int flags) +{ + vls_handle_t listen_vlsh, accept_vlsh; + int rv; + + if ((errno = -ldp_init ())) + return -1; + + listen_vlsh = ldp_fd_to_vlsh (listen_fd); + if (listen_vlsh != VLS_INVALID_HANDLE) + { + vppcom_endpt_t ep; + u8 src_addr[sizeof (struct sockaddr_in6)]; + memset (&ep, 0, sizeof (ep)); + ep.ip = src_addr; + + LDBG (0, "listen fd %d: calling vppcom_session_accept: listen sid %u," + " ep %p, flags 0x%x", listen_fd, listen_vlsh, ep, flags); + + accept_vlsh = vls_accept (listen_vlsh, &ep, flags); + if (accept_vlsh < 0) + { + errno = -accept_vlsh; + rv = -1; + } + else + { + rv = ldp_copy_ep_to_sockaddr (addr, addr_len, &ep); + if (rv != VPPCOM_OK) + { + (void) vls_close (accept_vlsh); + errno = -rv; + rv = -1; + } + else + { + rv = ldp_vlsh_to_fd (accept_vlsh); + } + } + } + else + { + LDBG (0, "listen fd %d: calling libc_accept4(): addr %p, addr_len %p," + " flags 0x%x", listen_fd, addr, addr_len, flags); + + rv = libc_accept4 (listen_fd, addr, addr_len, flags); + } + + LDBG (1, "listen fd %d: accept returning %d", listen_fd, rv); + + return rv; +} + +int +accept4 (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict addr_len, + int flags) +{ + return ldp_accept4 (fd, addr, addr_len, flags); +} + +int +accept (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict addr_len) +{ + return ldp_accept4 (fd, addr, addr_len, 0); +} + +int +shutdown (int fd, int how) +{ + vls_handle_t vlsh; + int rv = 0, flags; + u32 flags_len = sizeof (flags); + + if ((errno = -ldp_init ())) + return -1; + + vlsh = ldp_fd_to_vlsh (fd); + if (vlsh != VLS_INVALID_HANDLE) + { + LDBG (0, "called shutdown: fd %u vlsh %u how %d", fd, vlsh, how); + + if (vls_attr (vlsh, VPPCOM_ATTR_SET_SHUT, &how, &flags_len)) + { + close (fd); + return -1; + } + + if (vls_attr (vlsh, VPPCOM_ATTR_GET_SHUT, &flags, &flags_len)) + { + close (fd); + return -1; + } + + if (flags == SHUT_RDWR) + rv = close (fd); + } + else + { + LDBG (0, "fd %d: calling libc_shutdown: how %d", fd, how); + rv = libc_shutdown (fd, how); + } + + return rv; +} + +int +epoll_create1 (int flags) +{ + ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); + vls_handle_t vlsh; + int rv; + + if ((errno = -ldp_init ())) + return -1; + + if (ldp->vcl_needs_real_epoll) + { + /* Make sure workers have been allocated */ + if (!ldp->workers) + { + ldp_alloc_workers (); + ldpw = ldp_worker_get_current (); + } + rv = libc_epoll_create1 (flags); + ldp->vcl_needs_real_epoll = 0; + ldpw->vcl_mq_epfd = rv; + LDBG (0, "created vcl epfd %u", rv); + return rv; + } + + vlsh = vls_epoll_create (); + if (PREDICT_FALSE (vlsh == VLS_INVALID_HANDLE)) + { + errno = -vlsh; + rv = -1; + } + else + { + rv = ldp_vlsh_to_fd (vlsh); + } + LDBG (0, "epoll_create epfd %u vlsh %u", rv, vlsh); + return rv; +} + +int +epoll_create (int size) +{ + return epoll_create1 (0); +} + +int +epoll_ctl (int epfd, int op, int fd, struct epoll_event *event) +{ + vls_handle_t vep_vlsh, vlsh; + int rv; + + if ((errno = -ldp_init ())) + return -1; + + vep_vlsh = ldp_fd_to_vlsh (epfd); + if (PREDICT_FALSE (vep_vlsh == VLS_INVALID_HANDLE)) + { + /* The LDP epoll_create1 always creates VCL epfd's. + * The app should never have a kernel base epoll fd unless it + * was acquired outside of the LD_PRELOAD process context. + * In any case, if we get one, punt it to libc_epoll_ctl. + */ + LDBG (1, "epfd %d: calling libc_epoll_ctl: op %d, fd %d" + " event %p", epfd, op, fd, event); + + rv = libc_epoll_ctl (epfd, op, fd, event); + goto done; + } + + vlsh = ldp_fd_to_vlsh (fd); + + LDBG (0, "epfd %d ep_vlsh %d, fd %u vlsh %d, op %u", epfd, vep_vlsh, fd, + vlsh, op); + + if (vlsh != VLS_INVALID_HANDLE) + { + LDBG (1, "epfd %d: calling vls_epoll_ctl: ep_vlsh %d op %d, vlsh %u," + " event %p", epfd, vep_vlsh, vlsh, event); + + rv = vls_epoll_ctl (vep_vlsh, op, vlsh, event); + if (rv != VPPCOM_OK) + { + errno = -rv; + rv = -1; + } + } + else + { + int libc_epfd; + u32 size = sizeof (epfd); + + libc_epfd = vls_attr (vep_vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0); + if (!libc_epfd) + { + LDBG (1, "epfd %d, vep_vlsh %d calling libc_epoll_create1: " + "EPOLL_CLOEXEC", epfd, vep_vlsh); + + libc_epfd = libc_epoll_create1 (EPOLL_CLOEXEC); + if (libc_epfd < 0) + { + rv = libc_epfd; + goto done; + } + + rv = vls_attr (vep_vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &libc_epfd, + &size); + if (rv < 0) + { + errno = -rv; + rv = -1; + goto done; + } + } + else if (PREDICT_FALSE (libc_epfd < 0)) + { + errno = -epfd; + rv = -1; + goto done; + } + + LDBG (1, "epfd %d: calling libc_epoll_ctl: libc_epfd %d, op %d, fd %d," + " event %p", epfd, libc_epfd, op, fd, event); + + rv = libc_epoll_ctl (libc_epfd, op, fd, event); + } + +done: + return rv; +} + +static inline int +ldp_epoll_pwait (int epfd, struct epoll_event *events, int maxevents, + int timeout, const sigset_t * sigmask) +{ + ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); + double time_to_wait = (double) 0, max_time; + int libc_epfd, rv = 0; + vls_handle_t ep_vlsh; + + if ((errno = -ldp_init ())) + return -1; + + if (PREDICT_FALSE (!events || (timeout < -1))) + { + errno = EFAULT; + return -1; + } + + if (epfd == ldpw->vcl_mq_epfd) + return libc_epoll_pwait (epfd, events, maxevents, timeout, sigmask); + + ep_vlsh = ldp_fd_to_vlsh (epfd); + if (PREDICT_FALSE (ep_vlsh == VLS_INVALID_HANDLE)) + { + LDBG (0, "epfd %d: bad ep_vlsh %d!", epfd, ep_vlsh); + errno = EBADFD; + return -1; + } + + if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0)) + clib_time_init (&ldpw->clib_time); + time_to_wait = ((timeout >= 0) ? (double) timeout / 1000 : 0); + max_time = clib_time_now (&ldpw->clib_time) + time_to_wait; + + libc_epfd = vls_attr (ep_vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0); + if (PREDICT_FALSE (libc_epfd < 0)) + { + errno = -libc_epfd; + rv = -1; + goto done; + } + + LDBG (2, "epfd %d: vep_idx %d, libc_epfd %d, events %p, maxevents %d, " + "timeout %d, sigmask %p: time_to_wait %.02f", epfd, ep_vlsh, + libc_epfd, events, maxevents, timeout, sigmask, time_to_wait); + do + { + if (!ldpw->epoll_wait_vcl) + { + rv = vls_epoll_wait (ep_vlsh, events, maxevents, 0); + if (rv > 0) + { + ldpw->epoll_wait_vcl = 1; + goto done; + } + else if (rv < 0) + { + errno = -rv; + rv = -1; + goto done; + } + } + else + ldpw->epoll_wait_vcl = 0; + + if (libc_epfd > 0) + { + rv = libc_epoll_pwait (libc_epfd, events, maxevents, 0, sigmask); + if (rv != 0) + goto done; + } + } + while ((timeout == -1) || (clib_time_now (&ldpw->clib_time) < max_time)); + +done: + return rv; +} + +static inline int +ldp_epoll_pwait_eventfd (int epfd, struct epoll_event *events, + int maxevents, int timeout, const sigset_t * sigmask) +{ + ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); + int libc_epfd, rv = 0, num_ev; + vls_handle_t ep_vlsh; + + if ((errno = -ldp_init ())) + return -1; + + if (PREDICT_FALSE (!events || (timeout < -1))) + { + errno = EFAULT; + return -1; + } + + if (epfd == ldpw->vcl_mq_epfd) + return libc_epoll_pwait (epfd, events, maxevents, timeout, sigmask); + + ep_vlsh = ldp_fd_to_vlsh (epfd); + if (PREDICT_FALSE (ep_vlsh == VLS_INVALID_HANDLE)) + { + LDBG (0, "epfd %d: bad ep_vlsh %d!", epfd, ep_vlsh); + errno = EBADFD; + return -1; + } + + libc_epfd = vls_attr (ep_vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0); + if (PREDICT_FALSE (!libc_epfd)) + { + u32 size = sizeof (epfd); + + LDBG (1, "epfd %d, vep_vlsh %d calling libc_epoll_create1: " + "EPOLL_CLOEXEC", epfd, ep_vlsh); + libc_epfd = libc_epoll_create1 (EPOLL_CLOEXEC); + if (libc_epfd < 0) + { + rv = libc_epfd; + goto done; + } + + rv = vls_attr (ep_vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &libc_epfd, &size); + if (rv < 0) + { + errno = -rv; + rv = -1; + goto done; + } + } + if (PREDICT_FALSE (libc_epfd <= 0)) + { + errno = -libc_epfd; + rv = -1; + goto done; + } + + if (PREDICT_FALSE (!ldpw->mq_epfd_added)) + { + struct epoll_event e = { 0 }; + e.events = EPOLLIN; + e.data.fd = ldpw->vcl_mq_epfd; + if (libc_epoll_ctl (libc_epfd, EPOLL_CTL_ADD, ldpw->vcl_mq_epfd, &e) < + 0) + { + LDBG (0, "epfd %d, add libc mq epoll fd %d to libc epoll fd %d", + epfd, ldpw->vcl_mq_epfd, libc_epfd); + rv = -1; + goto done; + } + ldpw->mq_epfd_added = 1; + } + + rv = vls_epoll_wait (ep_vlsh, events, maxevents, 0); + if (rv > 0) + goto done; + else if (rv < 0) + { + errno = -rv; + rv = -1; + goto done; + } + + rv = libc_epoll_pwait (libc_epfd, events, maxevents, timeout, sigmask); + if (rv <= 0) + goto done; + for (int i = 0; i < rv; i++) + { + if (events[i].data.fd == ldpw->vcl_mq_epfd) + { + /* We should remove mq epoll fd from events. */ + rv--; + if (i != rv) + { + events[i].events = events[rv].events; + events[i].data.u64 = events[rv].data.u64; + } + num_ev = vls_epoll_wait (ep_vlsh, &events[rv], maxevents - rv, 0); + if (PREDICT_TRUE (num_ev > 0)) + rv += num_ev; + break; + } + } + +done: + return rv; +} + +int +epoll_pwait (int epfd, struct epoll_event *events, + int maxevents, int timeout, const sigset_t * sigmask) +{ + if (vls_use_eventfd ()) + return ldp_epoll_pwait_eventfd (epfd, events, maxevents, timeout, + sigmask); + else + return ldp_epoll_pwait (epfd, events, maxevents, timeout, sigmask); +} + +int +epoll_wait (int epfd, struct epoll_event *events, int maxevents, int timeout) +{ + if (vls_use_eventfd ()) + return ldp_epoll_pwait_eventfd (epfd, events, maxevents, timeout, NULL); + else + return ldp_epoll_pwait (epfd, events, maxevents, timeout, NULL); +} + +int +poll (struct pollfd *fds, nfds_t nfds, int timeout) +{ + ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); + int rv, i, n_revents = 0; + vls_handle_t vlsh; + vcl_poll_t *vp; + double max_time; + + LDBG (3, "fds %p, nfds %d, timeout %d", fds, nfds, timeout); + + if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0)) + clib_time_init (&ldpw->clib_time); + + max_time = (timeout >= 0) ? (f64) timeout / 1000 : 0; + max_time += clib_time_now (&ldpw->clib_time); + + for (i = 0; i < nfds; i++) + { + if (fds[i].fd < 0) + continue; + + vlsh = ldp_fd_to_vlsh (fds[i].fd); + if (vlsh != VLS_INVALID_HANDLE) + { + fds[i].fd = -fds[i].fd; + vec_add2 (ldpw->vcl_poll, vp, 1); + vp->fds_ndx = i; + vp->sh = vlsh_to_sh (vlsh); + vp->events = fds[i].events; +#ifdef __USE_XOPEN2K + if (fds[i].events & POLLRDNORM) + vp->events |= POLLIN; + if (fds[i].events & POLLWRNORM) + vp->events |= POLLOUT; +#endif + vp->revents = fds[i].revents; + } + else + { + vec_add1 (ldpw->libc_poll, fds[i]); + vec_add1 (ldpw->libc_poll_idxs, i); + } + } + + do + { + if (vec_len (ldpw->vcl_poll)) + { + rv = vppcom_poll (ldpw->vcl_poll, vec_len (ldpw->vcl_poll), 0); + if (rv < 0) + { + errno = -rv; + rv = -1; + goto done; + } + else + n_revents += rv; + } + + if (vec_len (ldpw->libc_poll)) + { + rv = libc_poll (ldpw->libc_poll, vec_len (ldpw->libc_poll), 0); + if (rv < 0) + goto done; + else + n_revents += rv; + } + + if (n_revents) + { + rv = n_revents; + goto done; + } + } + while ((timeout < 0) || (clib_time_now (&ldpw->clib_time) < max_time)); + rv = 0; + +done: + vec_foreach (vp, ldpw->vcl_poll) + { + fds[vp->fds_ndx].fd = -fds[vp->fds_ndx].fd; + fds[vp->fds_ndx].revents = vp->revents; +#ifdef __USE_XOPEN2K + if ((fds[vp->fds_ndx].revents & POLLIN) && + (fds[vp->fds_ndx].events & POLLRDNORM)) + fds[vp->fds_ndx].revents |= POLLRDNORM; + if ((fds[vp->fds_ndx].revents & POLLOUT) && + (fds[vp->fds_ndx].events & POLLWRNORM)) + fds[vp->fds_ndx].revents |= POLLWRNORM; +#endif + } + vec_reset_length (ldpw->vcl_poll); + + for (i = 0; i < vec_len (ldpw->libc_poll); i++) + { + fds[ldpw->libc_poll_idxs[i]].revents = ldpw->libc_poll[i].revents; + } + vec_reset_length (ldpw->libc_poll_idxs); + vec_reset_length (ldpw->libc_poll); + + return rv; +} + +#ifdef USE_GNU +int +ppoll (struct pollfd *fds, nfds_t nfds, + const struct timespec *timeout, const sigset_t * sigmask) +{ + if ((errno = -ldp_init ())) + return -1; + + clib_warning ("LDP<%d>: LDP-TBD", getpid ()); + errno = ENOSYS; + + + return -1; +} +#endif + +void CONSTRUCTOR_ATTRIBUTE ldp_constructor (void); + +void DESTRUCTOR_ATTRIBUTE ldp_destructor (void); + +/* + * This function is called when the library is loaded + */ +void +ldp_constructor (void) +{ + swrap_constructor (); + if (ldp_init () != 0) + { + fprintf (stderr, "\nLDP<%d>: ERROR: ldp_constructor: failed!\n", + getpid ()); + _exit (1); + } + else if (LDP_DEBUG > 0) + clib_warning ("LDP<%d>: LDP constructor: done!\n", getpid ()); +} + +/* + * This function is called when the library is unloaded + */ +void +ldp_destructor (void) +{ + /* + swrap_destructor (); + if (ldp->init) + ldp->init = 0; + */ + + /* Don't use clib_warning() here because that calls writev() + * which will call ldp_init(). + */ + if (LDP_DEBUG > 0) + fprintf (stderr, "%s:%d: LDP<%d>: LDP destructor: done!\n", + __func__, __LINE__, getpid ()); +} + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vcl/ldp.c.rej b/src/vcl/ldp.c.rej new file mode 100644 index 000000000..0ce81dba2 --- /dev/null +++ b/src/vcl/ldp.c.rej @@ -0,0 +1,87 @@ +--- src/vcl/ldp.c ++++ src/vcl/ldp.c +@@ -687,9 +880,9 @@ fcntl (int fd, int cmd, ...) + + va_start (ap, cmd); + +- vlsh = ldp_fd_to_vlsh (fd); +- LDBG (0, "fd %u vlsh %d, cmd %u", fd, vlsh, cmd); +- if (vlsh != VLS_INVALID_HANDLE) ++ vclsh = ldp_fd_to_vclsh (fd); ++ LDBG (0, "fd %u vclsh %d, cmd %u", fd, vclsh, cmd); ++ if (vclsh != INVALID_SESSION_ID) + { + int flags = va_arg (ap, int); + u32 size; +@@ -1766,7 +1972,7 @@ sendto (int fd, const void *buf, size_t n, int flags, + } + } + +- size = vls_sendto (vlsh, (void *) buf, n, flags, ep); ++ size = vppcom_session_sendto (vclsh, (void *) buf, n, flags, ep); + if (size < 0) + { + errno = -size; +@@ -1786,14 +1992,14 @@ ssize_t + recvfrom (int fd, void *__restrict buf, size_t n, int flags, + __SOCKADDR_ARG addr, socklen_t * __restrict addr_len) + { +- vls_handle_t sid; ++ vcl_session_handle_t sid; + ssize_t size, rv; + + if ((errno = -ldp_init ())) + return -1; + +- sid = ldp_fd_to_vlsh (fd); +- if (sid != VLS_INVALID_HANDLE) ++ sid = ldp_fd_to_vclsh (fd); ++ if (sid != INVALID_SESSION_ID) + { + vppcom_endpt_t ep; + u8 src_addr[sizeof (struct sockaddr_in6)]; +@@ -1801,7 +2007,7 @@ recvfrom (int fd, void *__restrict buf, size_t n, int flags, + if (addr) + { + ep.ip = src_addr; +- size = vls_recvfrom (sid, buf, n, flags, &ep); ++ size = vppcom_session_recvfrom (sid, buf, n, flags, &ep); + + if (size > 0) + { +@@ -1811,7 +2017,7 @@ recvfrom (int fd, void *__restrict buf, size_t n, int flags, + } + } + else +- size = vls_recvfrom (sid, buf, n, flags, NULL); ++ size = vppcom_session_recvfrom (sid, buf, n, flags, NULL); + + if (size < 0) + { +@@ -1902,14 +2108,14 @@ sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags) + ssize_t + recvmsg (int fd, struct msghdr * message, int flags) + { +- vls_handle_t vlsh; ++ vcl_session_handle_t vclsh; + ssize_t size; + + if ((errno = -ldp_init ())) + return -1; + +- vlsh = ldp_fd_to_vlsh (fd); +- if (vlsh != VLS_INVALID_HANDLE) ++ vclsh = ldp_fd_to_vclsh (fd); ++ if (vclsh != INVALID_SESSION_ID) + { + LDBG (0, "LDP-TBD"); + errno = ENOSYS; +@@ -1930,7 +2136,7 @@ recvmmsg (int fd, struct mmsghdr *vmessages, + { + ssize_t size; + const char *func_str; +- u32 sh = ldp_fd_to_vlsh (fd); ++ u32 sh = ldp_fd_to_vclsh (fd); + + if ((errno = -ldp_init ())) + return -1; diff --git a/src/vcl/ldp.h b/src/vcl/ldp.h index 8d78ead08..0a03f442d 100644 --- a/src/vcl/ldp.h +++ b/src/vcl/ldp.h @@ -34,7 +34,7 @@ #define LDP_ENV_TLS_KEY "LDP_TLS_KEY_FILE" #define LDP_ENV_TLS_TRANS "LDP_TRANSPARENT_TLS" -#define LDP_SID_BIT_MIN 5 +#define LDP_SID_BIT_MIN 16 #define LDP_SID_BIT_MAX 30 #define LDP_APP_NAME_MAX 256 diff --git a/src/vcl/vcl_private.h b/src/vcl/vcl_private.h index 593e63f3b..818c03f9a 100644 --- a/src/vcl/vcl_private.h +++ b/src/vcl/vcl_private.h @@ -244,7 +244,12 @@ typedef struct vcl_worker_ /* Session pool */ vcl_session_t *sessions; - /** Worker/thread index in current process */ + u32 listen_session_index; + + u32 listen_fd; + + u32 listen_queue_size; +/** Worker/thread index in current process */ u32 wrk_index; /** Worker index in vpp*/ diff --git a/src/vcl/vcl_private.h.orig b/src/vcl/vcl_private.h.orig new file mode 100644 index 000000000..12af09fbf --- /dev/null +++ b/src/vcl/vcl_private.h.orig @@ -0,0 +1,679 @@ +/* + * Copyright (c) 2018-2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SRC_VCL_VCL_PRIVATE_H_ +#define SRC_VCL_VCL_PRIVATE_H_ + +#include +#include +#include + +#if (CLIB_DEBUG > 0) +/* Set VPPCOM_DEBUG_INIT 2 for connection debug, + * 3 for read/write debug output + * or + * export VCL_DEBUG=<#> to set dynamically. + */ +#define VPPCOM_DEBUG_INIT 1 +#else +#define VPPCOM_DEBUG_INIT 0 +#endif + +#define VPPCOM_DEBUG vcm->debug + +extern __thread uword __vcl_worker_index; + +static inline void +vcl_set_worker_index (uword wrk_index) +{ + __vcl_worker_index = wrk_index; +} + +static inline uword +vcl_get_worker_index (void) +{ + return __vcl_worker_index; +} + +/* + * VPPCOM Private definitions and functions. + */ +typedef enum +{ + STATE_APP_START, + STATE_APP_CONN_VPP, + STATE_APP_ENABLED, + STATE_APP_ATTACHED, + STATE_APP_ADDING_WORKER, + STATE_APP_ADDING_TLS_DATA, + STATE_APP_FAILED, + STATE_APP_READY +} app_state_t; + +typedef enum +{ + STATE_CLOSED = 0, + STATE_CONNECT = 0x01, + STATE_LISTEN = 0x02, + STATE_ACCEPT = 0x04, + STATE_VPP_CLOSING = 0x08, + STATE_DISCONNECT = 0x10, + STATE_DETACHED = 0x20, + STATE_UPDATED = 0x40, + STATE_LISTEN_NO_MQ = 0x80, +} vcl_session_state_t; + +#define SERVER_STATE_OPEN (STATE_ACCEPT|STATE_VPP_CLOSING) +#define CLIENT_STATE_OPEN (STATE_CONNECT|STATE_VPP_CLOSING) +#define STATE_OPEN (SERVER_STATE_OPEN | CLIENT_STATE_OPEN) + +typedef struct epoll_event vppcom_epoll_event_t; + +typedef struct +{ + u32 next_sh; + u32 prev_sh; + u32 vep_sh; + vppcom_epoll_event_t ev; +#define VEP_DEFAULT_ET_MASK (EPOLLIN|EPOLLOUT) +#define VEP_UNSUPPORTED_EVENTS (EPOLLONESHOT|EPOLLEXCLUSIVE) + u32 et_mask; +} vppcom_epoll_t; + +/* Select uses the vcl_si_set as if a clib_bitmap. Make sure they are the + * same size */ +STATIC_ASSERT (sizeof (clib_bitmap_t) == sizeof (vcl_si_set), + "vppcom bitmap size mismatch"); + +typedef struct +{ + u8 is_ip4; + ip46_address_t ip46; +} vppcom_ip46_t; + +#define VCL_ACCEPTED_F_CLOSED (1 << 0) +#define VCL_ACCEPTED_F_RESET (1 << 1) + +typedef struct vcl_session_msg +{ + union + { + session_accepted_msg_t accepted_msg; + }; + u32 flags; +} vcl_session_msg_t; + +typedef enum +{ + VCL_SESS_ATTR_SERVER, + VCL_SESS_ATTR_CUT_THRU, + VCL_SESS_ATTR_VEP, + VCL_SESS_ATTR_VEP_SESSION, + VCL_SESS_ATTR_LISTEN, // SOL_SOCKET,SO_ACCEPTCONN + VCL_SESS_ATTR_NONBLOCK, // fcntl,O_NONBLOCK + VCL_SESS_ATTR_REUSEADDR, // SOL_SOCKET,SO_REUSEADDR + VCL_SESS_ATTR_REUSEPORT, // SOL_SOCKET,SO_REUSEPORT + VCL_SESS_ATTR_BROADCAST, // SOL_SOCKET,SO_BROADCAST + VCL_SESS_ATTR_V6ONLY, // SOL_TCP,IPV6_V6ONLY + VCL_SESS_ATTR_KEEPALIVE, // SOL_SOCKET,SO_KEEPALIVE + VCL_SESS_ATTR_TCP_NODELAY, // SOL_TCP,TCP_NODELAY + VCL_SESS_ATTR_TCP_KEEPIDLE, // SOL_TCP,TCP_KEEPIDLE + VCL_SESS_ATTR_TCP_KEEPINTVL, // SOL_TCP,TCP_KEEPINTVL + VCL_SESS_ATTR_SHUT_RD, + VCL_SESS_ATTR_SHUT_WR, + VCL_SESS_ATTR_MAX +} vppcom_session_attr_t; + +#define VCL_SESS_ATTR_SET(ATTR, VAL) \ +do { \ + (ATTR) |= 1 << (VAL); \ + } while (0) + +#define VCL_SESS_ATTR_CLR(ATTR, VAL) \ +do { \ + (ATTR) &= ~(1 << (VAL)); \ + } while (0) + +#define VCL_SESS_ATTR_TEST(ATTR, VAL) \ + ((ATTR) & (1 << (VAL)) ? 1 : 0) + +typedef enum vcl_session_flags_ +{ + VCL_SESSION_F_CONNECTED = 1 << 0, +} __clib_packed vcl_session_flags_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); +#define _(type, name) type name; + foreach_app_session_field +#undef _ + u32 sndbuf_size; // VPP-TBD: Hack until support setsockopt(SO_SNDBUF) + u32 rcvbuf_size; // VPP-TBD: Hack until support setsockopt(SO_RCVBUF) + u32 user_mss; // VPP-TBD: Hack until support setsockopt(TCP_MAXSEG) + u64 vpp_handle; + u32 vpp_thread_index; + + svm_fifo_t *ct_rx_fifo; + svm_fifo_t *ct_tx_fifo; + + /* Socket configuration state */ + u8 is_vep; + u8 is_vep_session; + vcl_session_flags_t flags; + /* VCL session index of the listening session (if any) */ + u32 listener_index; + /* Accepted sessions on this listener */ + int n_accepted_sessions; + u8 has_rx_evt; + u32 attr; + u64 parent_handle; + vppcom_epoll_t vep; + int libc_epfd; + svm_msg_q_t *our_evt_q; + vcl_session_msg_t *accept_evts_fifo; +#if VCL_ELOG + elog_track_t elog_track; +#endif +} vcl_session_t; + +typedef struct vppcom_cfg_t_ +{ + uword heapsize; + u32 max_workers; + u32 vpp_api_q_length; + uword segment_baseva; + uword segment_size; + uword add_segment_size; + u32 preallocated_fifo_pairs; + u32 rx_fifo_size; + u32 tx_fifo_size; + u32 event_queue_size; + u32 listen_queue_size; + u8 app_proxy_transport_tcp; + u8 app_proxy_transport_udp; + u8 app_scope_local; + u8 app_scope_global; + u8 *namespace_id; + u64 namespace_secret; + u8 use_mq_eventfd; + f64 app_timeout; + f64 session_timeout; + f64 accept_timeout; + u32 event_ring_size; + char *event_log_path; + u8 *vpp_api_filename; + u8 *vpp_api_socket_name; + u8 *vpp_api_chroot; + u32 tls_engine; +} vppcom_cfg_t; + +void vppcom_cfg (vppcom_cfg_t * vcl_cfg); + +typedef struct vcl_cut_through_registration_ +{ + svm_msg_q_t *mq; + svm_msg_q_t *peer_mq; + u32 sid; + u32 epoll_evt_conn_index; /*< mq evt connection index part of + the mqs evtfd epoll (if used) */ +} vcl_cut_through_registration_t; + +typedef struct vcl_mq_evt_conn_ +{ + svm_msg_q_t *mq; + int mq_fd; +} vcl_mq_evt_conn_t; + +typedef struct vcl_worker_ +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + + /* Session pool */ + vcl_session_t *sessions; + + /** Worker/thread index in current process */ + u32 wrk_index; + + /** Worker index in vpp*/ + u32 vpp_wrk_index; + + /** API client handle */ + u32 my_client_index; + + /** State of the connection, shared between msg RX thread and main thread */ + volatile app_state_t wrk_state; + + /** VPP binary api input queue */ + svm_queue_t *vl_input_queue; + + /** VPP mq to be used for exchanging control messages */ + svm_msg_q_t *ctrl_mq; + + /** Message queues epoll fd. Initialized only if using mqs with eventfds */ + int mqs_epfd; + + /** Pool of event message queue event connections */ + vcl_mq_evt_conn_t *mq_evt_conns; + + /** Per worker buffer for receiving mq epoll events */ + struct epoll_event *mq_events; + + /** Hash table for disconnect processing */ + uword *session_index_by_vpp_handles; + + /** Select bitmaps */ + clib_bitmap_t *rd_bitmap; + clib_bitmap_t *wr_bitmap; + clib_bitmap_t *ex_bitmap; + + /** Our event message queue */ + svm_msg_q_t *app_event_queue; + + /** VPP workers event message queues */ + svm_msg_q_t **vpp_event_queues; + + /** For deadman timers */ + clib_time_t clib_time; + + /** Vector acting as buffer for mq messages */ + svm_msg_q_msg_t *mq_msg_vector; + + /** Vector of unhandled events */ + session_event_t *unhandled_evts_vector; + + u32 *pending_session_wrk_updates; + + /** Used also as a thread stop key buffer */ + pthread_t thread_id; + + /** Current pid, may be different from main_pid if forked child */ + pid_t current_pid; + + u32 forked_child; + + socket_client_main_t bapi_sock_ctx; + memory_client_main_t bapi_shm_ctx; + api_main_t bapi_api_ctx; +} vcl_worker_t; + +typedef struct vppcom_main_t_ +{ + u8 is_init; + u32 debug; + pthread_t main_cpu; + + /** Main process pid */ + pid_t main_pid; + + /** App's index in vpp. It's used by vpp to identify the app */ + u32 app_index; + + /** State of the connection, shared between msg RX thread and main thread */ + volatile app_state_t app_state; + + u8 *app_name; + + /** VCL configuration */ + vppcom_cfg_t cfg; + + volatile u32 forking; + + /** Workers */ + vcl_worker_t *workers; + + /** Lock to protect worker registrations */ + clib_spinlock_t workers_lock; + + /** Lock to protect segment hash table */ + clib_rwlock_t segment_table_lock; + + /** Mapped segments table */ + uword *segment_table; + + /** Control mq obtained from attach */ + svm_msg_q_t *ctrl_mq; + + fifo_segment_main_t segment_main; + +#ifdef VCL_ELOG + /* VPP Event-logger */ + elog_main_t elog_main; + elog_track_t elog_track; +#endif + + /* VNET_API_ERROR_FOO -> "Foo" hash table */ + uword *error_string_by_error_number; + +} vppcom_main_t; + +extern vppcom_main_t *vcm; + +#define VCL_INVALID_SESSION_INDEX ((u32)~0) +#define VCL_INVALID_SESSION_HANDLE ((u64)~0) +#define VCL_INVALID_SEGMENT_INDEX ((u32)~0) +#define VCL_INVALID_SEGMENT_HANDLE ((u64)~0) + +static inline vcl_session_t * +vcl_session_alloc (vcl_worker_t * wrk) +{ + vcl_session_t *s; + pool_get (wrk->sessions, s); + memset (s, 0, sizeof (*s)); + s->session_index = s - wrk->sessions; + s->listener_index = VCL_INVALID_SESSION_INDEX; + return s; +} + +static inline void +vcl_session_free (vcl_worker_t * wrk, vcl_session_t * s) +{ + /* Debug level set to 1 to avoid debug messages while ldp is cleaning up */ + VDBG (1, "session %u [0x%llx] removed", s->session_index, s->vpp_handle); + pool_put (wrk->sessions, s); +} + +static inline vcl_session_t * +vcl_session_get (vcl_worker_t * wrk, u32 session_index) +{ + if (pool_is_free_index (wrk->sessions, session_index)) + return 0; + return pool_elt_at_index (wrk->sessions, session_index); +} + +static inline vcl_session_handle_t +vcl_session_handle_from_index (u32 session_index) +{ + ASSERT (session_index < 2 << 24); + return (vcl_get_worker_index () << 24 | session_index); +} + +static inline vcl_session_handle_t +vcl_session_handle (vcl_session_t * s) +{ + return vcl_session_handle_from_index (s->session_index); +} + +static inline void +vcl_session_handle_parse (u32 handle, u32 * wrk_index, u32 * session_index) +{ + *wrk_index = handle >> 24; + *session_index = handle & 0xFFFFFF; +} + +static inline vcl_session_t * +vcl_session_get_w_handle (vcl_worker_t * wrk, u32 session_handle) +{ + u32 session_index, wrk_index; + vcl_session_handle_parse (session_handle, &wrk_index, &session_index); + ASSERT (wrk_index == wrk->wrk_index); + return vcl_session_get (wrk, session_index); +} + +static inline vcl_session_t * +vcl_session_get_w_vpp_handle (vcl_worker_t * wrk, u64 vpp_handle) +{ + uword *p; + if ((p = hash_get (wrk->session_index_by_vpp_handles, vpp_handle))) + return vcl_session_get (wrk, (u32) p[0]); + return 0; +} + +static inline u32 +vcl_session_index_from_vpp_handle (vcl_worker_t * wrk, u64 vpp_handle) +{ + uword *p; + if ((p = hash_get (wrk->session_index_by_vpp_handles, vpp_handle))) + return p[0]; + return VCL_INVALID_SESSION_INDEX; +} + +static inline void +vcl_session_table_add_vpp_handle (vcl_worker_t * wrk, u64 handle, u32 value) +{ + hash_set (wrk->session_index_by_vpp_handles, handle, value); +} + +static inline void +vcl_session_table_del_vpp_handle (vcl_worker_t * wrk, u64 vpp_handle) +{ + hash_unset (wrk->session_index_by_vpp_handles, vpp_handle); +} + +static inline uword * +vcl_session_table_lookup_vpp_handle (vcl_worker_t * wrk, u64 handle) +{ + return hash_get (wrk->session_index_by_vpp_handles, handle); +} + +static inline void +vcl_session_table_add_listener (vcl_worker_t * wrk, u64 listener_handle, + u32 value) +{ + hash_set (wrk->session_index_by_vpp_handles, listener_handle, value); +} + +static inline void +vcl_session_table_del_listener (vcl_worker_t * wrk, u64 listener_handle) +{ + hash_unset (wrk->session_index_by_vpp_handles, listener_handle); +} + +static inline int +vcl_session_is_connectable_listener (vcl_worker_t * wrk, + vcl_session_t * session) +{ + /* Tell if we session_handle is a QUIC session. + * We can be in the following cases : + * Listen session <- QUIC session <- Stream session + * QUIC session <- Stream session + */ + vcl_session_t *ls; + if (session->session_type != VPPCOM_PROTO_QUIC) + return 0; + if (session->listener_index == VCL_INVALID_SESSION_INDEX) + return !(session->session_state & STATE_LISTEN); + ls = vcl_session_get_w_handle (wrk, session->listener_index); + if (!ls) + return VPPCOM_EBADFD; + return ls->session_state & STATE_LISTEN; +} + +static inline vcl_session_t * +vcl_session_table_lookup_listener (vcl_worker_t * wrk, u64 handle) +{ + uword *p; + vcl_session_t *session; + + p = hash_get (wrk->session_index_by_vpp_handles, handle); + if (!p) + { + VDBG (0, "could not find listen session: unknown vpp listener handle" + " %llx", handle); + return 0; + } + session = vcl_session_get (wrk, p[0]); + if (!session) + { + VDBG (1, "invalid listen session index (%u)", p[0]); + return 0; + } + + ASSERT ((session->session_state & (STATE_LISTEN | STATE_LISTEN_NO_MQ)) || + vcl_session_is_connectable_listener (wrk, session)); + return session; +} + +const char *vppcom_session_state_str (vcl_session_state_t state); + +static inline u8 +vcl_session_is_ct (vcl_session_t * s) +{ + return (s->ct_tx_fifo != 0); +} + +static inline u8 +vcl_session_is_cl (vcl_session_t * s) +{ + if (s->session_type == VPPCOM_PROTO_UDP) + return 1; + return 0; +} + +static inline u8 +vcl_session_is_open (vcl_session_t * s) +{ + return ((s->session_state & STATE_OPEN) + || (s->session_state == STATE_LISTEN + && s->session_type == VPPCOM_PROTO_UDP)); +} + +static inline u8 +vcl_session_is_closing (vcl_session_t * s) +{ + return (s->session_state == STATE_VPP_CLOSING + || s->session_state == STATE_DISCONNECT); +} + +static inline int +vcl_session_closing_error (vcl_session_t * s) +{ + /* Return 0 on closing sockets */ + return s->session_state == STATE_DISCONNECT ? VPPCOM_ECONNRESET : 0; +} + +static inline int +vcl_session_closed_error (vcl_session_t * s) +{ + return s->session_state == STATE_DISCONNECT + ? VPPCOM_ECONNRESET : VPPCOM_ENOTCONN; +} + +static inline void +vcl_ip_copy_from_ep (ip46_address_t * ip, vppcom_endpt_t * ep) +{ + if (ep->is_ip4) + clib_memcpy_fast (&ip->ip4, ep->ip, sizeof (ip4_address_t)); + else + clib_memcpy_fast (&ip->ip6, ep->ip, sizeof (ip6_address_t)); +} + +static inline void +vcl_ip_copy_to_ep (ip46_address_t * ip, vppcom_endpt_t * ep, u8 is_ip4) +{ + ep->is_ip4 = is_ip4; + if (is_ip4) + clib_memcpy_fast (ep->ip, &ip->ip4, sizeof (ip4_address_t)); + else + clib_memcpy_fast (ep->ip, &ip->ip6, sizeof (ip6_address_t)); +} + +/* + * Helpers + */ +int vcl_wait_for_app_state_change (app_state_t app_state); +vcl_mq_evt_conn_t *vcl_mq_evt_conn_alloc (vcl_worker_t * wrk); +u32 vcl_mq_evt_conn_index (vcl_worker_t * wrk, vcl_mq_evt_conn_t * mqc); +vcl_mq_evt_conn_t *vcl_mq_evt_conn_get (vcl_worker_t * wrk, u32 mq_conn_idx); +int vcl_mq_epoll_add_evfd (vcl_worker_t * wrk, svm_msg_q_t * mq); +int vcl_mq_epoll_del_evfd (vcl_worker_t * wrk, u32 mqc_index); + +vcl_worker_t *vcl_worker_alloc_and_init (void); +void vcl_worker_cleanup (vcl_worker_t * wrk, u8 notify_vpp); +int vcl_worker_register_with_vpp (void); +int vcl_worker_set_bapi (void); +svm_msg_q_t *vcl_worker_ctrl_mq (vcl_worker_t * wrk); + +void vcl_flush_mq_events (void); +void vcl_cleanup_bapi (void); +int vcl_session_cleanup (vcl_worker_t * wrk, vcl_session_t * session, + vcl_session_handle_t sh, u8 do_disconnect); + +void vcl_segment_table_add (u64 segment_handle, u32 svm_segment_index); +u32 vcl_segment_table_lookup (u64 segment_handle); +void vcl_segment_table_del (u64 segment_handle); + +int vcl_session_read_ready (vcl_session_t * session); +int vcl_session_write_ready (vcl_session_t * session); + +static inline vcl_worker_t * +vcl_worker_get (u32 wrk_index) +{ + return pool_elt_at_index (vcm->workers, wrk_index); +} + +static inline vcl_worker_t * +vcl_worker_get_if_valid (u32 wrk_index) +{ + if (pool_is_free_index (vcm->workers, wrk_index)) + return 0; + return pool_elt_at_index (vcm->workers, wrk_index); +} + +static inline vcl_worker_t * +vcl_worker_get_current (void) +{ + return vcl_worker_get (vcl_get_worker_index ()); +} + +static inline u8 +vcl_n_workers (void) +{ + return pool_elts (vcm->workers); +} + +static inline svm_msg_q_t * +vcl_session_vpp_evt_q (vcl_worker_t * wrk, vcl_session_t * s) +{ + return wrk->vpp_event_queues[s->vpp_thread_index]; +} + +void vcl_send_session_worker_update (vcl_worker_t * wrk, vcl_session_t * s, + u32 wrk_index); +/* + * VCL Binary API + */ +int vppcom_connect_to_vpp (char *app_name); +void vppcom_disconnect_from_vpp (void); +void vppcom_init_error_string_table (void); +void vppcom_send_session_enable_disable (u8 is_enable); +void vppcom_app_send_attach (void); +void vppcom_app_send_detach (void); +void vcl_send_session_unlisten (vcl_worker_t * wrk, vcl_session_t * s); +void vppcom_send_disconnect_session (u64 vpp_handle); +void vppcom_api_hookup (void); +void vppcom_send_application_tls_cert_add (vcl_session_t * session, + char *cert, u32 cert_len); +void vppcom_send_application_tls_key_add (vcl_session_t * session, char *key, + u32 key_len); +void vcl_send_app_worker_add_del (u8 is_add); +void vcl_send_child_worker_del (vcl_worker_t * wrk); + +int vcl_segment_attach (u64 segment_handle, char *name, + ssvm_segment_type_t type, int fd); +void vcl_segment_detach (u64 segment_handle); + +u32 vcl_max_nsid_len (void); + +void vls_init (); +#endif /* SRC_VCL_VCL_PRIVATE_H_ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- 2.17.1