aboutsummaryrefslogtreecommitdiffstats
path: root/src/vcl/ldp.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/vcl/ldp.c')
-rw-r--r--src/vcl/ldp.c454
1 files changed, 328 insertions, 126 deletions
diff --git a/src/vcl/ldp.c b/src/vcl/ldp.c
index 5f1eeb9d540..bd3457fa8fd 100644
--- a/src/vcl/ldp.c
+++ b/src/vcl/ldp.c
@@ -12,6 +12,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+#ifdef HAVE_GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
#include <unistd.h>
#include <stdio.h>
#include <signal.h>
@@ -21,6 +26,7 @@
#include <stdarg.h>
#include <sys/resource.h>
#include <netinet/tcp.h>
+#include <netinet/udp.h>
#include <vcl/ldp_socket_wrapper.h>
#include <vcl/ldp.h>
@@ -51,6 +57,20 @@
#define LDP_MAX_NWORKERS 32
+#ifdef HAVE_GNU_SOURCE
+#define SOCKADDR_GET_SA(__addr) __addr.__sockaddr__;
+#else
+#define SOCKADDR_GET_SA(__addr) _addr;
+#endif
+
+#ifndef UDP_SEGMENT
+#define UDP_SEGMENT 103
+#endif
+
+#ifndef SO_ORIGINAL_DST
+/* from <linux/netfilter_ipv4.h> */
+#define SO_ORIGINAL_DST 80
+#endif
typedef struct ldp_worker_ctx_
{
u8 *io_buffer;
@@ -143,15 +163,14 @@ ldp_worker_get_current (void)
static inline void
ldp_set_app_name (char *app_name)
{
- snprintf (ldp->app_name, LDP_APP_NAME_MAX,
- "ldp-%d-%s", getpid (), app_name);
+ snprintf (ldp->app_name, LDP_APP_NAME_MAX, "%s-ldp-%d", app_name, getpid ());
}
static inline char *
ldp_get_app_name ()
{
if (ldp->app_name[0] == '\0')
- ldp_set_app_name ("app");
+ ldp_set_app_name (program_invocation_short_name);
return ldp->app_name;
}
@@ -176,34 +195,12 @@ ldp_alloc_workers (void)
{
if (ldp->workers)
return;
- pool_alloc (ldp->workers, LDP_MAX_NWORKERS);
+ ldp->workers = vec_new (ldp_worker_ctx_t, LDP_MAX_NWORKERS);
}
-static int
-ldp_init (void)
+static void
+ldp_init_cfg (void)
{
- ldp_worker_ctx_t *ldpw;
- int rv;
-
- ASSERT (!ldp->init);
-
- ldp->init = 1;
- ldp->vcl_needs_real_epoll = 1;
- rv = vls_app_create (ldp_get_app_name ());
- if (rv != VPPCOM_OK)
- {
- ldp->vcl_needs_real_epoll = 0;
- if (rv == VPPCOM_EEXIST)
- return 0;
- LDBG (2, "\nERROR: ldp_init: vppcom_app_create()"
- " failed! rv = %d (%s)\n", rv, vppcom_retval_str (rv));
- ldp->init = 0;
- return rv;
- }
- ldp->vcl_needs_real_epoll = 0;
- ldp_alloc_workers ();
- ldpw = ldp_worker_get_current ();
-
char *env_var_str = getenv (LDP_ENV_DEBUG);
if (env_var_str)
{
@@ -271,10 +268,11 @@ ldp_init (void)
/* Make sure there are enough bits in the fd set for vcl sessions */
if (ldp->vlsh_bit_val > FD_SETSIZE / 2)
{
- LDBG (0, "ERROR: LDP vlsh bit value %d > FD_SETSIZE/2 %d!",
+ /* Only valid for select/pselect, so just WARNING and not exit */
+ LDBG (0,
+ "WARNING: LDP vlsh bit value %d > FD_SETSIZE/2 %d, "
+ "select/pselect not supported now!",
ldp->vlsh_bit_val, FD_SETSIZE / 2);
- ldp->init = 0;
- return -1;
}
}
env_var_str = getenv (LDP_ENV_TLS_TRANS);
@@ -282,12 +280,41 @@ ldp_init (void)
{
ldp->transparent_tls = 1;
}
+}
+
+static int
+ldp_init (void)
+{
+ ldp_worker_ctx_t *ldpw;
+ int rv;
+
+ if (ldp->init)
+ {
+ LDBG (0, "LDP is initialized already");
+ return 0;
+ }
- /* *INDENT-OFF* */
- pool_foreach (ldpw, ldp->workers) {
+ ldp_init_cfg ();
+ ldp->init = 1;
+ ldp->vcl_needs_real_epoll = 1;
+ rv = vls_app_create (ldp_get_app_name ());
+ if (rv != VPPCOM_OK)
+ {
+ ldp->vcl_needs_real_epoll = 0;
+ if (rv == VPPCOM_EEXIST)
+ return 0;
+ LDBG (2,
+ "\nERROR: ldp_init: vppcom_app_create()"
+ " failed! rv = %d (%s)\n",
+ rv, vppcom_retval_str (rv));
+ ldp->init = 0;
+ return rv;
+ }
+ ldp->vcl_needs_real_epoll = 0;
+ ldp_alloc_workers ();
+
+ vec_foreach (ldpw, ldp->workers)
clib_memset (&ldpw->clib_time, 0, sizeof (ldpw->clib_time));
- }
- /* *INDENT-ON* */
LDBG (0, "LDP initialization: done!");
@@ -315,16 +342,16 @@ close (int fd)
epfd = vls_attr (vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0);
if (epfd > 0)
{
+ ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
+ u32 size = sizeof (epfd);
+
LDBG (0, "fd %d: calling libc_close: epfd %u", fd, epfd);
- rv = libc_close (epfd);
- if (rv < 0)
- {
- u32 size = sizeof (epfd);
- epfd = 0;
+ libc_close (epfd);
+ ldpw->mq_epfd_added = 0;
- (void) vls_attr (vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &epfd, &size);
- }
+ epfd = 0;
+ (void) vls_attr (vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &epfd, &size);
}
else if (PREDICT_FALSE (epfd < 0))
{
@@ -586,10 +613,16 @@ ioctl (int fd, unsigned long int cmd, ...)
case FIONREAD:
rv = vls_attr (vlsh, VPPCOM_ATTR_GET_NREAD, 0, 0);
break;
-
+ case TIOCOUTQ:
+ {
+ u32 *buf = va_arg (ap, void *);
+ u32 *buflen = va_arg (ap, u32 *);
+ rv = vls_attr (vlsh, VPPCOM_ATTR_GET_NWRITEQ, buf, buflen);
+ }
+ break;
case FIONBIO:
{
- u32 flags = va_arg (ap, int) ? O_NONBLOCK : 0;
+ u32 flags = *(va_arg (ap, int *)) ? O_NONBLOCK : 0;
u32 size = sizeof (flags);
/* TBD: When VPPCOM_ATTR_[GS]ET_FLAGS supports flags other than
@@ -635,7 +668,6 @@ ldp_select_init_maps (fd_set * __restrict original,
clib_memcpy_fast (*resultb, original, n_bytes);
memset (original, 0, n_bytes);
- /* *INDENT-OFF* */
clib_bitmap_foreach (fd, *resultb) {
if (fd > nfds)
break;
@@ -645,7 +677,6 @@ ldp_select_init_maps (fd_set * __restrict original,
else
*vclb = clib_bitmap_set (*vclb, vlsh_to_session_index (vlsh), 1);
}
- /* *INDENT-ON* */
si_bits_set = clib_bitmap_last_set (*vclb) + 1;
*si_bits = (si_bits_set > *si_bits) ? si_bits_set : *si_bits;
@@ -665,7 +696,6 @@ ldp_select_vcl_map_to_libc (clib_bitmap_t * vclb, fd_set * __restrict libcb)
if (!libcb)
return 0;
- /* *INDENT-OFF* */
clib_bitmap_foreach (si, vclb) {
vlsh = vls_session_index_to_vlsh (si);
ASSERT (vlsh != VLS_INVALID_HANDLE);
@@ -677,7 +707,6 @@ ldp_select_vcl_map_to_libc (clib_bitmap_t * vclb, fd_set * __restrict libcb)
}
FD_SET (fd, libcb);
}
- /* *INDENT-ON* */
return 0;
}
@@ -690,10 +719,8 @@ ldp_select_libc_map_merge (clib_bitmap_t * result, fd_set * __restrict libcb)
if (!libcb)
return;
- /* *INDENT-OFF* */
clib_bitmap_foreach (fd, result)
FD_SET ((int)fd, libcb);
- /* *INDENT-ON* */
}
int
@@ -1050,8 +1077,9 @@ socketpair (int domain, int type, int protocol, int fds[2])
}
int
-bind (int fd, __CONST_SOCKADDR_ARG addr, socklen_t len)
+bind (int fd, __CONST_SOCKADDR_ARG _addr, socklen_t len)
{
+ const struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
vls_handle_t vlsh;
int rv;
@@ -1122,11 +1150,10 @@ done:
}
static inline int
-ldp_copy_ep_to_sockaddr (__SOCKADDR_ARG addr, socklen_t * __restrict len,
- vppcom_endpt_t * ep)
+ldp_copy_ep_to_sockaddr (struct sockaddr *addr, socklen_t *__restrict len,
+ vppcom_endpt_t *ep)
{
- int rv = 0;
- int sa_len, copy_len;
+ int rv = 0, sa_len, copy_len;
ldp_init_check ();
@@ -1167,8 +1194,9 @@ ldp_copy_ep_to_sockaddr (__SOCKADDR_ARG addr, socklen_t * __restrict len,
}
int
-getsockname (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict len)
+getsockname (int fd, __SOCKADDR_ARG _addr, socklen_t *__restrict len)
{
+ struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
vls_handle_t vlsh;
int rv;
@@ -1201,15 +1229,16 @@ getsockname (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict len)
}
else
{
- rv = libc_getsockname (fd, addr, len);
+ rv = libc_getsockname (fd, _addr, len);
}
return rv;
}
int
-connect (int fd, __CONST_SOCKADDR_ARG addr, socklen_t len)
+connect (int fd, __CONST_SOCKADDR_ARG _addr, socklen_t len)
{
+ const struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
vls_handle_t vlsh;
int rv;
@@ -1289,8 +1318,9 @@ done:
}
int
-getpeername (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict len)
+getpeername (int fd, __SOCKADDR_ARG _addr, socklen_t *__restrict len)
{
+ struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
vls_handle_t vlsh;
int rv;
@@ -1538,13 +1568,17 @@ __recv_chk (int fd, void *buf, size_t n, size_t buflen, int flags)
return recv (fd, buf, n, flags);
}
-static int
-ldp_vls_sendo (vls_handle_t vlsh, const void *buf, size_t n, int flags,
- __CONST_SOCKADDR_ARG addr, socklen_t addr_len)
+static inline int
+ldp_vls_sendo (vls_handle_t vlsh, const void *buf, size_t n,
+ vppcom_endpt_tlv_t *app_tlvs, int flags,
+ __CONST_SOCKADDR_ARG _addr, socklen_t addr_len)
{
+ const struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
vppcom_endpt_t *ep = 0;
vppcom_endpt_t _ep;
+ _ep.app_tlvs = app_tlvs;
+
if (addr)
{
ep = &_ep;
@@ -1574,11 +1608,11 @@ ldp_vls_sendo (vls_handle_t vlsh, const void *buf, size_t n, int flags,
}
static int
-ldp_vls_recvfrom (vls_handle_t vlsh, void *__restrict buf, size_t n,
- int flags, __SOCKADDR_ARG addr,
- socklen_t * __restrict addr_len)
+ldp_vls_recvfrom (vls_handle_t vlsh, void *__restrict buf, size_t n, int flags,
+ __SOCKADDR_ARG _addr, socklen_t *__restrict addr_len)
{
u8 src_addr[sizeof (struct sockaddr_in6)];
+ struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
vppcom_endpt_t ep;
ssize_t size;
int rv;
@@ -1603,8 +1637,9 @@ ldp_vls_recvfrom (vls_handle_t vlsh, void *__restrict buf, size_t n,
ssize_t
sendto (int fd, const void *buf, size_t n, int flags,
- __CONST_SOCKADDR_ARG addr, socklen_t addr_len)
+ __CONST_SOCKADDR_ARG _addr, socklen_t addr_len)
{
+ const struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
vls_handle_t vlsh;
ssize_t size;
@@ -1613,7 +1648,7 @@ sendto (int fd, const void *buf, size_t n, int flags,
vlsh = ldp_fd_to_vlsh (fd);
if (vlsh != VLS_INVALID_HANDLE)
{
- size = ldp_vls_sendo (vlsh, buf, n, flags, addr, addr_len);
+ size = ldp_vls_sendo (vlsh, buf, n, NULL, flags, addr, addr_len);
if (size < 0)
{
errno = -size;
@@ -1655,6 +1690,98 @@ recvfrom (int fd, void *__restrict buf, size_t n, int flags,
return size;
}
+static int
+ldp_parse_cmsg (vls_handle_t vlsh, const struct msghdr *msg,
+ vppcom_endpt_tlv_t **app_tlvs)
+{
+ uint8_t *ad, *at = (uint8_t *) *app_tlvs;
+ vppcom_endpt_tlv_t *adh;
+ struct in_pktinfo *pi;
+ struct cmsghdr *cmsg;
+
+ cmsg = CMSG_FIRSTHDR (msg);
+
+ while (cmsg != NULL)
+ {
+ switch (cmsg->cmsg_level)
+ {
+ case SOL_UDP:
+ switch (cmsg->cmsg_type)
+ {
+ case UDP_SEGMENT:
+ vec_add2 (at, adh, sizeof (*adh));
+ adh->data_type = VCL_UDP_SEGMENT;
+ adh->data_len = sizeof (uint16_t);
+ vec_add2 (at, ad, sizeof (uint16_t));
+ *(uint16_t *) ad = *(uint16_t *) CMSG_DATA (cmsg);
+ break;
+ default:
+ LDBG (1, "SOL_UDP cmsg_type %u not supported", cmsg->cmsg_type);
+ break;
+ }
+ break;
+ case SOL_IP:
+ switch (cmsg->cmsg_type)
+ {
+ case IP_PKTINFO:
+ vec_add2 (at, adh, sizeof (*adh));
+ adh->data_type = VCL_IP_PKTINFO;
+ adh->data_len = sizeof (struct in_addr);
+ vec_add2 (at, ad, sizeof (struct in_addr));
+ pi = (void *) CMSG_DATA (cmsg);
+ clib_memcpy_fast (ad, &pi->ipi_spec_dst,
+ sizeof (struct in_addr));
+ break;
+ default:
+ LDBG (1, "SOL_IP cmsg_type %u not supported", cmsg->cmsg_type);
+ break;
+ }
+ break;
+ default:
+ LDBG (1, "cmsg_level %u not supported", cmsg->cmsg_level);
+ break;
+ }
+ cmsg = CMSG_NXTHDR ((struct msghdr *) msg, cmsg);
+ }
+ *app_tlvs = (vppcom_endpt_tlv_t *) at;
+ return 0;
+}
+
+static int
+ldp_make_cmsg (vls_handle_t vlsh, struct msghdr *msg)
+{
+ u32 optval, optlen = sizeof (optval);
+ struct cmsghdr *cmsg;
+
+ cmsg = CMSG_FIRSTHDR (msg);
+ memset (cmsg, 0, sizeof (*cmsg));
+
+ if (!vls_attr (vlsh, VPPCOM_ATTR_GET_IP_PKTINFO, (void *) &optval, &optlen))
+ return 0;
+
+ if (optval)
+ {
+ vppcom_endpt_t ep;
+ u8 addr_buf[sizeof (struct in_addr)];
+ u32 size = sizeof (ep);
+
+ ep.ip = addr_buf;
+
+ if (!vls_attr (vlsh, VPPCOM_ATTR_GET_LCL_ADDR, &ep, &size))
+ {
+ struct in_pktinfo pi = {};
+
+ clib_memcpy (&pi.ipi_addr, ep.ip, sizeof (struct in_addr));
+ cmsg->cmsg_level = SOL_IP;
+ cmsg->cmsg_type = IP_PKTINFO;
+ cmsg->cmsg_len = CMSG_LEN (sizeof (pi));
+ clib_memcpy (CMSG_DATA (cmsg), &pi, sizeof (pi));
+ }
+ }
+
+ return 0;
+}
+
ssize_t
sendmsg (int fd, const struct msghdr * msg, int flags)
{
@@ -1666,14 +1793,17 @@ sendmsg (int fd, const struct msghdr * msg, int flags)
vlsh = ldp_fd_to_vlsh (fd);
if (vlsh != VLS_INVALID_HANDLE)
{
+ vppcom_endpt_tlv_t *app_tlvs = 0;
struct iovec *iov = msg->msg_iov;
ssize_t total = 0;
- int i, rv;
+ int i, rv = 0;
+
+ ldp_parse_cmsg (vlsh, msg, &app_tlvs);
for (i = 0; i < msg->msg_iovlen; ++i)
{
- rv = ldp_vls_sendo (vlsh, iov[i].iov_base, iov[i].iov_len, flags,
- msg->msg_name, msg->msg_namelen);
+ rv = ldp_vls_sendo (vlsh, iov[i].iov_base, iov[i].iov_len, app_tlvs,
+ flags, msg->msg_name, msg->msg_namelen);
if (rv < 0)
break;
else
@@ -1684,6 +1814,8 @@ sendmsg (int fd, const struct msghdr * msg, int flags)
}
}
+ vec_free (app_tlvs);
+
if (rv < 0 && total == 0)
{
errno = -rv;
@@ -1700,7 +1832,7 @@ sendmsg (int fd, const struct msghdr * msg, int flags)
return size;
}
-#ifdef USE_GNU
+#ifdef _GNU_SOURCE
int
sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags)
{
@@ -1733,7 +1865,6 @@ sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags)
if (size < 0)
{
int errno_val = errno;
- perror (func_str);
clib_warning ("LDP<%d>: ERROR: fd %d (0x%x): %s() failed! "
"rv %d, errno = %d", getpid (), fd, fd,
func_str, size, errno_val);
@@ -1760,7 +1891,7 @@ recvmsg (int fd, struct msghdr * msg, int flags)
{
struct iovec *iov = msg->msg_iov;
ssize_t max_deq, total = 0;
- int i, rv;
+ int i, rv = 0;
max_deq = vls_attr (vlsh, VPPCOM_ATTR_GET_NREAD, 0, 0);
if (!max_deq)
@@ -1789,7 +1920,11 @@ recvmsg (int fd, struct msghdr * msg, int flags)
size = -1;
}
else
- size = total;
+ {
+ if (msg->msg_controllen)
+ ldp_make_cmsg (vlsh, msg);
+ size = total;
+ }
}
else
{
@@ -1799,52 +1934,60 @@ recvmsg (int fd, struct msghdr * msg, int flags)
return size;
}
-#ifdef USE_GNU
+#ifdef _GNU_SOURCE
int
recvmmsg (int fd, struct mmsghdr *vmessages,
unsigned int vlen, int flags, struct timespec *tmo)
{
- ssize_t size;
- const char *func_str;
- u32 sh = ldp_fd_to_vlsh (fd);
+ ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
+ u32 sh;
ldp_init_check ();
+ sh = ldp_fd_to_vlsh (fd);
+
if (sh != VLS_INVALID_HANDLE)
{
- clib_warning ("LDP<%d>: LDP-TBD", getpid ());
- errno = ENOSYS;
- size = -1;
- }
- else
- {
- func_str = "libc_recvmmsg";
+ struct mmsghdr *mh;
+ ssize_t rv = 0;
+ u32 nvecs = 0;
+ f64 time_out;
- if (LDP_DEBUG > 2)
- clib_warning ("LDP<%d>: fd %d (0x%x): calling %s(): "
- "vmessages %p, vlen %u, flags 0x%x, tmo %p",
- getpid (), fd, fd, func_str, vmessages, vlen,
- flags, tmo);
-
- size = libc_recvmmsg (fd, vmessages, vlen, flags, tmo);
- }
-
- if (LDP_DEBUG > 2)
- {
- if (size < 0)
+ if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0))
+ clib_time_init (&ldpw->clib_time);
+ if (tmo)
{
- int errno_val = errno;
- perror (func_str);
- clib_warning ("LDP<%d>: ERROR: fd %d (0x%x): %s() failed! "
- "rv %d, errno = %d", getpid (), fd, fd,
- func_str, size, errno_val);
- errno = errno_val;
+ time_out = (f64) tmo->tv_sec + (f64) tmo->tv_nsec / (f64) 1e9;
+ time_out += clib_time_now (&ldpw->clib_time);
}
else
- clib_warning ("LDP<%d>: fd %d (0x%x): returning %d (0x%x)",
- getpid (), fd, fd, size, size);
+ {
+ time_out = (f64) ~0;
+ }
+
+ while (nvecs < vlen)
+ {
+ mh = &vmessages[nvecs];
+ rv = recvmsg (fd, &mh->msg_hdr, flags);
+ if (rv > 0)
+ {
+ mh->msg_len = rv;
+ nvecs += 1;
+ continue;
+ }
+
+ if (!time_out || clib_time_now (&ldpw->clib_time) >= time_out)
+ break;
+
+ usleep (1);
+ }
+
+ return nvecs > 0 ? nvecs : rv;
+ }
+ else
+ {
+ return libc_recvmmsg (fd, vmessages, vlen, flags, tmo);
}
- return size;
}
#endif
@@ -1905,6 +2048,21 @@ getsockopt (int fd, int level, int optname,
break;
}
break;
+ case SOL_IP:
+ switch (optname)
+ {
+ case SO_ORIGINAL_DST:
+ rv =
+ vls_attr (vlsh, VPPCOM_ATTR_GET_ORIGINAL_DST, optval, optlen);
+ break;
+ default:
+ LDBG (0,
+ "ERROR: fd %d: getsockopt SOL_IP: vlsh %u "
+ "optname %d unsupported!",
+ fd, vlsh, optname);
+ break;
+ }
+ break;
case SOL_IPV6:
switch (optname)
{
@@ -2067,6 +2225,21 @@ setsockopt (int fd, int level, int optname,
break;
}
break;
+ case SOL_IP:
+ switch (optname)
+ {
+ case IP_PKTINFO:
+ rv = vls_attr (vlsh, VPPCOM_ATTR_SET_IP_PKTINFO, (void *) optval,
+ &optlen);
+ break;
+ default:
+ LDBG (0,
+ "ERROR: fd %d: setsockopt SOL_IP: vlsh %u optname %d"
+ "unsupported!",
+ fd, vlsh, optname);
+ break;
+ }
+ break;
default:
break;
}
@@ -2116,9 +2289,10 @@ listen (int fd, int n)
}
static inline int
-ldp_accept4 (int listen_fd, __SOCKADDR_ARG addr,
- socklen_t * __restrict addr_len, int flags)
+ldp_accept4 (int listen_fd, __SOCKADDR_ARG _addr,
+ socklen_t *__restrict addr_len, int flags)
{
+ struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
vls_handle_t listen_vlsh, accept_vlsh;
int rv;
@@ -2265,8 +2439,10 @@ epoll_ctl (int epfd, int op, int fd, struct epoll_event *event)
* was acquired outside of the LD_PRELOAD process context.
* In any case, if we get one, punt it to libc_epoll_ctl.
*/
- LDBG (1, "epfd %d: calling libc_epoll_ctl: op %d, fd %d"
- " event %p", epfd, op, fd, event);
+ LDBG (1,
+ "epfd %d: calling libc_epoll_ctl: op %d, fd %d"
+ " events 0x%x",
+ epfd, op, fd, event ? event->events : 0);
rv = libc_epoll_ctl (epfd, op, fd, event);
goto done;
@@ -2279,8 +2455,10 @@ epoll_ctl (int epfd, int op, int fd, struct epoll_event *event)
if (vlsh != VLS_INVALID_HANDLE)
{
- LDBG (1, "epfd %d: calling vls_epoll_ctl: ep_vlsh %d op %d, vlsh %u,"
- " event %p", epfd, vep_vlsh, op, vlsh, event);
+ LDBG (1,
+ "epfd %d: calling vls_epoll_ctl: ep_vlsh %d op %d, vlsh %u,"
+ " events 0x%x",
+ epfd, vep_vlsh, op, vlsh, event ? event->events : 0);
rv = vls_epoll_ctl (vep_vlsh, op, vlsh, event);
if (rv != VPPCOM_OK)
@@ -2337,7 +2515,7 @@ static inline int
ldp_epoll_pwait (int epfd, struct epoll_event *events, int maxevents,
int timeout, const sigset_t * sigmask)
{
- ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
+ ldp_worker_ctx_t *ldpw;
double time_to_wait = (double) 0, max_time;
int libc_epfd, rv = 0;
vls_handle_t ep_vlsh;
@@ -2350,6 +2528,10 @@ ldp_epoll_pwait (int epfd, struct epoll_event *events, int maxevents,
return -1;
}
+ if (PREDICT_FALSE (vppcom_worker_index () == ~0))
+ vls_register_vcl_worker ();
+
+ ldpw = ldp_worker_get_current ();
if (epfd == ldpw->vcl_mq_epfd)
return libc_epoll_pwait (epfd, events, maxevents, timeout, sigmask);
@@ -2414,8 +2596,9 @@ static inline int
ldp_epoll_pwait_eventfd (int epfd, struct epoll_event *events,
int maxevents, int timeout, const sigset_t * sigmask)
{
+ int libc_epfd, rv = 0, num_ev, libc_num_ev, vcl_wups = 0;
+ struct epoll_event *libc_evts;
ldp_worker_ctx_t *ldpw;
- int libc_epfd, rv = 0, num_ev;
vls_handle_t ep_vlsh;
ldp_init_check ();
@@ -2491,7 +2674,12 @@ ldp_epoll_pwait_eventfd (int epfd, struct epoll_event *events,
/* Request to only drain unhandled to prevent libc_epoll_wait starved */
rv = vls_epoll_wait (ep_vlsh, events, maxevents, -2);
if (rv > 0)
- goto done;
+ {
+ timeout = 0;
+ if (rv >= maxevents)
+ goto done;
+ maxevents -= rv;
+ }
else if (PREDICT_FALSE (rv < 0))
{
errno = -rv;
@@ -2499,27 +2687,41 @@ ldp_epoll_pwait_eventfd (int epfd, struct epoll_event *events,
goto done;
}
- rv = libc_epoll_pwait (libc_epfd, events, maxevents, timeout, sigmask);
- if (rv <= 0)
- goto done;
- for (int i = 0; i < rv; i++)
+epoll_again:
+
+ libc_evts = &events[rv];
+ libc_num_ev =
+ libc_epoll_pwait (libc_epfd, libc_evts, maxevents, timeout, sigmask);
+ if (libc_num_ev <= 0)
{
- if (events[i].data.fd == ldpw->vcl_mq_epfd)
+ rv = rv >= 0 ? rv : -1;
+ goto done;
+ }
+
+ for (int i = 0; i < libc_num_ev; i++)
+ {
+ if (libc_evts[i].data.fd == ldpw->vcl_mq_epfd)
{
/* We should remove mq epoll fd from events. */
- rv--;
- if (i != rv)
+ libc_num_ev--;
+ if (i != libc_num_ev)
{
- events[i].events = events[rv].events;
- events[i].data.u64 = events[rv].data.u64;
+ libc_evts[i].events = libc_evts[libc_num_ev].events;
+ libc_evts[i].data.u64 = libc_evts[libc_num_ev].data.u64;
}
- num_ev = vls_epoll_wait (ep_vlsh, &events[rv], maxevents - rv, 0);
+ num_ev = vls_epoll_wait (ep_vlsh, &libc_evts[libc_num_ev],
+ maxevents - libc_num_ev, 0);
if (PREDICT_TRUE (num_ev > 0))
rv += num_ev;
+ /* Woken up by vcl but no events generated. Accept it once */
+ if (rv == 0 && libc_num_ev == 0 && timeout && vcl_wups++ < 1)
+ goto epoll_again;
break;
}
}
+ rv += libc_num_ev;
+
done:
return rv;
}
@@ -2648,7 +2850,7 @@ done:
return rv;
}
-#ifdef USE_GNU
+#ifdef _GNU_SOURCE
int
ppoll (struct pollfd *fds, nfds_t nfds,
const struct timespec *timeout, const sigset_t * sigmask)