summaryrefslogtreecommitdiffstats
path: root/src/vppinfra/socket.c
diff options
context:
space:
mode:
authorDamjan Marion <dmarion@me.com>2023-01-30 11:48:38 +0100
committerOle Tr�an <otroan@employees.org>2023-02-06 10:17:40 +0000
commit085757bb4930511928daa97f972cdca021e7a813 (patch)
tree5edf076e28156d9834fa37029028f2f5c5f568b9 /src/vppinfra/socket.c
parent0df06b6e95b6a3261c8e9c261c7a4a661d6ea25a (diff)
vppinfra: refactor clib_socket_init, add linux netns support
Type: improvement Change-Id: Ida2d044bccf0bc8914b4fe7d383f827400fa6a52 Signed-off-by: Damjan Marion <dmarion@me.com>
Diffstat (limited to 'src/vppinfra/socket.c')
-rw-r--r--src/vppinfra/socket.c567
1 files changed, 370 insertions, 197 deletions
diff --git a/src/vppinfra/socket.c b/src/vppinfra/socket.c
index a3024ae7a8c..e61c369f050 100644
--- a/src/vppinfra/socket.c
+++ b/src/vppinfra/socket.c
@@ -93,108 +93,6 @@ find_free_port (word sock)
return port < 1 << 16 ? port : -1;
}
-/* Convert a config string to a struct sockaddr and length for use
- with bind or connect. */
-static clib_error_t *
-socket_config (char *config,
- void *addr, socklen_t * addr_len, u32 ip4_default_address)
-{
- clib_error_t *error = 0;
-
- if (!config)
- config = "";
-
- /* Anything that begins with a / is a local PF_LOCAL socket. */
- if (config[0] == '/')
- {
- struct sockaddr_un *su = addr;
- su->sun_family = PF_LOCAL;
- clib_memcpy (&su->sun_path, config,
- clib_min (sizeof (su->sun_path), 1 + strlen (config)));
- *addr_len = sizeof (su[0]);
- }
-
- /* Treat everything that starts with @ as an abstract socket. */
- else if (config[0] == '@')
- {
- struct sockaddr_un *su = addr;
- su->sun_family = PF_LOCAL;
- clib_memcpy (&su->sun_path, config,
- clib_min (sizeof (su->sun_path), 1 + strlen (config)));
-
- *addr_len = sizeof (su->sun_family) + strlen (config);
- su->sun_path[0] = '\0';
- }
-
- /* Hostname or hostname:port or port. */
- else
- {
- char *host_name;
- int port = -1;
- struct sockaddr_in *sa = addr;
-
- host_name = 0;
- port = -1;
- if (config[0] != 0)
- {
- unformat_input_t i;
-
- unformat_init_string (&i, config, strlen (config));
- if (unformat (&i, "%s:%d", &host_name, &port)
- || unformat (&i, "%s:0x%x", &host_name, &port))
- ;
- else if (unformat (&i, "%s", &host_name))
- ;
- else
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, &i);
- unformat_free (&i);
-
- if (error)
- goto done;
- }
-
- sa->sin_family = PF_INET;
- *addr_len = sizeof (sa[0]);
- if (port != -1)
- sa->sin_port = htons (port);
- else
- sa->sin_port = 0;
-
- if (host_name)
- {
- struct in_addr host_addr;
-
- /* Recognize localhost to avoid host lookup in most common cast. */
- if (!strcmp (host_name, "localhost"))
- sa->sin_addr.s_addr = htonl (INADDR_LOOPBACK);
-
- else if (inet_aton (host_name, &host_addr))
- sa->sin_addr = host_addr;
-
- else if (host_name && strlen (host_name) > 0)
- {
- struct hostent *host = gethostbyname (host_name);
- if (!host)
- error = clib_error_return (0, "unknown host `%s'", config);
- else
- clib_memcpy (&sa->sin_addr.s_addr, host->h_addr_list[0],
- host->h_length);
- }
-
- else
- sa->sin_addr.s_addr = htonl (ip4_default_address);
-
- vec_free (host_name);
- if (error)
- goto done;
- }
- }
-
-done:
- return error;
-}
-
static clib_error_t *
default_socket_write (clib_socket_t * s)
{
@@ -253,7 +151,7 @@ default_socket_read (clib_socket_t * sock, int n_bytes)
u8 *buf;
/* RX side of socket is down once end of file is reached. */
- if (sock->flags & CLIB_SOCKET_F_RX_END_OF_FILE)
+ if (sock->rx_end_of_file)
return 0;
fd = sock->fd;
@@ -275,7 +173,7 @@ default_socket_read (clib_socket_t * sock, int n_bytes)
/* Other side closed the socket. */
if (n_read == 0)
- sock->flags |= CLIB_SOCKET_F_RX_END_OF_FILE;
+ sock->rx_end_of_file = 1;
non_fatal:
vec_inc_len (sock->rx_buffer, n_read - n_bytes);
@@ -328,7 +226,7 @@ static clib_error_t *
default_socket_recvmsg (clib_socket_t * s, void *msg, int msglen,
int fds[], int num_fds)
{
-#ifdef __linux__
+#ifdef CLIB_LINUX
char ctl[CMSG_SPACE (sizeof (int) * num_fds) +
CMSG_SPACE (sizeof (struct ucred))];
struct ucred *cr = 0;
@@ -363,7 +261,7 @@ default_socket_recvmsg (clib_socket_t * s, void *msg, int msglen,
{
if (cmsg->cmsg_level == SOL_SOCKET)
{
-#ifdef __linux__
+#ifdef CLIB_LINUX
if (cmsg->cmsg_type == SCM_CREDENTIALS)
{
cr = (struct ucred *) CMSG_DATA (cmsg);
@@ -399,158 +297,433 @@ socket_init_funcs (clib_socket_t * s)
s->recvmsg_func = default_socket_recvmsg;
}
+static const struct
+{
+ char *prefix;
+ sa_family_t family;
+ clib_socket_type_t type;
+ u16 skip_prefix : 1;
+ u16 is_local : 1;
+} clib_socket_type_data[] = {
+ { .prefix = "unix:",
+ .family = AF_UNIX,
+ .type = CLIB_SOCKET_TYPE_UNIX,
+ .skip_prefix = 1,
+ .is_local = 1 },
+ { .prefix = "tcp:",
+ .family = AF_INET,
+ .type = CLIB_SOCKET_TYPE_INET,
+ .skip_prefix = 1 },
+ { .prefix = "abstract:",
+ .family = AF_UNIX,
+ .type = CLIB_SOCKET_TYPE_LINUX_ABSTRACT,
+ .skip_prefix = 1,
+ .is_local = 1 },
+ { .prefix = "/",
+ .family = AF_UNIX,
+ .type = CLIB_SOCKET_TYPE_UNIX,
+ .skip_prefix = 0,
+ .is_local = 1 },
+ { .prefix = "",
+ .family = AF_INET,
+ .type = CLIB_SOCKET_TYPE_INET,
+ .skip_prefix = 0,
+ .is_local = 0 },
+ { .prefix = "",
+ .family = AF_UNIX,
+ .type = CLIB_SOCKET_TYPE_UNIX,
+ .skip_prefix = 0,
+ .is_local = 1 },
+};
+
+static u8 *
+_clib_socket_get_string (char **p, int is_hostname)
+{
+ u8 *s = 0;
+ while (**p)
+ {
+ switch (**p)
+ {
+ case '_':
+ if (is_hostname)
+ return s;
+ case 'a' ... 'z':
+ case 'A' ... 'Z':
+ case '0' ... '9':
+ case '/':
+ case '-':
+ case '.':
+ vec_add1 (s, **p);
+ (*p)++;
+ break;
+ break;
+ default:
+ return s;
+ }
+ }
+ return s;
+}
+
+__clib_export int
+clib_socket_prefix_is_valid (char *s)
+{
+ for (typeof (clib_socket_type_data[0]) *d = clib_socket_type_data;
+ d - clib_socket_type_data < ARRAY_LEN (clib_socket_type_data); d++)
+ if (d->skip_prefix && strncmp (s, d->prefix, strlen (d->prefix)) == 0)
+ return 1;
+ return 0;
+}
+
__clib_export clib_error_t *
-clib_socket_init (clib_socket_t * s)
+clib_socket_init (clib_socket_t *s)
{
- union
- {
- struct sockaddr sa;
- struct sockaddr_un su;
- } addr;
+ struct sockaddr_un su = { .sun_family = AF_UNIX };
+ struct sockaddr_in si = { .sin_family = AF_INET };
+ struct sockaddr *sa = 0;
+ typeof (clib_socket_type_data[0]) *data = 0;
socklen_t addr_len = 0;
- int socket_type, rv;
- clib_error_t *error = 0;
- word port;
+ int rv;
+ char *p;
+ clib_error_t *err = 0;
+ u8 *name = 0;
+ u16 port = 0;
+#if CLIB_LINUX
+ int netns_fd = -1;
+#endif
- error = socket_config (s->config, &addr.sa, &addr_len,
- (s->flags & CLIB_SOCKET_F_IS_SERVER
- ? INADDR_LOOPBACK : INADDR_ANY));
- if (error)
- goto done;
+ s->fd = -1;
- socket_init_funcs (s);
+ if (!s->config)
+ s->config = "";
+
+ for (int i = 0; i < ARRAY_LEN (clib_socket_type_data); i++)
+ {
+ typeof (clib_socket_type_data[0]) *d = clib_socket_type_data + i;
+
+ if (d->is_local == 0 && s->local_only)
+ continue;
+
+ if (strncmp (s->config, d->prefix, strlen (d->prefix)) == 0)
+ {
+ data = d;
+ break;
+ }
+ }
+
+ if (data == 0)
+ return clib_error_return (0, "unsupported socket config '%s'", s->config);
+
+ s->type = data->type;
+ p = s->config + (data->skip_prefix ? strlen (data->prefix) : 0);
- socket_type = s->flags & CLIB_SOCKET_F_SEQPACKET ?
- SOCK_SEQPACKET : SOCK_STREAM;
+ name = _clib_socket_get_string (&p, data->type == CLIB_SOCKET_TYPE_INET);
+ vec_add1 (name, 0);
- s->fd = socket (addr.sa.sa_family, socket_type, 0);
- if (s->fd < 0)
+ /* parse port type for INET sockets */
+ if (data->type == CLIB_SOCKET_TYPE_INET && p[0] == ':')
{
- error = clib_error_return_unix (0, "socket (fd %d, '%s')",
- s->fd, s->config);
+ char *old_p = p + 1;
+ long long ll = strtoll (old_p, &p, 0);
+
+ if (p == old_p)
+ {
+ err = clib_error_return (0, "invalid port");
+ goto done;
+ }
+
+ if (ll > CLIB_U16_MAX || ll < 1)
+ {
+ err = clib_error_return (0, "port out of range");
+ goto done;
+ }
+ port = ll;
+ }
+
+ while (p[0] == ',')
+ {
+ p++;
+ if (0)
+ ;
+#if CLIB_LINUX
+ else if (s->type == CLIB_SOCKET_TYPE_LINUX_ABSTRACT && netns_fd == -1 &&
+ strncmp (p, "netns_name=", 11) == 0)
+ {
+ p += 11;
+ u8 *str = _clib_socket_get_string (&p, 0);
+ u8 *pathname = format (0, "/var/run/netns/%v%c", str, 0);
+ if ((netns_fd = open ((char *) pathname, O_RDONLY)) < 0)
+ err = clib_error_return_unix (0, "open('%s')", pathname);
+ vec_free (str);
+ vec_free (pathname);
+ if (err)
+ goto done;
+ }
+ else if (s->type == CLIB_SOCKET_TYPE_LINUX_ABSTRACT && netns_fd == -1 &&
+ strncmp (p, "netns_pid=", 10) == 0)
+ {
+ char *old_p = p = p + 10;
+ u32 pid = (u32) strtol (old_p, &p, 0);
+
+ if (p == old_p)
+ err = clib_error_return (0, "invalid pid");
+ else
+ {
+ u8 *pathname = format (0, "/proc/%u/ns/net%c", pid, 0);
+ if ((netns_fd = open ((char *) pathname, O_RDONLY)) < 0)
+ err = clib_error_return_unix (0, "open('%s')", pathname);
+ vec_free (pathname);
+ }
+ if (err)
+ goto done;
+ }
+#endif
+ else
+ break;
+ }
+
+ if (p[0] != 0)
+ {
+ err = clib_error_return (0, "unknown input `%s'", p);
goto done;
}
- port = 0;
- if (addr.sa.sa_family == PF_INET)
- port = ((struct sockaddr_in *) &addr)->sin_port;
+#if CLIB_LINUX
+ /* change netns if requested */
+ if (s->type != CLIB_SOCKET_TYPE_INET && netns_fd != -1)
+ {
+ int fd = open ("/proc/self/ns/net", O_RDONLY);
+
+ if (setns (netns_fd, CLONE_NEWNET) < 0)
+ {
+ close (fd);
+ err = clib_error_return_unix (0, "setns(%d)", netns_fd);
+ goto done;
+ }
+ netns_fd = fd;
+ }
+#endif
- if (s->flags & CLIB_SOCKET_F_IS_SERVER)
+ if (s->type == CLIB_SOCKET_TYPE_INET)
{
- uword need_bind = 1;
+ addr_len = sizeof (si);
+ si.sin_port = htons (port);
+
+ if (name)
+ {
+ struct in_addr host_addr;
+ vec_add1 (name, 0);
+
+ /* Recognize localhost to avoid host lookup in most common cast. */
+ if (!strcmp ((char *) name, "localhost"))
+ si.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
+
+ else if (inet_aton ((char *) name, &host_addr))
+ si.sin_addr = host_addr;
+
+ else if (strlen ((char *) name) > 0)
+ {
+ struct hostent *host = gethostbyname ((char *) name);
+ if (!host)
+ err = clib_error_return (0, "unknown host `%s'", name);
+ else
+ clib_memcpy (&si.sin_addr.s_addr, host->h_addr_list[0],
+ host->h_length);
+ }
+
+ else
+ si.sin_addr.s_addr =
+ htonl (s->is_server ? INADDR_LOOPBACK : INADDR_ANY);
+
+ if (err)
+ goto done;
+ }
+ sa = (struct sockaddr *) &si;
+ }
+ else if (s->type == CLIB_SOCKET_TYPE_UNIX)
+ {
+ struct stat st = { 0 };
+ char *path = (char *) &su.sun_path;
- if (addr.sa.sa_family == PF_INET)
+ if (vec_len (name) > sizeof (su.sun_path) - 1)
{
- if (port == 0)
+ err = clib_error_return (0, "File path '%v' too long", name);
+ goto done;
+ }
+
+ clib_memcpy (path, s->config, vec_len (name));
+ addr_len = sizeof (su);
+ sa = (struct sockaddr *) &su;
+
+ rv = stat (path, &st);
+ if (!s->is_server && rv < 0)
+ {
+ err = clib_error_return_unix (0, "stat ('%s')", path);
+ goto done;
+ }
+
+ if (s->is_server && rv == 0)
+ {
+ if (S_ISSOCK (st.st_mode))
{
- port = find_free_port (s->fd);
- if (port < 0)
+ int client_fd = socket (AF_UNIX, SOCK_STREAM, 0);
+ int ret = connect (client_fd, (const struct sockaddr *) &su,
+ sizeof (su));
+ typeof (errno) connect_errno = errno;
+ close (client_fd);
+
+ if (ret == 0 || (ret < 0 && connect_errno != ECONNREFUSED))
+ {
+ err = clib_error_return (0, "Active listener on '%s'", path);
+ goto done;
+ }
+
+ if (unlink (path) < 0)
{
- error = clib_error_return (0, "no free port (fd %d, '%s')",
- s->fd, s->config);
+ err = clib_error_return_unix (0, "unlink ('%s')", path);
goto done;
}
- need_bind = 0;
}
+ else
+ {
+ err = clib_error_return (0, "File '%s' already exists", path);
+ goto done;
+ }
+ }
+ }
+#if CLIB_LINUX
+ else if (s->type == CLIB_SOCKET_TYPE_LINUX_ABSTRACT)
+ {
+ if (vec_len (name) > sizeof (su.sun_path) - 2)
+ {
+ err = clib_error_return (0, "Socket name '%v' too long", name);
+ goto done;
+ }
+
+ clib_memcpy (&su.sun_path[1], name, vec_len (name));
+ addr_len = sizeof (su.sun_family) + vec_len (name);
+ sa = (struct sockaddr *) &su;
+ s->allow_group_write = 0;
+ }
+#endif
+ else
+ {
+ err = clib_error_return_unix (0, "unknown socket family");
+ goto done;
+ }
+
+ socket_init_funcs (s);
+
+ if ((s->fd = socket (sa->sa_family,
+ s->is_seqpacket ? SOCK_SEQPACKET : SOCK_STREAM, 0)) < 0)
+ {
+ err =
+ clib_error_return_unix (0, "socket (fd %d, '%s')", s->fd, s->config);
+ goto done;
+ }
+
+ if (s->is_server)
+ {
+ uword need_bind = 1;
+
+ if (sa->sa_family == AF_INET && si.sin_port == 0)
+ {
+ word port = find_free_port (s->fd);
+ if (port < 0)
+ {
+ err = clib_error_return (0, "no free port (fd %d, '%s')", s->fd,
+ s->config);
+ goto done;
+ }
+ si.sin_port = port;
+ need_bind = 0;
}
- if (addr.sa.sa_family == PF_LOCAL &&
- ((struct sockaddr_un *) &addr)->sun_path[0] != 0)
- unlink (((struct sockaddr_un *) &addr)->sun_path);
-
- /* Make address available for multiple users. */
- {
- int v = 1;
- if (setsockopt (s->fd, SOL_SOCKET, SO_REUSEADDR, &v, sizeof (v)) < 0)
- clib_unix_warning ("setsockopt SO_REUSEADDR fails");
- }
-
-#if __linux__
- if (addr.sa.sa_family == PF_LOCAL && s->flags & CLIB_SOCKET_F_PASSCRED)
+
+ if (setsockopt (s->fd, SOL_SOCKET, SO_REUSEADDR, &((int){ 1 }),
+ sizeof (int)) < 0)
+ clib_unix_warning ("setsockopt SO_REUSEADDR fails");
+
+#if CLIB_LINUX
+ if (sa->sa_family == AF_UNIX && s->passcred)
{
- int x = 1;
- if (setsockopt (s->fd, SOL_SOCKET, SO_PASSCRED, &x, sizeof (x)) < 0)
+ if (setsockopt (s->fd, SOL_SOCKET, SO_PASSCRED, &((int){ 1 }),
+ sizeof (int)) < 0)
{
- error = clib_error_return_unix (0, "setsockopt (SO_PASSCRED, "
- "fd %d, '%s')", s->fd,
- s->config);
+ err = clib_error_return_unix (0,
+ "setsockopt (SO_PASSCRED, "
+ "fd %d, '%s')",
+ s->fd, s->config);
goto done;
}
}
#endif
- if (need_bind && bind (s->fd, &addr.sa, addr_len) < 0)
+ if (need_bind && bind (s->fd, sa, addr_len) < 0)
{
- error = clib_error_return_unix (0, "bind (fd %d, '%s')",
- s->fd, s->config);
+ err =
+ clib_error_return_unix (0, "bind (fd %d, '%s')", s->fd, s->config);
goto done;
}
if (listen (s->fd, 5) < 0)
{
- error = clib_error_return_unix (0, "listen (fd %d, '%s')",
- s->fd, s->config);
+ err = clib_error_return_unix (0, "listen (fd %d, '%s')", s->fd,
+ s->config);
goto done;
}
- if (addr.sa.sa_family == PF_LOCAL &&
- s->flags & CLIB_SOCKET_F_ALLOW_GROUP_WRITE &&
- ((struct sockaddr_un *) &addr)->sun_path[0] != 0)
+
+ if (s->local_only && s->allow_group_write)
{
- struct stat st = { 0 };
- if (stat (((struct sockaddr_un *) &addr)->sun_path, &st) < 0)
+ if (fchmod (s->fd, S_IWGRP) < 0)
{
- error = clib_error_return_unix (0, "stat (fd %d, '%s')",
- s->fd, s->config);
- goto done;
- }
- st.st_mode |= S_IWGRP;
- if (chmod (((struct sockaddr_un *) &addr)->sun_path, st.st_mode) <
- 0)
- {
- error =
- clib_error_return_unix (0, "chmod (fd %d, '%s', mode %o)",
- s->fd, s->config, st.st_mode);
+ err = clib_error_return_unix (
+ 0, "fchmod (fd %d, '%s', mode S_IWGRP)", s->fd, s->config);
goto done;
}
}
}
else
{
- if ((s->flags & CLIB_SOCKET_F_NON_BLOCKING_CONNECT)
- && fcntl (s->fd, F_SETFL, O_NONBLOCK) < 0)
+ if (s->non_blocking_connect && fcntl (s->fd, F_SETFL, O_NONBLOCK) < 0)
{
- error = clib_error_return_unix (0, "fcntl NONBLOCK (fd %d, '%s')",
- s->fd, s->config);
+ err = clib_error_return_unix (0, "fcntl NONBLOCK (fd %d, '%s')",
+ s->fd, s->config);
goto done;
}
- while ((rv = connect (s->fd, &addr.sa, addr_len)) < 0
- && errno == EAGAIN)
+ while ((rv = connect (s->fd, sa, addr_len)) < 0 && errno == EAGAIN)
;
- if (rv < 0 && !((s->flags & CLIB_SOCKET_F_NON_BLOCKING_CONNECT) &&
- errno == EINPROGRESS))
+ if (rv < 0 && !(s->non_blocking_connect && errno == EINPROGRESS))
{
- error = clib_error_return_unix (0, "connect (fd %d, '%s')",
- s->fd, s->config);
+ err = clib_error_return_unix (0, "connect (fd %d, '%s')", s->fd,
+ s->config);
goto done;
}
/* Connect was blocking so set fd to non-blocking now unless
* blocking mode explicitly requested. */
- if (!(s->flags & CLIB_SOCKET_F_NON_BLOCKING_CONNECT) &&
- !(s->flags & CLIB_SOCKET_F_BLOCKING) &&
+ if (!s->non_blocking_connect && !s->is_blocking &&
fcntl (s->fd, F_SETFL, O_NONBLOCK) < 0)
{
- error = clib_error_return_unix (0, "fcntl NONBLOCK2 (fd %d, '%s')",
- s->fd, s->config);
+ err = clib_error_return_unix (0, "fcntl NONBLOCK2 (fd %d, '%s')",
+ s->fd, s->config);
goto done;
}
}
- return error;
-
done:
- if (s->fd > 0)
- close (s->fd);
- return error;
+ if (err && s->fd > -1)
+ {
+ close (s->fd);
+ s->fd = -1;
+ }
+#if CLIB_LINUX
+ if (netns_fd != -1)
+ {
+ setns (CLONE_NEWNET, netns_fd);
+ close (netns_fd);
+ }
+#endif
+ vec_free (name);
+ return err;
}
__clib_export clib_error_t *