aboutsummaryrefslogtreecommitdiffstats
path: root/netlink/librtnl/rtnl.c
diff options
context:
space:
mode:
Diffstat (limited to 'netlink/librtnl/rtnl.c')
-rw-r--r--netlink/librtnl/rtnl.c582
1 files changed, 582 insertions, 0 deletions
diff --git a/netlink/librtnl/rtnl.c b/netlink/librtnl/rtnl.c
new file mode 100644
index 0000000..7f017a8
--- /dev/null
+++ b/netlink/librtnl/rtnl.c
@@ -0,0 +1,582 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <librtnl/rtnl.h>
+#include <librtnl/netns.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vppinfra/error.h>
+
+#include <sys/socket.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <float.h>
+#include <fcntl.h>
+
+#include <sys/types.h>
+#include <sys/wait.h>
+
+typedef enum {
+ RTNL_E_OPEN,
+ RTNL_E_CLOSE,
+ RTNL_E_READ,
+} rtnl_event_t;
+
+typedef enum {
+ RTNL_S_INIT,
+ RTNL_S_SYNC,
+ RTNL_S_READY,
+} rtnl_state_t;
+
+typedef enum {
+ RTNL_SS_OPENING,
+ RTNL_SS_LINK,
+ RTNL_SS_ADDR,
+ RTNL_SS_ROUTE4,
+ RTNL_SS_ROUTE6,
+ RTNL_SS_NEIGH,
+} rtnl_sync_state_t;
+
+typedef struct {
+ rtnl_stream_t stream;
+ rtnl_state_t state;
+ rtnl_sync_state_t sync_state;
+ int ns_fd;
+ int rtnl_socket;
+ u32 unix_index;
+ u32 rtnl_seq;
+ f64 timeout;
+} rtnl_ns_t;
+
+typedef struct {
+ f64 now;
+ rtnl_ns_t *streams;
+} rtnl_main_t;
+
+static rtnl_main_t rtnl_main;
+static vlib_node_registration_t rtnl_process_node;
+
+#define RTNL_BUFFSIZ 16384
+#define RTNL_DUMP_TIMEOUT 1
+
+static_always_inline void
+rtnl_schedule_timeout(rtnl_ns_t *ns, f64 when)
+{
+ ns->timeout = when;
+}
+
+static_always_inline void
+rtnl_cancel_timeout(rtnl_ns_t *ns)
+{
+ ns->timeout = DBL_MAX;
+}
+
+static clib_error_t *rtnl_read_cb(struct unix_file * f)
+{
+ vlib_main_t *vm = vlib_get_main();
+ rtnl_ns_t *ns = (rtnl_ns_t *) f->private_data;
+ vlib_process_signal_event_pointer(vm, rtnl_process_node.index, RTNL_E_READ, ns);
+ return 0;
+}
+
+int rtnl_dump_request(rtnl_ns_t *ns, int type, void *req, size_t len)
+{
+ struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
+ struct nlmsghdr nlh = {
+ .nlmsg_len = NLMSG_LENGTH(len),
+ .nlmsg_type = type,
+ .nlmsg_flags = NLM_F_DUMP|NLM_F_REQUEST,
+ .nlmsg_pid = 0,
+ .nlmsg_seq = ++ns->rtnl_seq,
+ };
+ struct iovec iov[2] = {
+ { .iov_base = &nlh, .iov_len = sizeof(nlh) },
+ { .iov_base = req, .iov_len = len }
+ };
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = iov,
+ .msg_iovlen = 2,
+ };
+ if(sendmsg(ns->rtnl_socket, &msg, 0) < 0)
+ return -1;
+ return 0;
+}
+
+static void rtnl_socket_close(rtnl_ns_t *ns)
+{
+ unix_file_del(&unix_main, &unix_main.file_pool[ns->unix_index]);
+ close(ns->rtnl_socket);
+}
+
+struct rtnl_thread_exec {
+ int fd;
+ void *(*fn)(void *);
+ void *arg;
+ void **ret;
+};
+
+static void *rtnl_exec_in_thread_fn(void *p)
+{
+ struct rtnl_thread_exec *ex = (struct rtnl_thread_exec *) p;
+ if (setns(ex->fd, 0))
+ return (void *) ((uword) (-errno));
+
+ *ex->ret = ex->fn(ex->arg);
+ return NULL;
+}
+
+static int rtnl_exec_in_namespace_byfd(int fd, void *(*fn)(void *), void *arg, void **ret)
+{
+ pthread_t thread;
+ void *thread_ret;
+ struct rtnl_thread_exec ex = {
+ .fd = fd,
+ .fn = fn,
+ .arg = arg,
+ .ret = ret
+ };
+ if(pthread_create(&thread, NULL, rtnl_exec_in_thread_fn, &ex))
+ return -errno;
+
+ if(pthread_join(thread, &thread_ret))
+ return -errno;
+
+ if (thread_ret)
+ return (int) ((uword)thread_ret);
+
+ return 0;
+}
+
+int rtnl_exec_in_namespace(u32 stream_index, void *(*fn)(void *), void *arg, void **ret)
+{
+ rtnl_main_t *rm = &rtnl_main;
+ if (pool_is_free_index(rm->streams, stream_index))
+ return -EBADR;
+
+ rtnl_ns_t *ns = pool_elt_at_index(rm->streams, stream_index);
+ return rtnl_exec_in_namespace_byfd(ns->ns_fd, fn, arg, ret);
+}
+
+int rtnl_exec_in_namespace_by_name(char *nsname, void *(*fn)(void *), void *arg, void **ret)
+{
+ u8 *s;
+ int fd;
+ if (nsname && strlen(nsname)) {
+ s = format(0, "/var/run/netns/%s", nsname);
+ } else {
+ s = format(0, "/proc/self/ns/net");
+ }
+
+ if ((fd = open((char *)s, O_RDONLY)) < 0) {
+ vec_free(s);
+ return -errno;
+ }
+
+ int r = rtnl_exec_in_namespace_byfd(fd, fn, arg, ret);
+ vec_free(s);
+ close(fd);
+ return r;
+}
+
+/* this function is run by the second thread */
+static void *rtnl_thread_fn(void *p)
+{
+ rtnl_ns_t *ns = (rtnl_ns_t *) p;
+ if (setns(ns->ns_fd, 0)) {
+ clib_warning("setns(%d, %d) error %d", ns->ns_fd, CLONE_NEWNET, errno);
+ return (void *) -1;
+ }
+
+ if ((ns->rtnl_socket = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) == -1) {
+ clib_warning("Cannot open socket");
+ return (void *) -2;
+ }
+
+ return NULL;
+}
+
+static int rtnl_socket_open(rtnl_ns_t *ns)
+{
+ pthread_t thread;
+ void *thread_ret;
+ if(pthread_create(&thread, NULL, rtnl_thread_fn, ns)) {
+ clib_warning("Can't create opening thread");
+ return -1;
+ }
+
+ if(pthread_join(thread, &thread_ret)) {
+ clib_warning("Can't join opening thread");
+ return -2;
+ }
+
+ if (thread_ret) {
+ clib_warning("Could not open netlink socket");
+ return -3;
+ }
+
+ struct sockaddr_nl addr = {
+ .nl_family = AF_NETLINK,
+ .nl_pad = 0,
+ .nl_pid = 0,
+ .nl_groups =
+ RTMGRP_LINK | RTMGRP_IPV6_IFADDR | RTMGRP_IPV4_IFADDR |
+ RTMGRP_IPV4_ROUTE | RTMGRP_IPV6_ROUTE | RTMGRP_NEIGH |
+ RTMGRP_NOTIFY,
+ };
+
+ if (bind(ns->rtnl_socket, (struct sockaddr*) &addr, sizeof(addr))) {
+ close(ns->rtnl_socket);
+ return -3;
+ }
+
+ unix_file_t template = {0};
+ template.read_function = rtnl_read_cb;
+ template.file_descriptor = ns->rtnl_socket;
+ template.private_data = (uword) ns;
+ ns->unix_index = unix_file_add (&unix_main, &template);
+ return 0;
+}
+
+static int
+rtnl_rcv_error(rtnl_ns_t *ns, struct nlmsghdr *hdr, int *error)
+{
+ struct nlmsgerr *err = NLMSG_DATA(hdr);
+ size_t datalen = hdr->nlmsg_len - NLMSG_ALIGN(sizeof(*hdr));
+ if(datalen < sizeof(*err))
+ return -1;
+
+ *error = err->error;
+ return 0;
+}
+
+static void
+rtnl_sync_reset(rtnl_ns_t *ns)
+{
+ if (ns->sync_state == RTNL_SS_OPENING)
+ return;
+
+ rtnl_socket_close(ns);
+ ns->sync_state = RTNL_SS_OPENING;
+}
+
+static void
+rtnl_sync_done(rtnl_ns_t *ns)
+{
+ rtnl_main_t *rm = &rtnl_main;
+ struct ifaddrmsg addrmsg;
+ struct rtmsg rtmsg;
+ struct ndmsg ndmsg;
+ switch (ns->sync_state) {
+ case RTNL_SS_OPENING:
+ //Cannot happen here
+ break;
+ case RTNL_SS_LINK:
+ memset(&addrmsg, 0, sizeof(addrmsg));
+ addrmsg.ifa_family = AF_UNSPEC;
+ if(rtnl_dump_request(ns, RTM_GETADDR, &addrmsg, sizeof(addrmsg))) {
+ rtnl_sync_reset(ns);
+ rtnl_schedule_timeout(ns, rm->now + 1);
+ return;
+ }
+ rtnl_schedule_timeout(ns, rm->now + RTNL_DUMP_TIMEOUT);
+ ns->sync_state = RTNL_SS_ADDR;
+ break;
+ case RTNL_SS_ADDR:
+ case RTNL_SS_ROUTE4:
+ memset(&rtmsg, 0, sizeof(rtmsg));
+ rtmsg.rtm_family = (ns->sync_state == RTNL_SS_ADDR)?AF_INET:AF_INET6;
+ rtmsg.rtm_table = RT_TABLE_UNSPEC;
+ if(rtnl_dump_request(ns, RTM_GETROUTE, &rtmsg, sizeof(rtmsg))) {
+ rtnl_sync_reset(ns);
+ rtnl_schedule_timeout(ns, rm->now + 1);
+ return;
+ }
+ rtnl_schedule_timeout(ns, rm->now + RTNL_DUMP_TIMEOUT);
+ ns->sync_state = (ns->sync_state == RTNL_SS_ADDR)?RTNL_SS_ROUTE4:RTNL_SS_ROUTE6;
+ break;
+ case RTNL_SS_ROUTE6:
+ memset(&ndmsg, 0, sizeof(ndmsg));
+ ndmsg.ndm_family = AF_UNSPEC;
+ if(rtnl_dump_request(ns, RTM_GETNEIGH, &ndmsg, sizeof(ndmsg))) {
+ rtnl_sync_reset(ns);
+ rtnl_schedule_timeout(ns, rm->now + 1);
+ return;
+ }
+ rtnl_schedule_timeout(ns, rm->now + RTNL_DUMP_TIMEOUT);
+ ns->sync_state = RTNL_SS_NEIGH;
+ break;
+ case RTNL_SS_NEIGH:
+ ns->state = RTNL_S_READY;
+ ns->sync_state = 0;
+ rtnl_cancel_timeout(ns);
+ break;
+ }
+}
+
+static void
+rtnl_sync_timeout(rtnl_ns_t *ns)
+{
+ rtnl_main_t *rm = &rtnl_main;
+ struct ifinfomsg imsg = {};
+ switch (ns->sync_state) {
+ case RTNL_SS_OPENING:
+ if (rtnl_socket_open(ns)) {
+ rtnl_schedule_timeout(ns, rm->now + 10);
+ return;
+ }
+ imsg.ifi_family = AF_UNSPEC;
+ if (rtnl_dump_request(ns, RTM_GETLINK, &imsg, sizeof(imsg))) {
+ rtnl_sync_reset(ns);
+ rtnl_schedule_timeout(ns, rm->now + 10);
+ }
+ ns->sync_state = RTNL_SS_LINK;
+ rtnl_schedule_timeout(ns, rm->now + 2);
+ break;
+ case RTNL_SS_LINK:
+ case RTNL_SS_ADDR:
+ case RTNL_SS_ROUTE4:
+ case RTNL_SS_ROUTE6:
+ case RTNL_SS_NEIGH:
+ //Timeout happened while synchronizing
+ rtnl_sync_reset(ns);
+ rtnl_schedule_timeout(ns, rm->now + 1);
+ break;
+ }
+}
+
+static int
+rtnl_ns_recv(rtnl_ns_t *ns, struct nlmsghdr *hdr)
+{
+ rtnl_main_t *rm = &rtnl_main;
+ int ret, error = 0;
+
+ if (ns->state == RTNL_S_SYNC &&
+ ((hdr->nlmsg_flags & RTM_F_NOTIFY) ||
+ (hdr->nlmsg_seq != (ns->rtnl_seq)))) {
+ clib_warning("Received notification while in sync. Restart synchronization.");
+ rtnl_sync_reset(ns);
+ rtnl_schedule_timeout(ns, rm->now);
+ }
+
+ switch (hdr->nlmsg_type) {
+ case NLMSG_DONE:
+ rtnl_sync_done(ns);
+ break;
+ case NLMSG_ERROR:
+ if((ret = rtnl_rcv_error(ns, hdr, &error)))
+ return ret;
+ break;
+ case RTM_NEWROUTE:
+ case RTM_DELROUTE:
+ case RTM_NEWLINK:
+ case RTM_DELLINK:
+ case RTM_NEWADDR:
+ case RTM_DELADDR:
+ case RTM_NEWNEIGH:
+ case RTM_DELNEIGH:
+ if (ns->stream.recv_message)
+ ns->stream.recv_message(hdr, ns->stream.opaque);
+ break;
+ default:
+ clib_warning("Unknown rtnetlink type %d", hdr->nlmsg_type);
+ break;
+ }
+ return 0;
+}
+
+static void
+rtnl_process_open(rtnl_ns_t *ns)
+{
+ rtnl_main_t *rm = &rtnl_main;
+ if (ns->state != RTNL_S_INIT)
+ return;
+
+ ns->state = RTNL_S_SYNC;
+ ns->sync_state = RTNL_SS_OPENING;
+ rtnl_schedule_timeout(ns, rm->now);
+}
+
+static void
+rtnl_process_close(rtnl_ns_t *ns)
+{
+ rtnl_main_t *rm = &rtnl_main;
+ if (ns->state == RTNL_S_INIT)
+ return;
+
+ rtnl_socket_close(ns);
+ close(ns->ns_fd);
+ pool_put(rm->streams, ns);
+}
+
+static int
+rtnl_process_read(rtnl_ns_t *ns)
+{
+ uint8_t buff[RTNL_BUFFSIZ];
+ ssize_t len;
+ struct nlmsghdr *hdr;
+ while(1) {
+ if((len = recv(ns->rtnl_socket, buff, RTNL_BUFFSIZ, MSG_DONTWAIT)) < 0) {
+ if(errno != EAGAIN) {
+ clib_warning("rtnetlink recv error: %s", strerror(errno));
+ return -1;
+ }
+ return 0;
+ }
+
+ for(hdr = (struct nlmsghdr *) buff;
+ len > 0;
+ len -= NLMSG_ALIGN(hdr->nlmsg_len),
+ hdr = (struct nlmsghdr *) (((uint8_t *) hdr) + NLMSG_ALIGN(hdr->nlmsg_len))) {
+ if((sizeof(*hdr) > (size_t)len) || (hdr->nlmsg_len > (size_t)len)) {
+ clib_warning("rtnetlink buffer too small (%d Vs %d)", (int) hdr->nlmsg_len, (int) len);
+ return -1;
+ }
+ if (rtnl_ns_recv(ns, hdr))
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static void
+rtnl_process_timeout(rtnl_ns_t *ns)
+{
+ switch (ns->state) {
+ case RTNL_S_SYNC:
+ rtnl_sync_timeout(ns);
+ break;
+ case RTNL_S_INIT:
+ case RTNL_S_READY:
+ clib_warning("Should not happen");
+ break;
+ }
+}
+
+static uword
+rtnl_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ rtnl_main_t *rm = &rtnl_main;
+ uword event_type;
+ uword *event_data = 0;
+ rm->now = vlib_time_now(vm);
+ f64 timeout = DBL_MAX;
+ rtnl_ns_t *ns;
+
+ //Setting up
+ while (1) {
+ vlib_process_wait_for_event_or_clock(vm, timeout - rm->now);
+ event_type = vlib_process_get_events(vm, &event_data);
+ rm->now = vlib_time_now(vm);
+
+ if (event_type == ~0) { //Clock event or no event
+ pool_foreach(ns, rm->streams, {
+ if (ns->timeout < rm->now) {
+ ns->timeout = DBL_MAX;
+ rtnl_process_timeout(ns);
+ }
+ });
+ } else {
+ rtnl_ns_t *ns;
+ uword *d;
+ vec_foreach(d, event_data) {
+ ns = (rtnl_ns_t *)d[0];
+ switch (event_type)
+ {
+ case RTNL_E_CLOSE:
+ rtnl_process_close(ns);
+ break;
+ case RTNL_E_OPEN:
+ rtnl_process_open(ns);
+ break;
+ case RTNL_E_READ:
+ rtnl_process_read(ns);
+ break;
+ }
+ }
+ }
+
+ vec_reset_length (event_data);
+
+ timeout = DBL_MAX;
+ pool_foreach(ns, rm->streams, {
+ if (ns->timeout < timeout)
+ timeout = ns->timeout;
+ });
+ }
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE(rtnl_process_node, static) = {
+ .function = rtnl_process,
+ .name = "rtnl-process",
+ .type = VLIB_NODE_TYPE_PROCESS,
+};
+
+u32
+rtnl_stream_open(rtnl_stream_t *template)
+{
+ vlib_main_t *vm = vlib_get_main();
+ rtnl_main_t *rm = &rtnl_main;
+ rtnl_ns_t *ns;
+ int fd;
+ u8 *s;
+
+ if (strlen(template->name)) {
+ s = format(0, "/var/run/netns/%s", template->name);
+ } else {
+ s = format(0, "/proc/self/ns/net");
+ }
+
+ if ((fd = open((char *)s, O_RDONLY)) < 0) {
+ vec_free(s);
+ return ~0;
+ }
+
+ vec_free(s);
+ pool_get(rm->streams, ns);
+ ns->state = RTNL_S_INIT;
+ ns->ns_fd = fd;
+ ns->stream = *template;
+ vlib_process_signal_event_pointer(vm, rtnl_process_node.index, RTNL_E_OPEN, ns);
+ return ns - rm->streams;
+}
+
+void
+rtnl_stream_close(u32 stream_index)
+{
+ vlib_main_t *vm = vlib_get_main();
+ rtnl_main_t *rm = &rtnl_main;
+ ASSERT(!pool_is_free_index(rm->streams, stream_index));
+ rtnl_ns_t *ns = pool_elt_at_index(rm->streams, stream_index);
+ vlib_process_signal_event_pointer(vm, rtnl_process_node.index, RTNL_E_CLOSE, ns);
+}
+
+clib_error_t *
+rtnl_init (vlib_main_t * vm)
+{
+ rtnl_main_t *rm = &rtnl_main;
+ rm->streams = 0;
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (rtnl_init);