diff options
Diffstat (limited to 'netlink/librtnl/rtnl.c')
-rw-r--r-- | netlink/librtnl/rtnl.c | 582 |
1 files changed, 582 insertions, 0 deletions
diff --git a/netlink/librtnl/rtnl.c b/netlink/librtnl/rtnl.c new file mode 100644 index 0000000..7f017a8 --- /dev/null +++ b/netlink/librtnl/rtnl.c @@ -0,0 +1,582 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define _GNU_SOURCE +#include <sched.h> + +#include <librtnl/rtnl.h> +#include <librtnl/netns.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vppinfra/error.h> + +#include <sys/socket.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <float.h> +#include <fcntl.h> + +#include <sys/types.h> +#include <sys/wait.h> + +typedef enum { + RTNL_E_OPEN, + RTNL_E_CLOSE, + RTNL_E_READ, +} rtnl_event_t; + +typedef enum { + RTNL_S_INIT, + RTNL_S_SYNC, + RTNL_S_READY, +} rtnl_state_t; + +typedef enum { + RTNL_SS_OPENING, + RTNL_SS_LINK, + RTNL_SS_ADDR, + RTNL_SS_ROUTE4, + RTNL_SS_ROUTE6, + RTNL_SS_NEIGH, +} rtnl_sync_state_t; + +typedef struct { + rtnl_stream_t stream; + rtnl_state_t state; + rtnl_sync_state_t sync_state; + int ns_fd; + int rtnl_socket; + u32 unix_index; + u32 rtnl_seq; + f64 timeout; +} rtnl_ns_t; + +typedef struct { + f64 now; + rtnl_ns_t *streams; +} rtnl_main_t; + +static rtnl_main_t rtnl_main; +static vlib_node_registration_t rtnl_process_node; + +#define RTNL_BUFFSIZ 16384 +#define RTNL_DUMP_TIMEOUT 1 + +static_always_inline void +rtnl_schedule_timeout(rtnl_ns_t *ns, f64 when) +{ + ns->timeout = when; +} + +static_always_inline void +rtnl_cancel_timeout(rtnl_ns_t *ns) +{ + ns->timeout = DBL_MAX; +} + +static clib_error_t *rtnl_read_cb(struct unix_file * f) +{ + vlib_main_t *vm = vlib_get_main(); + rtnl_ns_t *ns = (rtnl_ns_t *) f->private_data; + vlib_process_signal_event_pointer(vm, rtnl_process_node.index, RTNL_E_READ, ns); + return 0; +} + +int rtnl_dump_request(rtnl_ns_t *ns, int type, void *req, size_t len) +{ + struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; + struct nlmsghdr nlh = { + .nlmsg_len = NLMSG_LENGTH(len), + .nlmsg_type = type, + .nlmsg_flags = NLM_F_DUMP|NLM_F_REQUEST, + .nlmsg_pid = 0, + .nlmsg_seq = ++ns->rtnl_seq, + }; + struct iovec iov[2] = { + { .iov_base = &nlh, .iov_len = sizeof(nlh) }, + { .iov_base = req, .iov_len = len } + }; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = iov, + .msg_iovlen = 2, + }; + if(sendmsg(ns->rtnl_socket, &msg, 0) < 0) + return -1; + return 0; +} + +static void rtnl_socket_close(rtnl_ns_t *ns) +{ + unix_file_del(&unix_main, &unix_main.file_pool[ns->unix_index]); + close(ns->rtnl_socket); +} + +struct rtnl_thread_exec { + int fd; + void *(*fn)(void *); + void *arg; + void **ret; +}; + +static void *rtnl_exec_in_thread_fn(void *p) +{ + struct rtnl_thread_exec *ex = (struct rtnl_thread_exec *) p; + if (setns(ex->fd, 0)) + return (void *) ((uword) (-errno)); + + *ex->ret = ex->fn(ex->arg); + return NULL; +} + +static int rtnl_exec_in_namespace_byfd(int fd, void *(*fn)(void *), void *arg, void **ret) +{ + pthread_t thread; + void *thread_ret; + struct rtnl_thread_exec ex = { + .fd = fd, + .fn = fn, + .arg = arg, + .ret = ret + }; + if(pthread_create(&thread, NULL, rtnl_exec_in_thread_fn, &ex)) + return -errno; + + if(pthread_join(thread, &thread_ret)) + return -errno; + + if (thread_ret) + return (int) ((uword)thread_ret); + + return 0; +} + +int rtnl_exec_in_namespace(u32 stream_index, void *(*fn)(void *), void *arg, void **ret) +{ + rtnl_main_t *rm = &rtnl_main; + if (pool_is_free_index(rm->streams, stream_index)) + return -EBADR; + + rtnl_ns_t *ns = pool_elt_at_index(rm->streams, stream_index); + return rtnl_exec_in_namespace_byfd(ns->ns_fd, fn, arg, ret); +} + +int rtnl_exec_in_namespace_by_name(char *nsname, void *(*fn)(void *), void *arg, void **ret) +{ + u8 *s; + int fd; + if (nsname && strlen(nsname)) { + s = format(0, "/var/run/netns/%s", nsname); + } else { + s = format(0, "/proc/self/ns/net"); + } + + if ((fd = open((char *)s, O_RDONLY)) < 0) { + vec_free(s); + return -errno; + } + + int r = rtnl_exec_in_namespace_byfd(fd, fn, arg, ret); + vec_free(s); + close(fd); + return r; +} + +/* this function is run by the second thread */ +static void *rtnl_thread_fn(void *p) +{ + rtnl_ns_t *ns = (rtnl_ns_t *) p; + if (setns(ns->ns_fd, 0)) { + clib_warning("setns(%d, %d) error %d", ns->ns_fd, CLONE_NEWNET, errno); + return (void *) -1; + } + + if ((ns->rtnl_socket = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) == -1) { + clib_warning("Cannot open socket"); + return (void *) -2; + } + + return NULL; +} + +static int rtnl_socket_open(rtnl_ns_t *ns) +{ + pthread_t thread; + void *thread_ret; + if(pthread_create(&thread, NULL, rtnl_thread_fn, ns)) { + clib_warning("Can't create opening thread"); + return -1; + } + + if(pthread_join(thread, &thread_ret)) { + clib_warning("Can't join opening thread"); + return -2; + } + + if (thread_ret) { + clib_warning("Could not open netlink socket"); + return -3; + } + + struct sockaddr_nl addr = { + .nl_family = AF_NETLINK, + .nl_pad = 0, + .nl_pid = 0, + .nl_groups = + RTMGRP_LINK | RTMGRP_IPV6_IFADDR | RTMGRP_IPV4_IFADDR | + RTMGRP_IPV4_ROUTE | RTMGRP_IPV6_ROUTE | RTMGRP_NEIGH | + RTMGRP_NOTIFY, + }; + + if (bind(ns->rtnl_socket, (struct sockaddr*) &addr, sizeof(addr))) { + close(ns->rtnl_socket); + return -3; + } + + unix_file_t template = {0}; + template.read_function = rtnl_read_cb; + template.file_descriptor = ns->rtnl_socket; + template.private_data = (uword) ns; + ns->unix_index = unix_file_add (&unix_main, &template); + return 0; +} + +static int +rtnl_rcv_error(rtnl_ns_t *ns, struct nlmsghdr *hdr, int *error) +{ + struct nlmsgerr *err = NLMSG_DATA(hdr); + size_t datalen = hdr->nlmsg_len - NLMSG_ALIGN(sizeof(*hdr)); + if(datalen < sizeof(*err)) + return -1; + + *error = err->error; + return 0; +} + +static void +rtnl_sync_reset(rtnl_ns_t *ns) +{ + if (ns->sync_state == RTNL_SS_OPENING) + return; + + rtnl_socket_close(ns); + ns->sync_state = RTNL_SS_OPENING; +} + +static void +rtnl_sync_done(rtnl_ns_t *ns) +{ + rtnl_main_t *rm = &rtnl_main; + struct ifaddrmsg addrmsg; + struct rtmsg rtmsg; + struct ndmsg ndmsg; + switch (ns->sync_state) { + case RTNL_SS_OPENING: + //Cannot happen here + break; + case RTNL_SS_LINK: + memset(&addrmsg, 0, sizeof(addrmsg)); + addrmsg.ifa_family = AF_UNSPEC; + if(rtnl_dump_request(ns, RTM_GETADDR, &addrmsg, sizeof(addrmsg))) { + rtnl_sync_reset(ns); + rtnl_schedule_timeout(ns, rm->now + 1); + return; + } + rtnl_schedule_timeout(ns, rm->now + RTNL_DUMP_TIMEOUT); + ns->sync_state = RTNL_SS_ADDR; + break; + case RTNL_SS_ADDR: + case RTNL_SS_ROUTE4: + memset(&rtmsg, 0, sizeof(rtmsg)); + rtmsg.rtm_family = (ns->sync_state == RTNL_SS_ADDR)?AF_INET:AF_INET6; + rtmsg.rtm_table = RT_TABLE_UNSPEC; + if(rtnl_dump_request(ns, RTM_GETROUTE, &rtmsg, sizeof(rtmsg))) { + rtnl_sync_reset(ns); + rtnl_schedule_timeout(ns, rm->now + 1); + return; + } + rtnl_schedule_timeout(ns, rm->now + RTNL_DUMP_TIMEOUT); + ns->sync_state = (ns->sync_state == RTNL_SS_ADDR)?RTNL_SS_ROUTE4:RTNL_SS_ROUTE6; + break; + case RTNL_SS_ROUTE6: + memset(&ndmsg, 0, sizeof(ndmsg)); + ndmsg.ndm_family = AF_UNSPEC; + if(rtnl_dump_request(ns, RTM_GETNEIGH, &ndmsg, sizeof(ndmsg))) { + rtnl_sync_reset(ns); + rtnl_schedule_timeout(ns, rm->now + 1); + return; + } + rtnl_schedule_timeout(ns, rm->now + RTNL_DUMP_TIMEOUT); + ns->sync_state = RTNL_SS_NEIGH; + break; + case RTNL_SS_NEIGH: + ns->state = RTNL_S_READY; + ns->sync_state = 0; + rtnl_cancel_timeout(ns); + break; + } +} + +static void +rtnl_sync_timeout(rtnl_ns_t *ns) +{ + rtnl_main_t *rm = &rtnl_main; + struct ifinfomsg imsg = {}; + switch (ns->sync_state) { + case RTNL_SS_OPENING: + if (rtnl_socket_open(ns)) { + rtnl_schedule_timeout(ns, rm->now + 10); + return; + } + imsg.ifi_family = AF_UNSPEC; + if (rtnl_dump_request(ns, RTM_GETLINK, &imsg, sizeof(imsg))) { + rtnl_sync_reset(ns); + rtnl_schedule_timeout(ns, rm->now + 10); + } + ns->sync_state = RTNL_SS_LINK; + rtnl_schedule_timeout(ns, rm->now + 2); + break; + case RTNL_SS_LINK: + case RTNL_SS_ADDR: + case RTNL_SS_ROUTE4: + case RTNL_SS_ROUTE6: + case RTNL_SS_NEIGH: + //Timeout happened while synchronizing + rtnl_sync_reset(ns); + rtnl_schedule_timeout(ns, rm->now + 1); + break; + } +} + +static int +rtnl_ns_recv(rtnl_ns_t *ns, struct nlmsghdr *hdr) +{ + rtnl_main_t *rm = &rtnl_main; + int ret, error = 0; + + if (ns->state == RTNL_S_SYNC && + ((hdr->nlmsg_flags & RTM_F_NOTIFY) || + (hdr->nlmsg_seq != (ns->rtnl_seq)))) { + clib_warning("Received notification while in sync. Restart synchronization."); + rtnl_sync_reset(ns); + rtnl_schedule_timeout(ns, rm->now); + } + + switch (hdr->nlmsg_type) { + case NLMSG_DONE: + rtnl_sync_done(ns); + break; + case NLMSG_ERROR: + if((ret = rtnl_rcv_error(ns, hdr, &error))) + return ret; + break; + case RTM_NEWROUTE: + case RTM_DELROUTE: + case RTM_NEWLINK: + case RTM_DELLINK: + case RTM_NEWADDR: + case RTM_DELADDR: + case RTM_NEWNEIGH: + case RTM_DELNEIGH: + if (ns->stream.recv_message) + ns->stream.recv_message(hdr, ns->stream.opaque); + break; + default: + clib_warning("Unknown rtnetlink type %d", hdr->nlmsg_type); + break; + } + return 0; +} + +static void +rtnl_process_open(rtnl_ns_t *ns) +{ + rtnl_main_t *rm = &rtnl_main; + if (ns->state != RTNL_S_INIT) + return; + + ns->state = RTNL_S_SYNC; + ns->sync_state = RTNL_SS_OPENING; + rtnl_schedule_timeout(ns, rm->now); +} + +static void +rtnl_process_close(rtnl_ns_t *ns) +{ + rtnl_main_t *rm = &rtnl_main; + if (ns->state == RTNL_S_INIT) + return; + + rtnl_socket_close(ns); + close(ns->ns_fd); + pool_put(rm->streams, ns); +} + +static int +rtnl_process_read(rtnl_ns_t *ns) +{ + uint8_t buff[RTNL_BUFFSIZ]; + ssize_t len; + struct nlmsghdr *hdr; + while(1) { + if((len = recv(ns->rtnl_socket, buff, RTNL_BUFFSIZ, MSG_DONTWAIT)) < 0) { + if(errno != EAGAIN) { + clib_warning("rtnetlink recv error: %s", strerror(errno)); + return -1; + } + return 0; + } + + for(hdr = (struct nlmsghdr *) buff; + len > 0; + len -= NLMSG_ALIGN(hdr->nlmsg_len), + hdr = (struct nlmsghdr *) (((uint8_t *) hdr) + NLMSG_ALIGN(hdr->nlmsg_len))) { + if((sizeof(*hdr) > (size_t)len) || (hdr->nlmsg_len > (size_t)len)) { + clib_warning("rtnetlink buffer too small (%d Vs %d)", (int) hdr->nlmsg_len, (int) len); + return -1; + } + if (rtnl_ns_recv(ns, hdr)) + return -1; + } + } + return 0; +} + +static void +rtnl_process_timeout(rtnl_ns_t *ns) +{ + switch (ns->state) { + case RTNL_S_SYNC: + rtnl_sync_timeout(ns); + break; + case RTNL_S_INIT: + case RTNL_S_READY: + clib_warning("Should not happen"); + break; + } +} + +static uword +rtnl_process (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + rtnl_main_t *rm = &rtnl_main; + uword event_type; + uword *event_data = 0; + rm->now = vlib_time_now(vm); + f64 timeout = DBL_MAX; + rtnl_ns_t *ns; + + //Setting up + while (1) { + vlib_process_wait_for_event_or_clock(vm, timeout - rm->now); + event_type = vlib_process_get_events(vm, &event_data); + rm->now = vlib_time_now(vm); + + if (event_type == ~0) { //Clock event or no event + pool_foreach(ns, rm->streams, { + if (ns->timeout < rm->now) { + ns->timeout = DBL_MAX; + rtnl_process_timeout(ns); + } + }); + } else { + rtnl_ns_t *ns; + uword *d; + vec_foreach(d, event_data) { + ns = (rtnl_ns_t *)d[0]; + switch (event_type) + { + case RTNL_E_CLOSE: + rtnl_process_close(ns); + break; + case RTNL_E_OPEN: + rtnl_process_open(ns); + break; + case RTNL_E_READ: + rtnl_process_read(ns); + break; + } + } + } + + vec_reset_length (event_data); + + timeout = DBL_MAX; + pool_foreach(ns, rm->streams, { + if (ns->timeout < timeout) + timeout = ns->timeout; + }); + } + return frame->n_vectors; +} + +VLIB_REGISTER_NODE(rtnl_process_node, static) = { + .function = rtnl_process, + .name = "rtnl-process", + .type = VLIB_NODE_TYPE_PROCESS, +}; + +u32 +rtnl_stream_open(rtnl_stream_t *template) +{ + vlib_main_t *vm = vlib_get_main(); + rtnl_main_t *rm = &rtnl_main; + rtnl_ns_t *ns; + int fd; + u8 *s; + + if (strlen(template->name)) { + s = format(0, "/var/run/netns/%s", template->name); + } else { + s = format(0, "/proc/self/ns/net"); + } + + if ((fd = open((char *)s, O_RDONLY)) < 0) { + vec_free(s); + return ~0; + } + + vec_free(s); + pool_get(rm->streams, ns); + ns->state = RTNL_S_INIT; + ns->ns_fd = fd; + ns->stream = *template; + vlib_process_signal_event_pointer(vm, rtnl_process_node.index, RTNL_E_OPEN, ns); + return ns - rm->streams; +} + +void +rtnl_stream_close(u32 stream_index) +{ + vlib_main_t *vm = vlib_get_main(); + rtnl_main_t *rm = &rtnl_main; + ASSERT(!pool_is_free_index(rm->streams, stream_index)); + rtnl_ns_t *ns = pool_elt_at_index(rm->streams, stream_index); + vlib_process_signal_event_pointer(vm, rtnl_process_node.index, RTNL_E_CLOSE, ns); +} + +clib_error_t * +rtnl_init (vlib_main_t * vm) +{ + rtnl_main_t *rm = &rtnl_main; + rm->streams = 0; + return 0; +} + +VLIB_INIT_FUNCTION (rtnl_init); |