diff options
Diffstat (limited to 'extras/router-plugin/devices/rtnetlink')
-rw-r--r-- | extras/router-plugin/devices/rtnetlink/mapper.c | 270 | ||||
-rw-r--r-- | extras/router-plugin/devices/rtnetlink/mapper.h | 35 | ||||
-rw-r--r-- | extras/router-plugin/devices/rtnetlink/netns.c | 787 | ||||
-rw-r--r-- | extras/router-plugin/devices/rtnetlink/netns.h | 145 | ||||
-rw-r--r-- | extras/router-plugin/devices/rtnetlink/rtnl.c | 604 | ||||
-rw-r--r-- | extras/router-plugin/devices/rtnetlink/rtnl.h | 60 | ||||
-rw-r--r-- | extras/router-plugin/devices/rtnetlink/test.c | 203 |
7 files changed, 2104 insertions, 0 deletions
diff --git a/extras/router-plugin/devices/rtnetlink/mapper.c b/extras/router-plugin/devices/rtnetlink/mapper.c new file mode 100644 index 000000000..ed4fa5634 --- /dev/null +++ b/extras/router-plugin/devices/rtnetlink/mapper.c @@ -0,0 +1,270 @@ +/* + * Copyright (c) 2016-2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/ip/ip.h> +#include <vnet/ip/lookup.h> +#include <vnet/fib/fib.h> + +#include "netns.h" +#include "mapper.h" + +typedef struct { + int linux_ifindex; + u32 sw_if_index; +} mapper_map_t; + +typedef struct { + char nsname[RTNL_NETNS_NAMELEN + 1]; + mapper_map_t *mappings; + u32 netns_handle; //Used to receive notifications + u32 v4fib_index; //One fib index for the namespace + u32 v6fib_index; +} mapper_ns_t; + +typedef struct { + mapper_ns_t *namespaces; +} mapper_main_t; + +static mapper_main_t mapper_main; + +mapper_map_t *mapper_get_by_ifindex(mapper_ns_t *ns, int ifindex) +{ + mapper_map_t *map; + pool_foreach(map, ns->mappings, { + if (ifindex == map->linux_ifindex) + return map; + }); + return NULL; +} + +int mapper_add_del_route(mapper_ns_t *ns, ns_route_t *route, int del) +{ + mapper_main_t *mm = &mapper_main; + clib_warning("NS %d %s %U", ns - mm->namespaces, del?"del":"add", format_ns_route, route); + + mapper_map_t *map = mapper_get_by_ifindex(ns, route->oif); + if (!map) + return 0; + + if (route->rtm.rtm_family == AF_INET6) { + + //Filter-out multicast + if (route->rtm.rtm_dst_len >= 8 && route->dst[0] == 0xff) + return 0; + + fib_prefix_t prefix; + ip46_address_t nh; + + memset (&prefix, 0, sizeof (prefix)); + prefix.fp_len = route->rtm.rtm_dst_len; + prefix.fp_proto = FIB_PROTOCOL_IP6; + clib_memcpy (&prefix.fp_addr.ip6, route->dst, sizeof (prefix.fp_addr.ip6)); + + memset (&nh, 0, sizeof (nh)); + clib_memcpy (&nh.ip6, route->gateway, sizeof (nh.ip6)); + + fib_table_entry_path_add (ns->v6fib_index, &prefix, FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, prefix.fp_proto, + &nh, map->sw_if_index, ns->v6fib_index, + 0 /* weight */, + (fib_mpls_label_t *) MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + } else { + fib_prefix_t prefix; + ip46_address_t nh; + + memset (&prefix, 0, sizeof (prefix)); + prefix.fp_len = route->rtm.rtm_dst_len; + prefix.fp_proto = FIB_PROTOCOL_IP4; + clib_memcpy (&prefix.fp_addr.ip4, route->dst, sizeof (prefix.fp_addr.ip4)); + + memset (&nh, 0, sizeof (nh)); + clib_memcpy (&nh.ip4, route->gateway, sizeof (nh.ip4)); + + fib_table_entry_path_add (ns->v4fib_index, &prefix, FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, prefix.fp_proto, + &nh, map->sw_if_index, ns->v4fib_index, + 0 /* weight */, + (fib_mpls_label_t *) MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + } + + return 0; +} + +static void +mapper_netns_notify_cb(void *obj, netns_type_t type, + u32 flags, uword opaque) +{ + mapper_main_t *mm = &mapper_main; + mapper_ns_t *ns = &mm->namespaces[(u32) opaque]; + ASSERT(!pool_is_free_index(mm->namespaces, (u32) opaque)); + if (type != NETNS_TYPE_ROUTE) + return; //For now... + + ns_route_t *route = obj; + if (flags & NETNS_F_DEL) { + mapper_add_del_route(ns, route, 1); + } else if (flags & NETNS_F_ADD) { + mapper_add_del_route(ns, route, 0); + } +} + +void +mapper_delmap(mapper_ns_t*ns, mapper_map_t *map) +{ + ns_route_t *route; + netns_t *netns = netns_getns(ns->netns_handle); + pool_foreach(route, netns->routes, { + if (route->oif == map->linux_ifindex) + mapper_add_del_route(ns, route, 1); + }); + pool_put(ns->mappings, map); +} + +mapper_map_t * +mapper_getmap(mapper_ns_t*ns, u32 sw_if_index, + int linux_ifindex, int create) +{ + mapper_map_t *map; + pool_foreach(map, ns->mappings, { + if (linux_ifindex == map->linux_ifindex) { + if (sw_if_index != map->sw_if_index) + return NULL; //Cannot have multiple mapping with the same ifindex + else + return map; + } + }); + + if (!create) + return NULL; + + pool_get(ns->mappings, map); + map->linux_ifindex = linux_ifindex; + map->sw_if_index = sw_if_index; + ip6_main.fib_index_by_sw_if_index[sw_if_index] = ns->v6fib_index; + ip4_main.fib_index_by_sw_if_index[sw_if_index] = ns->v4fib_index; + + //Load available routes + ns_route_t *route; + netns_t *netns = netns_getns(ns->netns_handle); + pool_foreach(route, netns->routes, { + if (route->oif == map->linux_ifindex) + mapper_add_del_route(ns, route, 0); + }); + return map; +} + +u32 +mapper_get_ns(char *nsname) +{ + mapper_main_t *mm = &mapper_main; + mapper_ns_t *ns; + pool_foreach(ns, mm->namespaces, { + if (!strcmp(nsname, ns->nsname)) + return ns - mm->namespaces; + }); + return ~0; +} + +int +mapper_add_del(u32 nsindex, int linux_ifindex, + u32 sw_if_index, int del) +{ + mapper_main_t *mm = &mapper_main; + //ip6_main_t *im6 = &ip6_main; + mapper_ns_t *ns = &mm->namespaces[nsindex]; + mapper_map_t *map; + //vnet_sw_interface_t *iface = vnet_get_sw_interface(vnet_get_main(), sw_if_index); + + if (pool_is_free(mm->namespaces, ns)) + return -1; + + /*if (!del) { + if ((iface->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) && + im6->fib_index_by_sw_if_index[sw_if_index] != ~0) { + //A custom fib index will be used... + clib_warning("Cannot add interface with a custom fib index (current is %d)", + im6->fib_index_by_sw_if_index[sw_if_index]); + return -1; + } + }*/ + + if (!(map = mapper_getmap(ns, sw_if_index, linux_ifindex, !del))) + return -1; + + if (del) + mapper_delmap(ns, map); + + return 0; +} + +int +mapper_add_ns(char *nsname, u32 v4fib_index, u32 v6fib_index, u32 *nsindex) +{ + mapper_main_t *mm = &mapper_main; + mapper_ns_t *ns; + if (mapper_get_ns(nsname) != ~0) + return -1; //Already exists + + pool_get(mm->namespaces, ns); + strcpy(ns->nsname, nsname); + ns->v4fib_index = v4fib_index; + ns->v6fib_index = v6fib_index; + ns->mappings = 0; + + netns_sub_t sub; + sub.notify = mapper_netns_notify_cb; + sub.opaque = (uword)(ns - mm->namespaces); + if ((ns->netns_handle = netns_open(ns->nsname, &sub)) == ~0) { + pool_put(mm->namespaces, ns); + return -1; + } + *nsindex = ns - mm->namespaces; + return 0; +} + +int +mapper_del_ns(u32 nsindex) +{ + mapper_main_t *mm = &mapper_main; + mapper_ns_t *ns = &mm->namespaces[nsindex]; + if (pool_is_free(mm->namespaces, ns)) + return -1; + + //Remove all existing mappings + int i, *indexes = 0; + pool_foreach_index(i, ns->mappings, { + vec_add1(indexes, i); + }); + vec_foreach_index(i, indexes) { + mapper_delmap(ns, &ns->mappings[indexes[i]]); + } + vec_free(indexes); + + netns_close(ns->netns_handle); + pool_put(mm->namespaces, ns); + return 0; +} + +clib_error_t * +mapper_init (vlib_main_t * vm) +{ + mapper_main_t *mm = &mapper_main; + mm->namespaces = 0; + return 0; +} + +VLIB_INIT_FUNCTION (mapper_init); diff --git a/extras/router-plugin/devices/rtnetlink/mapper.h b/extras/router-plugin/devices/rtnetlink/mapper.h new file mode 100644 index 000000000..32e95d48f --- /dev/null +++ b/extras/router-plugin/devices/rtnetlink/mapper.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2016-2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MAPPER_H_ +#define MAPPER_H_ + +#include "netns.h" + +/* + * Automatically map linux network routes to VPP. + * Each namespace is associated with an individual fib. + * + * One linux interface can only be mapped to a single VPP + * interface, but one VPP interface can be mapped to + * multiple linux interfaces. + * A mapped VPP interface must not have any configured fib. + */ + +int mapper_add_ns(char *nsname, u32 v4fib_index, u32 v6fib_index, u32 *nsindex); +int mapper_del_ns(u32 nsindex); +int mapper_add_del(u32 nsindex, int linux_ifindex, u32 sw_if_index, int del); + +#endif /* MAPPER_H_ */ diff --git a/extras/router-plugin/devices/rtnetlink/netns.c b/extras/router-plugin/devices/rtnetlink/netns.c new file mode 100644 index 000000000..19adb469a --- /dev/null +++ b/extras/router-plugin/devices/rtnetlink/netns.c @@ -0,0 +1,787 @@ +/* + * Copyright (c) 2016-2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vnet/ip/format.h> +#include <stddef.h> + +#include "netns.h" + +/* Enable some RTA values debug */ +//#define RTNL_CHECK + +#define is_nonzero(x) \ + ({ \ + u8 __is_zero_zero[sizeof(x)] = {}; \ + memcmp(__is_zero_zero, &x, sizeof(x)); \ + }) + +typedef struct { + u8 type; //Attribute identifier + u8 unique; //Part of the values uniquely identifying an entry + u16 offset; //Offset where stored in struct + u16 size; //Length of the attribute +} rtnl_mapping_t; + +#define ns_foreach_ifla \ + _(IFLA_ADDRESS, hwaddr) \ + _(IFLA_BROADCAST, broadcast) \ + _(IFLA_IFNAME, ifname) \ + _(IFLA_MASTER, master) \ + _(IFLA_MTU, mtu) \ + _(IFLA_QDISC, qdisc) + +static rtnl_mapping_t ns_ifmap[] = { +#define _(t, e) \ + { \ + .type = t, \ + .offset = offsetof(ns_link_t, e), \ + .size = sizeof(((ns_link_t*)0)->e) \ + }, + ns_foreach_ifla +#undef _ + { .type = 0 } +}; + +u8 *format_ns_link (u8 *s, va_list *args) +{ + ns_link_t *l = va_arg(*args, ns_link_t *); + s = format(s, "%s index %u", l->ifname, l->ifi.ifi_index); + return s; +} + +#define ns_foreach_rta \ + _(RTA_DST, dst, 1) \ + _(RTA_SRC, src, 1) \ + _(RTA_GATEWAY, gateway, 1) \ + _(RTA_IIF, iif, 1) \ + _(RTA_OIF, oif, 1) \ + _(RTA_PREFSRC, prefsrc, 0) \ + _(RTA_TABLE, table, 0) \ + _(RTA_PRIORITY, priority, 0) \ + _(RTA_CACHEINFO, cacheinfo, 0) \ + _(RTA_ENCAP, encap, 1) + +static rtnl_mapping_t ns_routemap[] = { +#define _(t, e, u) \ + { \ + .type = t, .unique = u, \ + .offset = offsetof(ns_route_t, e), \ + .size = sizeof(((ns_route_t*)0)->e) \ + }, + ns_foreach_rta +#undef _ + { .type = 0 } +}; + +u8 *format_ns_route (u8 *s, va_list *args) +{ + ns_route_t *r = va_arg(*args, ns_route_t *); + void *format_ip = r->rtm.rtm_family == AF_INET ? format_ip4_address : format_ip6_address; + s = format(s, "%U/%d", format_ip, r->dst, r->rtm.rtm_dst_len); + if (r->rtm.rtm_src_len) + s = format(s, " from %U/%d", format_ip, r->src, r->rtm.rtm_src_len); + if (is_nonzero(r->gateway)) + s = format(s, " via %U", format_ip, r->gateway); + if (r->iif) + s = format(s, " iif %d", r->iif); + if (r->oif) + s = format(s, " oif %d", r->oif); + if (is_nonzero(r->prefsrc)) + s = format(s, " src %U", format_ip, r->prefsrc); + if (r->table) + s = format(s, " table %d", r->table); + if (r->priority) + s = format(s, " priority %u", r->priority); + return s; +} + +#define ns_foreach_ifaddr \ + _(IFA_ADDRESS, addr, 1) \ + _(IFA_LOCAL, local, 1) \ + _(IFA_LABEL, label, 0) \ + _(IFA_BROADCAST, broadcast, 0) \ + _(IFA_ANYCAST, anycast, 0) \ + _(IFA_CACHEINFO, cacheinfo, 0) + +static rtnl_mapping_t ns_addrmap[] = { +#define _(t, e, u) \ + { \ + .type = t, .unique = u, \ + .offset = offsetof(ns_addr_t, e), \ + .size = sizeof(((ns_addr_t*)0)->e) \ + }, + ns_foreach_ifaddr +#undef _ + { .type = 0 } +}; + +u8 *format_ns_addr (u8 *s, va_list *args) +{ + ns_addr_t *a = va_arg(*args, ns_addr_t *); + void *format_ip = a->ifaddr.ifa_family == AF_INET ? format_ip4_address : format_ip6_address; + s = format(s, "%U/%d", format_ip, a->addr, a->ifaddr.ifa_prefixlen); + if (is_nonzero(a->label)) + s = format(s, " dev %s", a->label); + if (is_nonzero(a->broadcast)) + s = format(s, " broadcast %U", format_ip, a->broadcast); + if (is_nonzero(a->anycast)) + s = format(s, " anycast %U", format_ip, a->anycast); + if (is_nonzero(a->local)) + s = format(s, " local %U", format_ip, a->local); + return s; +} + +#ifndef NDA_RTA +#define NDA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) +#define NDA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndmsg)) +#endif + +#define ns_foreach_neigh \ + _(NDA_DST, dst, 1) \ + _(NDA_LLADDR, lladdr, 0) \ + _(NDA_PROBES, probes, 0) \ + _(NDA_CACHEINFO, cacheinfo, 0) + +static rtnl_mapping_t ns_neighmap[] = { +#define _(t, e, u) \ + { \ + .type = t, .unique = u, \ + .offset = offsetof(ns_neigh_t, e), \ + .size = sizeof(((ns_neigh_t*)0)->e) \ + }, + ns_foreach_neigh +#undef _ + { .type = 0 } +}; + +u8 *format_ns_neigh (u8 *s, va_list *args) +{ + ns_neigh_t *n = va_arg(*args, ns_neigh_t *); + void *format_ip = n->nd.ndm_family == AF_INET ? format_ip4_address : format_ip6_address; + s = format(s, "%U", format_ip, n->dst); + if (is_nonzero(n->lladdr)) + s = format(s, " lladdr %U", format_ethernet_address, n->lladdr); + if (n->probes) + s = format(s, " probes %d", n->probes); + return s; +} + +typedef struct { + void (*notify)(void *obj, netns_type_t type, u32 flags, uword opaque); + uword opaque; + u32 netns_index; +} netns_handle_t; + +typedef struct { + netns_t netns; + u32 rtnl_handle; + u32 subscriber_count; +} netns_p; + +typedef struct { + netns_p *netnss; + netns_handle_t *handles; +} netns_main_t; + +netns_main_t netns_main; + +static int +rtnl_parse_rtattr(struct rtattr *db[], size_t max, + struct rtattr *rta, size_t len) { + for(; RTA_OK(rta, len); rta = RTA_NEXT(rta, len)) { + if (rta->rta_type <= max) + db[rta->rta_type] = rta; +#ifdef RTNL_CHECK + else + clib_warning("RTA type too high: %d", rta->rta_type); +#endif + } + + if(len) { + clib_warning("rattr lenght mistmatch %d %d len", + (int) len, (int) rta->rta_len); + return -1; + } + return 0; +} + +/* + * Debug function to display when + * we receive an RTA that I forgot in + * the mapping table (there are so many of them). + */ +#ifdef RTNL_CHECK +static void +rtnl_entry_check(struct rtattr *rtas[], + size_t rta_len, + rtnl_mapping_t map[], + char *logstr) +{ + int i; + for (i=0; i<rta_len; i++) { + if (!rtas[i]) + continue; + + rtnl_mapping_t *m = map; + for (m = map; m->type; m++) { + if (m->type == rtas[i]->rta_type) + break; + } + if (!m->type) + clib_warning("Unknown RTA type %d (%s)", rtas[i]->rta_type, logstr); + } +} +#endif + +/* + * Check if the provided entry matches the parsed and unique rtas + */ +static int +rtnl_entry_match(void *entry, + struct rtattr *rtas[], + rtnl_mapping_t map[]) +{ + u8 zero[1024] = {}; + for ( ;map->type != 0; map++) { + struct rtattr *rta = rtas[map->type]; + size_t rta_len = rta?RTA_PAYLOAD(rta):0; + if (!map->unique) + continue; + + if (rta && RTA_PAYLOAD(rta) > map->size) { + clib_warning("rta (type=%d len=%d) too long (max %d)", + rta->rta_type, rta->rta_len, map->size); + return -1; + } + + if ((rta && memcmp(RTA_DATA(rta), entry + map->offset, rta_len)) || + memcmp(entry + map->offset + rta_len, zero, map->size - rta_len)) { + return 0; + } + } + return 1; +} + +static int +rtnl_entry_set(void *entry, + struct rtattr *rtas[], + rtnl_mapping_t map[], + int init) +{ + for (; map->type != 0; map++) { + + struct rtattr *rta = rtas[map->type]; + + if(map->type == RTA_ENCAP && rta) { + /*Data of RTA_ENCAP is a pointer to rta attributes for MPLS*/ + rta = (struct rtattr*)RTA_DATA(rta); + if (RTA_PAYLOAD(rta) > map->size) { + clib_warning("rta (type=%d len=%d) too long (max %d)", rta->rta_type, rta->rta_len, map->size); + return -1; + } + memcpy(entry + map->offset, RTA_DATA(rta), map->size); + memset(entry + map->offset + map->size, 0, 0); + } else if (rta) { + if (RTA_PAYLOAD(rta) > map->size) { + clib_warning("rta (type=%d len=%d) too long (max %d)", rta->rta_type, rta->rta_len, map->size); + return -1; + } + memcpy(entry + map->offset, RTA_DATA(rta), RTA_PAYLOAD(rta)); + memset(entry + map->offset + RTA_PAYLOAD(rta), 0, map->size - RTA_PAYLOAD(rta)); + } else if (init) { + memset(entry + map->offset, 0, map->size); + } + } + return 0; +} + +void +netns_notify(netns_p *ns, void *obj, netns_type_t type, u32 flags) +{ + netns_main_t *nm = &netns_main; + netns_handle_t *h; + pool_foreach(h, nm->handles, { + if (h->netns_index == (ns - nm->netnss) && h->notify) + h->notify(obj, type, flags, h->opaque); + }); +} + +static_always_inline int +mask_match(void *a, void *b, void *mask, size_t len) +{ + u8 *va = (u8 *) a; + u8 *vb = (u8 *) b; + u8 *vm = (u8 *) mask; + while (len--) { + if ((va[len] ^ vb[len]) & vm[len]) + return 0; + } + return 1; +} + +static ns_link_t * +ns_get_link(netns_p *ns, struct ifinfomsg *ifi, struct rtattr *rtas[]) +{ + ns_link_t *link; + pool_foreach(link, ns->netns.links, { + if(ifi->ifi_index == link->ifi.ifi_index) + return link; + }); + return NULL; +} + +static int +ns_rcv_link(netns_p *ns, struct nlmsghdr *hdr) +{ + ns_link_t *link; + struct ifinfomsg *ifi; + struct rtattr *rtas[IFLA_MAX + 1] = {}; + size_t datalen = hdr->nlmsg_len - NLMSG_ALIGN(sizeof(*hdr)); + + if(datalen < sizeof(*ifi)) + return -1; + + ifi = NLMSG_DATA(hdr); + if((datalen > NLMSG_ALIGN(sizeof(*ifi))) && + rtnl_parse_rtattr(rtas, IFLA_MAX, IFLA_RTA(ifi), + IFLA_PAYLOAD(hdr))) { + return -1; + } +#ifdef RTNL_CHECK + rtnl_entry_check(rtas, IFLA_MAX + 1, ns_ifmap, "link"); +#endif + + link = ns_get_link(ns, ifi, rtas); + + if (hdr->nlmsg_type == RTM_DELLINK) { + if (!link) + return -1; + pool_put(ns->netns.links, link); + netns_notify(ns, link, NETNS_TYPE_LINK, NETNS_F_DEL); + return 0; + } + + if (!link) { + pool_get(ns->netns.links, link); + rtnl_entry_set(link, rtas, ns_ifmap, 1); + } else { + rtnl_entry_set(link, rtas, ns_ifmap, 0); + } + + link->ifi = *ifi; + link->last_updated = vlib_time_now(vlib_get_main()); + netns_notify(ns, link, NETNS_TYPE_LINK, NETNS_F_ADD); + return 0; +} + +static ns_route_t * +ns_get_route(netns_p *ns, struct rtmsg *rtm, struct rtattr *rtas[]) +{ + ns_route_t *route; + + //This describes the values which uniquely identify a route + struct rtmsg msg = { + .rtm_family = 0xff, + .rtm_dst_len = 0xff, + .rtm_src_len = 0xff, + .rtm_table = 0xff, + .rtm_protocol = 0xff, + .rtm_type = 0xff + }; + + pool_foreach(route, ns->netns.routes, { + if(mask_match(&route->rtm, rtm, &msg, sizeof(struct rtmsg)) && + rtnl_entry_match(route, rtas, ns_routemap)) + return route; + }); + return NULL; +} + +static int +ns_rcv_route(netns_p *ns, struct nlmsghdr *hdr) +{ + ns_route_t *route; + struct rtmsg *rtm; + struct rtattr *rtas[RTA_MAX + 1] = {}; + size_t datalen = hdr->nlmsg_len - NLMSG_ALIGN(sizeof(*hdr)); + + if(datalen < sizeof(*rtm)) + return -1; + + rtm = NLMSG_DATA(hdr); + if((datalen > NLMSG_ALIGN(sizeof(*rtm))) && + rtnl_parse_rtattr(rtas, RTA_MAX, RTM_RTA(rtm), + RTM_PAYLOAD(hdr))) { + return -1; + } +#ifdef RTNL_CHECK + rtnl_entry_check(rtas, RTA_MAX + 1, ns_routemap, "route"); +#endif + route = ns_get_route(ns, rtm, rtas); + + if (hdr->nlmsg_type == RTM_DELROUTE) { + if (!route) + return -1; + pool_put(ns->netns.routes, route); + netns_notify(ns, route, NETNS_TYPE_ROUTE, NETNS_F_DEL); + return 0; + } + + if (!route) { + pool_get(ns->netns.routes, route); + memset(route, 0, sizeof(*route)); + rtnl_entry_set(route, rtas, ns_routemap, 1); + } else { + rtnl_entry_set(route, rtas, ns_routemap, 0); + } + + route->rtm = *rtm; + route->last_updated = vlib_time_now(vlib_get_main()); + netns_notify(ns, route, NETNS_TYPE_ROUTE, NETNS_F_ADD); + return 0; +} + +static ns_addr_t * +ns_get_addr(netns_p *ns, struct ifaddrmsg *ifaddr, struct rtattr *rtas[]) +{ + ns_addr_t *addr; + + //This describes the values which uniquely identify a route + struct ifaddrmsg msg = { + .ifa_family = 0xff, + .ifa_prefixlen = 0xff, + }; + + pool_foreach(addr, ns->netns.addresses, { + if(mask_match(&addr->ifaddr, ifaddr, &msg, sizeof(struct ifaddrmsg)) && + rtnl_entry_match(addr, rtas, ns_addrmap)) + return addr; + }); + return NULL; +} + +static int +ns_rcv_addr(netns_p *ns, struct nlmsghdr *hdr) +{ + ns_addr_t *addr; + struct ifaddrmsg *ifaddr; + struct rtattr *rtas[IFA_MAX + 1] = {}; + size_t datalen = hdr->nlmsg_len - NLMSG_ALIGN(sizeof(*hdr)); + + if(datalen < sizeof(*ifaddr)) + return -1; + + ifaddr = NLMSG_DATA(hdr); + if((datalen > NLMSG_ALIGN(sizeof(*ifaddr))) && + rtnl_parse_rtattr(rtas, IFA_MAX, IFA_RTA(ifaddr), + IFA_PAYLOAD(hdr))) { + return -1; + } +#ifdef RTNL_CHECK + rtnl_entry_check(rtas, IFA_MAX + 1, ns_addrmap, "addr"); +#endif + addr = ns_get_addr(ns, ifaddr, rtas); + + if (hdr->nlmsg_type == RTM_DELADDR) { + if (!addr) + return -1; + pool_put(ns->netns.addresses, addr); + netns_notify(ns, addr, NETNS_TYPE_ADDR, NETNS_F_DEL); + return 0; + } + + if (!addr) { + pool_get(ns->netns.addresses, addr); + memset(addr, 0, sizeof(*addr)); + rtnl_entry_set(addr, rtas, ns_addrmap, 1); + } else { + rtnl_entry_set(addr, rtas, ns_addrmap, 0); + } + + addr->ifaddr = *ifaddr; + addr->last_updated = vlib_time_now(vlib_get_main()); + netns_notify(ns, addr, NETNS_TYPE_ADDR, NETNS_F_ADD); + return 0; +} + +static ns_neigh_t * +ns_get_neigh(netns_p *ns, struct ndmsg *nd, struct rtattr *rtas[]) +{ + ns_neigh_t *neigh; + + //This describes the values which uniquely identify a route + struct ndmsg msg = { + .ndm_family = 0xff, + .ndm_ifindex = 0xff, + }; + + pool_foreach(neigh, ns->netns.neighbors, { + if(mask_match(&neigh->nd, nd, &msg, sizeof(&msg)) && + rtnl_entry_match(neigh, rtas, ns_neighmap)) + return neigh; + }); + return NULL; +} + +static int +ns_rcv_neigh(netns_p *ns, struct nlmsghdr *hdr) +{ + ns_neigh_t *neigh; + struct ndmsg *nd; + struct rtattr *rtas[NDA_MAX + 1] = {}; + size_t datalen = hdr->nlmsg_len - NLMSG_ALIGN(sizeof(*hdr)); + + if(datalen < sizeof(*nd)) + return -1; + + nd = NLMSG_DATA(hdr); + if((datalen > NLMSG_ALIGN(sizeof(*nd))) && + rtnl_parse_rtattr(rtas, NDA_MAX, NDA_RTA(nd), + NDA_PAYLOAD(hdr))) { + return -1; + } +#ifdef RTNL_CHECK + rtnl_entry_check(rtas, NDA_MAX + 1, ns_neighmap, "nd"); +#endif + neigh = ns_get_neigh(ns, nd, rtas); + + if (hdr->nlmsg_type == RTM_DELNEIGH) { + if (!neigh) + return -1; + pool_put(ns->netns.neighbors, neigh); + netns_notify(ns, neigh, NETNS_TYPE_NEIGH, NETNS_F_DEL); + return 0; + } + + if (!neigh) { + pool_get(ns->netns.neighbors, neigh); + memset(neigh, 0, sizeof(*neigh)); + rtnl_entry_set(neigh, rtas, ns_neighmap, 1); + } else { + rtnl_entry_set(neigh, rtas, ns_neighmap, 0); + } + + neigh->nd = *nd; + neigh->last_updated = vlib_time_now(vlib_get_main()); + netns_notify(ns, neigh, NETNS_TYPE_NEIGH, NETNS_F_ADD); + return 0; +} + +#define ns_object_foreach \ + _(neighbors, NETNS_TYPE_NEIGH) \ + _(routes, NETNS_TYPE_ROUTE) \ + _(addresses, NETNS_TYPE_ADDR) \ + _(links, NETNS_TYPE_LINK) + +static void +ns_recv_error(rtnl_error_t err, uword o) +{ + //An error was received. Reset everything. + netns_p *ns = &netns_main.netnss[o]; + u32 *indexes = 0; + u32 *i = 0; + +#define _(pool, type) \ + pool_foreach_index(*i, ns->netns.pool, { \ + vec_add1(indexes, *i); \ + }) \ + vec_foreach(i, indexes) { \ + pool_put_index(ns->netns.pool, *i); \ + netns_notify(ns, &ns->netns.pool[*i], type, NETNS_F_DEL); \ + } \ + vec_reset_length(indexes); + + ns_object_foreach + +#undef _ + vec_free(indexes); +} + +static void +ns_recv_rtnl(struct nlmsghdr *hdr, uword o) +{ + netns_p *ns = &netns_main.netnss[o]; + switch (hdr->nlmsg_type) { + case RTM_NEWROUTE: + case RTM_DELROUTE: + ns_rcv_route(ns, hdr); + break; + case RTM_NEWLINK: + case RTM_DELLINK: + ns_rcv_link(ns, hdr); + break; + case RTM_NEWADDR: + case RTM_DELADDR: + ns_rcv_addr(ns, hdr); + break; + case RTM_NEWNEIGH: + case RTM_DELNEIGH: + ns_rcv_neigh(ns, hdr); + break; + default: + clib_warning("unknown rtnl type %d", hdr->nlmsg_type); + break; + } +} + +static void +netns_destroy(netns_p *ns) +{ + netns_main_t *nm = &netns_main; + rtnl_stream_close(ns->rtnl_handle); + pool_put(nm->netnss, ns); + pool_free(ns->netns.links); + pool_free(ns->netns.addresses); + pool_free(ns->netns.routes); + pool_free(ns->netns.neighbors); +} + +static netns_p * +netns_get(char *name) +{ + netns_main_t *nm = &netns_main; + netns_p *ns; + pool_foreach(ns, nm->netnss, { + if (!strcmp(name, ns->netns.name)) + return ns; + }); + + if (strlen(name) > RTNL_NETNS_NAMELEN) + return NULL; + + pool_get(nm->netnss, ns); + rtnl_stream_t s = { + .recv_message = ns_recv_rtnl, + .error = ns_recv_error, + .opaque = (uword)(ns - nm->netnss), + }; + strcpy(s.name, name); + + u32 handle; + if ((handle = rtnl_stream_open(&s)) == ~0) { + pool_put(nm->netnss, ns); + return NULL; + } + + strcpy(ns->netns.name, name); + ns->netns.addresses = 0; + ns->netns.links = 0; + ns->netns.neighbors = 0; + ns->netns.routes = 0; + ns->subscriber_count = 0; + ns->rtnl_handle = handle; + return ns; +} + +u32 netns_open(char *name, netns_sub_t *sub) +{ + netns_main_t *nm = &netns_main; + netns_p *ns; + netns_handle_t *p; + if (!(ns = netns_get(name))) + return ~0; + + pool_get(nm->handles, p); + p->netns_index = ns - nm->netnss; + p->notify = sub->notify; + p->opaque = sub->opaque; + ns->subscriber_count++; + return p - nm->handles; +} + +netns_t *netns_getns(u32 handle) +{ + netns_main_t *nm = &netns_main; + netns_handle_t *h = pool_elt_at_index(nm->handles, handle); + netns_p *ns = pool_elt_at_index(nm->netnss, h->netns_index); + return &ns->netns; +} + +void netns_close(u32 handle) +{ + netns_main_t *nm = &netns_main; + netns_handle_t *h = pool_elt_at_index(nm->handles, handle); + netns_p *ns = pool_elt_at_index(nm->netnss, h->netns_index); + pool_put(h, nm->handles); + ns->subscriber_count--; + if (!ns->subscriber_count) + netns_destroy(ns); +} + +void netns_callme(u32 handle, char del) +{ + netns_main_t *nm = &netns_main; + netns_handle_t *h = pool_elt_at_index(nm->handles, handle); + netns_p *ns = pool_elt_at_index(nm->netnss, h->netns_index); + u32 i = 0; + if (!h->notify) + return; + +#define _(pool, type) \ + pool_foreach_index(i, ns->netns.pool, { \ + h->notify(&ns->netns.pool[i], type, \ + del?NETNS_F_DEL:NETNS_F_ADD, h->opaque); \ + }); + + ns_object_foreach +#undef _ + + } + +u8 *format_ns_object(u8 *s, va_list *args) +{ + netns_type_t t = va_arg(*args, netns_type_t); + void *o = va_arg(*args, void *); + switch (t) { + case NETNS_TYPE_ADDR: + return format(s, "addr %U", format_ns_addr, o); + case NETNS_TYPE_ROUTE: + return format(s, "route %U", format_ns_route, o); + case NETNS_TYPE_LINK: + return format(s, "link %U", format_ns_link, o); + case NETNS_TYPE_NEIGH: + return format(s, "neigh %U", format_ns_neigh, o); + } + return s; +} + +u8 *format_ns_flags(u8 *s, va_list *args) +{ + u32 flags = va_arg(*args, u32); + if (flags & NETNS_F_ADD) + s = format(s, "add"); + else if (flags & NETNS_F_DEL) + s = format(s, "del"); + else + s = format(s, "mod"); + return s; +} + +clib_error_t * +netns_init (vlib_main_t * vm) +{ + netns_main_t *nm = &netns_main; + nm->netnss = 0; + nm->handles = 0; + return 0; +} + +VLIB_INIT_FUNCTION (netns_init); diff --git a/extras/router-plugin/devices/rtnetlink/netns.h b/extras/router-plugin/devices/rtnetlink/netns.h new file mode 100644 index 000000000..53effe5ce --- /dev/null +++ b/extras/router-plugin/devices/rtnetlink/netns.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2016-2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef NETNS_H_ +#define NETNS_H_ + +#include <vlib/vlib.h> + +#include <sys/socket.h> +#include <linux/rtnetlink.h> +#include <linux/netlink.h> +#include <net/if.h> + +#include "rtnl.h" + +/*include it for 'struct mpls_label'*/ +#include <linux/mpls.h> +/*so far depth is fixed, looking into ways to be dynamic*/ +#define MPLS_STACK_DEPTH 7 + +typedef struct { + struct ifinfomsg ifi; + u8 hwaddr[IFHWADDRLEN]; + u8 broadcast[IFHWADDRLEN]; + u8 ifname[IFNAMSIZ]; + u32 mtu; + u32 master; + u8 qdisc[IFNAMSIZ]; + struct rtnl_link_stats stats; //This struct is big and only comes as a response to a request + f64 last_updated; +} ns_link_t; + +typedef struct { + struct rtmsg rtm; + u8 dst[16]; + u8 src[16]; + u8 via[16]; + u8 prefsrc[16]; + u32 iif; + u32 oif; + u32 table; + u8 gateway[16]; + u32 priority; + struct rta_cacheinfo cacheinfo; + struct mpls_label encap[MPLS_STACK_DEPTH]; + f64 last_updated; +} ns_route_t; + +typedef struct { + struct ifaddrmsg ifaddr; + u8 addr[16]; + u8 local[16]; + u8 label[IFNAMSIZ]; + u8 broadcast[16]; + u8 anycast[16]; + struct ifa_cacheinfo cacheinfo; + f64 last_updated; +} ns_addr_t; + +typedef struct { + struct ndmsg nd; + u8 dst[16]; + u8 lladdr[IFHWADDRLEN]; + u32 probes; + struct nda_cacheinfo cacheinfo; + f64 last_updated; +} ns_neigh_t; + +typedef struct { + char name[RTNL_NETNS_NAMELEN + 1]; + ns_link_t *links; + ns_route_t *routes; + ns_addr_t *addresses; + ns_neigh_t *neighbors; +} netns_t; + + +typedef enum { + NETNS_TYPE_LINK, + NETNS_TYPE_ROUTE, + NETNS_TYPE_ADDR, + NETNS_TYPE_NEIGH, +} netns_type_t; + +//Flags used in notification functions call +#define NETNS_F_ADD 0x01 +#define NETNS_F_DEL 0x02 + +typedef struct { + void (*notify)(void *obj, netns_type_t type, u32 flags, uword opaque); + uword opaque; +} netns_sub_t; + +/* + * Subscribe for events related to the given namespace. + * When another subscriber already uses the namespace, + * this call will not trigger updates for already + * existing routes (This is to protect against + * synch. Vs asynch. issues). + */ +u32 netns_open(char *name, netns_sub_t *sub); + +/* + * Retrieves the namespace structure associated with a + * given namespace handler. + */ +netns_t *netns_getns(u32 handle); + +/* + * Terminates a subscriber session. + */ +void netns_close(u32 handle); + +/* + * Calls the callback associated with the handle + * for all existing objects with the flags + * set to (del?NETNS_F_DEL:NETNS_F_ADD). + */ +void netns_callme(u32 handle, char del); + +/* + * netns struct format functions. + * Taking the struct as single argument. + */ +u8 *format_ns_neigh(u8 *s, va_list *args); +u8 *format_ns_addr(u8 *s, va_list *args); +u8 *format_ns_route(u8 *s, va_list *args); +u8 *format_ns_link(u8 *s, va_list *args); + +u8 *format_ns_object(u8 *s, va_list *args); +u8 *format_ns_flags(u8 *s, va_list *args); + +#endif diff --git a/extras/router-plugin/devices/rtnetlink/rtnl.c b/extras/router-plugin/devices/rtnetlink/rtnl.c new file mode 100644 index 000000000..ed3db9e72 --- /dev/null +++ b/extras/router-plugin/devices/rtnetlink/rtnl.c @@ -0,0 +1,604 @@ +/* + * Copyright (c) 2016-2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define _GNU_SOURCE +#include <sched.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vppinfra/error.h> + +#include <sys/socket.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <float.h> +#include <fcntl.h> + +#include <sys/types.h> +#include <sys/wait.h> +#include <errno.h> +#include <string.h> + +#include "netns.h" +#include "rtnl.h" + +#undef DBL_MAX +#define DBL_MAX 1000000000.0 + +typedef enum { + RTNL_E_OPEN, + RTNL_E_CLOSE, + RTNL_E_READ, +} rtnl_event_t; + +typedef enum { + RTNL_S_INIT, + RTNL_S_SYNC, + RTNL_S_READY, +} rtnl_state_t; + +typedef enum { + RTNL_SS_OPENING, + RTNL_SS_LINK, + RTNL_SS_ADDR, + RTNL_SS_ROUTE4, + RTNL_SS_ROUTE6, + RTNL_SS_NEIGH, +} rtnl_sync_state_t; + +typedef struct { + rtnl_stream_t stream; + rtnl_state_t state; + rtnl_sync_state_t sync_state; + int ns_fd; + int rtnl_socket; + u32 unix_index; + u32 rtnl_seq; + f64 timeout; +} rtnl_ns_t; + +typedef struct { + f64 now; + rtnl_ns_t *streams; +} rtnl_main_t; + +static rtnl_main_t rtnl_main; +static vlib_node_registration_t rtnl_process_node; + +#define RTNL_BUFFSIZ 16384 +#define RTNL_DUMP_TIMEOUT 1 + +static inline u32 grpmask(u32 g) +{ + ASSERT (g <= 31); + if (g) { + return 1 << (g - 1); + } else + return 0; +} + + +u8 *format_rtnl_nsname2path(u8 *s, va_list *args) +{ + char *nsname = va_arg(*args, char *); + if (!nsname || !strlen(nsname)) { + return format(s, "/proc/self/ns/net"); + } else if (strpbrk(nsname, "/") != NULL) { + return format(s, "%s", nsname); + } else { + return format(s, "/var/run/netns/%s", nsname); + } +} + +static_always_inline void +rtnl_schedule_timeout(rtnl_ns_t *ns, f64 when) +{ + ns->timeout = when; +} + +static_always_inline void +rtnl_cancel_timeout(rtnl_ns_t *ns) +{ + ns->timeout = DBL_MAX; +} + +static clib_error_t *rtnl_read_cb(struct clib_file * f) +{ + rtnl_main_t *rm = &rtnl_main; + vlib_main_t *vm = vlib_get_main(); + rtnl_ns_t *ns = &rm->streams[f->private_data]; + vlib_process_signal_event(vm, rtnl_process_node.index, RTNL_E_READ, (uword)(ns - rm->streams)); + return 0; +} + +int rtnl_dump_request(rtnl_ns_t *ns, int type, void *req, size_t len) +{ + struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; + struct nlmsghdr nlh = { + .nlmsg_len = NLMSG_LENGTH(len), + .nlmsg_type = type, + .nlmsg_flags = NLM_F_DUMP|NLM_F_REQUEST, + .nlmsg_pid = 0, + .nlmsg_seq = ++ns->rtnl_seq, + }; + struct iovec iov[2] = { + { .iov_base = &nlh, .iov_len = sizeof(nlh) }, + { .iov_base = req, .iov_len = len } + }; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = iov, + .msg_iovlen = 2, + }; + if(sendmsg(ns->rtnl_socket, &msg, 0) < 0) { + clib_warning("sendmsg error: %s", strerror(errno)); + return -1; + } + return 0; +} + +static void rtnl_socket_close(rtnl_ns_t *ns) +{ + clib_file_del(&file_main, &file_main.file_pool[ns->unix_index]); + close(ns->rtnl_socket); +} + +struct rtnl_thread_exec { + int fd; + void *(*fn)(void *); + void *arg; + void **ret; +}; + +static void *rtnl_exec_in_thread_fn(void *p) +{ + struct rtnl_thread_exec *ex = (struct rtnl_thread_exec *) p; + if (setns(ex->fd, 0)) + return (void *) ((uword) (-errno)); + + *ex->ret = ex->fn(ex->arg); + return NULL; +} + +static int rtnl_exec_in_namespace_byfd(int fd, void *(*fn)(void *), void *arg, void **ret) +{ + pthread_t thread; + void *thread_ret; + struct rtnl_thread_exec ex = { + .fd = fd, + .fn = fn, + .arg = arg, + .ret = ret + }; + if(pthread_create(&thread, NULL, rtnl_exec_in_thread_fn, &ex)) + return -errno; + + if(pthread_join(thread, &thread_ret)) + return -errno; + + if (thread_ret) + return (int) ((uword)thread_ret); + + return 0; +} + +int rtnl_exec_in_namespace(u32 stream_index, void *(*fn)(void *), void *arg, void **ret) +{ + rtnl_main_t *rm = &rtnl_main; + if (pool_is_free_index(rm->streams, stream_index)) + return -EBADR; + + rtnl_ns_t *ns = pool_elt_at_index(rm->streams, stream_index); + return rtnl_exec_in_namespace_byfd(ns->ns_fd, fn, arg, ret); +} + +int rtnl_exec_in_namespace_by_name(char *nsname, void *(*fn)(void *), void *arg, void **ret) +{ + int fd; + u8 *s = format((u8 *)0, "%U", format_rtnl_nsname2path, nsname); + + if ((fd = open((char *)s, O_RDONLY)) < 0) { + vec_free(s); + return -errno; + } + + int r = rtnl_exec_in_namespace_byfd(fd, fn, arg, ret); + vec_free(s); + close(fd); + return r; +} + +/* this function is run by the second thread */ +static void *rtnl_thread_fn(void *p) +{ + rtnl_ns_t *ns = (rtnl_ns_t *) p; + if (setns(ns->ns_fd, 0)) { + clib_warning("setns(%d, %d) error %d", ns->ns_fd, CLONE_NEWNET, errno); + return (void *) -1; + } + + if ((ns->rtnl_socket = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) == -1) { + clib_warning("Cannot open socket"); + return (void *) -2; + } + + return NULL; +} + +static int rtnl_socket_open(rtnl_ns_t *ns) +{ + rtnl_main_t *rm = &rtnl_main; + pthread_t thread; + void *thread_ret; + if(pthread_create(&thread, NULL, rtnl_thread_fn, ns)) { + clib_warning("Can't create opening thread"); + return -1; + } + + if(pthread_join(thread, &thread_ret)) { + clib_warning("Can't join opening thread"); + return -2; + } + + if (thread_ret) { + clib_warning("Could not open netlink socket"); + return -3; + } + + struct sockaddr_nl addr = { + .nl_family = AF_NETLINK, + .nl_pad = 0, + .nl_pid = 0, + /*add mpls message group*/ + .nl_groups = grpmask(RTNLGRP_LINK)| grpmask(RTNLGRP_IPV6_IFADDR) | + grpmask(RTNLGRP_IPV4_IFADDR) | grpmask(RTNLGRP_IPV4_ROUTE) | + grpmask(RTNLGRP_IPV6_ROUTE) | grpmask(RTNLGRP_NEIGH) | + grpmask(RTNLGRP_NOTIFY) /* | grpmask(RTNLGRP_MPLS_ROUTE)*/, + }; + + if (bind(ns->rtnl_socket, (struct sockaddr*) &addr, sizeof(addr))) { + close(ns->rtnl_socket); + return -3; + } + + clib_file_t template = {0}; + template.read_function = rtnl_read_cb; + template.file_descriptor = ns->rtnl_socket; + template.private_data = (uword) (ns - rm->streams); + ns->unix_index = clib_file_add (&file_main, &template); + return 0; +} + +static int +rtnl_rcv_error(rtnl_ns_t *ns, struct nlmsghdr *hdr, int *error) +{ + struct nlmsgerr *err = NLMSG_DATA(hdr); + size_t datalen = hdr->nlmsg_len - NLMSG_ALIGN(sizeof(*hdr)); + if(datalen < sizeof(*err)) + return -1; + + *error = err->error; + return 0; +} + +static void +rtnl_sync_reset(rtnl_ns_t *ns) +{ + if (ns->sync_state == RTNL_SS_OPENING) + return; + + rtnl_socket_close(ns); + ns->sync_state = RTNL_SS_OPENING; +} + +static void +rtnl_sync_done(rtnl_ns_t *ns) +{ + rtnl_main_t *rm = &rtnl_main; + struct ifaddrmsg addrmsg; + struct rtmsg rtmsg; + struct ndmsg ndmsg; + switch (ns->sync_state) { + case RTNL_SS_OPENING: + //Cannot happen here + break; + case RTNL_SS_LINK: + memset(&addrmsg, 0, sizeof(addrmsg)); + addrmsg.ifa_family = AF_UNSPEC; + if(rtnl_dump_request(ns, RTM_GETADDR, &addrmsg, sizeof(addrmsg))) { + rtnl_sync_reset(ns); + rtnl_schedule_timeout(ns, rm->now + 1); + return; + } + rtnl_schedule_timeout(ns, rm->now + RTNL_DUMP_TIMEOUT); + ns->sync_state = RTNL_SS_ADDR; + break; + case RTNL_SS_ADDR: + case RTNL_SS_ROUTE4: + memset(&rtmsg, 0, sizeof(rtmsg)); + rtmsg.rtm_family = (ns->sync_state == RTNL_SS_ADDR)?AF_INET:AF_INET6; + rtmsg.rtm_table = RT_TABLE_UNSPEC; + if(rtnl_dump_request(ns, RTM_GETROUTE, &rtmsg, sizeof(rtmsg))) { + rtnl_sync_reset(ns); + rtnl_schedule_timeout(ns, rm->now + 1); + return; + } + rtnl_schedule_timeout(ns, rm->now + RTNL_DUMP_TIMEOUT); + ns->sync_state = (ns->sync_state == RTNL_SS_ADDR)?RTNL_SS_ROUTE4:RTNL_SS_ROUTE6; + break; + case RTNL_SS_ROUTE6: + memset(&ndmsg, 0, sizeof(ndmsg)); + ndmsg.ndm_family = AF_UNSPEC; + if(rtnl_dump_request(ns, RTM_GETNEIGH, &ndmsg, sizeof(ndmsg))) { + rtnl_sync_reset(ns); + rtnl_schedule_timeout(ns, rm->now + 1); + return; + } + rtnl_schedule_timeout(ns, rm->now + RTNL_DUMP_TIMEOUT); + ns->sync_state = RTNL_SS_NEIGH; + break; + case RTNL_SS_NEIGH: + ns->state = RTNL_S_READY; + ns->sync_state = 0; + rtnl_cancel_timeout(ns); + break; + } +} + +static void +rtnl_sync_timeout(rtnl_ns_t *ns) +{ + rtnl_main_t *rm = &rtnl_main; + struct ifinfomsg imsg = {}; + switch (ns->sync_state) { + case RTNL_SS_OPENING: + if (rtnl_socket_open(ns)) { + rtnl_schedule_timeout(ns, rm->now + 10); + return; + } + imsg.ifi_family = AF_UNSPEC; + if (rtnl_dump_request(ns, RTM_GETLINK, &imsg, sizeof(imsg))) { + rtnl_sync_reset(ns); + rtnl_schedule_timeout(ns, rm->now + 10); + } + ns->sync_state = RTNL_SS_LINK; + rtnl_schedule_timeout(ns, rm->now + 2); + break; + case RTNL_SS_LINK: + case RTNL_SS_ADDR: + case RTNL_SS_ROUTE4: + case RTNL_SS_ROUTE6: + case RTNL_SS_NEIGH: + //Timeout happened while synchronizing + rtnl_sync_reset(ns); + rtnl_schedule_timeout(ns, rm->now + 1); + break; + } +} + +static int +rtnl_ns_recv(rtnl_ns_t *ns, struct nlmsghdr *hdr) +{ + rtnl_main_t *rm = &rtnl_main; + int ret, error = 0; + + if (ns->state == RTNL_S_SYNC && + ((hdr->nlmsg_flags & RTM_F_NOTIFY) || + (hdr->nlmsg_seq != (ns->rtnl_seq)))) { + clib_warning("Received notification while in sync. Restart synchronization."); + rtnl_sync_reset(ns); + rtnl_schedule_timeout(ns, rm->now); + } + + switch (hdr->nlmsg_type) { + case NLMSG_DONE: + rtnl_sync_done(ns); + break; + case NLMSG_ERROR: + if((ret = rtnl_rcv_error(ns, hdr, &error))) + return ret; + break; + case RTM_NEWROUTE: + case RTM_DELROUTE: + case RTM_NEWLINK: + case RTM_DELLINK: + case RTM_NEWADDR: + case RTM_DELADDR: + case RTM_NEWNEIGH: + case RTM_DELNEIGH: + if (ns->stream.recv_message) + ns->stream.recv_message(hdr, ns->stream.opaque); + break; + default: + clib_warning("Unknown rtnetlink type %d", hdr->nlmsg_type); + break; + } + return 0; +} + +static void +rtnl_process_open(rtnl_ns_t *ns) +{ + rtnl_main_t *rm = &rtnl_main; + if (ns->state != RTNL_S_INIT) + return; + + ns->state = RTNL_S_SYNC; + ns->sync_state = RTNL_SS_OPENING; + rtnl_schedule_timeout(ns, rm->now); +} + +static void +rtnl_process_close(rtnl_ns_t *ns) +{ + rtnl_main_t *rm = &rtnl_main; + if (ns->state == RTNL_S_INIT) + return; + + rtnl_socket_close(ns); + close(ns->ns_fd); + pool_put(rm->streams, ns); +} + +static int +rtnl_process_read(rtnl_ns_t *ns) +{ + uint8_t buff[RTNL_BUFFSIZ]; + ssize_t len; + struct nlmsghdr *hdr; + while(1) { + if((len = recv(ns->rtnl_socket, buff, RTNL_BUFFSIZ, MSG_DONTWAIT)) < 0) { + if(errno != EAGAIN) { + clib_warning("rtnetlink recv error (%d) [%s]: %s", ns->rtnl_socket, ns->stream.name, strerror(errno)); + return -1; + } + return 0; + } + + for(hdr = (struct nlmsghdr *) buff; + len > 0; + len -= NLMSG_ALIGN(hdr->nlmsg_len), + hdr = (struct nlmsghdr *) (((uint8_t *) hdr) + NLMSG_ALIGN(hdr->nlmsg_len))) { + if((sizeof(*hdr) > (size_t)len) || (hdr->nlmsg_len > (size_t)len)) { + clib_warning("rtnetlink buffer too small (%d Vs %d)", (int) hdr->nlmsg_len, (int) len); + return -1; + } + if (rtnl_ns_recv(ns, hdr)) + return -1; + } + } + return 0; +} + +static void +rtnl_process_timeout(rtnl_ns_t *ns) +{ + switch (ns->state) { + case RTNL_S_SYNC: + rtnl_sync_timeout(ns); + break; + case RTNL_S_INIT: + case RTNL_S_READY: + clib_warning("Should not happen"); + break; + } +} + +static uword +rtnl_process (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + rtnl_main_t *rm = &rtnl_main; + uword event_type; + uword *event_data = 0; + rm->now = vlib_time_now(vm); + f64 timeout = DBL_MAX; + rtnl_ns_t *ns; + + //Setting up + while (1) { + vlib_process_wait_for_event_or_clock(vm, timeout - rm->now); + event_type = vlib_process_get_events(vm, &event_data); + rm->now = vlib_time_now(vm); + + if (event_type == ~0) { //Clock event or no event + pool_foreach(ns, rm->streams, { + if (ns->timeout < rm->now) { + ns->timeout = DBL_MAX; + rtnl_process_timeout(ns); + } + }); + } else { + rtnl_ns_t *ns; + uword *d; + vec_foreach(d, event_data) { + ns = &rm->streams[d[0]]; + switch (event_type) + { + case RTNL_E_CLOSE: + rtnl_process_close(ns); + break; + case RTNL_E_OPEN: + rtnl_process_open(ns); + break; + case RTNL_E_READ: + rtnl_process_read(ns); + break; + } + } + } + + vec_reset_length (event_data); + + timeout = DBL_MAX; + pool_foreach(ns, rm->streams, { + if (ns->timeout < timeout) + timeout = ns->timeout; + }); + } + return frame->n_vectors; +} + +VLIB_REGISTER_NODE(rtnl_process_node, static) = { + .function = rtnl_process, + .name = "rtnl-process", + .type = VLIB_NODE_TYPE_PROCESS, +}; + +u32 +rtnl_stream_open(rtnl_stream_t *template) +{ + vlib_main_t *vm = vlib_get_main(); + rtnl_main_t *rm = &rtnl_main; + rtnl_ns_t *ns; + int fd; + u8 *s = format((u8 *)0, "%U", format_rtnl_nsname2path, template->name); + vec_add1(s, 0); + + if ((fd = open((char *)s, O_RDONLY)) < 0) { + clib_unix_warning("open stream %s: ", s); + vec_free(s); + return ~0; + } + + vec_free(s); + pool_get(rm->streams, ns); + ns->state = RTNL_S_INIT; + ns->ns_fd = fd; + ns->stream = *template; + vlib_process_signal_event(vm, rtnl_process_node.index, RTNL_E_OPEN, (uword)(ns - rm->streams)); + return ns - rm->streams; +} + +void +rtnl_stream_close(u32 stream_index) +{ + vlib_main_t *vm = vlib_get_main(); + rtnl_main_t *rm = &rtnl_main; + ASSERT(!pool_is_free_index(rm->streams, stream_index)); + vlib_process_signal_event(vm, rtnl_process_node.index, RTNL_E_CLOSE, stream_index); +} + +clib_error_t * +rtnl_init (vlib_main_t * vm) +{ + rtnl_main_t *rm = &rtnl_main; + rm->streams = 0; + return 0; +} + +VLIB_INIT_FUNCTION (rtnl_init); diff --git a/extras/router-plugin/devices/rtnetlink/rtnl.h b/extras/router-plugin/devices/rtnetlink/rtnl.h new file mode 100644 index 000000000..3f96252c1 --- /dev/null +++ b/extras/router-plugin/devices/rtnetlink/rtnl.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016-2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RTNL_H_ +#define RTNL_H_ + +#include <vlib/vlib.h> + +#include <linux/netlink.h> +#include <vppinfra/clib.h> + +typedef enum { + RTNL_ERR_UNKNOWN, +} rtnl_error_t; + +#define RTNL_NETNS_NAMELEN 128 + +/* + * RTNL stream implements an RTNL overlay + * for receiving continuous updates for a given namespace. + * When the stream is initially opened, dump requests are sent + * in order to retrieve the original state. + * handle_error is called any time synchronization cannot be + * achieved. When called, state is reset to its original state and + * new dump requests are sent. + */ + +typedef struct rtnl_stream_s { + char name[RTNL_NETNS_NAMELEN + 1]; + void (*recv_message)(struct nlmsghdr *hdr, uword opaque); + void (*error)(rtnl_error_t err, uword opaque); + uword opaque; +} rtnl_stream_t; + +u32 rtnl_stream_open(rtnl_stream_t *template); +void rtnl_stream_close(u32 handle); + +/* + * Executes a function in a synchronously executed thread in the + * given namespace. + * Returns 0 on success, and -errno on error. + */ +int rtnl_exec_in_namespace(u32 handle, void *(*fn)(void *), void *arg, void **ret); +int rtnl_exec_in_namespace_by_name(char *nsname, void *(*fn)(void *), void *arg, void **ret); + +u8 *format_rtnl_nsname2path(u8 *s, va_list *args); + +#endif diff --git a/extras/router-plugin/devices/rtnetlink/test.c b/extras/router-plugin/devices/rtnetlink/test.c new file mode 100644 index 000000000..031748dd3 --- /dev/null +++ b/extras/router-plugin/devices/rtnetlink/test.c @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2016-2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <librtnl/netns.h> + +#include <vnet/plugin/plugin.h> +#include <librtnl/mapper.h> +#include <vnet/ip/ip.h> +#include <vnet/fib/fib.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/ip6_fib.h> + +u32 handles[10]; + +static void +test_notify(void *obj, netns_type_t type, u32 flags, uword opaque) { + u32 index = (u32) opaque; + const char *action = (flags & NETNS_F_ADD)?"add":(flags & NETNS_F_DEL)?"del":"mod"; + + switch (type) { + case NETNS_TYPE_ADDR: + clib_warning("%d: addr %s %U", index, action, format_ns_addr, (ns_addr_t *)obj); + break; + case NETNS_TYPE_ROUTE: + clib_warning("%d: route %s %U", index, action, format_ns_route, (ns_route_t *)obj); + break; + case NETNS_TYPE_LINK: + clib_warning("%d:link %s %U", index, action, format_ns_link, (ns_link_t *)obj); + break; + case NETNS_TYPE_NEIGH: + clib_warning("%d: neigh %s %U", index, action, format_ns_neigh, (ns_neigh_t *)obj); + break; + } +} + +static clib_error_t * +test_enable_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + char *nsname = 0; + u32 index; + if (!unformat(input, "%s", &nsname)) { + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + if (!unformat(input, "%d", &index)) { + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + + if (!strcmp(nsname, "default")) + nsname[0] = 0; + + netns_sub_t sub; + sub.notify = test_notify; + sub.opaque = index; + handles[index] = netns_open(nsname, &sub); + if (handles[index] == ~0) { + return clib_error_create("Could not open netns with name %s", nsname); + } + return 0; +} + +static clib_error_t * +test_disable_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 index; + if (!unformat(input, "%d", &index)) { + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + + netns_close(handles[index]); + return 0; +} + +VLIB_CLI_COMMAND (rtnl_enable_command, static) = { + .path = "test netns enable", + .short_help = "test netns enable [<ns-name>|default] <index>", + .function = test_enable_command_fn, +}; + +VLIB_CLI_COMMAND (rtnl_disable_command, static) = { + .path = "test netns disable", + .short_help = "test rtnl disable <index>", + .function = test_disable_command_fn, +}; + +u32 mapper_indexes[10]; + +static clib_error_t * +mapper_ns_add_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 index; + char *nsname; + u32 table_id; + if (!unformat(input, "%d", &index)) + return clib_error_return(0, "invalid index `%U'", + format_unformat_error, input); + if (!unformat(input, "%s", &nsname)) + return clib_error_return(0, "invalid nsname `%U'", + format_unformat_error, input); + if (!unformat(input, "%d", &table_id)) + return clib_error_return(0, "invalid fib index `%U'", + format_unformat_error, input); + + if (!strcmp(nsname, "default")) + nsname[0] = 0; + + u32 fib4 = ip4_fib_index_from_table_id(table_id); + u32 fib6 = ip6_fib_index_from_table_id(table_id); + + if (mapper_add_ns(nsname, fib4, fib6, &mapper_indexes[index])) + return clib_error_return(0, "Could not add ns %s", nsname); + return 0; +} + +VLIB_CLI_COMMAND (mapper_ns_add_command, static) = { + .path = "test mapper ns add", + .short_help = "test mapper ns add <index> <nsname> <table-id>", + .function = mapper_ns_add_command_fn, +}; + +static clib_error_t * +mapper_ns_del_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 index; + if (!unformat(input, "%d", &index)) + return clib_error_return(0, "invalid index `%U'", + format_unformat_error, input); + + if (mapper_del_ns(mapper_indexes[index])) + return clib_error_return(0, "Could not del ns %d", index); + return 0; +} + +VLIB_CLI_COMMAND (mapper_ns_del_command, static) = { + .path = "test mapper ns delete", + .short_help = "test mapper ns delete <index>", + .function = mapper_ns_del_command_fn, +}; + +static clib_error_t * +mapper_iface_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 nsindex; + u32 ifindex; + u32 sw_if_index; + int del = 0; + if (!unformat(input, "%d", &nsindex)) + return clib_error_return(0, "invalid nsindex `%U'", + format_unformat_error, input); + if (!unformat(input, "%d", &ifindex)) + return clib_error_return(0, "invalid ifindex `%U'", + format_unformat_error, input); + if (!unformat(input, "%d", &sw_if_index)) + return clib_error_return(0, "invalid sw_if_index `%U'", + format_unformat_error, input); + if (unformat(input, "del")) + del = 1; + + clib_warning("mapper_add_del %d %d %d %d", mapper_indexes[nsindex], ifindex, sw_if_index, del); + + if (mapper_add_del(mapper_indexes[nsindex], ifindex, sw_if_index, del)) + return clib_error_return(0, "Could not add iface"); + return 0; +} + + +VLIB_CLI_COMMAND (mapper_iface_command, static) = { + .path = "test mapper iface", + .short_help = "test mapper iface <nsindex> <linux-ifindex> <sw_if_index> [del]", + .function = mapper_iface_command_fn, +}; + +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = { + //.version = VPP_BUILD_VER, FIXME + .description = "netlink", +}; +/* *INDENT-ON* */ + |