diff options
Diffstat (limited to 'utils/extras')
-rw-r--r-- | utils/extras/CMakeLists.txt | 101 | ||||
-rw-r--r-- | utils/extras/cmake/Modules/Packaging.cmake | 31 | ||||
-rw-r--r-- | utils/extras/devices/rtnetlink/mapper.c | 270 | ||||
-rw-r--r-- | utils/extras/devices/rtnetlink/mapper.h | 35 | ||||
-rw-r--r-- | utils/extras/devices/rtnetlink/netns.c | 787 | ||||
-rw-r--r-- | utils/extras/devices/rtnetlink/netns.h | 145 | ||||
-rw-r--r-- | utils/extras/devices/rtnetlink/rtnl.c | 604 | ||||
-rw-r--r-- | utils/extras/devices/rtnetlink/rtnl.h | 60 | ||||
-rw-r--r-- | utils/extras/devices/rtnetlink/test.c | 203 | ||||
-rw-r--r-- | utils/extras/rtinject/tap_inject.c | 380 | ||||
-rw-r--r-- | utils/extras/rtinject/tap_inject.h | 108 | ||||
-rw-r--r-- | utils/extras/rtinject/tap_inject_netlink.c | 285 | ||||
-rw-r--r-- | utils/extras/rtinject/tap_inject_node.c | 374 | ||||
-rw-r--r-- | utils/extras/rtinject/tap_inject_tap.c | 170 |
14 files changed, 3553 insertions, 0 deletions
diff --git a/utils/extras/CMakeLists.txt b/utils/extras/CMakeLists.txt new file mode 100644 index 000000000..85f924cb1 --- /dev/null +++ b/utils/extras/CMakeLists.txt @@ -0,0 +1,101 @@ +# Copyright (c) 2017-2019 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cmake_minimum_required(VERSION 3.5 FATAL_ERROR) +project(extra-plugin) + +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} + "${CMAKE_CURRENT_SOURCE_DIR}/../../cmake/Modules/" + "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/" +) + +set (CMAKE_CXX_STANDARD 11) +set (CMAKE_C_STANDARD 11) + +# Check for memfd_create syscall +include(CheckSymbolExists) +CHECK_SYMBOL_EXISTS ( "__NR_memfd_create" "sys/syscall.h" HAVE_MEMFD_CREATE ) +if ( HAVE_MEMFD_CREATE ) + add_definitions ( -DHAVE_MEMFD_CREATE ) +endif() + +if(CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR) + set(HICN_EXTRA_PLUGIN extra-plugin) +endif() + +include (Packaging) + +# Dependencies + +find_package(Vpp REQUIRED) + +include_directories(${VPP_INCLUDE_DIR}) + +set(HICN_EXTRA_PLUGIN_SOURCE_FILES + rtinject/tap_inject.c + rtinject/tap_inject_netlink.c + rtinject/tap_inject_node.c + rtinject/tap_inject_tap.c + devices/rtnetlink/mapper.c + devices/rtnetlink/netns.c + devices/rtnetlink/rtnl.c +) + +set(HICN_EXTRA_PLUGIN_HEADER_FILES + devices/rtnetlink/mapper.h + devices/rtnetlink/netns.h + devices/rtnetlink/rtnl.h +) + +if (NOT VPP_HOME) + set(VPP_HOME /usr) +endif() + +if (NOT CMAKE_BUILD_TYPE) + set (CMAKE_BUILD_TYPE "Release") +endif (NOT CMAKE_BUILD_TYPE) + +SET(EXTRA_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}/lib CACHE STRING "extra_install_prefix") + +if (CMAKE_BUILD_TYPE STREQUAL "Release") + set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Wall -march=native -O3 -g") +elseif (CMAKE_BUILD_TYPE STREQUAL "Debug") + set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -Wall -march=native -O0 -g") + add_definitions(-DCLIB_DEBUG -fPIC -fstack-protector-all) +endif() + +include_directories(SYSTEM) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DEXTRA_VPP_PLUGIN=1") +add_library(hicn_extra_plugin SHARED + ${HICN_EXTRA_PLUGIN_SOURCE_FILES}) + +include_directories(${CMAKE_CURRENT_BINARY_DIR}/vpp_plugins) + +set(VPP_INSTALL_PLUGIN ${EXTRA_INSTALL_PREFIX}/vpp_plugins) + +set_target_properties(hicn_extra_plugin + PROPERTIES + LINKER_LANGUAGE C + INSTALL_RPATH ${VPP_INSTALL_PLUGIN} + PREFIX "") + +install(DIRECTORY + DESTINATION ${VPP_INSTALL_PLUGIN} + COMPONENT ${HICN_EXTRA_PLUGIN}) + +install(TARGETS hicn_extra_plugin + DESTINATION + ${VPP_INSTALL_PLUGIN} + COMPONENT ${HICN_EXTRA_PLUGIN}) diff --git a/utils/extras/cmake/Modules/Packaging.cmake b/utils/extras/cmake/Modules/Packaging.cmake new file mode 100644 index 000000000..69f63971f --- /dev/null +++ b/utils/extras/cmake/Modules/Packaging.cmake @@ -0,0 +1,31 @@ +# Copyright (c) 2017-2019 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +###################### +# Packages section +###################### + +set(${HICN_EXTRA_PLUGIN}_DESCRIPTION + "A extra plugin to VPP." + CACHE STRING "Description for deb/rpm package." +) + +set(${HICN_EXTRA_PLUGIN}_DEB_DEPENDENCIES + "vpp (>= stable_version-release), vpp (<< next_version-release), vpp-plugin-core (>= stable_version-release), vpp-plugin-core (<< next_version-release)" + CACHE STRING "Dependencies for deb/rpm package." +) + +set(${HICN_EXTRA_PLUGIN}_RPM_DEPENDENCIES + "vpp >= stable_version-release, vpp < next_version-release, vpp-plugins >= stable_version-release, vpp-plugins < next_version-release" + CACHE STRING "Dependencies for deb/rpm package." +) diff --git a/utils/extras/devices/rtnetlink/mapper.c b/utils/extras/devices/rtnetlink/mapper.c new file mode 100644 index 000000000..ed4fa5634 --- /dev/null +++ b/utils/extras/devices/rtnetlink/mapper.c @@ -0,0 +1,270 @@ +/* + * Copyright (c) 2016-2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/ip/ip.h> +#include <vnet/ip/lookup.h> +#include <vnet/fib/fib.h> + +#include "netns.h" +#include "mapper.h" + +typedef struct { + int linux_ifindex; + u32 sw_if_index; +} mapper_map_t; + +typedef struct { + char nsname[RTNL_NETNS_NAMELEN + 1]; + mapper_map_t *mappings; + u32 netns_handle; //Used to receive notifications + u32 v4fib_index; //One fib index for the namespace + u32 v6fib_index; +} mapper_ns_t; + +typedef struct { + mapper_ns_t *namespaces; +} mapper_main_t; + +static mapper_main_t mapper_main; + +mapper_map_t *mapper_get_by_ifindex(mapper_ns_t *ns, int ifindex) +{ + mapper_map_t *map; + pool_foreach(map, ns->mappings, { + if (ifindex == map->linux_ifindex) + return map; + }); + return NULL; +} + +int mapper_add_del_route(mapper_ns_t *ns, ns_route_t *route, int del) +{ + mapper_main_t *mm = &mapper_main; + clib_warning("NS %d %s %U", ns - mm->namespaces, del?"del":"add", format_ns_route, route); + + mapper_map_t *map = mapper_get_by_ifindex(ns, route->oif); + if (!map) + return 0; + + if (route->rtm.rtm_family == AF_INET6) { + + //Filter-out multicast + if (route->rtm.rtm_dst_len >= 8 && route->dst[0] == 0xff) + return 0; + + fib_prefix_t prefix; + ip46_address_t nh; + + memset (&prefix, 0, sizeof (prefix)); + prefix.fp_len = route->rtm.rtm_dst_len; + prefix.fp_proto = FIB_PROTOCOL_IP6; + clib_memcpy (&prefix.fp_addr.ip6, route->dst, sizeof (prefix.fp_addr.ip6)); + + memset (&nh, 0, sizeof (nh)); + clib_memcpy (&nh.ip6, route->gateway, sizeof (nh.ip6)); + + fib_table_entry_path_add (ns->v6fib_index, &prefix, FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, prefix.fp_proto, + &nh, map->sw_if_index, ns->v6fib_index, + 0 /* weight */, + (fib_mpls_label_t *) MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + } else { + fib_prefix_t prefix; + ip46_address_t nh; + + memset (&prefix, 0, sizeof (prefix)); + prefix.fp_len = route->rtm.rtm_dst_len; + prefix.fp_proto = FIB_PROTOCOL_IP4; + clib_memcpy (&prefix.fp_addr.ip4, route->dst, sizeof (prefix.fp_addr.ip4)); + + memset (&nh, 0, sizeof (nh)); + clib_memcpy (&nh.ip4, route->gateway, sizeof (nh.ip4)); + + fib_table_entry_path_add (ns->v4fib_index, &prefix, FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, prefix.fp_proto, + &nh, map->sw_if_index, ns->v4fib_index, + 0 /* weight */, + (fib_mpls_label_t *) MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + } + + return 0; +} + +static void +mapper_netns_notify_cb(void *obj, netns_type_t type, + u32 flags, uword opaque) +{ + mapper_main_t *mm = &mapper_main; + mapper_ns_t *ns = &mm->namespaces[(u32) opaque]; + ASSERT(!pool_is_free_index(mm->namespaces, (u32) opaque)); + if (type != NETNS_TYPE_ROUTE) + return; //For now... + + ns_route_t *route = obj; + if (flags & NETNS_F_DEL) { + mapper_add_del_route(ns, route, 1); + } else if (flags & NETNS_F_ADD) { + mapper_add_del_route(ns, route, 0); + } +} + +void +mapper_delmap(mapper_ns_t*ns, mapper_map_t *map) +{ + ns_route_t *route; + netns_t *netns = netns_getns(ns->netns_handle); + pool_foreach(route, netns->routes, { + if (route->oif == map->linux_ifindex) + mapper_add_del_route(ns, route, 1); + }); + pool_put(ns->mappings, map); +} + +mapper_map_t * +mapper_getmap(mapper_ns_t*ns, u32 sw_if_index, + int linux_ifindex, int create) +{ + mapper_map_t *map; + pool_foreach(map, ns->mappings, { + if (linux_ifindex == map->linux_ifindex) { + if (sw_if_index != map->sw_if_index) + return NULL; //Cannot have multiple mapping with the same ifindex + else + return map; + } + }); + + if (!create) + return NULL; + + pool_get(ns->mappings, map); + map->linux_ifindex = linux_ifindex; + map->sw_if_index = sw_if_index; + ip6_main.fib_index_by_sw_if_index[sw_if_index] = ns->v6fib_index; + ip4_main.fib_index_by_sw_if_index[sw_if_index] = ns->v4fib_index; + + //Load available routes + ns_route_t *route; + netns_t *netns = netns_getns(ns->netns_handle); + pool_foreach(route, netns->routes, { + if (route->oif == map->linux_ifindex) + mapper_add_del_route(ns, route, 0); + }); + return map; +} + +u32 +mapper_get_ns(char *nsname) +{ + mapper_main_t *mm = &mapper_main; + mapper_ns_t *ns; + pool_foreach(ns, mm->namespaces, { + if (!strcmp(nsname, ns->nsname)) + return ns - mm->namespaces; + }); + return ~0; +} + +int +mapper_add_del(u32 nsindex, int linux_ifindex, + u32 sw_if_index, int del) +{ + mapper_main_t *mm = &mapper_main; + //ip6_main_t *im6 = &ip6_main; + mapper_ns_t *ns = &mm->namespaces[nsindex]; + mapper_map_t *map; + //vnet_sw_interface_t *iface = vnet_get_sw_interface(vnet_get_main(), sw_if_index); + + if (pool_is_free(mm->namespaces, ns)) + return -1; + + /*if (!del) { + if ((iface->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) && + im6->fib_index_by_sw_if_index[sw_if_index] != ~0) { + //A custom fib index will be used... + clib_warning("Cannot add interface with a custom fib index (current is %d)", + im6->fib_index_by_sw_if_index[sw_if_index]); + return -1; + } + }*/ + + if (!(map = mapper_getmap(ns, sw_if_index, linux_ifindex, !del))) + return -1; + + if (del) + mapper_delmap(ns, map); + + return 0; +} + +int +mapper_add_ns(char *nsname, u32 v4fib_index, u32 v6fib_index, u32 *nsindex) +{ + mapper_main_t *mm = &mapper_main; + mapper_ns_t *ns; + if (mapper_get_ns(nsname) != ~0) + return -1; //Already exists + + pool_get(mm->namespaces, ns); + strcpy(ns->nsname, nsname); + ns->v4fib_index = v4fib_index; + ns->v6fib_index = v6fib_index; + ns->mappings = 0; + + netns_sub_t sub; + sub.notify = mapper_netns_notify_cb; + sub.opaque = (uword)(ns - mm->namespaces); + if ((ns->netns_handle = netns_open(ns->nsname, &sub)) == ~0) { + pool_put(mm->namespaces, ns); + return -1; + } + *nsindex = ns - mm->namespaces; + return 0; +} + +int +mapper_del_ns(u32 nsindex) +{ + mapper_main_t *mm = &mapper_main; + mapper_ns_t *ns = &mm->namespaces[nsindex]; + if (pool_is_free(mm->namespaces, ns)) + return -1; + + //Remove all existing mappings + int i, *indexes = 0; + pool_foreach_index(i, ns->mappings, { + vec_add1(indexes, i); + }); + vec_foreach_index(i, indexes) { + mapper_delmap(ns, &ns->mappings[indexes[i]]); + } + vec_free(indexes); + + netns_close(ns->netns_handle); + pool_put(mm->namespaces, ns); + return 0; +} + +clib_error_t * +mapper_init (vlib_main_t * vm) +{ + mapper_main_t *mm = &mapper_main; + mm->namespaces = 0; + return 0; +} + +VLIB_INIT_FUNCTION (mapper_init); diff --git a/utils/extras/devices/rtnetlink/mapper.h b/utils/extras/devices/rtnetlink/mapper.h new file mode 100644 index 000000000..32e95d48f --- /dev/null +++ b/utils/extras/devices/rtnetlink/mapper.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2016-2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MAPPER_H_ +#define MAPPER_H_ + +#include "netns.h" + +/* + * Automatically map linux network routes to VPP. + * Each namespace is associated with an individual fib. + * + * One linux interface can only be mapped to a single VPP + * interface, but one VPP interface can be mapped to + * multiple linux interfaces. + * A mapped VPP interface must not have any configured fib. + */ + +int mapper_add_ns(char *nsname, u32 v4fib_index, u32 v6fib_index, u32 *nsindex); +int mapper_del_ns(u32 nsindex); +int mapper_add_del(u32 nsindex, int linux_ifindex, u32 sw_if_index, int del); + +#endif /* MAPPER_H_ */ diff --git a/utils/extras/devices/rtnetlink/netns.c b/utils/extras/devices/rtnetlink/netns.c new file mode 100644 index 000000000..19adb469a --- /dev/null +++ b/utils/extras/devices/rtnetlink/netns.c @@ -0,0 +1,787 @@ +/* + * Copyright (c) 2016-2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vnet/ip/format.h> +#include <stddef.h> + +#include "netns.h" + +/* Enable some RTA values debug */ +//#define RTNL_CHECK + +#define is_nonzero(x) \ + ({ \ + u8 __is_zero_zero[sizeof(x)] = {}; \ + memcmp(__is_zero_zero, &x, sizeof(x)); \ + }) + +typedef struct { + u8 type; //Attribute identifier + u8 unique; //Part of the values uniquely identifying an entry + u16 offset; //Offset where stored in struct + u16 size; //Length of the attribute +} rtnl_mapping_t; + +#define ns_foreach_ifla \ + _(IFLA_ADDRESS, hwaddr) \ + _(IFLA_BROADCAST, broadcast) \ + _(IFLA_IFNAME, ifname) \ + _(IFLA_MASTER, master) \ + _(IFLA_MTU, mtu) \ + _(IFLA_QDISC, qdisc) + +static rtnl_mapping_t ns_ifmap[] = { +#define _(t, e) \ + { \ + .type = t, \ + .offset = offsetof(ns_link_t, e), \ + .size = sizeof(((ns_link_t*)0)->e) \ + }, + ns_foreach_ifla +#undef _ + { .type = 0 } +}; + +u8 *format_ns_link (u8 *s, va_list *args) +{ + ns_link_t *l = va_arg(*args, ns_link_t *); + s = format(s, "%s index %u", l->ifname, l->ifi.ifi_index); + return s; +} + +#define ns_foreach_rta \ + _(RTA_DST, dst, 1) \ + _(RTA_SRC, src, 1) \ + _(RTA_GATEWAY, gateway, 1) \ + _(RTA_IIF, iif, 1) \ + _(RTA_OIF, oif, 1) \ + _(RTA_PREFSRC, prefsrc, 0) \ + _(RTA_TABLE, table, 0) \ + _(RTA_PRIORITY, priority, 0) \ + _(RTA_CACHEINFO, cacheinfo, 0) \ + _(RTA_ENCAP, encap, 1) + +static rtnl_mapping_t ns_routemap[] = { +#define _(t, e, u) \ + { \ + .type = t, .unique = u, \ + .offset = offsetof(ns_route_t, e), \ + .size = sizeof(((ns_route_t*)0)->e) \ + }, + ns_foreach_rta +#undef _ + { .type = 0 } +}; + +u8 *format_ns_route (u8 *s, va_list *args) +{ + ns_route_t *r = va_arg(*args, ns_route_t *); + void *format_ip = r->rtm.rtm_family == AF_INET ? format_ip4_address : format_ip6_address; + s = format(s, "%U/%d", format_ip, r->dst, r->rtm.rtm_dst_len); + if (r->rtm.rtm_src_len) + s = format(s, " from %U/%d", format_ip, r->src, r->rtm.rtm_src_len); + if (is_nonzero(r->gateway)) + s = format(s, " via %U", format_ip, r->gateway); + if (r->iif) + s = format(s, " iif %d", r->iif); + if (r->oif) + s = format(s, " oif %d", r->oif); + if (is_nonzero(r->prefsrc)) + s = format(s, " src %U", format_ip, r->prefsrc); + if (r->table) + s = format(s, " table %d", r->table); + if (r->priority) + s = format(s, " priority %u", r->priority); + return s; +} + +#define ns_foreach_ifaddr \ + _(IFA_ADDRESS, addr, 1) \ + _(IFA_LOCAL, local, 1) \ + _(IFA_LABEL, label, 0) \ + _(IFA_BROADCAST, broadcast, 0) \ + _(IFA_ANYCAST, anycast, 0) \ + _(IFA_CACHEINFO, cacheinfo, 0) + +static rtnl_mapping_t ns_addrmap[] = { +#define _(t, e, u) \ + { \ + .type = t, .unique = u, \ + .offset = offsetof(ns_addr_t, e), \ + .size = sizeof(((ns_addr_t*)0)->e) \ + }, + ns_foreach_ifaddr +#undef _ + { .type = 0 } +}; + +u8 *format_ns_addr (u8 *s, va_list *args) +{ + ns_addr_t *a = va_arg(*args, ns_addr_t *); + void *format_ip = a->ifaddr.ifa_family == AF_INET ? format_ip4_address : format_ip6_address; + s = format(s, "%U/%d", format_ip, a->addr, a->ifaddr.ifa_prefixlen); + if (is_nonzero(a->label)) + s = format(s, " dev %s", a->label); + if (is_nonzero(a->broadcast)) + s = format(s, " broadcast %U", format_ip, a->broadcast); + if (is_nonzero(a->anycast)) + s = format(s, " anycast %U", format_ip, a->anycast); + if (is_nonzero(a->local)) + s = format(s, " local %U", format_ip, a->local); + return s; +} + +#ifndef NDA_RTA +#define NDA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) +#define NDA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndmsg)) +#endif + +#define ns_foreach_neigh \ + _(NDA_DST, dst, 1) \ + _(NDA_LLADDR, lladdr, 0) \ + _(NDA_PROBES, probes, 0) \ + _(NDA_CACHEINFO, cacheinfo, 0) + +static rtnl_mapping_t ns_neighmap[] = { +#define _(t, e, u) \ + { \ + .type = t, .unique = u, \ + .offset = offsetof(ns_neigh_t, e), \ + .size = sizeof(((ns_neigh_t*)0)->e) \ + }, + ns_foreach_neigh +#undef _ + { .type = 0 } +}; + +u8 *format_ns_neigh (u8 *s, va_list *args) +{ + ns_neigh_t *n = va_arg(*args, ns_neigh_t *); + void *format_ip = n->nd.ndm_family == AF_INET ? format_ip4_address : format_ip6_address; + s = format(s, "%U", format_ip, n->dst); + if (is_nonzero(n->lladdr)) + s = format(s, " lladdr %U", format_ethernet_address, n->lladdr); + if (n->probes) + s = format(s, " probes %d", n->probes); + return s; +} + +typedef struct { + void (*notify)(void *obj, netns_type_t type, u32 flags, uword opaque); + uword opaque; + u32 netns_index; +} netns_handle_t; + +typedef struct { + netns_t netns; + u32 rtnl_handle; + u32 subscriber_count; +} netns_p; + +typedef struct { + netns_p *netnss; + netns_handle_t *handles; +} netns_main_t; + +netns_main_t netns_main; + +static int +rtnl_parse_rtattr(struct rtattr *db[], size_t max, + struct rtattr *rta, size_t len) { + for(; RTA_OK(rta, len); rta = RTA_NEXT(rta, len)) { + if (rta->rta_type <= max) + db[rta->rta_type] = rta; +#ifdef RTNL_CHECK + else + clib_warning("RTA type too high: %d", rta->rta_type); +#endif + } + + if(len) { + clib_warning("rattr lenght mistmatch %d %d len", + (int) len, (int) rta->rta_len); + return -1; + } + return 0; +} + +/* + * Debug function to display when + * we receive an RTA that I forgot in + * the mapping table (there are so many of them). + */ +#ifdef RTNL_CHECK +static void +rtnl_entry_check(struct rtattr *rtas[], + size_t rta_len, + rtnl_mapping_t map[], + char *logstr) +{ + int i; + for (i=0; i<rta_len; i++) { + if (!rtas[i]) + continue; + + rtnl_mapping_t *m = map; + for (m = map; m->type; m++) { + if (m->type == rtas[i]->rta_type) + break; + } + if (!m->type) + clib_warning("Unknown RTA type %d (%s)", rtas[i]->rta_type, logstr); + } +} +#endif + +/* + * Check if the provided entry matches the parsed and unique rtas + */ +static int +rtnl_entry_match(void *entry, + struct rtattr *rtas[], + rtnl_mapping_t map[]) +{ + u8 zero[1024] = {}; + for ( ;map->type != 0; map++) { + struct rtattr *rta = rtas[map->type]; + size_t rta_len = rta?RTA_PAYLOAD(rta):0; + if (!map->unique) + continue; + + if (rta && RTA_PAYLOAD(rta) > map->size) { + clib_warning("rta (type=%d len=%d) too long (max %d)", + rta->rta_type, rta->rta_len, map->size); + return -1; + } + + if ((rta && memcmp(RTA_DATA(rta), entry + map->offset, rta_len)) || + memcmp(entry + map->offset + rta_len, zero, map->size - rta_len)) { + return 0; + } + } + return 1; +} + +static int +rtnl_entry_set(void *entry, + struct rtattr *rtas[], + rtnl_mapping_t map[], + int init) +{ + for (; map->type != 0; map++) { + + struct rtattr *rta = rtas[map->type]; + + if(map->type == RTA_ENCAP && rta) { + /*Data of RTA_ENCAP is a pointer to rta attributes for MPLS*/ + rta = (struct rtattr*)RTA_DATA(rta); + if (RTA_PAYLOAD(rta) > map->size) { + clib_warning("rta (type=%d len=%d) too long (max %d)", rta->rta_type, rta->rta_len, map->size); + return -1; + } + memcpy(entry + map->offset, RTA_DATA(rta), map->size); + memset(entry + map->offset + map->size, 0, 0); + } else if (rta) { + if (RTA_PAYLOAD(rta) > map->size) { + clib_warning("rta (type=%d len=%d) too long (max %d)", rta->rta_type, rta->rta_len, map->size); + return -1; + } + memcpy(entry + map->offset, RTA_DATA(rta), RTA_PAYLOAD(rta)); + memset(entry + map->offset + RTA_PAYLOAD(rta), 0, map->size - RTA_PAYLOAD(rta)); + } else if (init) { + memset(entry + map->offset, 0, map->size); + } + } + return 0; +} + +void +netns_notify(netns_p *ns, void *obj, netns_type_t type, u32 flags) +{ + netns_main_t *nm = &netns_main; + netns_handle_t *h; + pool_foreach(h, nm->handles, { + if (h->netns_index == (ns - nm->netnss) && h->notify) + h->notify(obj, type, flags, h->opaque); + }); +} + +static_always_inline int +mask_match(void *a, void *b, void *mask, size_t len) +{ + u8 *va = (u8 *) a; + u8 *vb = (u8 *) b; + u8 *vm = (u8 *) mask; + while (len--) { + if ((va[len] ^ vb[len]) & vm[len]) + return 0; + } + return 1; +} + +static ns_link_t * +ns_get_link(netns_p *ns, struct ifinfomsg *ifi, struct rtattr *rtas[]) +{ + ns_link_t *link; + pool_foreach(link, ns->netns.links, { + if(ifi->ifi_index == link->ifi.ifi_index) + return link; + }); + return NULL; +} + +static int +ns_rcv_link(netns_p *ns, struct nlmsghdr *hdr) +{ + ns_link_t *link; + struct ifinfomsg *ifi; + struct rtattr *rtas[IFLA_MAX + 1] = {}; + size_t datalen = hdr->nlmsg_len - NLMSG_ALIGN(sizeof(*hdr)); + + if(datalen < sizeof(*ifi)) + return -1; + + ifi = NLMSG_DATA(hdr); + if((datalen > NLMSG_ALIGN(sizeof(*ifi))) && + rtnl_parse_rtattr(rtas, IFLA_MAX, IFLA_RTA(ifi), + IFLA_PAYLOAD(hdr))) { + return -1; + } +#ifdef RTNL_CHECK + rtnl_entry_check(rtas, IFLA_MAX + 1, ns_ifmap, "link"); +#endif + + link = ns_get_link(ns, ifi, rtas); + + if (hdr->nlmsg_type == RTM_DELLINK) { + if (!link) + return -1; + pool_put(ns->netns.links, link); + netns_notify(ns, link, NETNS_TYPE_LINK, NETNS_F_DEL); + return 0; + } + + if (!link) { + pool_get(ns->netns.links, link); + rtnl_entry_set(link, rtas, ns_ifmap, 1); + } else { + rtnl_entry_set(link, rtas, ns_ifmap, 0); + } + + link->ifi = *ifi; + link->last_updated = vlib_time_now(vlib_get_main()); + netns_notify(ns, link, NETNS_TYPE_LINK, NETNS_F_ADD); + return 0; +} + +static ns_route_t * +ns_get_route(netns_p *ns, struct rtmsg *rtm, struct rtattr *rtas[]) +{ + ns_route_t *route; + + //This describes the values which uniquely identify a route + struct rtmsg msg = { + .rtm_family = 0xff, + .rtm_dst_len = 0xff, + .rtm_src_len = 0xff, + .rtm_table = 0xff, + .rtm_protocol = 0xff, + .rtm_type = 0xff + }; + + pool_foreach(route, ns->netns.routes, { + if(mask_match(&route->rtm, rtm, &msg, sizeof(struct rtmsg)) && + rtnl_entry_match(route, rtas, ns_routemap)) + return route; + }); + return NULL; +} + +static int +ns_rcv_route(netns_p *ns, struct nlmsghdr *hdr) +{ + ns_route_t *route; + struct rtmsg *rtm; + struct rtattr *rtas[RTA_MAX + 1] = {}; + size_t datalen = hdr->nlmsg_len - NLMSG_ALIGN(sizeof(*hdr)); + + if(datalen < sizeof(*rtm)) + return -1; + + rtm = NLMSG_DATA(hdr); + if((datalen > NLMSG_ALIGN(sizeof(*rtm))) && + rtnl_parse_rtattr(rtas, RTA_MAX, RTM_RTA(rtm), + RTM_PAYLOAD(hdr))) { + return -1; + } +#ifdef RTNL_CHECK + rtnl_entry_check(rtas, RTA_MAX + 1, ns_routemap, "route"); +#endif + route = ns_get_route(ns, rtm, rtas); + + if (hdr->nlmsg_type == RTM_DELROUTE) { + if (!route) + return -1; + pool_put(ns->netns.routes, route); + netns_notify(ns, route, NETNS_TYPE_ROUTE, NETNS_F_DEL); + return 0; + } + + if (!route) { + pool_get(ns->netns.routes, route); + memset(route, 0, sizeof(*route)); + rtnl_entry_set(route, rtas, ns_routemap, 1); + } else { + rtnl_entry_set(route, rtas, ns_routemap, 0); + } + + route->rtm = *rtm; + route->last_updated = vlib_time_now(vlib_get_main()); + netns_notify(ns, route, NETNS_TYPE_ROUTE, NETNS_F_ADD); + return 0; +} + +static ns_addr_t * +ns_get_addr(netns_p *ns, struct ifaddrmsg *ifaddr, struct rtattr *rtas[]) +{ + ns_addr_t *addr; + + //This describes the values which uniquely identify a route + struct ifaddrmsg msg = { + .ifa_family = 0xff, + .ifa_prefixlen = 0xff, + }; + + pool_foreach(addr, ns->netns.addresses, { + if(mask_match(&addr->ifaddr, ifaddr, &msg, sizeof(struct ifaddrmsg)) && + rtnl_entry_match(addr, rtas, ns_addrmap)) + return addr; + }); + return NULL; +} + +static int +ns_rcv_addr(netns_p *ns, struct nlmsghdr *hdr) +{ + ns_addr_t *addr; + struct ifaddrmsg *ifaddr; + struct rtattr *rtas[IFA_MAX + 1] = {}; + size_t datalen = hdr->nlmsg_len - NLMSG_ALIGN(sizeof(*hdr)); + + if(datalen < sizeof(*ifaddr)) + return -1; + + ifaddr = NLMSG_DATA(hdr); + if((datalen > NLMSG_ALIGN(sizeof(*ifaddr))) && + rtnl_parse_rtattr(rtas, IFA_MAX, IFA_RTA(ifaddr), + IFA_PAYLOAD(hdr))) { + return -1; + } +#ifdef RTNL_CHECK + rtnl_entry_check(rtas, IFA_MAX + 1, ns_addrmap, "addr"); +#endif + addr = ns_get_addr(ns, ifaddr, rtas); + + if (hdr->nlmsg_type == RTM_DELADDR) { + if (!addr) + return -1; + pool_put(ns->netns.addresses, addr); + netns_notify(ns, addr, NETNS_TYPE_ADDR, NETNS_F_DEL); + return 0; + } + + if (!addr) { + pool_get(ns->netns.addresses, addr); + memset(addr, 0, sizeof(*addr)); + rtnl_entry_set(addr, rtas, ns_addrmap, 1); + } else { + rtnl_entry_set(addr, rtas, ns_addrmap, 0); + } + + addr->ifaddr = *ifaddr; + addr->last_updated = vlib_time_now(vlib_get_main()); + netns_notify(ns, addr, NETNS_TYPE_ADDR, NETNS_F_ADD); + return 0; +} + +static ns_neigh_t * +ns_get_neigh(netns_p *ns, struct ndmsg *nd, struct rtattr *rtas[]) +{ + ns_neigh_t *neigh; + + //This describes the values which uniquely identify a route + struct ndmsg msg = { + .ndm_family = 0xff, + .ndm_ifindex = 0xff, + }; + + pool_foreach(neigh, ns->netns.neighbors, { + if(mask_match(&neigh->nd, nd, &msg, sizeof(&msg)) && + rtnl_entry_match(neigh, rtas, ns_neighmap)) + return neigh; + }); + return NULL; +} + +static int +ns_rcv_neigh(netns_p *ns, struct nlmsghdr *hdr) +{ + ns_neigh_t *neigh; + struct ndmsg *nd; + struct rtattr *rtas[NDA_MAX + 1] = {}; + size_t datalen = hdr->nlmsg_len - NLMSG_ALIGN(sizeof(*hdr)); + + if(datalen < sizeof(*nd)) + return -1; + + nd = NLMSG_DATA(hdr); + if((datalen > NLMSG_ALIGN(sizeof(*nd))) && + rtnl_parse_rtattr(rtas, NDA_MAX, NDA_RTA(nd), + NDA_PAYLOAD(hdr))) { + return -1; + } +#ifdef RTNL_CHECK + rtnl_entry_check(rtas, NDA_MAX + 1, ns_neighmap, "nd"); +#endif + neigh = ns_get_neigh(ns, nd, rtas); + + if (hdr->nlmsg_type == RTM_DELNEIGH) { + if (!neigh) + return -1; + pool_put(ns->netns.neighbors, neigh); + netns_notify(ns, neigh, NETNS_TYPE_NEIGH, NETNS_F_DEL); + return 0; + } + + if (!neigh) { + pool_get(ns->netns.neighbors, neigh); + memset(neigh, 0, sizeof(*neigh)); + rtnl_entry_set(neigh, rtas, ns_neighmap, 1); + } else { + rtnl_entry_set(neigh, rtas, ns_neighmap, 0); + } + + neigh->nd = *nd; + neigh->last_updated = vlib_time_now(vlib_get_main()); + netns_notify(ns, neigh, NETNS_TYPE_NEIGH, NETNS_F_ADD); + return 0; +} + +#define ns_object_foreach \ + _(neighbors, NETNS_TYPE_NEIGH) \ + _(routes, NETNS_TYPE_ROUTE) \ + _(addresses, NETNS_TYPE_ADDR) \ + _(links, NETNS_TYPE_LINK) + +static void +ns_recv_error(rtnl_error_t err, uword o) +{ + //An error was received. Reset everything. + netns_p *ns = &netns_main.netnss[o]; + u32 *indexes = 0; + u32 *i = 0; + +#define _(pool, type) \ + pool_foreach_index(*i, ns->netns.pool, { \ + vec_add1(indexes, *i); \ + }) \ + vec_foreach(i, indexes) { \ + pool_put_index(ns->netns.pool, *i); \ + netns_notify(ns, &ns->netns.pool[*i], type, NETNS_F_DEL); \ + } \ + vec_reset_length(indexes); + + ns_object_foreach + +#undef _ + vec_free(indexes); +} + +static void +ns_recv_rtnl(struct nlmsghdr *hdr, uword o) +{ + netns_p *ns = &netns_main.netnss[o]; + switch (hdr->nlmsg_type) { + case RTM_NEWROUTE: + case RTM_DELROUTE: + ns_rcv_route(ns, hdr); + break; + case RTM_NEWLINK: + case RTM_DELLINK: + ns_rcv_link(ns, hdr); + break; + case RTM_NEWADDR: + case RTM_DELADDR: + ns_rcv_addr(ns, hdr); + break; + case RTM_NEWNEIGH: + case RTM_DELNEIGH: + ns_rcv_neigh(ns, hdr); + break; + default: + clib_warning("unknown rtnl type %d", hdr->nlmsg_type); + break; + } +} + +static void +netns_destroy(netns_p *ns) +{ + netns_main_t *nm = &netns_main; + rtnl_stream_close(ns->rtnl_handle); + pool_put(nm->netnss, ns); + pool_free(ns->netns.links); + pool_free(ns->netns.addresses); + pool_free(ns->netns.routes); + pool_free(ns->netns.neighbors); +} + +static netns_p * +netns_get(char *name) +{ + netns_main_t *nm = &netns_main; + netns_p *ns; + pool_foreach(ns, nm->netnss, { + if (!strcmp(name, ns->netns.name)) + return ns; + }); + + if (strlen(name) > RTNL_NETNS_NAMELEN) + return NULL; + + pool_get(nm->netnss, ns); + rtnl_stream_t s = { + .recv_message = ns_recv_rtnl, + .error = ns_recv_error, + .opaque = (uword)(ns - nm->netnss), + }; + strcpy(s.name, name); + + u32 handle; + if ((handle = rtnl_stream_open(&s)) == ~0) { + pool_put(nm->netnss, ns); + return NULL; + } + + strcpy(ns->netns.name, name); + ns->netns.addresses = 0; + ns->netns.links = 0; + ns->netns.neighbors = 0; + ns->netns.routes = 0; + ns->subscriber_count = 0; + ns->rtnl_handle = handle; + return ns; +} + +u32 netns_open(char *name, netns_sub_t *sub) +{ + netns_main_t *nm = &netns_main; + netns_p *ns; + netns_handle_t *p; + if (!(ns = netns_get(name))) + return ~0; + + pool_get(nm->handles, p); + p->netns_index = ns - nm->netnss; + p->notify = sub->notify; + p->opaque = sub->opaque; + ns->subscriber_count++; + return p - nm->handles; +} + +netns_t *netns_getns(u32 handle) +{ + netns_main_t *nm = &netns_main; + netns_handle_t *h = pool_elt_at_index(nm->handles, handle); + netns_p *ns = pool_elt_at_index(nm->netnss, h->netns_index); + return &ns->netns; +} + +void netns_close(u32 handle) +{ + netns_main_t *nm = &netns_main; + netns_handle_t *h = pool_elt_at_index(nm->handles, handle); + netns_p *ns = pool_elt_at_index(nm->netnss, h->netns_index); + pool_put(h, nm->handles); + ns->subscriber_count--; + if (!ns->subscriber_count) + netns_destroy(ns); +} + +void netns_callme(u32 handle, char del) +{ + netns_main_t *nm = &netns_main; + netns_handle_t *h = pool_elt_at_index(nm->handles, handle); + netns_p *ns = pool_elt_at_index(nm->netnss, h->netns_index); + u32 i = 0; + if (!h->notify) + return; + +#define _(pool, type) \ + pool_foreach_index(i, ns->netns.pool, { \ + h->notify(&ns->netns.pool[i], type, \ + del?NETNS_F_DEL:NETNS_F_ADD, h->opaque); \ + }); + + ns_object_foreach +#undef _ + + } + +u8 *format_ns_object(u8 *s, va_list *args) +{ + netns_type_t t = va_arg(*args, netns_type_t); + void *o = va_arg(*args, void *); + switch (t) { + case NETNS_TYPE_ADDR: + return format(s, "addr %U", format_ns_addr, o); + case NETNS_TYPE_ROUTE: + return format(s, "route %U", format_ns_route, o); + case NETNS_TYPE_LINK: + return format(s, "link %U", format_ns_link, o); + case NETNS_TYPE_NEIGH: + return format(s, "neigh %U", format_ns_neigh, o); + } + return s; +} + +u8 *format_ns_flags(u8 *s, va_list *args) +{ + u32 flags = va_arg(*args, u32); + if (flags & NETNS_F_ADD) + s = format(s, "add"); + else if (flags & NETNS_F_DEL) + s = format(s, "del"); + else + s = format(s, "mod"); + return s; +} + +clib_error_t * +netns_init (vlib_main_t * vm) +{ + netns_main_t *nm = &netns_main; + nm->netnss = 0; + nm->handles = 0; + return 0; +} + +VLIB_INIT_FUNCTION (netns_init); diff --git a/utils/extras/devices/rtnetlink/netns.h b/utils/extras/devices/rtnetlink/netns.h new file mode 100644 index 000000000..53effe5ce --- /dev/null +++ b/utils/extras/devices/rtnetlink/netns.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2016-2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef NETNS_H_ +#define NETNS_H_ + +#include <vlib/vlib.h> + +#include <sys/socket.h> +#include <linux/rtnetlink.h> +#include <linux/netlink.h> +#include <net/if.h> + +#include "rtnl.h" + +/*include it for 'struct mpls_label'*/ +#include <linux/mpls.h> +/*so far depth is fixed, looking into ways to be dynamic*/ +#define MPLS_STACK_DEPTH 7 + +typedef struct { + struct ifinfomsg ifi; + u8 hwaddr[IFHWADDRLEN]; + u8 broadcast[IFHWADDRLEN]; + u8 ifname[IFNAMSIZ]; + u32 mtu; + u32 master; + u8 qdisc[IFNAMSIZ]; + struct rtnl_link_stats stats; //This struct is big and only comes as a response to a request + f64 last_updated; +} ns_link_t; + +typedef struct { + struct rtmsg rtm; + u8 dst[16]; + u8 src[16]; + u8 via[16]; + u8 prefsrc[16]; + u32 iif; + u32 oif; + u32 table; + u8 gateway[16]; + u32 priority; + struct rta_cacheinfo cacheinfo; + struct mpls_label encap[MPLS_STACK_DEPTH]; + f64 last_updated; +} ns_route_t; + +typedef struct { + struct ifaddrmsg ifaddr; + u8 addr[16]; + u8 local[16]; + u8 label[IFNAMSIZ]; + u8 broadcast[16]; + u8 anycast[16]; + struct ifa_cacheinfo cacheinfo; + f64 last_updated; +} ns_addr_t; + +typedef struct { + struct ndmsg nd; + u8 dst[16]; + u8 lladdr[IFHWADDRLEN]; + u32 probes; + struct nda_cacheinfo cacheinfo; + f64 last_updated; +} ns_neigh_t; + +typedef struct { + char name[RTNL_NETNS_NAMELEN + 1]; + ns_link_t *links; + ns_route_t *routes; + ns_addr_t *addresses; + ns_neigh_t *neighbors; +} netns_t; + + +typedef enum { + NETNS_TYPE_LINK, + NETNS_TYPE_ROUTE, + NETNS_TYPE_ADDR, + NETNS_TYPE_NEIGH, +} netns_type_t; + +//Flags used in notification functions call +#define NETNS_F_ADD 0x01 +#define NETNS_F_DEL 0x02 + +typedef struct { + void (*notify)(void *obj, netns_type_t type, u32 flags, uword opaque); + uword opaque; +} netns_sub_t; + +/* + * Subscribe for events related to the given namespace. + * When another subscriber already uses the namespace, + * this call will not trigger updates for already + * existing routes (This is to protect against + * synch. Vs asynch. issues). + */ +u32 netns_open(char *name, netns_sub_t *sub); + +/* + * Retrieves the namespace structure associated with a + * given namespace handler. + */ +netns_t *netns_getns(u32 handle); + +/* + * Terminates a subscriber session. + */ +void netns_close(u32 handle); + +/* + * Calls the callback associated with the handle + * for all existing objects with the flags + * set to (del?NETNS_F_DEL:NETNS_F_ADD). + */ +void netns_callme(u32 handle, char del); + +/* + * netns struct format functions. + * Taking the struct as single argument. + */ +u8 *format_ns_neigh(u8 *s, va_list *args); +u8 *format_ns_addr(u8 *s, va_list *args); +u8 *format_ns_route(u8 *s, va_list *args); +u8 *format_ns_link(u8 *s, va_list *args); + +u8 *format_ns_object(u8 *s, va_list *args); +u8 *format_ns_flags(u8 *s, va_list *args); + +#endif diff --git a/utils/extras/devices/rtnetlink/rtnl.c b/utils/extras/devices/rtnetlink/rtnl.c new file mode 100644 index 000000000..ed3db9e72 --- /dev/null +++ b/utils/extras/devices/rtnetlink/rtnl.c @@ -0,0 +1,604 @@ +/* + * Copyright (c) 2016-2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define _GNU_SOURCE +#include <sched.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vppinfra/error.h> + +#include <sys/socket.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <float.h> +#include <fcntl.h> + +#include <sys/types.h> +#include <sys/wait.h> +#include <errno.h> +#include <string.h> + +#include "netns.h" +#include "rtnl.h" + +#undef DBL_MAX +#define DBL_MAX 1000000000.0 + +typedef enum { + RTNL_E_OPEN, + RTNL_E_CLOSE, + RTNL_E_READ, +} rtnl_event_t; + +typedef enum { + RTNL_S_INIT, + RTNL_S_SYNC, + RTNL_S_READY, +} rtnl_state_t; + +typedef enum { + RTNL_SS_OPENING, + RTNL_SS_LINK, + RTNL_SS_ADDR, + RTNL_SS_ROUTE4, + RTNL_SS_ROUTE6, + RTNL_SS_NEIGH, +} rtnl_sync_state_t; + +typedef struct { + rtnl_stream_t stream; + rtnl_state_t state; + rtnl_sync_state_t sync_state; + int ns_fd; + int rtnl_socket; + u32 unix_index; + u32 rtnl_seq; + f64 timeout; +} rtnl_ns_t; + +typedef struct { + f64 now; + rtnl_ns_t *streams; +} rtnl_main_t; + +static rtnl_main_t rtnl_main; +static vlib_node_registration_t rtnl_process_node; + +#define RTNL_BUFFSIZ 16384 +#define RTNL_DUMP_TIMEOUT 1 + +static inline u32 grpmask(u32 g) +{ + ASSERT (g <= 31); + if (g) { + return 1 << (g - 1); + } else + return 0; +} + + +u8 *format_rtnl_nsname2path(u8 *s, va_list *args) +{ + char *nsname = va_arg(*args, char *); + if (!nsname || !strlen(nsname)) { + return format(s, "/proc/self/ns/net"); + } else if (strpbrk(nsname, "/") != NULL) { + return format(s, "%s", nsname); + } else { + return format(s, "/var/run/netns/%s", nsname); + } +} + +static_always_inline void +rtnl_schedule_timeout(rtnl_ns_t *ns, f64 when) +{ + ns->timeout = when; +} + +static_always_inline void +rtnl_cancel_timeout(rtnl_ns_t *ns) +{ + ns->timeout = DBL_MAX; +} + +static clib_error_t *rtnl_read_cb(struct clib_file * f) +{ + rtnl_main_t *rm = &rtnl_main; + vlib_main_t *vm = vlib_get_main(); + rtnl_ns_t *ns = &rm->streams[f->private_data]; + vlib_process_signal_event(vm, rtnl_process_node.index, RTNL_E_READ, (uword)(ns - rm->streams)); + return 0; +} + +int rtnl_dump_request(rtnl_ns_t *ns, int type, void *req, size_t len) +{ + struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; + struct nlmsghdr nlh = { + .nlmsg_len = NLMSG_LENGTH(len), + .nlmsg_type = type, + .nlmsg_flags = NLM_F_DUMP|NLM_F_REQUEST, + .nlmsg_pid = 0, + .nlmsg_seq = ++ns->rtnl_seq, + }; + struct iovec iov[2] = { + { .iov_base = &nlh, .iov_len = sizeof(nlh) }, + { .iov_base = req, .iov_len = len } + }; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = iov, + .msg_iovlen = 2, + }; + if(sendmsg(ns->rtnl_socket, &msg, 0) < 0) { + clib_warning("sendmsg error: %s", strerror(errno)); + return -1; + } + return 0; +} + +static void rtnl_socket_close(rtnl_ns_t *ns) +{ + clib_file_del(&file_main, &file_main.file_pool[ns->unix_index]); + close(ns->rtnl_socket); +} + +struct rtnl_thread_exec { + int fd; + void *(*fn)(void *); + void *arg; + void **ret; +}; + +static void *rtnl_exec_in_thread_fn(void *p) +{ + struct rtnl_thread_exec *ex = (struct rtnl_thread_exec *) p; + if (setns(ex->fd, 0)) + return (void *) ((uword) (-errno)); + + *ex->ret = ex->fn(ex->arg); + return NULL; +} + +static int rtnl_exec_in_namespace_byfd(int fd, void *(*fn)(void *), void *arg, void **ret) +{ + pthread_t thread; + void *thread_ret; + struct rtnl_thread_exec ex = { + .fd = fd, + .fn = fn, + .arg = arg, + .ret = ret + }; + if(pthread_create(&thread, NULL, rtnl_exec_in_thread_fn, &ex)) + return -errno; + + if(pthread_join(thread, &thread_ret)) + return -errno; + + if (thread_ret) + return (int) ((uword)thread_ret); + + return 0; +} + +int rtnl_exec_in_namespace(u32 stream_index, void *(*fn)(void *), void *arg, void **ret) +{ + rtnl_main_t *rm = &rtnl_main; + if (pool_is_free_index(rm->streams, stream_index)) + return -EBADR; + + rtnl_ns_t *ns = pool_elt_at_index(rm->streams, stream_index); + return rtnl_exec_in_namespace_byfd(ns->ns_fd, fn, arg, ret); +} + +int rtnl_exec_in_namespace_by_name(char *nsname, void *(*fn)(void *), void *arg, void **ret) +{ + int fd; + u8 *s = format((u8 *)0, "%U", format_rtnl_nsname2path, nsname); + + if ((fd = open((char *)s, O_RDONLY)) < 0) { + vec_free(s); + return -errno; + } + + int r = rtnl_exec_in_namespace_byfd(fd, fn, arg, ret); + vec_free(s); + close(fd); + return r; +} + +/* this function is run by the second thread */ +static void *rtnl_thread_fn(void *p) +{ + rtnl_ns_t *ns = (rtnl_ns_t *) p; + if (setns(ns->ns_fd, 0)) { + clib_warning("setns(%d, %d) error %d", ns->ns_fd, CLONE_NEWNET, errno); + return (void *) -1; + } + + if ((ns->rtnl_socket = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) == -1) { + clib_warning("Cannot open socket"); + return (void *) -2; + } + + return NULL; +} + +static int rtnl_socket_open(rtnl_ns_t *ns) +{ + rtnl_main_t *rm = &rtnl_main; + pthread_t thread; + void *thread_ret; + if(pthread_create(&thread, NULL, rtnl_thread_fn, ns)) { + clib_warning("Can't create opening thread"); + return -1; + } + + if(pthread_join(thread, &thread_ret)) { + clib_warning("Can't join opening thread"); + return -2; + } + + if (thread_ret) { + clib_warning("Could not open netlink socket"); + return -3; + } + + struct sockaddr_nl addr = { + .nl_family = AF_NETLINK, + .nl_pad = 0, + .nl_pid = 0, + /*add mpls message group*/ + .nl_groups = grpmask(RTNLGRP_LINK)| grpmask(RTNLGRP_IPV6_IFADDR) | + grpmask(RTNLGRP_IPV4_IFADDR) | grpmask(RTNLGRP_IPV4_ROUTE) | + grpmask(RTNLGRP_IPV6_ROUTE) | grpmask(RTNLGRP_NEIGH) | + grpmask(RTNLGRP_NOTIFY) /* | grpmask(RTNLGRP_MPLS_ROUTE)*/, + }; + + if (bind(ns->rtnl_socket, (struct sockaddr*) &addr, sizeof(addr))) { + close(ns->rtnl_socket); + return -3; + } + + clib_file_t template = {0}; + template.read_function = rtnl_read_cb; + template.file_descriptor = ns->rtnl_socket; + template.private_data = (uword) (ns - rm->streams); + ns->unix_index = clib_file_add (&file_main, &template); + return 0; +} + +static int +rtnl_rcv_error(rtnl_ns_t *ns, struct nlmsghdr *hdr, int *error) +{ + struct nlmsgerr *err = NLMSG_DATA(hdr); + size_t datalen = hdr->nlmsg_len - NLMSG_ALIGN(sizeof(*hdr)); + if(datalen < sizeof(*err)) + return -1; + + *error = err->error; + return 0; +} + +static void +rtnl_sync_reset(rtnl_ns_t *ns) +{ + if (ns->sync_state == RTNL_SS_OPENING) + return; + + rtnl_socket_close(ns); + ns->sync_state = RTNL_SS_OPENING; +} + +static void +rtnl_sync_done(rtnl_ns_t *ns) +{ + rtnl_main_t *rm = &rtnl_main; + struct ifaddrmsg addrmsg; + struct rtmsg rtmsg; + struct ndmsg ndmsg; + switch (ns->sync_state) { + case RTNL_SS_OPENING: + //Cannot happen here + break; + case RTNL_SS_LINK: + memset(&addrmsg, 0, sizeof(addrmsg)); + addrmsg.ifa_family = AF_UNSPEC; + if(rtnl_dump_request(ns, RTM_GETADDR, &addrmsg, sizeof(addrmsg))) { + rtnl_sync_reset(ns); + rtnl_schedule_timeout(ns, rm->now + 1); + return; + } + rtnl_schedule_timeout(ns, rm->now + RTNL_DUMP_TIMEOUT); + ns->sync_state = RTNL_SS_ADDR; + break; + case RTNL_SS_ADDR: + case RTNL_SS_ROUTE4: + memset(&rtmsg, 0, sizeof(rtmsg)); + rtmsg.rtm_family = (ns->sync_state == RTNL_SS_ADDR)?AF_INET:AF_INET6; + rtmsg.rtm_table = RT_TABLE_UNSPEC; + if(rtnl_dump_request(ns, RTM_GETROUTE, &rtmsg, sizeof(rtmsg))) { + rtnl_sync_reset(ns); + rtnl_schedule_timeout(ns, rm->now + 1); + return; + } + rtnl_schedule_timeout(ns, rm->now + RTNL_DUMP_TIMEOUT); + ns->sync_state = (ns->sync_state == RTNL_SS_ADDR)?RTNL_SS_ROUTE4:RTNL_SS_ROUTE6; + break; + case RTNL_SS_ROUTE6: + memset(&ndmsg, 0, sizeof(ndmsg)); + ndmsg.ndm_family = AF_UNSPEC; + if(rtnl_dump_request(ns, RTM_GETNEIGH, &ndmsg, sizeof(ndmsg))) { + rtnl_sync_reset(ns); + rtnl_schedule_timeout(ns, rm->now + 1); + return; + } + rtnl_schedule_timeout(ns, rm->now + RTNL_DUMP_TIMEOUT); + ns->sync_state = RTNL_SS_NEIGH; + break; + case RTNL_SS_NEIGH: + ns->state = RTNL_S_READY; + ns->sync_state = 0; + rtnl_cancel_timeout(ns); + break; + } +} + +static void +rtnl_sync_timeout(rtnl_ns_t *ns) +{ + rtnl_main_t *rm = &rtnl_main; + struct ifinfomsg imsg = {}; + switch (ns->sync_state) { + case RTNL_SS_OPENING: + if (rtnl_socket_open(ns)) { + rtnl_schedule_timeout(ns, rm->now + 10); + return; + } + imsg.ifi_family = AF_UNSPEC; + if (rtnl_dump_request(ns, RTM_GETLINK, &imsg, sizeof(imsg))) { + rtnl_sync_reset(ns); + rtnl_schedule_timeout(ns, rm->now + 10); + } + ns->sync_state = RTNL_SS_LINK; + rtnl_schedule_timeout(ns, rm->now + 2); + break; + case RTNL_SS_LINK: + case RTNL_SS_ADDR: + case RTNL_SS_ROUTE4: + case RTNL_SS_ROUTE6: + case RTNL_SS_NEIGH: + //Timeout happened while synchronizing + rtnl_sync_reset(ns); + rtnl_schedule_timeout(ns, rm->now + 1); + break; + } +} + +static int +rtnl_ns_recv(rtnl_ns_t *ns, struct nlmsghdr *hdr) +{ + rtnl_main_t *rm = &rtnl_main; + int ret, error = 0; + + if (ns->state == RTNL_S_SYNC && + ((hdr->nlmsg_flags & RTM_F_NOTIFY) || + (hdr->nlmsg_seq != (ns->rtnl_seq)))) { + clib_warning("Received notification while in sync. Restart synchronization."); + rtnl_sync_reset(ns); + rtnl_schedule_timeout(ns, rm->now); + } + + switch (hdr->nlmsg_type) { + case NLMSG_DONE: + rtnl_sync_done(ns); + break; + case NLMSG_ERROR: + if((ret = rtnl_rcv_error(ns, hdr, &error))) + return ret; + break; + case RTM_NEWROUTE: + case RTM_DELROUTE: + case RTM_NEWLINK: + case RTM_DELLINK: + case RTM_NEWADDR: + case RTM_DELADDR: + case RTM_NEWNEIGH: + case RTM_DELNEIGH: + if (ns->stream.recv_message) + ns->stream.recv_message(hdr, ns->stream.opaque); + break; + default: + clib_warning("Unknown rtnetlink type %d", hdr->nlmsg_type); + break; + } + return 0; +} + +static void +rtnl_process_open(rtnl_ns_t *ns) +{ + rtnl_main_t *rm = &rtnl_main; + if (ns->state != RTNL_S_INIT) + return; + + ns->state = RTNL_S_SYNC; + ns->sync_state = RTNL_SS_OPENING; + rtnl_schedule_timeout(ns, rm->now); +} + +static void +rtnl_process_close(rtnl_ns_t *ns) +{ + rtnl_main_t *rm = &rtnl_main; + if (ns->state == RTNL_S_INIT) + return; + + rtnl_socket_close(ns); + close(ns->ns_fd); + pool_put(rm->streams, ns); +} + +static int +rtnl_process_read(rtnl_ns_t *ns) +{ + uint8_t buff[RTNL_BUFFSIZ]; + ssize_t len; + struct nlmsghdr *hdr; + while(1) { + if((len = recv(ns->rtnl_socket, buff, RTNL_BUFFSIZ, MSG_DONTWAIT)) < 0) { + if(errno != EAGAIN) { + clib_warning("rtnetlink recv error (%d) [%s]: %s", ns->rtnl_socket, ns->stream.name, strerror(errno)); + return -1; + } + return 0; + } + + for(hdr = (struct nlmsghdr *) buff; + len > 0; + len -= NLMSG_ALIGN(hdr->nlmsg_len), + hdr = (struct nlmsghdr *) (((uint8_t *) hdr) + NLMSG_ALIGN(hdr->nlmsg_len))) { + if((sizeof(*hdr) > (size_t)len) || (hdr->nlmsg_len > (size_t)len)) { + clib_warning("rtnetlink buffer too small (%d Vs %d)", (int) hdr->nlmsg_len, (int) len); + return -1; + } + if (rtnl_ns_recv(ns, hdr)) + return -1; + } + } + return 0; +} + +static void +rtnl_process_timeout(rtnl_ns_t *ns) +{ + switch (ns->state) { + case RTNL_S_SYNC: + rtnl_sync_timeout(ns); + break; + case RTNL_S_INIT: + case RTNL_S_READY: + clib_warning("Should not happen"); + break; + } +} + +static uword +rtnl_process (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + rtnl_main_t *rm = &rtnl_main; + uword event_type; + uword *event_data = 0; + rm->now = vlib_time_now(vm); + f64 timeout = DBL_MAX; + rtnl_ns_t *ns; + + //Setting up + while (1) { + vlib_process_wait_for_event_or_clock(vm, timeout - rm->now); + event_type = vlib_process_get_events(vm, &event_data); + rm->now = vlib_time_now(vm); + + if (event_type == ~0) { //Clock event or no event + pool_foreach(ns, rm->streams, { + if (ns->timeout < rm->now) { + ns->timeout = DBL_MAX; + rtnl_process_timeout(ns); + } + }); + } else { + rtnl_ns_t *ns; + uword *d; + vec_foreach(d, event_data) { + ns = &rm->streams[d[0]]; + switch (event_type) + { + case RTNL_E_CLOSE: + rtnl_process_close(ns); + break; + case RTNL_E_OPEN: + rtnl_process_open(ns); + break; + case RTNL_E_READ: + rtnl_process_read(ns); + break; + } + } + } + + vec_reset_length (event_data); + + timeout = DBL_MAX; + pool_foreach(ns, rm->streams, { + if (ns->timeout < timeout) + timeout = ns->timeout; + }); + } + return frame->n_vectors; +} + +VLIB_REGISTER_NODE(rtnl_process_node, static) = { + .function = rtnl_process, + .name = "rtnl-process", + .type = VLIB_NODE_TYPE_PROCESS, +}; + +u32 +rtnl_stream_open(rtnl_stream_t *template) +{ + vlib_main_t *vm = vlib_get_main(); + rtnl_main_t *rm = &rtnl_main; + rtnl_ns_t *ns; + int fd; + u8 *s = format((u8 *)0, "%U", format_rtnl_nsname2path, template->name); + vec_add1(s, 0); + + if ((fd = open((char *)s, O_RDONLY)) < 0) { + clib_unix_warning("open stream %s: ", s); + vec_free(s); + return ~0; + } + + vec_free(s); + pool_get(rm->streams, ns); + ns->state = RTNL_S_INIT; + ns->ns_fd = fd; + ns->stream = *template; + vlib_process_signal_event(vm, rtnl_process_node.index, RTNL_E_OPEN, (uword)(ns - rm->streams)); + return ns - rm->streams; +} + +void +rtnl_stream_close(u32 stream_index) +{ + vlib_main_t *vm = vlib_get_main(); + rtnl_main_t *rm = &rtnl_main; + ASSERT(!pool_is_free_index(rm->streams, stream_index)); + vlib_process_signal_event(vm, rtnl_process_node.index, RTNL_E_CLOSE, stream_index); +} + +clib_error_t * +rtnl_init (vlib_main_t * vm) +{ + rtnl_main_t *rm = &rtnl_main; + rm->streams = 0; + return 0; +} + +VLIB_INIT_FUNCTION (rtnl_init); diff --git a/utils/extras/devices/rtnetlink/rtnl.h b/utils/extras/devices/rtnetlink/rtnl.h new file mode 100644 index 000000000..3f96252c1 --- /dev/null +++ b/utils/extras/devices/rtnetlink/rtnl.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016-2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RTNL_H_ +#define RTNL_H_ + +#include <vlib/vlib.h> + +#include <linux/netlink.h> +#include <vppinfra/clib.h> + +typedef enum { + RTNL_ERR_UNKNOWN, +} rtnl_error_t; + +#define RTNL_NETNS_NAMELEN 128 + +/* + * RTNL stream implements an RTNL overlay + * for receiving continuous updates for a given namespace. + * When the stream is initially opened, dump requests are sent + * in order to retrieve the original state. + * handle_error is called any time synchronization cannot be + * achieved. When called, state is reset to its original state and + * new dump requests are sent. + */ + +typedef struct rtnl_stream_s { + char name[RTNL_NETNS_NAMELEN + 1]; + void (*recv_message)(struct nlmsghdr *hdr, uword opaque); + void (*error)(rtnl_error_t err, uword opaque); + uword opaque; +} rtnl_stream_t; + +u32 rtnl_stream_open(rtnl_stream_t *template); +void rtnl_stream_close(u32 handle); + +/* + * Executes a function in a synchronously executed thread in the + * given namespace. + * Returns 0 on success, and -errno on error. + */ +int rtnl_exec_in_namespace(u32 handle, void *(*fn)(void *), void *arg, void **ret); +int rtnl_exec_in_namespace_by_name(char *nsname, void *(*fn)(void *), void *arg, void **ret); + +u8 *format_rtnl_nsname2path(u8 *s, va_list *args); + +#endif diff --git a/utils/extras/devices/rtnetlink/test.c b/utils/extras/devices/rtnetlink/test.c new file mode 100644 index 000000000..031748dd3 --- /dev/null +++ b/utils/extras/devices/rtnetlink/test.c @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2016-2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <librtnl/netns.h> + +#include <vnet/plugin/plugin.h> +#include <librtnl/mapper.h> +#include <vnet/ip/ip.h> +#include <vnet/fib/fib.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/ip6_fib.h> + +u32 handles[10]; + +static void +test_notify(void *obj, netns_type_t type, u32 flags, uword opaque) { + u32 index = (u32) opaque; + const char *action = (flags & NETNS_F_ADD)?"add":(flags & NETNS_F_DEL)?"del":"mod"; + + switch (type) { + case NETNS_TYPE_ADDR: + clib_warning("%d: addr %s %U", index, action, format_ns_addr, (ns_addr_t *)obj); + break; + case NETNS_TYPE_ROUTE: + clib_warning("%d: route %s %U", index, action, format_ns_route, (ns_route_t *)obj); + break; + case NETNS_TYPE_LINK: + clib_warning("%d:link %s %U", index, action, format_ns_link, (ns_link_t *)obj); + break; + case NETNS_TYPE_NEIGH: + clib_warning("%d: neigh %s %U", index, action, format_ns_neigh, (ns_neigh_t *)obj); + break; + } +} + +static clib_error_t * +test_enable_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + char *nsname = 0; + u32 index; + if (!unformat(input, "%s", &nsname)) { + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + if (!unformat(input, "%d", &index)) { + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + + if (!strcmp(nsname, "default")) + nsname[0] = 0; + + netns_sub_t sub; + sub.notify = test_notify; + sub.opaque = index; + handles[index] = netns_open(nsname, &sub); + if (handles[index] == ~0) { + return clib_error_create("Could not open netns with name %s", nsname); + } + return 0; +} + +static clib_error_t * +test_disable_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 index; + if (!unformat(input, "%d", &index)) { + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + + netns_close(handles[index]); + return 0; +} + +VLIB_CLI_COMMAND (rtnl_enable_command, static) = { + .path = "test netns enable", + .short_help = "test netns enable [<ns-name>|default] <index>", + .function = test_enable_command_fn, +}; + +VLIB_CLI_COMMAND (rtnl_disable_command, static) = { + .path = "test netns disable", + .short_help = "test rtnl disable <index>", + .function = test_disable_command_fn, +}; + +u32 mapper_indexes[10]; + +static clib_error_t * +mapper_ns_add_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 index; + char *nsname; + u32 table_id; + if (!unformat(input, "%d", &index)) + return clib_error_return(0, "invalid index `%U'", + format_unformat_error, input); + if (!unformat(input, "%s", &nsname)) + return clib_error_return(0, "invalid nsname `%U'", + format_unformat_error, input); + if (!unformat(input, "%d", &table_id)) + return clib_error_return(0, "invalid fib index `%U'", + format_unformat_error, input); + + if (!strcmp(nsname, "default")) + nsname[0] = 0; + + u32 fib4 = ip4_fib_index_from_table_id(table_id); + u32 fib6 = ip6_fib_index_from_table_id(table_id); + + if (mapper_add_ns(nsname, fib4, fib6, &mapper_indexes[index])) + return clib_error_return(0, "Could not add ns %s", nsname); + return 0; +} + +VLIB_CLI_COMMAND (mapper_ns_add_command, static) = { + .path = "test mapper ns add", + .short_help = "test mapper ns add <index> <nsname> <table-id>", + .function = mapper_ns_add_command_fn, +}; + +static clib_error_t * +mapper_ns_del_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 index; + if (!unformat(input, "%d", &index)) + return clib_error_return(0, "invalid index `%U'", + format_unformat_error, input); + + if (mapper_del_ns(mapper_indexes[index])) + return clib_error_return(0, "Could not del ns %d", index); + return 0; +} + +VLIB_CLI_COMMAND (mapper_ns_del_command, static) = { + .path = "test mapper ns delete", + .short_help = "test mapper ns delete <index>", + .function = mapper_ns_del_command_fn, +}; + +static clib_error_t * +mapper_iface_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 nsindex; + u32 ifindex; + u32 sw_if_index; + int del = 0; + if (!unformat(input, "%d", &nsindex)) + return clib_error_return(0, "invalid nsindex `%U'", + format_unformat_error, input); + if (!unformat(input, "%d", &ifindex)) + return clib_error_return(0, "invalid ifindex `%U'", + format_unformat_error, input); + if (!unformat(input, "%d", &sw_if_index)) + return clib_error_return(0, "invalid sw_if_index `%U'", + format_unformat_error, input); + if (unformat(input, "del")) + del = 1; + + clib_warning("mapper_add_del %d %d %d %d", mapper_indexes[nsindex], ifindex, sw_if_index, del); + + if (mapper_add_del(mapper_indexes[nsindex], ifindex, sw_if_index, del)) + return clib_error_return(0, "Could not add iface"); + return 0; +} + + +VLIB_CLI_COMMAND (mapper_iface_command, static) = { + .path = "test mapper iface", + .short_help = "test mapper iface <nsindex> <linux-ifindex> <sw_if_index> [del]", + .function = mapper_iface_command_fn, +}; + +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = { + //.version = VPP_BUILD_VER, FIXME + .description = "netlink", +}; +/* *INDENT-ON* */ + diff --git a/utils/extras/rtinject/tap_inject.c b/utils/extras/rtinject/tap_inject.c new file mode 100644 index 000000000..f41ae86c8 --- /dev/null +++ b/utils/extras/rtinject/tap_inject.c @@ -0,0 +1,380 @@ +/* + * Copyright 2016 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tap_inject.h" + +#include <vnet/mfib/mfib_table.h> +#include <vnet/ip/ip.h> +#include <vnet/ip/lookup.h> +#include <vnet/fib/fib.h> + +static tap_inject_main_t tap_inject_main; +extern dpo_type_t tap_inject_dpo_type; + +tap_inject_main_t * +tap_inject_get_main (void) +{ + return &tap_inject_main; +} + +void +tap_inject_insert_tap (u32 sw_if_index, u32 tap_fd, u32 tap_if_index) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + vec_validate_init_empty (im->sw_if_index_to_tap_fd, sw_if_index, ~0); + vec_validate_init_empty (im->sw_if_index_to_tap_if_index, sw_if_index, ~0); + + vec_validate_init_empty (im->tap_fd_to_sw_if_index, tap_fd, ~0); + + im->sw_if_index_to_tap_fd[sw_if_index] = tap_fd; + im->sw_if_index_to_tap_if_index[sw_if_index] = tap_if_index; + + im->tap_fd_to_sw_if_index[tap_fd] = sw_if_index; + + hash_set (im->tap_if_index_to_sw_if_index, tap_if_index, sw_if_index); +} + +void +tap_inject_delete_tap (u32 sw_if_index) +{ + tap_inject_main_t * im = tap_inject_get_main (); + u32 tap_fd = im->sw_if_index_to_tap_fd[sw_if_index]; + u32 tap_if_index = im->sw_if_index_to_tap_if_index[sw_if_index]; + + im->sw_if_index_to_tap_if_index[sw_if_index] = ~0; + im->sw_if_index_to_tap_fd[sw_if_index] = ~0; + im->tap_fd_to_sw_if_index[tap_fd] = ~0; + + hash_unset (im->tap_if_index_to_sw_if_index, tap_if_index); +} + +u32 +tap_inject_lookup_tap_fd (u32 sw_if_index) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + vec_validate_init_empty (im->sw_if_index_to_tap_fd, sw_if_index, ~0); + return im->sw_if_index_to_tap_fd[sw_if_index]; +} + +u32 +tap_inject_lookup_sw_if_index_from_tap_fd (u32 tap_fd) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + vec_validate_init_empty (im->tap_fd_to_sw_if_index, tap_fd, ~0); + return im->tap_fd_to_sw_if_index[tap_fd]; +} + +u32 +tap_inject_lookup_sw_if_index_from_tap_if_index (u32 tap_if_index) +{ + tap_inject_main_t * im = tap_inject_get_main (); + uword * sw_if_index; + + sw_if_index = hash_get (im->tap_if_index_to_sw_if_index, tap_if_index); + return sw_if_index ? *(u32 *)sw_if_index : ~0; +} + +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = { + // .version = VPP_BUILD_VER, FIXME + .description = "router", +}; +/* *INDENT-ON* */ + + +static void +tap_inject_disable (void) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + im->flags &= ~TAP_INJECT_F_ENABLED; + + clib_warning ("tap-inject is not actually disabled."); +} + +static clib_error_t * +tap_inject_enable (void) +{ + vlib_main_t * vm = vlib_get_main (); + tap_inject_main_t * im = tap_inject_get_main (); + + if (tap_inject_is_enabled ()) + return 0; + + tap_inject_enable_netlink (); + + /* Only enable netlink? */ + if (im->flags & TAP_INJECT_F_CONFIG_NETLINK) + { + im->flags |= TAP_INJECT_F_ENABLED; + return 0; + } + + /* Register ARP and ICMP6 as neighbor nodes. */ + ethernet_register_input_type (vm, ETHERNET_TYPE_ARP, im->neighbor_node_index); + ip6_register_protocol (IP_PROTOCOL_ICMP6, im->neighbor_node_index); + + /* Register remaining protocols. */ + ip4_register_protocol (IP_PROTOCOL_ICMP, im->tx_node_index); + + ip4_register_protocol (IP_PROTOCOL_OSPF, im->tx_node_index); + ip4_register_protocol (IP_PROTOCOL_TCP, im->tx_node_index); + ip4_register_protocol (IP_PROTOCOL_UDP, im->tx_node_index); + + ip6_register_protocol (IP_PROTOCOL_OSPF, im->tx_node_index); + ip6_register_protocol (IP_PROTOCOL_TCP, im->tx_node_index); + ip6_register_protocol (IP_PROTOCOL_UDP, im->tx_node_index); + + { + dpo_id_t dpo = DPO_INVALID; + + const mfib_prefix_t pfx_224_0_0_0 = { + .fp_len = 24, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_grp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0xe0000000), + }, + .fp_src_addr = { + .ip4.as_u32 = 0, + }, + }; + + dpo_set(&dpo, tap_inject_dpo_type, DPO_PROTO_IP4, ~0); + + index_t repi = replicate_create(1, DPO_PROTO_IP4); + replicate_set_bucket(repi, 0, &dpo); + + mfib_table_entry_special_add(0, + &pfx_224_0_0_0, + MFIB_SOURCE_API, + MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF, + repi); + + dpo_reset(&dpo); + } + + im->flags |= TAP_INJECT_F_ENABLED; + + return 0; +} + +static uword +tap_inject_iface_isr (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * f) +{ + tap_inject_main_t * im = tap_inject_get_main (); + vnet_hw_interface_t * hw; + u32 * hw_if_index; + clib_error_t * err = 0; + + vec_foreach (hw_if_index, im->interfaces_to_enable) + { + hw = vnet_get_hw_interface (vnet_get_main (), *hw_if_index); + + if (hw->hw_class_index == ethernet_hw_interface_class.index) + { + err = tap_inject_tap_connect (hw); + if (err) + break; + } + } + + vec_foreach (hw_if_index, im->interfaces_to_disable) + tap_inject_tap_disconnect (*hw_if_index); + + vec_free (im->interfaces_to_enable); + vec_free (im->interfaces_to_disable); + + return err ? -1 : 0; +} + +VLIB_REGISTER_NODE (tap_inject_iface_isr_node, static) = { + .function = tap_inject_iface_isr, + .name = "tap-inject-iface-isr", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .vector_size = sizeof (u32), +}; + + +static clib_error_t * +tap_inject_interface_add_del (struct vnet_main_t * vnet_main, u32 hw_if_index, + u32 add) +{ + vlib_main_t * vm = vlib_get_main (); + tap_inject_main_t * im = tap_inject_get_main (); + + if (!tap_inject_is_config_enabled ()) + return 0; + + tap_inject_enable (); + + if (add) + vec_add1 (im->interfaces_to_enable, hw_if_index); + else + vec_add1 (im->interfaces_to_disable, hw_if_index); + + vlib_node_set_interrupt_pending (vm, tap_inject_iface_isr_node.index); + + return 0; +} + +VNET_HW_INTERFACE_ADD_DEL_FUNCTION (tap_inject_interface_add_del); + + +static clib_error_t * +tap_inject_enable_disable_all_interfaces (int enable) +{ + vnet_main_t * vnet_main = vnet_get_main (); + tap_inject_main_t * im = tap_inject_get_main (); + vnet_hw_interface_t * interfaces; + vnet_hw_interface_t * hw; + u32 ** indices; + + if (enable) + tap_inject_enable (); + else + tap_inject_disable (); + + /* Collect all the interface indices. */ + interfaces = vnet_main->interface_main.hw_interfaces; + indices = enable ? &im->interfaces_to_enable : &im->interfaces_to_disable; + pool_foreach (hw, interfaces, vec_add1 (*indices, hw - interfaces)); + + if (tap_inject_iface_isr (vlib_get_main (), 0, 0)) + return clib_error_return (0, "tap-inject interface add del isr failed"); + + return 0; +} + +static clib_error_t * +tap_inject_cli (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + if (cmd->function_arg) + { + clib_error_t * err; + + if (tap_inject_is_config_disabled ()) + return clib_error_return (0, + "tap-inject is disabled in config, thus cannot be enabled."); + + /* Enable */ + err = tap_inject_enable_disable_all_interfaces (1); + if (err) + { + tap_inject_enable_disable_all_interfaces (0); + return err; + } + + im->flags |= TAP_INJECT_F_CONFIG_ENABLE; + } + else + { + /* Disable */ + tap_inject_enable_disable_all_interfaces (0); + im->flags &= ~TAP_INJECT_F_CONFIG_ENABLE; + } + + return 0; +} + +VLIB_CLI_COMMAND (tap_inject_enable_cmd, static) = { + .path = "enable tap-inject", + .short_help = "enable tap-inject", + .function = tap_inject_cli, + .function_arg = 1, +}; + +VLIB_CLI_COMMAND (tap_inject_disable_cmd, static) = { + .path = "disable tap-inject", + .short_help = "disable tap-inject", + .function = tap_inject_cli, + .function_arg = 0, +}; + + +static clib_error_t * +show_tap_inject (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnet_main = vnet_get_main (); + tap_inject_main_t * im = tap_inject_get_main (); + u32 k, v; + + if (tap_inject_is_config_disabled ()) + { + vlib_cli_output (vm, "tap-inject is disabled in config.\n"); + return 0; + } + + if (!tap_inject_is_enabled ()) + { + vlib_cli_output (vm, "tap-inject is not enabled.\n"); + return 0; + } + + hash_foreach (k, v, im->tap_if_index_to_sw_if_index, { + vlib_cli_output (vm, "%U -> %U", + format_vnet_sw_interface_name, vnet_main, + vnet_get_sw_interface (vnet_main, v), + format_tap_inject_tap_name, k); + }); + + return 0; +} + +VLIB_CLI_COMMAND (show_tap_inject_cmd, static) = { + .path = "show tap-inject", + .short_help = "show tap-inject", + .function = show_tap_inject, +}; + + +static clib_error_t * +tap_inject_config (vlib_main_t * vm, unformat_input_t * input) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "enable")) + im->flags |= TAP_INJECT_F_CONFIG_ENABLE; + + else if (unformat (input, "disable")) + im->flags |= TAP_INJECT_F_CONFIG_DISABLE; + + else if (unformat (input, "netlink-only")) + im->flags |= TAP_INJECT_F_CONFIG_NETLINK; + + else + return clib_error_return (0, "syntax error `%U'", + format_unformat_error, input); + } + + if (tap_inject_is_config_enabled () && tap_inject_is_config_disabled ()) + return clib_error_return (0, + "tap-inject cannot be both enabled and disabled."); + + return 0; +} + +VLIB_CONFIG_FUNCTION (tap_inject_config, "tap-inject"); diff --git a/utils/extras/rtinject/tap_inject.h b/utils/extras/rtinject/tap_inject.h new file mode 100644 index 000000000..ec5121a09 --- /dev/null +++ b/utils/extras/rtinject/tap_inject.h @@ -0,0 +1,108 @@ +/* + * Copyright 2016 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _TAP_INJECT_H +#define _TAP_INJECT_H + +#include <vnet/plugin/plugin.h> +#include <vnet/ip/ip.h> + +#ifndef ETHER_ADDR_LEN +#define ETHER_ADDR_LEN 6 +#endif + +typedef struct { + /* + * tap-inject can be enabled or disabled in config file or during runtime. + * When disabled in config, it is not possible to enable during runtime. + * + * When the netlink-only option is used, netlink configuration is monitored + * and mirrored to the data plane but no traffic is passed between the host + * and the data plane. + */ +#define TAP_INJECT_F_CONFIG_ENABLE (1U << 0) +#define TAP_INJECT_F_CONFIG_DISABLE (1U << 1) +#define TAP_INJECT_F_CONFIG_NETLINK (1U << 2) +#define TAP_INJECT_F_ENABLED (1U << 3) + + u32 flags; + + u32 * sw_if_index_to_tap_fd; + u32 * sw_if_index_to_tap_if_index; + u32 * tap_fd_to_sw_if_index; + u32 * tap_if_index_to_sw_if_index; + + u32 * interfaces_to_enable; + u32 * interfaces_to_disable; + + u32 * rx_file_descriptors; + + u32 rx_node_index; + u32 tx_node_index; + u32 neighbor_node_index; + + u32 * rx_buffers; + +} tap_inject_main_t; + + +tap_inject_main_t * tap_inject_get_main (void); + +void tap_inject_insert_tap (u32 sw_if_index, u32 tap_fd, u32 tap_if_index); +void tap_inject_delete_tap (u32 sw_if_index); + +u32 tap_inject_lookup_tap_fd (u32 sw_if_index); +u32 tap_inject_lookup_sw_if_index_from_tap_fd (u32 tap_fd); +u32 tap_inject_lookup_sw_if_index_from_tap_if_index (u32 tap_if_index); + +static inline int +tap_inject_is_enabled (void) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + return !!(im->flags & TAP_INJECT_F_ENABLED); +} + +static inline int +tap_inject_is_config_enabled (void) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + return !!(im->flags & TAP_INJECT_F_CONFIG_ENABLE); +} + +static inline int +tap_inject_is_config_disabled (void) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + return !!(im->flags & TAP_INJECT_F_CONFIG_DISABLE); +} + + +/* Netlink */ + +void tap_inject_enable_netlink (void); + + +/* Tap */ + +clib_error_t * tap_inject_tap_connect (vnet_hw_interface_t * hw); +clib_error_t * tap_inject_tap_disconnect (u32 sw_if_index); + +u8 * format_tap_inject_tap_name (u8 * s, va_list * args); + +#endif /* _TAP_INJECT_H */ diff --git a/utils/extras/rtinject/tap_inject_netlink.c b/utils/extras/rtinject/tap_inject_netlink.c new file mode 100644 index 000000000..c0e0ce995 --- /dev/null +++ b/utils/extras/rtinject/tap_inject_netlink.c @@ -0,0 +1,285 @@ +/* + * Copyright 2016 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../devices/rtnetlink/netns.h" +#include <vlibmemory/api.h> +#include <vnet/ip/ip6_neighbor.h> +#include <vnet/ip/lookup.h> +#include <vnet/fib/fib.h> +#include <vnet/ethernet/arp.h> +#include <arpa/inet.h> +#include <linux/mpls.h> +#include <vnet/mpls/packet.h> + +#include "tap_inject.h" + +static void +add_del_addr (ns_addr_t * a, int is_del) +{ + vlib_main_t * vm = vlib_get_main (); + u32 sw_if_index; + + sw_if_index = tap_inject_lookup_sw_if_index_from_tap_if_index ( + a->ifaddr.ifa_index); + + if (sw_if_index == ~0) + return; + + if (a->ifaddr.ifa_family == AF_INET) + { + ip4_add_del_interface_address (vm, sw_if_index, + (ip4_address_t *) a->local, a->ifaddr.ifa_prefixlen, is_del); + } + else if (a->ifaddr.ifa_family == AF_INET6) + { + ip6_add_del_interface_address (vm, sw_if_index, + (ip6_address_t *) a->addr, a->ifaddr.ifa_prefixlen, is_del); + } +} + + +struct set_flags_args { + u32 index; + u8 flags; +}; + +static void +set_flags_cb (struct set_flags_args * a) +{ + vnet_sw_interface_set_flags (vnet_get_main (), a->index, a->flags); +} + +static void +add_del_link (ns_link_t * l, int is_del) +{ + struct set_flags_args args = { ~0, 0 }; + vnet_sw_interface_t * sw; + u8 flags = 0; + u32 sw_if_index; + + sw_if_index = tap_inject_lookup_sw_if_index_from_tap_if_index ( + l->ifi.ifi_index); + + if (sw_if_index == ~0) + return; + + sw = vnet_get_sw_interface (vnet_get_main (), sw_if_index); + + flags = sw->flags; + + if (l->ifi.ifi_flags & IFF_UP) + flags |= VNET_SW_INTERFACE_FLAG_ADMIN_UP; + else + flags &= ~VNET_SW_INTERFACE_FLAG_ADMIN_UP; + + args.index = sw_if_index; + args.flags = flags; + + vl_api_rpc_call_main_thread (set_flags_cb, (u8 *)&args, sizeof (args)); +} + + +static void +add_del_neigh (ns_neigh_t * n, int is_del) +{ + vnet_main_t * vnet_main = vnet_get_main (); + vlib_main_t * vm = vlib_get_main (); + u32 sw_if_index; + + sw_if_index = tap_inject_lookup_sw_if_index_from_tap_if_index ( + n->nd.ndm_ifindex); + + if (sw_if_index == ~0) + return; + + if (n->nd.ndm_family == AF_INET) + { + ethernet_arp_ip4_over_ethernet_address_t a; + + memset (&a, 0, sizeof (a)); + + clib_memcpy (&a.mac, n->lladdr, ETHER_ADDR_LEN); + clib_memcpy (&a.ip4, n->dst, sizeof (a.ip4)); + + + if (n->nd.ndm_state & NUD_REACHABLE) + { + vnet_arp_set_ip4_over_ethernet (vnet_main, sw_if_index, + &a, + IP_NEIGHBOR_FLAG_NO_FIB_ENTRY); + + } + else if (n->nd.ndm_state & NUD_FAILED) + { + vnet_arp_unset_ip4_over_ethernet (vnet_main, sw_if_index, &a); + } + } + else if (n->nd.ndm_family == AF_INET6) + { + if (n->nd.ndm_state & NUD_REACHABLE) + { + + mac_address_t * mac1; + mac1=malloc(sizeof(mac_address_t)); + memcpy (mac1, n->lladdr, ETHER_ADDR_LEN); + vnet_set_ip6_ethernet_neighbor (vm, sw_if_index, + (ip6_address_t *) n->dst, (mac_address_t *) mac1, + IP_NEIGHBOR_FLAG_NO_FIB_ENTRY); + } + else + vnet_unset_ip6_ethernet_neighbor (vm, sw_if_index, + (ip6_address_t *) n->dst); + } +} + + +#define TAP_INJECT_HOST_ROUTE_TABLE_MAIN 254 + +static void +get_mpls_label_stack(struct mpls_label *addr, u32* l) +{ + u32 entry = ntohl(addr[0].entry); + u32 label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT; + + for(int i = 1; label != 0; i++) { + *l++ = label; + if(entry & MPLS_LS_S_MASK) + return; + entry = ntohl(addr[i].entry); + label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT; + } +} + +static void +add_del_route (ns_route_t * r, int is_del) +{ + u32 sw_if_index; + + sw_if_index = tap_inject_lookup_sw_if_index_from_tap_if_index (r->oif); + + if (sw_if_index == ~0) + return; + + if (r->rtm.rtm_family == AF_INET) + { + u32 stack[MPLS_STACK_DEPTH] = {0}; + + fib_prefix_t prefix; + ip46_address_t nh; + + memset (&prefix, 0, sizeof (prefix)); + prefix.fp_len = r->rtm.rtm_dst_len; + prefix.fp_proto = FIB_PROTOCOL_IP4; + clib_memcpy (&prefix.fp_addr.ip4, r->dst, sizeof (prefix.fp_addr.ip4)); + get_mpls_label_stack(r->encap, stack); + memset (&nh, 0, sizeof (nh)); + clib_memcpy (&nh.ip4, r->gateway, sizeof (nh.ip4)); + if(*stack == 0) + fib_table_entry_path_add (0, &prefix, FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, prefix.fp_proto, + &nh, sw_if_index, 0, + 0 /* weight */, NULL, + FIB_ROUTE_PATH_FLAG_NONE); + else { + fib_route_path_t *rpaths = NULL, rpath; + memset(&rpath, 0, sizeof(rpath)); + rpath.frp_weight = 1; + rpath.frp_proto = DPO_PROTO_IP4; + clib_memcpy(&rpath.frp_addr.ip4, r->gateway, sizeof(rpath.frp_addr.ip4)); + rpath.frp_sw_if_index = sw_if_index; + for(int i = 0; i < MPLS_STACK_DEPTH && stack[i] != 0; i++) { + fib_mpls_label_t fib_label = {stack[i],0,0,0}; + vec_add1(rpath.frp_label_stack, fib_label); + } + vec_add1(rpaths, rpath); + fib_table_entry_path_add2(0, + &prefix, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + rpaths); + } + } + else if (r->rtm.rtm_family == AF_INET6) + { + fib_prefix_t prefix; + ip46_address_t nh; + memset (&prefix, 0, sizeof (prefix)); + prefix.fp_len = r->rtm.rtm_dst_len; + prefix.fp_proto = FIB_PROTOCOL_IP6; + clib_memcpy (&prefix.fp_addr.ip6, r->dst, sizeof (prefix.fp_addr.ip6)); + memset (&nh, 0, sizeof (nh)); + clib_memcpy (&nh.ip6, r->gateway, sizeof (nh.ip6)); + fib_table_entry_path_add (0, &prefix, FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, prefix.fp_proto, + &nh, sw_if_index, 0, + 0 /* weight */, NULL, + FIB_ROUTE_PATH_FLAG_NONE); + } +/* else if (r->rtm.rtm_family == AF_MPLS) + { + u32 dst_label; + get_mpls_label_stack((struct mpls_label*) r->dst, &dst_label); + struct rtvia *via = (struct rtvia*) r->via; + fib_prefix_t prefix; + fib_route_path_t *rpaths = NULL, rpath; + memset (&prefix, 0, sizeof (prefix)); + prefix.fp_len = 21; + prefix.fp_label = dst_label; + prefix.fp_proto = FIB_PROTOCOL_MPLS; + prefix.fp_payload_proto = DPO_PROTO_IP4; + memset(&rpath, 0, sizeof(rpath)); + clib_memcpy (&rpath.frp_addr.ip4, via->rtvia_addr, sizeof (rpath.frp_addr.ip4)); + rpath.frp_weight = 1; + rpath.frp_proto = DPO_PROTO_IP4; + rpath.frp_fib_index = 0; + rpath.frp_sw_if_index = sw_if_index; + vec_add1(rpaths, rpath); + fib_table_entry_path_add2(0, + &prefix, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + rpaths); + }*/ +} + + +static void +netns_notify_cb (void * obj, netns_type_t type, u32 flags, uword opaque) +{ + if (type == NETNS_TYPE_ADDR) + add_del_addr ((ns_addr_t *)obj, flags & NETNS_F_DEL); + + else if (type == NETNS_TYPE_LINK) + add_del_link ((ns_link_t *)obj, flags & NETNS_F_DEL); + + else if (type == NETNS_TYPE_NEIGH) + add_del_neigh ((ns_neigh_t *)obj, flags & NETNS_F_DEL); + + else if (type == NETNS_TYPE_ROUTE) + add_del_route ((ns_route_t *)obj, flags & NETNS_F_DEL); +} + +void +tap_inject_enable_netlink (void) +{ + char nsname = 0; + netns_sub_t sub = { + .notify = netns_notify_cb, + .opaque = 0, + }; + + netns_open (&nsname, &sub); +} diff --git a/utils/extras/rtinject/tap_inject_node.c b/utils/extras/rtinject/tap_inject_node.c new file mode 100644 index 000000000..73c296451 --- /dev/null +++ b/utils/extras/rtinject/tap_inject_node.c @@ -0,0 +1,374 @@ +/* + * Copyright 2016 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tap_inject.h" +#include <sys/uio.h> +#include <netinet/in.h> +#include <vnet/ethernet/arp_packet.h> + +vlib_node_registration_t tap_inject_rx_node; +vlib_node_registration_t tap_inject_tx_node; +vlib_node_registration_t tap_inject_neighbor_node; + +enum { + NEXT_NEIGHBOR_ARP, + NEXT_NEIGHBOR_ICMP6, +}; + +/** + * @brief Dynamically added tap_inject DPO type + */ +dpo_type_t tap_inject_dpo_type; + +static inline void +tap_inject_tap_send_buffer (int fd, vlib_buffer_t * b) +{ + struct iovec iov; + ssize_t n_bytes; + + iov.iov_base = vlib_buffer_get_current (b); + iov.iov_len = b->current_length; + + n_bytes = writev (fd, &iov, 1); + + if (n_bytes < 0) + clib_warning ("writev failed"); + else if (n_bytes < b->current_length || b->flags & VLIB_BUFFER_NEXT_PRESENT) + clib_warning ("buffer truncated"); +} + +static uword +tap_inject_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) +{ + vlib_buffer_t * b; + u32 * pkts; + u32 fd; + u32 i; + + pkts = vlib_frame_vector_args (f); + + for (i = 0; i < f->n_vectors; ++i) + { + b = vlib_get_buffer (vm, pkts[i]); + + fd = tap_inject_lookup_tap_fd (vnet_buffer (b)->sw_if_index[VLIB_RX]); + if (fd == ~0) + continue; + + /* Re-wind the buffer to the start of the Ethernet header. */ + vlib_buffer_advance (b, -b->current_data); + + tap_inject_tap_send_buffer (fd, b); + } + + vlib_buffer_free (vm, pkts, f->n_vectors); + return f->n_vectors; +} + +VLIB_REGISTER_NODE (tap_inject_tx_node) = { + .function = tap_inject_tx, + .name = "tap-inject-tx", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, +}; + + +static uword +tap_inject_neighbor (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * f) +{ + vlib_buffer_t * b; + u32 * pkts; + u32 fd; + u32 i; + u32 bi; + u32 next_index = node->cached_next_index; + u32 next = ~0; + u32 n_left; + u32 * to_next; + + pkts = vlib_frame_vector_args (f); + + for (i = 0; i < f->n_vectors; ++i) + { + bi = pkts[i]; + b = vlib_get_buffer (vm, bi); + + fd = tap_inject_lookup_tap_fd (vnet_buffer (b)->sw_if_index[VLIB_RX]); + if (fd == ~0) + { + vlib_buffer_free (vm, &bi, 1); + continue; + } + + /* Re-wind the buffer to the start of the Ethernet header. */ + vlib_buffer_advance (b, -b->current_data); + + tap_inject_tap_send_buffer (fd, b); + + /* Send the buffer to a neighbor node too? */ + { + ethernet_header_t * eth = vlib_buffer_get_current (b); + u16 ether_type = htons (eth->type); + + if (ether_type == ETHERNET_TYPE_ARP) + { + ethernet_arp_header_t * arp = (void *)(eth + 1); + + if (arp->opcode == ntohs (ETHERNET_ARP_OPCODE_reply)) + next = NEXT_NEIGHBOR_ARP; + } + else if (ether_type == ETHERNET_TYPE_IP6) + { + ip6_header_t * ip = (void *)(eth + 1); + icmp46_header_t * icmp = (void *)(ip + 1); + + if (ip->protocol == IP_PROTOCOL_ICMP6 && + icmp->type == ICMP6_neighbor_advertisement) + next = NEXT_NEIGHBOR_ICMP6; + } + } + + if (next == ~0) + { + vlib_buffer_free (vm, &bi, 1); + continue; + } + + /* ARP and ICMP6 expect to start processing after the Ethernet header. */ + vlib_buffer_advance (b, sizeof (ethernet_header_t)); + + vlib_get_next_frame (vm, node, next_index, to_next, n_left); + + *(to_next++) = bi; + --n_left; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left, bi, next); + vlib_put_next_frame (vm, node, next_index, n_left); + } + + return f->n_vectors; +} + +VLIB_REGISTER_NODE (tap_inject_neighbor_node) = { + .function = tap_inject_neighbor, + .name = "tap-inject-neighbor", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, + .n_next_nodes = 2, + .next_nodes = { + [NEXT_NEIGHBOR_ARP] = "arp-input", + [NEXT_NEIGHBOR_ICMP6] = "icmp6-neighbor-solicitation", + }, +}; + + +#define MTU 1500 +#define MTU_BUFFERS ((MTU + vlib_buffer_get_default_data_size(vm) - 1) / vlib_buffer_get_default_data_size(vm)) +#define NUM_BUFFERS_TO_ALLOC 32 + +static inline uword +tap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f, int fd) +{ + tap_inject_main_t * im = tap_inject_get_main (); + u32 sw_if_index; + struct iovec iov[MTU_BUFFERS]; + u32 bi[MTU_BUFFERS]; + vlib_buffer_t * b; + ssize_t n_bytes; + ssize_t n_bytes_left; + u32 i, j; + + sw_if_index = tap_inject_lookup_sw_if_index_from_tap_fd (fd); + if (sw_if_index == ~0) + return 0; + + /* Allocate buffers in bulk when there are less than enough to rx an MTU. */ + if (vec_len (im->rx_buffers) < MTU_BUFFERS) + { + u32 len = vec_len (im->rx_buffers); + + + u8 index = vlib_buffer_pool_get_default_for_numa (vm,0); + len = vlib_buffer_alloc_from_pool(vm, + &im->rx_buffers[len], NUM_BUFFERS_TO_ALLOC, + index); + + _vec_len (im->rx_buffers) += len; + + if (vec_len (im->rx_buffers) < MTU_BUFFERS) + { + clib_warning ("failed to allocate buffers"); + return 0; + } + } + + /* Fill buffers from the end of the list to make it easier to resize. */ + for (i = 0, j = vec_len (im->rx_buffers) - 1; i < MTU_BUFFERS; ++i, --j) + { + vlib_buffer_t * b; + + bi[i] = im->rx_buffers[j]; + + b = vlib_get_buffer (vm, bi[i]); + + iov[i].iov_base = b->data; + iov[i].iov_len = VLIB_BUFFER_DEFAULT_DATA_SIZE; + } + + n_bytes = readv (fd, iov, MTU_BUFFERS); + if (n_bytes < 0) + { + clib_warning ("readv failed"); + return 0; + } + + b = vlib_get_buffer (vm, bi[0]); + + vnet_buffer (b)->sw_if_index[VLIB_RX] = sw_if_index; + vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index; + + n_bytes_left = n_bytes - VLIB_BUFFER_DEFAULT_DATA_SIZE; + + if (n_bytes_left > 0) + { + b->total_length_not_including_first_buffer = n_bytes_left; + b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + } + + b->current_length = n_bytes; + + /* If necessary, configure any remaining buffers in the chain. */ + for (i = 1; n_bytes_left > 0; ++i, n_bytes_left -= VLIB_BUFFER_DEFAULT_DATA_SIZE) + { + b = vlib_get_buffer (vm, bi[i - 1]); + b->current_length = VLIB_BUFFER_DEFAULT_DATA_SIZE; + b->flags |= VLIB_BUFFER_NEXT_PRESENT; + b->next_buffer = bi[i]; + + b = vlib_get_buffer (vm, bi[i]); + b->current_length = n_bytes_left; + } + + _vec_len (im->rx_buffers) -= i; + + /* Get the packet to the output node. */ + { + vnet_hw_interface_t * hw; + vlib_frame_t * new_frame; + u32 * to_next; + + hw = vnet_get_hw_interface (vnet_get_main (), sw_if_index); + + new_frame = vlib_get_frame_to_node (vm, hw->output_node_index); + to_next = vlib_frame_vector_args (new_frame); + to_next[0] = bi[0]; + new_frame->n_vectors = 1; + + vlib_put_frame_to_node (vm, hw->output_node_index, new_frame); + } + + return 1; +} + +static uword +tap_inject_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) +{ + tap_inject_main_t * im = tap_inject_get_main (); + u32 * fd; + uword count = 0; + + vec_foreach (fd, im->rx_file_descriptors) + { + if (tap_rx (vm, node, f, *fd) != 1) + { + clib_warning ("rx failed"); + count = 0; + break; + } + ++count; + } + + vec_free (im->rx_file_descriptors); + + return count; +} + +VLIB_REGISTER_NODE (tap_inject_rx_node) = { + .function = tap_inject_rx, + .name = "tap-inject-rx", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .vector_size = sizeof (u32), +}; + +/** + * @brief no-op lock function. + */ +static void +tap_inject_dpo_lock (dpo_id_t * dpo) +{ +} + +/** + * @brief no-op unlock function. + */ +static void +tap_inject_dpo_unlock (dpo_id_t * dpo) +{ +} + +u8 * +format_tap_inject_dpo (u8 * s, va_list * args) +{ + return (format (s, "tap-inject:[%d]", 0)); +} + +const static dpo_vft_t tap_inject_vft = { + .dv_lock = tap_inject_dpo_lock, + .dv_unlock = tap_inject_dpo_unlock, + .dv_format = format_tap_inject_dpo, +}; + +const static char *const tap_inject_tx_nodes[] = { + "tap-inject-tx", + NULL, +}; + +const static char *const *const tap_inject_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP4] = tap_inject_tx_nodes, + [DPO_PROTO_IP6] = tap_inject_tx_nodes, +}; + +static clib_error_t * +tap_inject_init (vlib_main_t * vm) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + im->rx_node_index = tap_inject_rx_node.index; + im->tx_node_index = tap_inject_tx_node.index; + im->neighbor_node_index = tap_inject_neighbor_node.index; + + tap_inject_dpo_type = dpo_register_new_type (&tap_inject_vft, tap_inject_nodes); + + vec_alloc (im->rx_buffers, NUM_BUFFERS_TO_ALLOC); + vec_reset_length (im->rx_buffers); + + return 0; +} + +VLIB_INIT_FUNCTION (tap_inject_init); diff --git a/utils/extras/rtinject/tap_inject_tap.c b/utils/extras/rtinject/tap_inject_tap.c new file mode 100644 index 000000000..a3ec9ffef --- /dev/null +++ b/utils/extras/rtinject/tap_inject_tap.c @@ -0,0 +1,170 @@ +/* + * Copyright 2016 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tap_inject.h" + +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <linux/if.h> +#include <linux/if_arp.h> +#include <linux/if_ether.h> +#include <linux/if_tun.h> +#include <netinet/in.h> +#include <vnet/unix/tuntap.h> + +#include <vlib/unix/unix.h> + + +static clib_error_t * +tap_inject_tap_read (clib_file_t * f) +{ + vlib_main_t * vm = vlib_get_main (); + tap_inject_main_t * im = tap_inject_get_main (); + + vec_add1 (im->rx_file_descriptors, f->file_descriptor); + + vlib_node_set_interrupt_pending (vm, im->rx_node_index); + + return 0; +} + +#define TAP_INJECT_TAP_BASE_NAME "vpp" + +clib_error_t * +tap_inject_tap_connect (vnet_hw_interface_t * hw) +{ + vnet_main_t * vnet_main = vnet_get_main (); + vnet_sw_interface_t * sw = vnet_get_sw_interface (vnet_main, hw->hw_if_index); + static const int one = 1; + int fd; + struct ifreq ifr; + clib_file_t template; + u32 tap_fd; + u8 * name; + + memset (&ifr, 0, sizeof (ifr)); + memset (&template, 0, sizeof (template)); + + ASSERT (hw->hw_if_index == sw->sw_if_index); + + /* Create the tap. */ + tap_fd = open ("/dev/net/tun", O_RDWR); + + if ((int)tap_fd < 0) + return clib_error_return (0, "failed to open tun device"); + + name = format (0, TAP_INJECT_TAP_BASE_NAME "%u%c", hw->hw_instance, 0); + + strncpy (ifr.ifr_name, (char *) name, sizeof (ifr.ifr_name) - 1); + ifr.ifr_flags = IFF_TAP | IFF_NO_PI; + + if (ioctl (tap_fd, TUNSETIFF, (void *)&ifr) < 0) + { + close (tap_fd); + return clib_error_return (0, "failed to create tap"); + } + + if (ioctl (tap_fd, FIONBIO, &one) < 0) + { + close (tap_fd); + return clib_error_return (0, "failed to set tap to non-blocking io"); + } + + /* Open a socket to configure the device. */ + fd = socket (PF_PACKET, SOCK_RAW, htons (ETH_P_ALL)); + + if (fd < 0) + { + close (tap_fd); + return clib_error_return (0, "failed to configure tap"); + } + + if (hw->hw_address) + clib_memcpy (ifr.ifr_hwaddr.sa_data, hw->hw_address, ETHER_ADDR_LEN); + + ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER; + + /* Set the hardware address. */ + if (ioctl (fd, SIOCSIFHWADDR, &ifr) < 0) + { + close (tap_fd); + close (fd); + return clib_error_return (0, "failed to set tap hardware address"); + } + + /* Get the tap if index. */ + if (ioctl (fd, SIOCGIFINDEX, &ifr) < 0) + { + close (tap_fd); + close (fd); + return clib_error_return (0, "failed to procure tap if index"); + } + + close (fd); + + /* Get notified when the tap needs to be read. */ + template.read_function = tap_inject_tap_read; + template.file_descriptor = tap_fd; + + clib_file_add (&file_main, &template); + + tap_inject_insert_tap (sw->sw_if_index, tap_fd, ifr.ifr_ifindex); + + return 0; +} + +clib_error_t * +tap_inject_tap_disconnect (u32 sw_if_index) +{ + u32 tap_fd; + + tap_fd = tap_inject_lookup_tap_fd (sw_if_index); + if (tap_fd == ~0) + return clib_error_return (0, "failed to disconnect tap"); + + tap_inject_delete_tap (sw_if_index); + + close (tap_fd); + return 0; +} + + +u8 * +format_tap_inject_tap_name (u8 * s, va_list * args) +{ + int fd; + struct ifreq ifr; + + fd = socket (PF_PACKET, SOCK_RAW, htons (ETH_P_ALL)); + + if (fd < 0) + return 0; + + memset (&ifr, 0, sizeof (ifr)); + + ifr.ifr_ifindex = va_arg (*args, u32); + + if (ioctl (fd, SIOCGIFNAME, &ifr) < 0) + { + close (fd); + return 0; + } + + close (fd); + + return format (s, "%s", ifr.ifr_name); +} |