summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt12
-rw-r--r--scripts/build-packages.sh1
-rw-r--r--utils/extras/CMakeLists.txt101
-rw-r--r--utils/extras/cmake/Modules/Packaging.cmake31
-rw-r--r--utils/extras/devices/rtnetlink/mapper.c270
-rw-r--r--utils/extras/devices/rtnetlink/mapper.h35
-rw-r--r--utils/extras/devices/rtnetlink/netns.c787
-rw-r--r--utils/extras/devices/rtnetlink/netns.h145
-rw-r--r--utils/extras/devices/rtnetlink/rtnl.c604
-rw-r--r--utils/extras/devices/rtnetlink/rtnl.h60
-rw-r--r--utils/extras/devices/rtnetlink/test.c203
-rw-r--r--utils/extras/rtinject/tap_inject.c380
-rw-r--r--utils/extras/rtinject/tap_inject.h108
-rw-r--r--utils/extras/rtinject/tap_inject_netlink.c285
-rw-r--r--utils/extras/rtinject/tap_inject_node.c374
-rw-r--r--utils/extras/rtinject/tap_inject_tap.c170
16 files changed, 3565 insertions, 1 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 63c438fd8..34105c7f7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,6 +28,7 @@ option(BUILD_LIBTRANSPORT "Build the hicn transport library" ON)
option(BUILD_UTILS "Build the hicn utils" ON)
option(BUILD_APPS "Build the hicn apps" OFF)
option(BUILD_HICNPLUGIN "Build the hicn vpp plugin" OFF)
+option(BUILD_HICNEXTRAPLUGIN "Build the hicn extra plugin" OFF)
list(APPEND dir_options
BUILD_LIBHICN
@@ -43,6 +44,8 @@ set(BUILD_LIBTRANSPORT_DIR libtransport)
set(BUILD_UTILS_DIR utils)
set(BUILD_APPS_DIR apps)
set(BUILD_HICNPLUGIN_DIR hicn-plugin)
+set(BUILD_HICNEXTRAPLUGIN_DIR utils/extras/)
+
## HEADER FILES
set(LIBHICN_HEADER_FILES)
@@ -64,6 +67,7 @@ set(LIBHICN_LIGHT hicn-light)
set(HICN_LIGHT_CONTROL hicn-light-control)
set(HICN_LIGHT_DAEMON hicn-light-daemon)
set(HICN_PLUGIN hicn-plugin)
+set(HICN_EXTRA_PLUGIN hicn-extra-plugin)
set(LIBTRANSPORT hicntransport)
set(HICN_UTILS hicn-utils)
set(HICN_APPS hicn-apps)
@@ -81,6 +85,12 @@ if (BUILD_HICNPLUGIN AND "${CMAKE_SYSTEM_NAME}" STREQUAL "Linux")
set(LIBTRANSPORT ${LIBTRANSPORT}-memif)
endif()
+if (BUILD_HICNEXTRAPLUGIN AND "${CMAKE_SYSTEM_NAME}" STREQUAL "Linux")
+ list(APPEND subdirs
+ ${BUILD_HICNEXTRAPLUGIN_DIR}
+ )
+endif()
+
## Shared targets
set(LIBHICN_SHARED ${LIBHICN}.shared)
set(LIBTRANSPORT_SHARED ${LIBTRANSPORT}.shared)
@@ -96,4 +106,4 @@ foreach(dir ${subdirs})
endforeach()
include(Packager)
-make_packages() \ No newline at end of file
+make_packages()
diff --git a/scripts/build-packages.sh b/scripts/build-packages.sh
index d758472de..cc48900c2 100644
--- a/scripts/build-packages.sh
+++ b/scripts/build-packages.sh
@@ -153,6 +153,7 @@ build_package() {
cmake -DCMAKE_INSTALL_PREFIX=/usr \
-DBUILD_HICNPLUGIN=ON \
+ -DBUILD_HICNEXTRAPLUGIN=ON \
-DBUILD_LIBTRANSPORT=ON \
-DBUILD_APPS=ON \
-DLIBMEMIF_HOME=${MEMIF_HOME} \
diff --git a/utils/extras/CMakeLists.txt b/utils/extras/CMakeLists.txt
new file mode 100644
index 000000000..85f924cb1
--- /dev/null
+++ b/utils/extras/CMakeLists.txt
@@ -0,0 +1,101 @@
+# Copyright (c) 2017-2019 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
+project(extra-plugin)
+
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH}
+ "${CMAKE_CURRENT_SOURCE_DIR}/../../cmake/Modules/"
+ "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/"
+)
+
+set (CMAKE_CXX_STANDARD 11)
+set (CMAKE_C_STANDARD 11)
+
+# Check for memfd_create syscall
+include(CheckSymbolExists)
+CHECK_SYMBOL_EXISTS ( "__NR_memfd_create" "sys/syscall.h" HAVE_MEMFD_CREATE )
+if ( HAVE_MEMFD_CREATE )
+ add_definitions ( -DHAVE_MEMFD_CREATE )
+endif()
+
+if(CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR)
+ set(HICN_EXTRA_PLUGIN extra-plugin)
+endif()
+
+include (Packaging)
+
+# Dependencies
+
+find_package(Vpp REQUIRED)
+
+include_directories(${VPP_INCLUDE_DIR})
+
+set(HICN_EXTRA_PLUGIN_SOURCE_FILES
+ rtinject/tap_inject.c
+ rtinject/tap_inject_netlink.c
+ rtinject/tap_inject_node.c
+ rtinject/tap_inject_tap.c
+ devices/rtnetlink/mapper.c
+ devices/rtnetlink/netns.c
+ devices/rtnetlink/rtnl.c
+)
+
+set(HICN_EXTRA_PLUGIN_HEADER_FILES
+ devices/rtnetlink/mapper.h
+ devices/rtnetlink/netns.h
+ devices/rtnetlink/rtnl.h
+)
+
+if (NOT VPP_HOME)
+ set(VPP_HOME /usr)
+endif()
+
+if (NOT CMAKE_BUILD_TYPE)
+ set (CMAKE_BUILD_TYPE "Release")
+endif (NOT CMAKE_BUILD_TYPE)
+
+SET(EXTRA_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}/lib CACHE STRING "extra_install_prefix")
+
+if (CMAKE_BUILD_TYPE STREQUAL "Release")
+ set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Wall -march=native -O3 -g")
+elseif (CMAKE_BUILD_TYPE STREQUAL "Debug")
+ set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -Wall -march=native -O0 -g")
+ add_definitions(-DCLIB_DEBUG -fPIC -fstack-protector-all)
+endif()
+
+include_directories(SYSTEM)
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
+
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DEXTRA_VPP_PLUGIN=1")
+add_library(hicn_extra_plugin SHARED
+ ${HICN_EXTRA_PLUGIN_SOURCE_FILES})
+
+include_directories(${CMAKE_CURRENT_BINARY_DIR}/vpp_plugins)
+
+set(VPP_INSTALL_PLUGIN ${EXTRA_INSTALL_PREFIX}/vpp_plugins)
+
+set_target_properties(hicn_extra_plugin
+ PROPERTIES
+ LINKER_LANGUAGE C
+ INSTALL_RPATH ${VPP_INSTALL_PLUGIN}
+ PREFIX "")
+
+install(DIRECTORY
+ DESTINATION ${VPP_INSTALL_PLUGIN}
+ COMPONENT ${HICN_EXTRA_PLUGIN})
+
+install(TARGETS hicn_extra_plugin
+ DESTINATION
+ ${VPP_INSTALL_PLUGIN}
+ COMPONENT ${HICN_EXTRA_PLUGIN})
diff --git a/utils/extras/cmake/Modules/Packaging.cmake b/utils/extras/cmake/Modules/Packaging.cmake
new file mode 100644
index 000000000..69f63971f
--- /dev/null
+++ b/utils/extras/cmake/Modules/Packaging.cmake
@@ -0,0 +1,31 @@
+# Copyright (c) 2017-2019 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+######################
+# Packages section
+######################
+
+set(${HICN_EXTRA_PLUGIN}_DESCRIPTION
+ "A extra plugin to VPP."
+ CACHE STRING "Description for deb/rpm package."
+)
+
+set(${HICN_EXTRA_PLUGIN}_DEB_DEPENDENCIES
+ "vpp (>= stable_version-release), vpp (<< next_version-release), vpp-plugin-core (>= stable_version-release), vpp-plugin-core (<< next_version-release)"
+ CACHE STRING "Dependencies for deb/rpm package."
+)
+
+set(${HICN_EXTRA_PLUGIN}_RPM_DEPENDENCIES
+ "vpp >= stable_version-release, vpp < next_version-release, vpp-plugins >= stable_version-release, vpp-plugins < next_version-release"
+ CACHE STRING "Dependencies for deb/rpm package."
+)
diff --git a/utils/extras/devices/rtnetlink/mapper.c b/utils/extras/devices/rtnetlink/mapper.c
new file mode 100644
index 000000000..ed4fa5634
--- /dev/null
+++ b/utils/extras/devices/rtnetlink/mapper.c
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2016-2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/fib/fib.h>
+
+#include "netns.h"
+#include "mapper.h"
+
+typedef struct {
+ int linux_ifindex;
+ u32 sw_if_index;
+} mapper_map_t;
+
+typedef struct {
+ char nsname[RTNL_NETNS_NAMELEN + 1];
+ mapper_map_t *mappings;
+ u32 netns_handle; //Used to receive notifications
+ u32 v4fib_index; //One fib index for the namespace
+ u32 v6fib_index;
+} mapper_ns_t;
+
+typedef struct {
+ mapper_ns_t *namespaces;
+} mapper_main_t;
+
+static mapper_main_t mapper_main;
+
+mapper_map_t *mapper_get_by_ifindex(mapper_ns_t *ns, int ifindex)
+{
+ mapper_map_t *map;
+ pool_foreach(map, ns->mappings, {
+ if (ifindex == map->linux_ifindex)
+ return map;
+ });
+ return NULL;
+}
+
+int mapper_add_del_route(mapper_ns_t *ns, ns_route_t *route, int del)
+{
+ mapper_main_t *mm = &mapper_main;
+ clib_warning("NS %d %s %U", ns - mm->namespaces, del?"del":"add", format_ns_route, route);
+
+ mapper_map_t *map = mapper_get_by_ifindex(ns, route->oif);
+ if (!map)
+ return 0;
+
+ if (route->rtm.rtm_family == AF_INET6) {
+
+ //Filter-out multicast
+ if (route->rtm.rtm_dst_len >= 8 && route->dst[0] == 0xff)
+ return 0;
+
+ fib_prefix_t prefix;
+ ip46_address_t nh;
+
+ memset (&prefix, 0, sizeof (prefix));
+ prefix.fp_len = route->rtm.rtm_dst_len;
+ prefix.fp_proto = FIB_PROTOCOL_IP6;
+ clib_memcpy (&prefix.fp_addr.ip6, route->dst, sizeof (prefix.fp_addr.ip6));
+
+ memset (&nh, 0, sizeof (nh));
+ clib_memcpy (&nh.ip6, route->gateway, sizeof (nh.ip6));
+
+ fib_table_entry_path_add (ns->v6fib_index, &prefix, FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE, prefix.fp_proto,
+ &nh, map->sw_if_index, ns->v6fib_index,
+ 0 /* weight */,
+ (fib_mpls_label_t *) MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ } else {
+ fib_prefix_t prefix;
+ ip46_address_t nh;
+
+ memset (&prefix, 0, sizeof (prefix));
+ prefix.fp_len = route->rtm.rtm_dst_len;
+ prefix.fp_proto = FIB_PROTOCOL_IP4;
+ clib_memcpy (&prefix.fp_addr.ip4, route->dst, sizeof (prefix.fp_addr.ip4));
+
+ memset (&nh, 0, sizeof (nh));
+ clib_memcpy (&nh.ip4, route->gateway, sizeof (nh.ip4));
+
+ fib_table_entry_path_add (ns->v4fib_index, &prefix, FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE, prefix.fp_proto,
+ &nh, map->sw_if_index, ns->v4fib_index,
+ 0 /* weight */,
+ (fib_mpls_label_t *) MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ }
+
+ return 0;
+}
+
+static void
+mapper_netns_notify_cb(void *obj, netns_type_t type,
+ u32 flags, uword opaque)
+{
+ mapper_main_t *mm = &mapper_main;
+ mapper_ns_t *ns = &mm->namespaces[(u32) opaque];
+ ASSERT(!pool_is_free_index(mm->namespaces, (u32) opaque));
+ if (type != NETNS_TYPE_ROUTE)
+ return; //For now...
+
+ ns_route_t *route = obj;
+ if (flags & NETNS_F_DEL) {
+ mapper_add_del_route(ns, route, 1);
+ } else if (flags & NETNS_F_ADD) {
+ mapper_add_del_route(ns, route, 0);
+ }
+}
+
+void
+mapper_delmap(mapper_ns_t*ns, mapper_map_t *map)
+{
+ ns_route_t *route;
+ netns_t *netns = netns_getns(ns->netns_handle);
+ pool_foreach(route, netns->routes, {
+ if (route->oif == map->linux_ifindex)
+ mapper_add_del_route(ns, route, 1);
+ });
+ pool_put(ns->mappings, map);
+}
+
+mapper_map_t *
+mapper_getmap(mapper_ns_t*ns, u32 sw_if_index,
+ int linux_ifindex, int create)
+{
+ mapper_map_t *map;
+ pool_foreach(map, ns->mappings, {
+ if (linux_ifindex == map->linux_ifindex) {
+ if (sw_if_index != map->sw_if_index)
+ return NULL; //Cannot have multiple mapping with the same ifindex
+ else
+ return map;
+ }
+ });
+
+ if (!create)
+ return NULL;
+
+ pool_get(ns->mappings, map);
+ map->linux_ifindex = linux_ifindex;
+ map->sw_if_index = sw_if_index;
+ ip6_main.fib_index_by_sw_if_index[sw_if_index] = ns->v6fib_index;
+ ip4_main.fib_index_by_sw_if_index[sw_if_index] = ns->v4fib_index;
+
+ //Load available routes
+ ns_route_t *route;
+ netns_t *netns = netns_getns(ns->netns_handle);
+ pool_foreach(route, netns->routes, {
+ if (route->oif == map->linux_ifindex)
+ mapper_add_del_route(ns, route, 0);
+ });
+ return map;
+}
+
+u32
+mapper_get_ns(char *nsname)
+{
+ mapper_main_t *mm = &mapper_main;
+ mapper_ns_t *ns;
+ pool_foreach(ns, mm->namespaces, {
+ if (!strcmp(nsname, ns->nsname))
+ return ns - mm->namespaces;
+ });
+ return ~0;
+}
+
+int
+mapper_add_del(u32 nsindex, int linux_ifindex,
+ u32 sw_if_index, int del)
+{
+ mapper_main_t *mm = &mapper_main;
+ //ip6_main_t *im6 = &ip6_main;
+ mapper_ns_t *ns = &mm->namespaces[nsindex];
+ mapper_map_t *map;
+ //vnet_sw_interface_t *iface = vnet_get_sw_interface(vnet_get_main(), sw_if_index);
+
+ if (pool_is_free(mm->namespaces, ns))
+ return -1;
+
+ /*if (!del) {
+ if ((iface->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) &&
+ im6->fib_index_by_sw_if_index[sw_if_index] != ~0) {
+ //A custom fib index will be used...
+ clib_warning("Cannot add interface with a custom fib index (current is %d)",
+ im6->fib_index_by_sw_if_index[sw_if_index]);
+ return -1;
+ }
+ }*/
+
+ if (!(map = mapper_getmap(ns, sw_if_index, linux_ifindex, !del)))
+ return -1;
+
+ if (del)
+ mapper_delmap(ns, map);
+
+ return 0;
+}
+
+int
+mapper_add_ns(char *nsname, u32 v4fib_index, u32 v6fib_index, u32 *nsindex)
+{
+ mapper_main_t *mm = &mapper_main;
+ mapper_ns_t *ns;
+ if (mapper_get_ns(nsname) != ~0)
+ return -1; //Already exists
+
+ pool_get(mm->namespaces, ns);
+ strcpy(ns->nsname, nsname);
+ ns->v4fib_index = v4fib_index;
+ ns->v6fib_index = v6fib_index;
+ ns->mappings = 0;
+
+ netns_sub_t sub;
+ sub.notify = mapper_netns_notify_cb;
+ sub.opaque = (uword)(ns - mm->namespaces);
+ if ((ns->netns_handle = netns_open(ns->nsname, &sub)) == ~0) {
+ pool_put(mm->namespaces, ns);
+ return -1;
+ }
+ *nsindex = ns - mm->namespaces;
+ return 0;
+}
+
+int
+mapper_del_ns(u32 nsindex)
+{
+ mapper_main_t *mm = &mapper_main;
+ mapper_ns_t *ns = &mm->namespaces[nsindex];
+ if (pool_is_free(mm->namespaces, ns))
+ return -1;
+
+ //Remove all existing mappings
+ int i, *indexes = 0;
+ pool_foreach_index(i, ns->mappings, {
+ vec_add1(indexes, i);
+ });
+ vec_foreach_index(i, indexes) {
+ mapper_delmap(ns, &ns->mappings[indexes[i]]);
+ }
+ vec_free(indexes);
+
+ netns_close(ns->netns_handle);
+ pool_put(mm->namespaces, ns);
+ return 0;
+}
+
+clib_error_t *
+mapper_init (vlib_main_t * vm)
+{
+ mapper_main_t *mm = &mapper_main;
+ mm->namespaces = 0;
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (mapper_init);
diff --git a/utils/extras/devices/rtnetlink/mapper.h b/utils/extras/devices/rtnetlink/mapper.h
new file mode 100644
index 000000000..32e95d48f
--- /dev/null
+++ b/utils/extras/devices/rtnetlink/mapper.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2016-2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MAPPER_H_
+#define MAPPER_H_
+
+#include "netns.h"
+
+/*
+ * Automatically map linux network routes to VPP.
+ * Each namespace is associated with an individual fib.
+ *
+ * One linux interface can only be mapped to a single VPP
+ * interface, but one VPP interface can be mapped to
+ * multiple linux interfaces.
+ * A mapped VPP interface must not have any configured fib.
+ */
+
+int mapper_add_ns(char *nsname, u32 v4fib_index, u32 v6fib_index, u32 *nsindex);
+int mapper_del_ns(u32 nsindex);
+int mapper_add_del(u32 nsindex, int linux_ifindex, u32 sw_if_index, int del);
+
+#endif /* MAPPER_H_ */
diff --git a/utils/extras/devices/rtnetlink/netns.c b/utils/extras/devices/rtnetlink/netns.c
new file mode 100644
index 000000000..19adb469a
--- /dev/null
+++ b/utils/extras/devices/rtnetlink/netns.c
@@ -0,0 +1,787 @@
+/*
+ * Copyright (c) 2016-2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/format.h>
+#include <stddef.h>
+
+#include "netns.h"
+
+/* Enable some RTA values debug */
+//#define RTNL_CHECK
+
+#define is_nonzero(x) \
+ ({ \
+ u8 __is_zero_zero[sizeof(x)] = {}; \
+ memcmp(__is_zero_zero, &x, sizeof(x)); \
+ })
+
+typedef struct {
+ u8 type; //Attribute identifier
+ u8 unique; //Part of the values uniquely identifying an entry
+ u16 offset; //Offset where stored in struct
+ u16 size; //Length of the attribute
+} rtnl_mapping_t;
+
+#define ns_foreach_ifla \
+ _(IFLA_ADDRESS, hwaddr) \
+ _(IFLA_BROADCAST, broadcast) \
+ _(IFLA_IFNAME, ifname) \
+ _(IFLA_MASTER, master) \
+ _(IFLA_MTU, mtu) \
+ _(IFLA_QDISC, qdisc)
+
+static rtnl_mapping_t ns_ifmap[] = {
+#define _(t, e) \
+ { \
+ .type = t, \
+ .offset = offsetof(ns_link_t, e), \
+ .size = sizeof(((ns_link_t*)0)->e) \
+ },
+ ns_foreach_ifla
+#undef _
+ { .type = 0 }
+};
+
+u8 *format_ns_link (u8 *s, va_list *args)
+{
+ ns_link_t *l = va_arg(*args, ns_link_t *);
+ s = format(s, "%s index %u", l->ifname, l->ifi.ifi_index);
+ return s;
+}
+
+#define ns_foreach_rta \
+ _(RTA_DST, dst, 1) \
+ _(RTA_SRC, src, 1) \
+ _(RTA_GATEWAY, gateway, 1) \
+ _(RTA_IIF, iif, 1) \
+ _(RTA_OIF, oif, 1) \
+ _(RTA_PREFSRC, prefsrc, 0) \
+ _(RTA_TABLE, table, 0) \
+ _(RTA_PRIORITY, priority, 0) \
+ _(RTA_CACHEINFO, cacheinfo, 0) \
+ _(RTA_ENCAP, encap, 1)
+
+static rtnl_mapping_t ns_routemap[] = {
+#define _(t, e, u) \
+ { \
+ .type = t, .unique = u, \
+ .offset = offsetof(ns_route_t, e), \
+ .size = sizeof(((ns_route_t*)0)->e) \
+ },
+ ns_foreach_rta
+#undef _
+ { .type = 0 }
+};
+
+u8 *format_ns_route (u8 *s, va_list *args)
+{
+ ns_route_t *r = va_arg(*args, ns_route_t *);
+ void *format_ip = r->rtm.rtm_family == AF_INET ? format_ip4_address : format_ip6_address;
+ s = format(s, "%U/%d", format_ip, r->dst, r->rtm.rtm_dst_len);
+ if (r->rtm.rtm_src_len)
+ s = format(s, " from %U/%d", format_ip, r->src, r->rtm.rtm_src_len);
+ if (is_nonzero(r->gateway))
+ s = format(s, " via %U", format_ip, r->gateway);
+ if (r->iif)
+ s = format(s, " iif %d", r->iif);
+ if (r->oif)
+ s = format(s, " oif %d", r->oif);
+ if (is_nonzero(r->prefsrc))
+ s = format(s, " src %U", format_ip, r->prefsrc);
+ if (r->table)
+ s = format(s, " table %d", r->table);
+ if (r->priority)
+ s = format(s, " priority %u", r->priority);
+ return s;
+}
+
+#define ns_foreach_ifaddr \
+ _(IFA_ADDRESS, addr, 1) \
+ _(IFA_LOCAL, local, 1) \
+ _(IFA_LABEL, label, 0) \
+ _(IFA_BROADCAST, broadcast, 0) \
+ _(IFA_ANYCAST, anycast, 0) \
+ _(IFA_CACHEINFO, cacheinfo, 0)
+
+static rtnl_mapping_t ns_addrmap[] = {
+#define _(t, e, u) \
+ { \
+ .type = t, .unique = u, \
+ .offset = offsetof(ns_addr_t, e), \
+ .size = sizeof(((ns_addr_t*)0)->e) \
+ },
+ ns_foreach_ifaddr
+#undef _
+ { .type = 0 }
+};
+
+u8 *format_ns_addr (u8 *s, va_list *args)
+{
+ ns_addr_t *a = va_arg(*args, ns_addr_t *);
+ void *format_ip = a->ifaddr.ifa_family == AF_INET ? format_ip4_address : format_ip6_address;
+ s = format(s, "%U/%d", format_ip, a->addr, a->ifaddr.ifa_prefixlen);
+ if (is_nonzero(a->label))
+ s = format(s, " dev %s", a->label);
+ if (is_nonzero(a->broadcast))
+ s = format(s, " broadcast %U", format_ip, a->broadcast);
+ if (is_nonzero(a->anycast))
+ s = format(s, " anycast %U", format_ip, a->anycast);
+ if (is_nonzero(a->local))
+ s = format(s, " local %U", format_ip, a->local);
+ return s;
+}
+
+#ifndef NDA_RTA
+#define NDA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
+#define NDA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndmsg))
+#endif
+
+#define ns_foreach_neigh \
+ _(NDA_DST, dst, 1) \
+ _(NDA_LLADDR, lladdr, 0) \
+ _(NDA_PROBES, probes, 0) \
+ _(NDA_CACHEINFO, cacheinfo, 0)
+
+static rtnl_mapping_t ns_neighmap[] = {
+#define _(t, e, u) \
+ { \
+ .type = t, .unique = u, \
+ .offset = offsetof(ns_neigh_t, e), \
+ .size = sizeof(((ns_neigh_t*)0)->e) \
+ },
+ ns_foreach_neigh
+#undef _
+ { .type = 0 }
+};
+
+u8 *format_ns_neigh (u8 *s, va_list *args)
+{
+ ns_neigh_t *n = va_arg(*args, ns_neigh_t *);
+ void *format_ip = n->nd.ndm_family == AF_INET ? format_ip4_address : format_ip6_address;
+ s = format(s, "%U", format_ip, n->dst);
+ if (is_nonzero(n->lladdr))
+ s = format(s, " lladdr %U", format_ethernet_address, n->lladdr);
+ if (n->probes)
+ s = format(s, " probes %d", n->probes);
+ return s;
+}
+
+typedef struct {
+ void (*notify)(void *obj, netns_type_t type, u32 flags, uword opaque);
+ uword opaque;
+ u32 netns_index;
+} netns_handle_t;
+
+typedef struct {
+ netns_t netns;
+ u32 rtnl_handle;
+ u32 subscriber_count;
+} netns_p;
+
+typedef struct {
+ netns_p *netnss;
+ netns_handle_t *handles;
+} netns_main_t;
+
+netns_main_t netns_main;
+
+static int
+rtnl_parse_rtattr(struct rtattr *db[], size_t max,
+ struct rtattr *rta, size_t len) {
+ for(; RTA_OK(rta, len); rta = RTA_NEXT(rta, len)) {
+ if (rta->rta_type <= max)
+ db[rta->rta_type] = rta;
+#ifdef RTNL_CHECK
+ else
+ clib_warning("RTA type too high: %d", rta->rta_type);
+#endif
+ }
+
+ if(len) {
+ clib_warning("rattr lenght mistmatch %d %d len",
+ (int) len, (int) rta->rta_len);
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Debug function to display when
+ * we receive an RTA that I forgot in
+ * the mapping table (there are so many of them).
+ */
+#ifdef RTNL_CHECK
+static void
+rtnl_entry_check(struct rtattr *rtas[],
+ size_t rta_len,
+ rtnl_mapping_t map[],
+ char *logstr)
+{
+ int i;
+ for (i=0; i<rta_len; i++) {
+ if (!rtas[i])
+ continue;
+
+ rtnl_mapping_t *m = map;
+ for (m = map; m->type; m++) {
+ if (m->type == rtas[i]->rta_type)
+ break;
+ }
+ if (!m->type)
+ clib_warning("Unknown RTA type %d (%s)", rtas[i]->rta_type, logstr);
+ }
+}
+#endif
+
+/*
+ * Check if the provided entry matches the parsed and unique rtas
+ */
+static int
+rtnl_entry_match(void *entry,
+ struct rtattr *rtas[],
+ rtnl_mapping_t map[])
+{
+ u8 zero[1024] = {};
+ for ( ;map->type != 0; map++) {
+ struct rtattr *rta = rtas[map->type];
+ size_t rta_len = rta?RTA_PAYLOAD(rta):0;
+ if (!map->unique)
+ continue;
+
+ if (rta && RTA_PAYLOAD(rta) > map->size) {
+ clib_warning("rta (type=%d len=%d) too long (max %d)",
+ rta->rta_type, rta->rta_len, map->size);
+ return -1;
+ }
+
+ if ((rta && memcmp(RTA_DATA(rta), entry + map->offset, rta_len)) ||
+ memcmp(entry + map->offset + rta_len, zero, map->size - rta_len)) {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static int
+rtnl_entry_set(void *entry,
+ struct rtattr *rtas[],
+ rtnl_mapping_t map[],
+ int init)
+{
+ for (; map->type != 0; map++) {
+
+ struct rtattr *rta = rtas[map->type];
+
+ if(map->type == RTA_ENCAP && rta) {
+ /*Data of RTA_ENCAP is a pointer to rta attributes for MPLS*/
+ rta = (struct rtattr*)RTA_DATA(rta);
+ if (RTA_PAYLOAD(rta) > map->size) {
+ clib_warning("rta (type=%d len=%d) too long (max %d)", rta->rta_type, rta->rta_len, map->size);
+ return -1;
+ }
+ memcpy(entry + map->offset, RTA_DATA(rta), map->size);
+ memset(entry + map->offset + map->size, 0, 0);
+ } else if (rta) {
+ if (RTA_PAYLOAD(rta) > map->size) {
+ clib_warning("rta (type=%d len=%d) too long (max %d)", rta->rta_type, rta->rta_len, map->size);
+ return -1;
+ }
+ memcpy(entry + map->offset, RTA_DATA(rta), RTA_PAYLOAD(rta));
+ memset(entry + map->offset + RTA_PAYLOAD(rta), 0, map->size - RTA_PAYLOAD(rta));
+ } else if (init) {
+ memset(entry + map->offset, 0, map->size);
+ }
+ }
+ return 0;
+}
+
+void
+netns_notify(netns_p *ns, void *obj, netns_type_t type, u32 flags)
+{
+ netns_main_t *nm = &netns_main;
+ netns_handle_t *h;
+ pool_foreach(h, nm->handles, {
+ if (h->netns_index == (ns - nm->netnss) && h->notify)
+ h->notify(obj, type, flags, h->opaque);
+ });
+}
+
+static_always_inline int
+mask_match(void *a, void *b, void *mask, size_t len)
+{
+ u8 *va = (u8 *) a;
+ u8 *vb = (u8 *) b;
+ u8 *vm = (u8 *) mask;
+ while (len--) {
+ if ((va[len] ^ vb[len]) & vm[len])
+ return 0;
+ }
+ return 1;
+}
+
+static ns_link_t *
+ns_get_link(netns_p *ns, struct ifinfomsg *ifi, struct rtattr *rtas[])
+{
+ ns_link_t *link;
+ pool_foreach(link, ns->netns.links, {
+ if(ifi->ifi_index == link->ifi.ifi_index)
+ return link;
+ });
+ return NULL;
+}
+
+static int
+ns_rcv_link(netns_p *ns, struct nlmsghdr *hdr)
+{
+ ns_link_t *link;
+ struct ifinfomsg *ifi;
+ struct rtattr *rtas[IFLA_MAX + 1] = {};
+ size_t datalen = hdr->nlmsg_len - NLMSG_ALIGN(sizeof(*hdr));
+
+ if(datalen < sizeof(*ifi))
+ return -1;
+
+ ifi = NLMSG_DATA(hdr);
+ if((datalen > NLMSG_ALIGN(sizeof(*ifi))) &&
+ rtnl_parse_rtattr(rtas, IFLA_MAX, IFLA_RTA(ifi),
+ IFLA_PAYLOAD(hdr))) {
+ return -1;
+ }
+#ifdef RTNL_CHECK
+ rtnl_entry_check(rtas, IFLA_MAX + 1, ns_ifmap, "link");
+#endif
+
+ link = ns_get_link(ns, ifi, rtas);
+
+ if (hdr->nlmsg_type == RTM_DELLINK) {
+ if (!link)
+ return -1;
+ pool_put(ns->netns.links, link);
+ netns_notify(ns, link, NETNS_TYPE_LINK, NETNS_F_DEL);
+ return 0;
+ }
+
+ if (!link) {
+ pool_get(ns->netns.links, link);
+ rtnl_entry_set(link, rtas, ns_ifmap, 1);
+ } else {
+ rtnl_entry_set(link, rtas, ns_ifmap, 0);
+ }
+
+ link->ifi = *ifi;
+ link->last_updated = vlib_time_now(vlib_get_main());
+ netns_notify(ns, link, NETNS_TYPE_LINK, NETNS_F_ADD);
+ return 0;
+}
+
+static ns_route_t *
+ns_get_route(netns_p *ns, struct rtmsg *rtm, struct rtattr *rtas[])
+{
+ ns_route_t *route;
+
+ //This describes the values which uniquely identify a route
+ struct rtmsg msg = {
+ .rtm_family = 0xff,
+ .rtm_dst_len = 0xff,
+ .rtm_src_len = 0xff,
+ .rtm_table = 0xff,
+ .rtm_protocol = 0xff,
+ .rtm_type = 0xff
+ };
+
+ pool_foreach(route, ns->netns.routes, {
+ if(mask_match(&route->rtm, rtm, &msg, sizeof(struct rtmsg)) &&
+ rtnl_entry_match(route, rtas, ns_routemap))
+ return route;
+ });
+ return NULL;
+}
+
+static int
+ns_rcv_route(netns_p *ns, struct nlmsghdr *hdr)
+{
+ ns_route_t *route;
+ struct rtmsg *rtm;
+ struct rtattr *rtas[RTA_MAX + 1] = {};
+ size_t datalen = hdr->nlmsg_len - NLMSG_ALIGN(sizeof(*hdr));
+
+ if(datalen < sizeof(*rtm))
+ return -1;
+
+ rtm = NLMSG_DATA(hdr);
+ if((datalen > NLMSG_ALIGN(sizeof(*rtm))) &&
+ rtnl_parse_rtattr(rtas, RTA_MAX, RTM_RTA(rtm),
+ RTM_PAYLOAD(hdr))) {
+ return -1;
+ }
+#ifdef RTNL_CHECK
+ rtnl_entry_check(rtas, RTA_MAX + 1, ns_routemap, "route");
+#endif
+ route = ns_get_route(ns, rtm, rtas);
+
+ if (hdr->nlmsg_type == RTM_DELROUTE) {
+ if (!route)
+ return -1;
+ pool_put(ns->netns.routes, route);
+ netns_notify(ns, route, NETNS_TYPE_ROUTE, NETNS_F_DEL);
+ return 0;
+ }
+
+ if (!route) {
+ pool_get(ns->netns.routes, route);
+ memset(route, 0, sizeof(*route));
+ rtnl_entry_set(route, rtas, ns_routemap, 1);
+ } else {
+ rtnl_entry_set(route, rtas, ns_routemap, 0);
+ }
+
+ route->rtm = *rtm;
+ route->last_updated = vlib_time_now(vlib_get_main());
+ netns_notify(ns, route, NETNS_TYPE_ROUTE, NETNS_F_ADD);
+ return 0;
+}
+
+static ns_addr_t *
+ns_get_addr(netns_p *ns, struct ifaddrmsg *ifaddr, struct rtattr *rtas[])
+{
+ ns_addr_t *addr;
+
+ //This describes the values which uniquely identify a route
+ struct ifaddrmsg msg = {
+ .ifa_family = 0xff,
+ .ifa_prefixlen = 0xff,
+ };
+
+ pool_foreach(addr, ns->netns.addresses, {
+ if(mask_match(&addr->ifaddr, ifaddr, &msg, sizeof(struct ifaddrmsg)) &&
+ rtnl_entry_match(addr, rtas, ns_addrmap))
+ return addr;
+ });
+ return NULL;
+}
+
+static int
+ns_rcv_addr(netns_p *ns, struct nlmsghdr *hdr)
+{
+ ns_addr_t *addr;
+ struct ifaddrmsg *ifaddr;
+ struct rtattr *rtas[IFA_MAX + 1] = {};
+ size_t datalen = hdr->nlmsg_len - NLMSG_ALIGN(sizeof(*hdr));
+
+ if(datalen < sizeof(*ifaddr))
+ return -1;
+
+ ifaddr = NLMSG_DATA(hdr);
+ if((datalen > NLMSG_ALIGN(sizeof(*ifaddr))) &&
+ rtnl_parse_rtattr(rtas, IFA_MAX, IFA_RTA(ifaddr),
+ IFA_PAYLOAD(hdr))) {
+ return -1;
+ }
+#ifdef RTNL_CHECK
+ rtnl_entry_check(rtas, IFA_MAX + 1, ns_addrmap, "addr");
+#endif
+ addr = ns_get_addr(ns, ifaddr, rtas);
+
+ if (hdr->nlmsg_type == RTM_DELADDR) {
+ if (!addr)
+ return -1;
+ pool_put(ns->netns.addresses, addr);
+ netns_notify(ns, addr, NETNS_TYPE_ADDR, NETNS_F_DEL);
+ return 0;
+ }
+
+ if (!addr) {
+ pool_get(ns->netns.addresses, addr);
+ memset(addr, 0, sizeof(*addr));
+ rtnl_entry_set(addr, rtas, ns_addrmap, 1);
+ } else {
+ rtnl_entry_set(addr, rtas, ns_addrmap, 0);
+ }
+
+ addr->ifaddr = *ifaddr;
+ addr->last_updated = vlib_time_now(vlib_get_main());
+ netns_notify(ns, addr, NETNS_TYPE_ADDR, NETNS_F_ADD);
+ return 0;
+}
+
+static ns_neigh_t *
+ns_get_neigh(netns_p *ns, struct ndmsg *nd, struct rtattr *rtas[])
+{
+ ns_neigh_t *neigh;
+
+ //This describes the values which uniquely identify a route
+ struct ndmsg msg = {
+ .ndm_family = 0xff,
+ .ndm_ifindex = 0xff,
+ };
+
+ pool_foreach(neigh, ns->netns.neighbors, {
+ if(mask_match(&neigh->nd, nd, &msg, sizeof(&msg)) &&
+ rtnl_entry_match(neigh, rtas, ns_neighmap))
+ return neigh;
+ });
+ return NULL;
+}
+
+static int
+ns_rcv_neigh(netns_p *ns, struct nlmsghdr *hdr)
+{
+ ns_neigh_t *neigh;
+ struct ndmsg *nd;
+ struct rtattr *rtas[NDA_MAX + 1] = {};
+ size_t datalen = hdr->nlmsg_len - NLMSG_ALIGN(sizeof(*hdr));
+
+ if(datalen < sizeof(*nd))
+ return -1;
+
+ nd = NLMSG_DATA(hdr);
+ if((datalen > NLMSG_ALIGN(sizeof(*nd))) &&
+ rtnl_parse_rtattr(rtas, NDA_MAX, NDA_RTA(nd),
+ NDA_PAYLOAD(hdr))) {
+ return -1;
+ }
+#ifdef RTNL_CHECK
+ rtnl_entry_check(rtas, NDA_MAX + 1, ns_neighmap, "nd");
+#endif
+ neigh = ns_get_neigh(ns, nd, rtas);
+
+ if (hdr->nlmsg_type == RTM_DELNEIGH) {
+ if (!neigh)
+ return -1;
+ pool_put(ns->netns.neighbors, neigh);
+ netns_notify(ns, neigh, NETNS_TYPE_NEIGH, NETNS_F_DEL);
+ return 0;
+ }
+
+ if (!neigh) {
+ pool_get(ns->netns.neighbors, neigh);
+ memset(neigh, 0, sizeof(*neigh));
+ rtnl_entry_set(neigh, rtas, ns_neighmap, 1);
+ } else {
+ rtnl_entry_set(neigh, rtas, ns_neighmap, 0);
+ }
+
+ neigh->nd = *nd;
+ neigh->last_updated = vlib_time_now(vlib_get_main());
+ netns_notify(ns, neigh, NETNS_TYPE_NEIGH, NETNS_F_ADD);
+ return 0;
+}
+
+#define ns_object_foreach \
+ _(neighbors, NETNS_TYPE_NEIGH) \
+ _(routes, NETNS_TYPE_ROUTE) \
+ _(addresses, NETNS_TYPE_ADDR) \
+ _(links, NETNS_TYPE_LINK)
+
+static void
+ns_recv_error(rtnl_error_t err, uword o)
+{
+ //An error was received. Reset everything.
+ netns_p *ns = &netns_main.netnss[o];
+ u32 *indexes = 0;
+ u32 *i = 0;
+
+#define _(pool, type) \
+ pool_foreach_index(*i, ns->netns.pool, { \
+ vec_add1(indexes, *i); \
+ }) \
+ vec_foreach(i, indexes) { \
+ pool_put_index(ns->netns.pool, *i); \
+ netns_notify(ns, &ns->netns.pool[*i], type, NETNS_F_DEL); \
+ } \
+ vec_reset_length(indexes);
+
+ ns_object_foreach
+
+#undef _
+ vec_free(indexes);
+}
+
+static void
+ns_recv_rtnl(struct nlmsghdr *hdr, uword o)
+{
+ netns_p *ns = &netns_main.netnss[o];
+ switch (hdr->nlmsg_type) {
+ case RTM_NEWROUTE:
+ case RTM_DELROUTE:
+ ns_rcv_route(ns, hdr);
+ break;
+ case RTM_NEWLINK:
+ case RTM_DELLINK:
+ ns_rcv_link(ns, hdr);
+ break;
+ case RTM_NEWADDR:
+ case RTM_DELADDR:
+ ns_rcv_addr(ns, hdr);
+ break;
+ case RTM_NEWNEIGH:
+ case RTM_DELNEIGH:
+ ns_rcv_neigh(ns, hdr);
+ break;
+ default:
+ clib_warning("unknown rtnl type %d", hdr->nlmsg_type);
+ break;
+ }
+}
+
+static void
+netns_destroy(netns_p *ns)
+{
+ netns_main_t *nm = &netns_main;
+ rtnl_stream_close(ns->rtnl_handle);
+ pool_put(nm->netnss, ns);
+ pool_free(ns->netns.links);
+ pool_free(ns->netns.addresses);
+ pool_free(ns->netns.routes);
+ pool_free(ns->netns.neighbors);
+}
+
+static netns_p *
+netns_get(char *name)
+{
+ netns_main_t *nm = &netns_main;
+ netns_p *ns;
+ pool_foreach(ns, nm->netnss, {
+ if (!strcmp(name, ns->netns.name))
+ return ns;
+ });
+
+ if (strlen(name) > RTNL_NETNS_NAMELEN)
+ return NULL;
+
+ pool_get(nm->netnss, ns);
+ rtnl_stream_t s = {
+ .recv_message = ns_recv_rtnl,
+ .error = ns_recv_error,
+ .opaque = (uword)(ns - nm->netnss),
+ };
+ strcpy(s.name, name);
+
+ u32 handle;
+ if ((handle = rtnl_stream_open(&s)) == ~0) {
+ pool_put(nm->netnss, ns);
+ return NULL;
+ }
+
+ strcpy(ns->netns.name, name);
+ ns->netns.addresses = 0;
+ ns->netns.links = 0;
+ ns->netns.neighbors = 0;
+ ns->netns.routes = 0;
+ ns->subscriber_count = 0;
+ ns->rtnl_handle = handle;
+ return ns;
+}
+
+u32 netns_open(char *name, netns_sub_t *sub)
+{
+ netns_main_t *nm = &netns_main;
+ netns_p *ns;
+ netns_handle_t *p;
+ if (!(ns = netns_get(name)))
+ return ~0;
+
+ pool_get(nm->handles, p);
+ p->netns_index = ns - nm->netnss;
+ p->notify = sub->notify;
+ p->opaque = sub->opaque;
+ ns->subscriber_count++;
+ return p - nm->handles;
+}
+
+netns_t *netns_getns(u32 handle)
+{
+ netns_main_t *nm = &netns_main;
+ netns_handle_t *h = pool_elt_at_index(nm->handles, handle);
+ netns_p *ns = pool_elt_at_index(nm->netnss, h->netns_index);
+ return &ns->netns;
+}
+
+void netns_close(u32 handle)
+{
+ netns_main_t *nm = &netns_main;
+ netns_handle_t *h = pool_elt_at_index(nm->handles, handle);
+ netns_p *ns = pool_elt_at_index(nm->netnss, h->netns_index);
+ pool_put(h, nm->handles);
+ ns->subscriber_count--;
+ if (!ns->subscriber_count)
+ netns_destroy(ns);
+}
+
+void netns_callme(u32 handle, char del)
+{
+ netns_main_t *nm = &netns_main;
+ netns_handle_t *h = pool_elt_at_index(nm->handles, handle);
+ netns_p *ns = pool_elt_at_index(nm->netnss, h->netns_index);
+ u32 i = 0;
+ if (!h->notify)
+ return;
+
+#define _(pool, type) \
+ pool_foreach_index(i, ns->netns.pool, { \
+ h->notify(&ns->netns.pool[i], type, \
+ del?NETNS_F_DEL:NETNS_F_ADD, h->opaque); \
+ });
+
+ ns_object_foreach
+#undef _
+
+ }
+
+u8 *format_ns_object(u8 *s, va_list *args)
+{
+ netns_type_t t = va_arg(*args, netns_type_t);
+ void *o = va_arg(*args, void *);
+ switch (t) {
+ case NETNS_TYPE_ADDR:
+ return format(s, "addr %U", format_ns_addr, o);
+ case NETNS_TYPE_ROUTE:
+ return format(s, "route %U", format_ns_route, o);
+ case NETNS_TYPE_LINK:
+ return format(s, "link %U", format_ns_link, o);
+ case NETNS_TYPE_NEIGH:
+ return format(s, "neigh %U", format_ns_neigh, o);
+ }
+ return s;
+}
+
+u8 *format_ns_flags(u8 *s, va_list *args)
+{
+ u32 flags = va_arg(*args, u32);
+ if (flags & NETNS_F_ADD)
+ s = format(s, "add");
+ else if (flags & NETNS_F_DEL)
+ s = format(s, "del");
+ else
+ s = format(s, "mod");
+ return s;
+}
+
+clib_error_t *
+netns_init (vlib_main_t * vm)
+{
+ netns_main_t *nm = &netns_main;
+ nm->netnss = 0;
+ nm->handles = 0;
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (netns_init);
diff --git a/utils/extras/devices/rtnetlink/netns.h b/utils/extras/devices/rtnetlink/netns.h
new file mode 100644
index 000000000..53effe5ce
--- /dev/null
+++ b/utils/extras/devices/rtnetlink/netns.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2016-2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NETNS_H_
+#define NETNS_H_
+
+#include <vlib/vlib.h>
+
+#include <sys/socket.h>
+#include <linux/rtnetlink.h>
+#include <linux/netlink.h>
+#include <net/if.h>
+
+#include "rtnl.h"
+
+/*include it for 'struct mpls_label'*/
+#include <linux/mpls.h>
+/*so far depth is fixed, looking into ways to be dynamic*/
+#define MPLS_STACK_DEPTH 7
+
+typedef struct {
+ struct ifinfomsg ifi;
+ u8 hwaddr[IFHWADDRLEN];
+ u8 broadcast[IFHWADDRLEN];
+ u8 ifname[IFNAMSIZ];
+ u32 mtu;
+ u32 master;
+ u8 qdisc[IFNAMSIZ];
+ struct rtnl_link_stats stats; //This struct is big and only comes as a response to a request
+ f64 last_updated;
+} ns_link_t;
+
+typedef struct {
+ struct rtmsg rtm;
+ u8 dst[16];
+ u8 src[16];
+ u8 via[16];
+ u8 prefsrc[16];
+ u32 iif;
+ u32 oif;
+ u32 table;
+ u8 gateway[16];
+ u32 priority;
+ struct rta_cacheinfo cacheinfo;
+ struct mpls_label encap[MPLS_STACK_DEPTH];
+ f64 last_updated;
+} ns_route_t;
+
+typedef struct {
+ struct ifaddrmsg ifaddr;
+ u8 addr[16];
+ u8 local[16];
+ u8 label[IFNAMSIZ];
+ u8 broadcast[16];
+ u8 anycast[16];
+ struct ifa_cacheinfo cacheinfo;
+ f64 last_updated;
+} ns_addr_t;
+
+typedef struct {
+ struct ndmsg nd;
+ u8 dst[16];
+ u8 lladdr[IFHWADDRLEN];
+ u32 probes;
+ struct nda_cacheinfo cacheinfo;
+ f64 last_updated;
+} ns_neigh_t;
+
+typedef struct {
+ char name[RTNL_NETNS_NAMELEN + 1];
+ ns_link_t *links;
+ ns_route_t *routes;
+ ns_addr_t *addresses;
+ ns_neigh_t *neighbors;
+} netns_t;
+
+
+typedef enum {
+ NETNS_TYPE_LINK,
+ NETNS_TYPE_ROUTE,
+ NETNS_TYPE_ADDR,
+ NETNS_TYPE_NEIGH,
+} netns_type_t;
+
+//Flags used in notification functions call
+#define NETNS_F_ADD 0x01
+#define NETNS_F_DEL 0x02
+
+typedef struct {
+ void (*notify)(void *obj, netns_type_t type, u32 flags, uword opaque);
+ uword opaque;
+} netns_sub_t;
+
+/*
+ * Subscribe for events related to the given namespace.
+ * When another subscriber already uses the namespace,
+ * this call will not trigger updates for already
+ * existing routes (This is to protect against
+ * synch. Vs asynch. issues).
+ */
+u32 netns_open(char *name, netns_sub_t *sub);
+
+/*
+ * Retrieves the namespace structure associated with a
+ * given namespace handler.
+ */
+netns_t *netns_getns(u32 handle);
+
+/*
+ * Terminates a subscriber session.
+ */
+void netns_close(u32 handle);
+
+/*
+ * Calls the callback associated with the handle
+ * for all existing objects with the flags
+ * set to (del?NETNS_F_DEL:NETNS_F_ADD).
+ */
+void netns_callme(u32 handle, char del);
+
+/*
+ * netns struct format functions.
+ * Taking the struct as single argument.
+ */
+u8 *format_ns_neigh(u8 *s, va_list *args);
+u8 *format_ns_addr(u8 *s, va_list *args);
+u8 *format_ns_route(u8 *s, va_list *args);
+u8 *format_ns_link(u8 *s, va_list *args);
+
+u8 *format_ns_object(u8 *s, va_list *args);
+u8 *format_ns_flags(u8 *s, va_list *args);
+
+#endif
diff --git a/utils/extras/devices/rtnetlink/rtnl.c b/utils/extras/devices/rtnetlink/rtnl.c
new file mode 100644
index 000000000..ed3db9e72
--- /dev/null
+++ b/utils/extras/devices/rtnetlink/rtnl.c
@@ -0,0 +1,604 @@
+/*
+ * Copyright (c) 2016-2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vppinfra/error.h>
+
+#include <sys/socket.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <float.h>
+#include <fcntl.h>
+
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <string.h>
+
+#include "netns.h"
+#include "rtnl.h"
+
+#undef DBL_MAX
+#define DBL_MAX 1000000000.0
+
+typedef enum {
+ RTNL_E_OPEN,
+ RTNL_E_CLOSE,
+ RTNL_E_READ,
+} rtnl_event_t;
+
+typedef enum {
+ RTNL_S_INIT,
+ RTNL_S_SYNC,
+ RTNL_S_READY,
+} rtnl_state_t;
+
+typedef enum {
+ RTNL_SS_OPENING,
+ RTNL_SS_LINK,
+ RTNL_SS_ADDR,
+ RTNL_SS_ROUTE4,
+ RTNL_SS_ROUTE6,
+ RTNL_SS_NEIGH,
+} rtnl_sync_state_t;
+
+typedef struct {
+ rtnl_stream_t stream;
+ rtnl_state_t state;
+ rtnl_sync_state_t sync_state;
+ int ns_fd;
+ int rtnl_socket;
+ u32 unix_index;
+ u32 rtnl_seq;
+ f64 timeout;
+} rtnl_ns_t;
+
+typedef struct {
+ f64 now;
+ rtnl_ns_t *streams;
+} rtnl_main_t;
+
+static rtnl_main_t rtnl_main;
+static vlib_node_registration_t rtnl_process_node;
+
+#define RTNL_BUFFSIZ 16384
+#define RTNL_DUMP_TIMEOUT 1
+
+static inline u32 grpmask(u32 g)
+{
+ ASSERT (g <= 31);
+ if (g) {
+ return 1 << (g - 1);
+ } else
+ return 0;
+}
+
+
+u8 *format_rtnl_nsname2path(u8 *s, va_list *args)
+{
+ char *nsname = va_arg(*args, char *);
+ if (!nsname || !strlen(nsname)) {
+ return format(s, "/proc/self/ns/net");
+ } else if (strpbrk(nsname, "/") != NULL) {
+ return format(s, "%s", nsname);
+ } else {
+ return format(s, "/var/run/netns/%s", nsname);
+ }
+}
+
+static_always_inline void
+rtnl_schedule_timeout(rtnl_ns_t *ns, f64 when)
+{
+ ns->timeout = when;
+}
+
+static_always_inline void
+rtnl_cancel_timeout(rtnl_ns_t *ns)
+{
+ ns->timeout = DBL_MAX;
+}
+
+static clib_error_t *rtnl_read_cb(struct clib_file * f)
+{
+ rtnl_main_t *rm = &rtnl_main;
+ vlib_main_t *vm = vlib_get_main();
+ rtnl_ns_t *ns = &rm->streams[f->private_data];
+ vlib_process_signal_event(vm, rtnl_process_node.index, RTNL_E_READ, (uword)(ns - rm->streams));
+ return 0;
+}
+
+int rtnl_dump_request(rtnl_ns_t *ns, int type, void *req, size_t len)
+{
+ struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
+ struct nlmsghdr nlh = {
+ .nlmsg_len = NLMSG_LENGTH(len),
+ .nlmsg_type = type,
+ .nlmsg_flags = NLM_F_DUMP|NLM_F_REQUEST,
+ .nlmsg_pid = 0,
+ .nlmsg_seq = ++ns->rtnl_seq,
+ };
+ struct iovec iov[2] = {
+ { .iov_base = &nlh, .iov_len = sizeof(nlh) },
+ { .iov_base = req, .iov_len = len }
+ };
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = iov,
+ .msg_iovlen = 2,
+ };
+ if(sendmsg(ns->rtnl_socket, &msg, 0) < 0) {
+ clib_warning("sendmsg error: %s", strerror(errno));
+ return -1;
+ }
+ return 0;
+}
+
+static void rtnl_socket_close(rtnl_ns_t *ns)
+{
+ clib_file_del(&file_main, &file_main.file_pool[ns->unix_index]);
+ close(ns->rtnl_socket);
+}
+
+struct rtnl_thread_exec {
+ int fd;
+ void *(*fn)(void *);
+ void *arg;
+ void **ret;
+};
+
+static void *rtnl_exec_in_thread_fn(void *p)
+{
+ struct rtnl_thread_exec *ex = (struct rtnl_thread_exec *) p;
+ if (setns(ex->fd, 0))
+ return (void *) ((uword) (-errno));
+
+ *ex->ret = ex->fn(ex->arg);
+ return NULL;
+}
+
+static int rtnl_exec_in_namespace_byfd(int fd, void *(*fn)(void *), void *arg, void **ret)
+{
+ pthread_t thread;
+ void *thread_ret;
+ struct rtnl_thread_exec ex = {
+ .fd = fd,
+ .fn = fn,
+ .arg = arg,
+ .ret = ret
+ };
+ if(pthread_create(&thread, NULL, rtnl_exec_in_thread_fn, &ex))
+ return -errno;
+
+ if(pthread_join(thread, &thread_ret))
+ return -errno;
+
+ if (thread_ret)
+ return (int) ((uword)thread_ret);
+
+ return 0;
+}
+
+int rtnl_exec_in_namespace(u32 stream_index, void *(*fn)(void *), void *arg, void **ret)
+{
+ rtnl_main_t *rm = &rtnl_main;
+ if (pool_is_free_index(rm->streams, stream_index))
+ return -EBADR;
+
+ rtnl_ns_t *ns = pool_elt_at_index(rm->streams, stream_index);
+ return rtnl_exec_in_namespace_byfd(ns->ns_fd, fn, arg, ret);
+}
+
+int rtnl_exec_in_namespace_by_name(char *nsname, void *(*fn)(void *), void *arg, void **ret)
+{
+ int fd;
+ u8 *s = format((u8 *)0, "%U", format_rtnl_nsname2path, nsname);
+
+ if ((fd = open((char *)s, O_RDONLY)) < 0) {
+ vec_free(s);
+ return -errno;
+ }
+
+ int r = rtnl_exec_in_namespace_byfd(fd, fn, arg, ret);
+ vec_free(s);
+ close(fd);
+ return r;
+}
+
+/* this function is run by the second thread */
+static void *rtnl_thread_fn(void *p)
+{
+ rtnl_ns_t *ns = (rtnl_ns_t *) p;
+ if (setns(ns->ns_fd, 0)) {
+ clib_warning("setns(%d, %d) error %d", ns->ns_fd, CLONE_NEWNET, errno);
+ return (void *) -1;
+ }
+
+ if ((ns->rtnl_socket = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) == -1) {
+ clib_warning("Cannot open socket");
+ return (void *) -2;
+ }
+
+ return NULL;
+}
+
+static int rtnl_socket_open(rtnl_ns_t *ns)
+{
+ rtnl_main_t *rm = &rtnl_main;
+ pthread_t thread;
+ void *thread_ret;
+ if(pthread_create(&thread, NULL, rtnl_thread_fn, ns)) {
+ clib_warning("Can't create opening thread");
+ return -1;
+ }
+
+ if(pthread_join(thread, &thread_ret)) {
+ clib_warning("Can't join opening thread");
+ return -2;
+ }
+
+ if (thread_ret) {
+ clib_warning("Could not open netlink socket");
+ return -3;
+ }
+
+ struct sockaddr_nl addr = {
+ .nl_family = AF_NETLINK,
+ .nl_pad = 0,
+ .nl_pid = 0,
+ /*add mpls message group*/
+ .nl_groups = grpmask(RTNLGRP_LINK)| grpmask(RTNLGRP_IPV6_IFADDR) |
+ grpmask(RTNLGRP_IPV4_IFADDR) | grpmask(RTNLGRP_IPV4_ROUTE) |
+ grpmask(RTNLGRP_IPV6_ROUTE) | grpmask(RTNLGRP_NEIGH) |
+ grpmask(RTNLGRP_NOTIFY) /* | grpmask(RTNLGRP_MPLS_ROUTE)*/,
+ };
+
+ if (bind(ns->rtnl_socket, (struct sockaddr*) &addr, sizeof(addr))) {
+ close(ns->rtnl_socket);
+ return -3;
+ }
+
+ clib_file_t template = {0};
+ template.read_function = rtnl_read_cb;
+ template.file_descriptor = ns->rtnl_socket;
+ template.private_data = (uword) (ns - rm->streams);
+ ns->unix_index = clib_file_add (&file_main, &template);
+ return 0;
+}
+
+static int
+rtnl_rcv_error(rtnl_ns_t *ns, struct nlmsghdr *hdr, int *error)
+{
+ struct nlmsgerr *err = NLMSG_DATA(hdr);
+ size_t datalen = hdr->nlmsg_len - NLMSG_ALIGN(sizeof(*hdr));
+ if(datalen < sizeof(*err))
+ return -1;
+
+ *error = err->error;
+ return 0;
+}
+
+static void
+rtnl_sync_reset(rtnl_ns_t *ns)
+{
+ if (ns->sync_state == RTNL_SS_OPENING)
+ return;
+
+ rtnl_socket_close(ns);
+ ns->sync_state = RTNL_SS_OPENING;
+}
+
+static void
+rtnl_sync_done(rtnl_ns_t *ns)
+{
+ rtnl_main_t *rm = &rtnl_main;
+ struct ifaddrmsg addrmsg;
+ struct rtmsg rtmsg;
+ struct ndmsg ndmsg;
+ switch (ns->sync_state) {
+ case RTNL_SS_OPENING:
+ //Cannot happen here
+ break;
+ case RTNL_SS_LINK:
+ memset(&addrmsg, 0, sizeof(addrmsg));
+ addrmsg.ifa_family = AF_UNSPEC;
+ if(rtnl_dump_request(ns, RTM_GETADDR, &addrmsg, sizeof(addrmsg))) {
+ rtnl_sync_reset(ns);
+ rtnl_schedule_timeout(ns, rm->now + 1);
+ return;
+ }
+ rtnl_schedule_timeout(ns, rm->now + RTNL_DUMP_TIMEOUT);
+ ns->sync_state = RTNL_SS_ADDR;
+ break;
+ case RTNL_SS_ADDR:
+ case RTNL_SS_ROUTE4:
+ memset(&rtmsg, 0, sizeof(rtmsg));
+ rtmsg.rtm_family = (ns->sync_state == RTNL_SS_ADDR)?AF_INET:AF_INET6;
+ rtmsg.rtm_table = RT_TABLE_UNSPEC;
+ if(rtnl_dump_request(ns, RTM_GETROUTE, &rtmsg, sizeof(rtmsg))) {
+ rtnl_sync_reset(ns);
+ rtnl_schedule_timeout(ns, rm->now + 1);
+ return;
+ }
+ rtnl_schedule_timeout(ns, rm->now + RTNL_DUMP_TIMEOUT);
+ ns->sync_state = (ns->sync_state == RTNL_SS_ADDR)?RTNL_SS_ROUTE4:RTNL_SS_ROUTE6;
+ break;
+ case RTNL_SS_ROUTE6:
+ memset(&ndmsg, 0, sizeof(ndmsg));
+ ndmsg.ndm_family = AF_UNSPEC;
+ if(rtnl_dump_request(ns, RTM_GETNEIGH, &ndmsg, sizeof(ndmsg))) {
+ rtnl_sync_reset(ns);
+ rtnl_schedule_timeout(ns, rm->now + 1);
+ return;
+ }
+ rtnl_schedule_timeout(ns, rm->now + RTNL_DUMP_TIMEOUT);
+ ns->sync_state = RTNL_SS_NEIGH;
+ break;
+ case RTNL_SS_NEIGH:
+ ns->state = RTNL_S_READY;
+ ns->sync_state = 0;
+ rtnl_cancel_timeout(ns);
+ break;
+ }
+}
+
+static void
+rtnl_sync_timeout(rtnl_ns_t *ns)
+{
+ rtnl_main_t *rm = &rtnl_main;
+ struct ifinfomsg imsg = {};
+ switch (ns->sync_state) {
+ case RTNL_SS_OPENING:
+ if (rtnl_socket_open(ns)) {
+ rtnl_schedule_timeout(ns, rm->now + 10);
+ return;
+ }
+ imsg.ifi_family = AF_UNSPEC;
+ if (rtnl_dump_request(ns, RTM_GETLINK, &imsg, sizeof(imsg))) {
+ rtnl_sync_reset(ns);
+ rtnl_schedule_timeout(ns, rm->now + 10);
+ }
+ ns->sync_state = RTNL_SS_LINK;
+ rtnl_schedule_timeout(ns, rm->now + 2);
+ break;
+ case RTNL_SS_LINK:
+ case RTNL_SS_ADDR:
+ case RTNL_SS_ROUTE4:
+ case RTNL_SS_ROUTE6:
+ case RTNL_SS_NEIGH:
+ //Timeout happened while synchronizing
+ rtnl_sync_reset(ns);
+ rtnl_schedule_timeout(ns, rm->now + 1);
+ break;
+ }
+}
+
+static int
+rtnl_ns_recv(rtnl_ns_t *ns, struct nlmsghdr *hdr)
+{
+ rtnl_main_t *rm = &rtnl_main;
+ int ret, error = 0;
+
+ if (ns->state == RTNL_S_SYNC &&
+ ((hdr->nlmsg_flags & RTM_F_NOTIFY) ||
+ (hdr->nlmsg_seq != (ns->rtnl_seq)))) {
+ clib_warning("Received notification while in sync. Restart synchronization.");
+ rtnl_sync_reset(ns);
+ rtnl_schedule_timeout(ns, rm->now);
+ }
+
+ switch (hdr->nlmsg_type) {
+ case NLMSG_DONE:
+ rtnl_sync_done(ns);
+ break;
+ case NLMSG_ERROR:
+ if((ret = rtnl_rcv_error(ns, hdr, &error)))
+ return ret;
+ break;
+ case RTM_NEWROUTE:
+ case RTM_DELROUTE:
+ case RTM_NEWLINK:
+ case RTM_DELLINK:
+ case RTM_NEWADDR:
+ case RTM_DELADDR:
+ case RTM_NEWNEIGH:
+ case RTM_DELNEIGH:
+ if (ns->stream.recv_message)
+ ns->stream.recv_message(hdr, ns->stream.opaque);
+ break;
+ default:
+ clib_warning("Unknown rtnetlink type %d", hdr->nlmsg_type);
+ break;
+ }
+ return 0;
+}
+
+static void
+rtnl_process_open(rtnl_ns_t *ns)
+{
+ rtnl_main_t *rm = &rtnl_main;
+ if (ns->state != RTNL_S_INIT)
+ return;
+
+ ns->state = RTNL_S_SYNC;
+ ns->sync_state = RTNL_SS_OPENING;
+ rtnl_schedule_timeout(ns, rm->now);
+}
+
+static void
+rtnl_process_close(rtnl_ns_t *ns)
+{
+ rtnl_main_t *rm = &rtnl_main;
+ if (ns->state == RTNL_S_INIT)
+ return;
+
+ rtnl_socket_close(ns);
+ close(ns->ns_fd);
+ pool_put(rm->streams, ns);
+}
+
+static int
+rtnl_process_read(rtnl_ns_t *ns)
+{
+ uint8_t buff[RTNL_BUFFSIZ];
+ ssize_t len;
+ struct nlmsghdr *hdr;
+ while(1) {
+ if((len = recv(ns->rtnl_socket, buff, RTNL_BUFFSIZ, MSG_DONTWAIT)) < 0) {
+ if(errno != EAGAIN) {
+ clib_warning("rtnetlink recv error (%d) [%s]: %s", ns->rtnl_socket, ns->stream.name, strerror(errno));
+ return -1;
+ }
+ return 0;
+ }
+
+ for(hdr = (struct nlmsghdr *) buff;
+ len > 0;
+ len -= NLMSG_ALIGN(hdr->nlmsg_len),
+ hdr = (struct nlmsghdr *) (((uint8_t *) hdr) + NLMSG_ALIGN(hdr->nlmsg_len))) {
+ if((sizeof(*hdr) > (size_t)len) || (hdr->nlmsg_len > (size_t)len)) {
+ clib_warning("rtnetlink buffer too small (%d Vs %d)", (int) hdr->nlmsg_len, (int) len);
+ return -1;
+ }
+ if (rtnl_ns_recv(ns, hdr))
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static void
+rtnl_process_timeout(rtnl_ns_t *ns)
+{
+ switch (ns->state) {
+ case RTNL_S_SYNC:
+ rtnl_sync_timeout(ns);
+ break;
+ case RTNL_S_INIT:
+ case RTNL_S_READY:
+ clib_warning("Should not happen");
+ break;
+ }
+}
+
+static uword
+rtnl_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ rtnl_main_t *rm = &rtnl_main;
+ uword event_type;
+ uword *event_data = 0;
+ rm->now = vlib_time_now(vm);
+ f64 timeout = DBL_MAX;
+ rtnl_ns_t *ns;
+
+ //Setting up
+ while (1) {
+ vlib_process_wait_for_event_or_clock(vm, timeout - rm->now);
+ event_type = vlib_process_get_events(vm, &event_data);
+ rm->now = vlib_time_now(vm);
+
+ if (event_type == ~0) { //Clock event or no event
+ pool_foreach(ns, rm->streams, {
+ if (ns->timeout < rm->now) {
+ ns->timeout = DBL_MAX;
+ rtnl_process_timeout(ns);
+ }
+ });
+ } else {
+ rtnl_ns_t *ns;
+ uword *d;
+ vec_foreach(d, event_data) {
+ ns = &rm->streams[d[0]];
+ switch (event_type)
+ {
+ case RTNL_E_CLOSE:
+ rtnl_process_close(ns);
+ break;
+ case RTNL_E_OPEN:
+ rtnl_process_open(ns);
+ break;
+ case RTNL_E_READ:
+ rtnl_process_read(ns);
+ break;
+ }
+ }
+ }
+
+ vec_reset_length (event_data);
+
+ timeout = DBL_MAX;
+ pool_foreach(ns, rm->streams, {
+ if (ns->timeout < timeout)
+ timeout = ns->timeout;
+ });
+ }
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE(rtnl_process_node, static) = {
+ .function = rtnl_process,
+ .name = "rtnl-process",
+ .type = VLIB_NODE_TYPE_PROCESS,
+};
+
+u32
+rtnl_stream_open(rtnl_stream_t *template)
+{
+ vlib_main_t *vm = vlib_get_main();
+ rtnl_main_t *rm = &rtnl_main;
+ rtnl_ns_t *ns;
+ int fd;
+ u8 *s = format((u8 *)0, "%U", format_rtnl_nsname2path, template->name);
+ vec_add1(s, 0);
+
+ if ((fd = open((char *)s, O_RDONLY)) < 0) {
+ clib_unix_warning("open stream %s: ", s);
+ vec_free(s);
+ return ~0;
+ }
+
+ vec_free(s);
+ pool_get(rm->streams, ns);
+ ns->state = RTNL_S_INIT;
+ ns->ns_fd = fd;
+ ns->stream = *template;
+ vlib_process_signal_event(vm, rtnl_process_node.index, RTNL_E_OPEN, (uword)(ns - rm->streams));
+ return ns - rm->streams;
+}
+
+void
+rtnl_stream_close(u32 stream_index)
+{
+ vlib_main_t *vm = vlib_get_main();
+ rtnl_main_t *rm = &rtnl_main;
+ ASSERT(!pool_is_free_index(rm->streams, stream_index));
+ vlib_process_signal_event(vm, rtnl_process_node.index, RTNL_E_CLOSE, stream_index);
+}
+
+clib_error_t *
+rtnl_init (vlib_main_t * vm)
+{
+ rtnl_main_t *rm = &rtnl_main;
+ rm->streams = 0;
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (rtnl_init);
diff --git a/utils/extras/devices/rtnetlink/rtnl.h b/utils/extras/devices/rtnetlink/rtnl.h
new file mode 100644
index 000000000..3f96252c1
--- /dev/null
+++ b/utils/extras/devices/rtnetlink/rtnl.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2016-2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RTNL_H_
+#define RTNL_H_
+
+#include <vlib/vlib.h>
+
+#include <linux/netlink.h>
+#include <vppinfra/clib.h>
+
+typedef enum {
+ RTNL_ERR_UNKNOWN,
+} rtnl_error_t;
+
+#define RTNL_NETNS_NAMELEN 128
+
+/*
+ * RTNL stream implements an RTNL overlay
+ * for receiving continuous updates for a given namespace.
+ * When the stream is initially opened, dump requests are sent
+ * in order to retrieve the original state.
+ * handle_error is called any time synchronization cannot be
+ * achieved. When called, state is reset to its original state and
+ * new dump requests are sent.
+ */
+
+typedef struct rtnl_stream_s {
+ char name[RTNL_NETNS_NAMELEN + 1];
+ void (*recv_message)(struct nlmsghdr *hdr, uword opaque);
+ void (*error)(rtnl_error_t err, uword opaque);
+ uword opaque;
+} rtnl_stream_t;
+
+u32 rtnl_stream_open(rtnl_stream_t *template);
+void rtnl_stream_close(u32 handle);
+
+/*
+ * Executes a function in a synchronously executed thread in the
+ * given namespace.
+ * Returns 0 on success, and -errno on error.
+ */
+int rtnl_exec_in_namespace(u32 handle, void *(*fn)(void *), void *arg, void **ret);
+int rtnl_exec_in_namespace_by_name(char *nsname, void *(*fn)(void *), void *arg, void **ret);
+
+u8 *format_rtnl_nsname2path(u8 *s, va_list *args);
+
+#endif
diff --git a/utils/extras/devices/rtnetlink/test.c b/utils/extras/devices/rtnetlink/test.c
new file mode 100644
index 000000000..031748dd3
--- /dev/null
+++ b/utils/extras/devices/rtnetlink/test.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2016-2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <librtnl/netns.h>
+
+#include <vnet/plugin/plugin.h>
+#include <librtnl/mapper.h>
+#include <vnet/ip/ip.h>
+#include <vnet/fib/fib.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/ip6_fib.h>
+
+u32 handles[10];
+
+static void
+test_notify(void *obj, netns_type_t type, u32 flags, uword opaque) {
+ u32 index = (u32) opaque;
+ const char *action = (flags & NETNS_F_ADD)?"add":(flags & NETNS_F_DEL)?"del":"mod";
+
+ switch (type) {
+ case NETNS_TYPE_ADDR:
+ clib_warning("%d: addr %s %U", index, action, format_ns_addr, (ns_addr_t *)obj);
+ break;
+ case NETNS_TYPE_ROUTE:
+ clib_warning("%d: route %s %U", index, action, format_ns_route, (ns_route_t *)obj);
+ break;
+ case NETNS_TYPE_LINK:
+ clib_warning("%d:link %s %U", index, action, format_ns_link, (ns_link_t *)obj);
+ break;
+ case NETNS_TYPE_NEIGH:
+ clib_warning("%d: neigh %s %U", index, action, format_ns_neigh, (ns_neigh_t *)obj);
+ break;
+ }
+}
+
+static clib_error_t *
+test_enable_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ char *nsname = 0;
+ u32 index;
+ if (!unformat(input, "%s", &nsname)) {
+ return clib_error_return(0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ if (!unformat(input, "%d", &index)) {
+ return clib_error_return(0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (!strcmp(nsname, "default"))
+ nsname[0] = 0;
+
+ netns_sub_t sub;
+ sub.notify = test_notify;
+ sub.opaque = index;
+ handles[index] = netns_open(nsname, &sub);
+ if (handles[index] == ~0) {
+ return clib_error_create("Could not open netns with name %s", nsname);
+ }
+ return 0;
+}
+
+static clib_error_t *
+test_disable_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u32 index;
+ if (!unformat(input, "%d", &index)) {
+ return clib_error_return(0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ netns_close(handles[index]);
+ return 0;
+}
+
+VLIB_CLI_COMMAND (rtnl_enable_command, static) = {
+ .path = "test netns enable",
+ .short_help = "test netns enable [<ns-name>|default] <index>",
+ .function = test_enable_command_fn,
+};
+
+VLIB_CLI_COMMAND (rtnl_disable_command, static) = {
+ .path = "test netns disable",
+ .short_help = "test rtnl disable <index>",
+ .function = test_disable_command_fn,
+};
+
+u32 mapper_indexes[10];
+
+static clib_error_t *
+mapper_ns_add_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u32 index;
+ char *nsname;
+ u32 table_id;
+ if (!unformat(input, "%d", &index))
+ return clib_error_return(0, "invalid index `%U'",
+ format_unformat_error, input);
+ if (!unformat(input, "%s", &nsname))
+ return clib_error_return(0, "invalid nsname `%U'",
+ format_unformat_error, input);
+ if (!unformat(input, "%d", &table_id))
+ return clib_error_return(0, "invalid fib index `%U'",
+ format_unformat_error, input);
+
+ if (!strcmp(nsname, "default"))
+ nsname[0] = 0;
+
+ u32 fib4 = ip4_fib_index_from_table_id(table_id);
+ u32 fib6 = ip6_fib_index_from_table_id(table_id);
+
+ if (mapper_add_ns(nsname, fib4, fib6, &mapper_indexes[index]))
+ return clib_error_return(0, "Could not add ns %s", nsname);
+ return 0;
+}
+
+VLIB_CLI_COMMAND (mapper_ns_add_command, static) = {
+ .path = "test mapper ns add",
+ .short_help = "test mapper ns add <index> <nsname> <table-id>",
+ .function = mapper_ns_add_command_fn,
+};
+
+static clib_error_t *
+mapper_ns_del_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u32 index;
+ if (!unformat(input, "%d", &index))
+ return clib_error_return(0, "invalid index `%U'",
+ format_unformat_error, input);
+
+ if (mapper_del_ns(mapper_indexes[index]))
+ return clib_error_return(0, "Could not del ns %d", index);
+ return 0;
+}
+
+VLIB_CLI_COMMAND (mapper_ns_del_command, static) = {
+ .path = "test mapper ns delete",
+ .short_help = "test mapper ns delete <index>",
+ .function = mapper_ns_del_command_fn,
+};
+
+static clib_error_t *
+mapper_iface_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u32 nsindex;
+ u32 ifindex;
+ u32 sw_if_index;
+ int del = 0;
+ if (!unformat(input, "%d", &nsindex))
+ return clib_error_return(0, "invalid nsindex `%U'",
+ format_unformat_error, input);
+ if (!unformat(input, "%d", &ifindex))
+ return clib_error_return(0, "invalid ifindex `%U'",
+ format_unformat_error, input);
+ if (!unformat(input, "%d", &sw_if_index))
+ return clib_error_return(0, "invalid sw_if_index `%U'",
+ format_unformat_error, input);
+ if (unformat(input, "del"))
+ del = 1;
+
+ clib_warning("mapper_add_del %d %d %d %d", mapper_indexes[nsindex], ifindex, sw_if_index, del);
+
+ if (mapper_add_del(mapper_indexes[nsindex], ifindex, sw_if_index, del))
+ return clib_error_return(0, "Could not add iface");
+ return 0;
+}
+
+
+VLIB_CLI_COMMAND (mapper_iface_command, static) = {
+ .path = "test mapper iface",
+ .short_help = "test mapper iface <nsindex> <linux-ifindex> <sw_if_index> [del]",
+ .function = mapper_iface_command_fn,
+};
+
+/* *INDENT-OFF* */
+VLIB_PLUGIN_REGISTER () = {
+ //.version = VPP_BUILD_VER, FIXME
+ .description = "netlink",
+};
+/* *INDENT-ON* */
+
diff --git a/utils/extras/rtinject/tap_inject.c b/utils/extras/rtinject/tap_inject.c
new file mode 100644
index 000000000..f41ae86c8
--- /dev/null
+++ b/utils/extras/rtinject/tap_inject.c
@@ -0,0 +1,380 @@
+/*
+ * Copyright 2016 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tap_inject.h"
+
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/fib/fib.h>
+
+static tap_inject_main_t tap_inject_main;
+extern dpo_type_t tap_inject_dpo_type;
+
+tap_inject_main_t *
+tap_inject_get_main (void)
+{
+ return &tap_inject_main;
+}
+
+void
+tap_inject_insert_tap (u32 sw_if_index, u32 tap_fd, u32 tap_if_index)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ vec_validate_init_empty (im->sw_if_index_to_tap_fd, sw_if_index, ~0);
+ vec_validate_init_empty (im->sw_if_index_to_tap_if_index, sw_if_index, ~0);
+
+ vec_validate_init_empty (im->tap_fd_to_sw_if_index, tap_fd, ~0);
+
+ im->sw_if_index_to_tap_fd[sw_if_index] = tap_fd;
+ im->sw_if_index_to_tap_if_index[sw_if_index] = tap_if_index;
+
+ im->tap_fd_to_sw_if_index[tap_fd] = sw_if_index;
+
+ hash_set (im->tap_if_index_to_sw_if_index, tap_if_index, sw_if_index);
+}
+
+void
+tap_inject_delete_tap (u32 sw_if_index)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+ u32 tap_fd = im->sw_if_index_to_tap_fd[sw_if_index];
+ u32 tap_if_index = im->sw_if_index_to_tap_if_index[sw_if_index];
+
+ im->sw_if_index_to_tap_if_index[sw_if_index] = ~0;
+ im->sw_if_index_to_tap_fd[sw_if_index] = ~0;
+ im->tap_fd_to_sw_if_index[tap_fd] = ~0;
+
+ hash_unset (im->tap_if_index_to_sw_if_index, tap_if_index);
+}
+
+u32
+tap_inject_lookup_tap_fd (u32 sw_if_index)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ vec_validate_init_empty (im->sw_if_index_to_tap_fd, sw_if_index, ~0);
+ return im->sw_if_index_to_tap_fd[sw_if_index];
+}
+
+u32
+tap_inject_lookup_sw_if_index_from_tap_fd (u32 tap_fd)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ vec_validate_init_empty (im->tap_fd_to_sw_if_index, tap_fd, ~0);
+ return im->tap_fd_to_sw_if_index[tap_fd];
+}
+
+u32
+tap_inject_lookup_sw_if_index_from_tap_if_index (u32 tap_if_index)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+ uword * sw_if_index;
+
+ sw_if_index = hash_get (im->tap_if_index_to_sw_if_index, tap_if_index);
+ return sw_if_index ? *(u32 *)sw_if_index : ~0;
+}
+
+/* *INDENT-OFF* */
+VLIB_PLUGIN_REGISTER () = {
+ // .version = VPP_BUILD_VER, FIXME
+ .description = "router",
+};
+/* *INDENT-ON* */
+
+
+static void
+tap_inject_disable (void)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ im->flags &= ~TAP_INJECT_F_ENABLED;
+
+ clib_warning ("tap-inject is not actually disabled.");
+}
+
+static clib_error_t *
+tap_inject_enable (void)
+{
+ vlib_main_t * vm = vlib_get_main ();
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ if (tap_inject_is_enabled ())
+ return 0;
+
+ tap_inject_enable_netlink ();
+
+ /* Only enable netlink? */
+ if (im->flags & TAP_INJECT_F_CONFIG_NETLINK)
+ {
+ im->flags |= TAP_INJECT_F_ENABLED;
+ return 0;
+ }
+
+ /* Register ARP and ICMP6 as neighbor nodes. */
+ ethernet_register_input_type (vm, ETHERNET_TYPE_ARP, im->neighbor_node_index);
+ ip6_register_protocol (IP_PROTOCOL_ICMP6, im->neighbor_node_index);
+
+ /* Register remaining protocols. */
+ ip4_register_protocol (IP_PROTOCOL_ICMP, im->tx_node_index);
+
+ ip4_register_protocol (IP_PROTOCOL_OSPF, im->tx_node_index);
+ ip4_register_protocol (IP_PROTOCOL_TCP, im->tx_node_index);
+ ip4_register_protocol (IP_PROTOCOL_UDP, im->tx_node_index);
+
+ ip6_register_protocol (IP_PROTOCOL_OSPF, im->tx_node_index);
+ ip6_register_protocol (IP_PROTOCOL_TCP, im->tx_node_index);
+ ip6_register_protocol (IP_PROTOCOL_UDP, im->tx_node_index);
+
+ {
+ dpo_id_t dpo = DPO_INVALID;
+
+ const mfib_prefix_t pfx_224_0_0_0 = {
+ .fp_len = 24,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_grp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0xe0000000),
+ },
+ .fp_src_addr = {
+ .ip4.as_u32 = 0,
+ },
+ };
+
+ dpo_set(&dpo, tap_inject_dpo_type, DPO_PROTO_IP4, ~0);
+
+ index_t repi = replicate_create(1, DPO_PROTO_IP4);
+ replicate_set_bucket(repi, 0, &dpo);
+
+ mfib_table_entry_special_add(0,
+ &pfx_224_0_0_0,
+ MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF,
+ repi);
+
+ dpo_reset(&dpo);
+ }
+
+ im->flags |= TAP_INJECT_F_ENABLED;
+
+ return 0;
+}
+
+static uword
+tap_inject_iface_isr (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * f)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+ vnet_hw_interface_t * hw;
+ u32 * hw_if_index;
+ clib_error_t * err = 0;
+
+ vec_foreach (hw_if_index, im->interfaces_to_enable)
+ {
+ hw = vnet_get_hw_interface (vnet_get_main (), *hw_if_index);
+
+ if (hw->hw_class_index == ethernet_hw_interface_class.index)
+ {
+ err = tap_inject_tap_connect (hw);
+ if (err)
+ break;
+ }
+ }
+
+ vec_foreach (hw_if_index, im->interfaces_to_disable)
+ tap_inject_tap_disconnect (*hw_if_index);
+
+ vec_free (im->interfaces_to_enable);
+ vec_free (im->interfaces_to_disable);
+
+ return err ? -1 : 0;
+}
+
+VLIB_REGISTER_NODE (tap_inject_iface_isr_node, static) = {
+ .function = tap_inject_iface_isr,
+ .name = "tap-inject-iface-isr",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+ .vector_size = sizeof (u32),
+};
+
+
+static clib_error_t *
+tap_inject_interface_add_del (struct vnet_main_t * vnet_main, u32 hw_if_index,
+ u32 add)
+{
+ vlib_main_t * vm = vlib_get_main ();
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ if (!tap_inject_is_config_enabled ())
+ return 0;
+
+ tap_inject_enable ();
+
+ if (add)
+ vec_add1 (im->interfaces_to_enable, hw_if_index);
+ else
+ vec_add1 (im->interfaces_to_disable, hw_if_index);
+
+ vlib_node_set_interrupt_pending (vm, tap_inject_iface_isr_node.index);
+
+ return 0;
+}
+
+VNET_HW_INTERFACE_ADD_DEL_FUNCTION (tap_inject_interface_add_del);
+
+
+static clib_error_t *
+tap_inject_enable_disable_all_interfaces (int enable)
+{
+ vnet_main_t * vnet_main = vnet_get_main ();
+ tap_inject_main_t * im = tap_inject_get_main ();
+ vnet_hw_interface_t * interfaces;
+ vnet_hw_interface_t * hw;
+ u32 ** indices;
+
+ if (enable)
+ tap_inject_enable ();
+ else
+ tap_inject_disable ();
+
+ /* Collect all the interface indices. */
+ interfaces = vnet_main->interface_main.hw_interfaces;
+ indices = enable ? &im->interfaces_to_enable : &im->interfaces_to_disable;
+ pool_foreach (hw, interfaces, vec_add1 (*indices, hw - interfaces));
+
+ if (tap_inject_iface_isr (vlib_get_main (), 0, 0))
+ return clib_error_return (0, "tap-inject interface add del isr failed");
+
+ return 0;
+}
+
+static clib_error_t *
+tap_inject_cli (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ if (cmd->function_arg)
+ {
+ clib_error_t * err;
+
+ if (tap_inject_is_config_disabled ())
+ return clib_error_return (0,
+ "tap-inject is disabled in config, thus cannot be enabled.");
+
+ /* Enable */
+ err = tap_inject_enable_disable_all_interfaces (1);
+ if (err)
+ {
+ tap_inject_enable_disable_all_interfaces (0);
+ return err;
+ }
+
+ im->flags |= TAP_INJECT_F_CONFIG_ENABLE;
+ }
+ else
+ {
+ /* Disable */
+ tap_inject_enable_disable_all_interfaces (0);
+ im->flags &= ~TAP_INJECT_F_CONFIG_ENABLE;
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (tap_inject_enable_cmd, static) = {
+ .path = "enable tap-inject",
+ .short_help = "enable tap-inject",
+ .function = tap_inject_cli,
+ .function_arg = 1,
+};
+
+VLIB_CLI_COMMAND (tap_inject_disable_cmd, static) = {
+ .path = "disable tap-inject",
+ .short_help = "disable tap-inject",
+ .function = tap_inject_cli,
+ .function_arg = 0,
+};
+
+
+static clib_error_t *
+show_tap_inject (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnet_main = vnet_get_main ();
+ tap_inject_main_t * im = tap_inject_get_main ();
+ u32 k, v;
+
+ if (tap_inject_is_config_disabled ())
+ {
+ vlib_cli_output (vm, "tap-inject is disabled in config.\n");
+ return 0;
+ }
+
+ if (!tap_inject_is_enabled ())
+ {
+ vlib_cli_output (vm, "tap-inject is not enabled.\n");
+ return 0;
+ }
+
+ hash_foreach (k, v, im->tap_if_index_to_sw_if_index, {
+ vlib_cli_output (vm, "%U -> %U",
+ format_vnet_sw_interface_name, vnet_main,
+ vnet_get_sw_interface (vnet_main, v),
+ format_tap_inject_tap_name, k);
+ });
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_tap_inject_cmd, static) = {
+ .path = "show tap-inject",
+ .short_help = "show tap-inject",
+ .function = show_tap_inject,
+};
+
+
+static clib_error_t *
+tap_inject_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "enable"))
+ im->flags |= TAP_INJECT_F_CONFIG_ENABLE;
+
+ else if (unformat (input, "disable"))
+ im->flags |= TAP_INJECT_F_CONFIG_DISABLE;
+
+ else if (unformat (input, "netlink-only"))
+ im->flags |= TAP_INJECT_F_CONFIG_NETLINK;
+
+ else
+ return clib_error_return (0, "syntax error `%U'",
+ format_unformat_error, input);
+ }
+
+ if (tap_inject_is_config_enabled () && tap_inject_is_config_disabled ())
+ return clib_error_return (0,
+ "tap-inject cannot be both enabled and disabled.");
+
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (tap_inject_config, "tap-inject");
diff --git a/utils/extras/rtinject/tap_inject.h b/utils/extras/rtinject/tap_inject.h
new file mode 100644
index 000000000..ec5121a09
--- /dev/null
+++ b/utils/extras/rtinject/tap_inject.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright 2016 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TAP_INJECT_H
+#define _TAP_INJECT_H
+
+#include <vnet/plugin/plugin.h>
+#include <vnet/ip/ip.h>
+
+#ifndef ETHER_ADDR_LEN
+#define ETHER_ADDR_LEN 6
+#endif
+
+typedef struct {
+ /*
+ * tap-inject can be enabled or disabled in config file or during runtime.
+ * When disabled in config, it is not possible to enable during runtime.
+ *
+ * When the netlink-only option is used, netlink configuration is monitored
+ * and mirrored to the data plane but no traffic is passed between the host
+ * and the data plane.
+ */
+#define TAP_INJECT_F_CONFIG_ENABLE (1U << 0)
+#define TAP_INJECT_F_CONFIG_DISABLE (1U << 1)
+#define TAP_INJECT_F_CONFIG_NETLINK (1U << 2)
+#define TAP_INJECT_F_ENABLED (1U << 3)
+
+ u32 flags;
+
+ u32 * sw_if_index_to_tap_fd;
+ u32 * sw_if_index_to_tap_if_index;
+ u32 * tap_fd_to_sw_if_index;
+ u32 * tap_if_index_to_sw_if_index;
+
+ u32 * interfaces_to_enable;
+ u32 * interfaces_to_disable;
+
+ u32 * rx_file_descriptors;
+
+ u32 rx_node_index;
+ u32 tx_node_index;
+ u32 neighbor_node_index;
+
+ u32 * rx_buffers;
+
+} tap_inject_main_t;
+
+
+tap_inject_main_t * tap_inject_get_main (void);
+
+void tap_inject_insert_tap (u32 sw_if_index, u32 tap_fd, u32 tap_if_index);
+void tap_inject_delete_tap (u32 sw_if_index);
+
+u32 tap_inject_lookup_tap_fd (u32 sw_if_index);
+u32 tap_inject_lookup_sw_if_index_from_tap_fd (u32 tap_fd);
+u32 tap_inject_lookup_sw_if_index_from_tap_if_index (u32 tap_if_index);
+
+static inline int
+tap_inject_is_enabled (void)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ return !!(im->flags & TAP_INJECT_F_ENABLED);
+}
+
+static inline int
+tap_inject_is_config_enabled (void)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ return !!(im->flags & TAP_INJECT_F_CONFIG_ENABLE);
+}
+
+static inline int
+tap_inject_is_config_disabled (void)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ return !!(im->flags & TAP_INJECT_F_CONFIG_DISABLE);
+}
+
+
+/* Netlink */
+
+void tap_inject_enable_netlink (void);
+
+
+/* Tap */
+
+clib_error_t * tap_inject_tap_connect (vnet_hw_interface_t * hw);
+clib_error_t * tap_inject_tap_disconnect (u32 sw_if_index);
+
+u8 * format_tap_inject_tap_name (u8 * s, va_list * args);
+
+#endif /* _TAP_INJECT_H */
diff --git a/utils/extras/rtinject/tap_inject_netlink.c b/utils/extras/rtinject/tap_inject_netlink.c
new file mode 100644
index 000000000..c0e0ce995
--- /dev/null
+++ b/utils/extras/rtinject/tap_inject_netlink.c
@@ -0,0 +1,285 @@
+/*
+ * Copyright 2016 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../devices/rtnetlink/netns.h"
+#include <vlibmemory/api.h>
+#include <vnet/ip/ip6_neighbor.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/fib/fib.h>
+#include <vnet/ethernet/arp.h>
+#include <arpa/inet.h>
+#include <linux/mpls.h>
+#include <vnet/mpls/packet.h>
+
+#include "tap_inject.h"
+
+static void
+add_del_addr (ns_addr_t * a, int is_del)
+{
+ vlib_main_t * vm = vlib_get_main ();
+ u32 sw_if_index;
+
+ sw_if_index = tap_inject_lookup_sw_if_index_from_tap_if_index (
+ a->ifaddr.ifa_index);
+
+ if (sw_if_index == ~0)
+ return;
+
+ if (a->ifaddr.ifa_family == AF_INET)
+ {
+ ip4_add_del_interface_address (vm, sw_if_index,
+ (ip4_address_t *) a->local, a->ifaddr.ifa_prefixlen, is_del);
+ }
+ else if (a->ifaddr.ifa_family == AF_INET6)
+ {
+ ip6_add_del_interface_address (vm, sw_if_index,
+ (ip6_address_t *) a->addr, a->ifaddr.ifa_prefixlen, is_del);
+ }
+}
+
+
+struct set_flags_args {
+ u32 index;
+ u8 flags;
+};
+
+static void
+set_flags_cb (struct set_flags_args * a)
+{
+ vnet_sw_interface_set_flags (vnet_get_main (), a->index, a->flags);
+}
+
+static void
+add_del_link (ns_link_t * l, int is_del)
+{
+ struct set_flags_args args = { ~0, 0 };
+ vnet_sw_interface_t * sw;
+ u8 flags = 0;
+ u32 sw_if_index;
+
+ sw_if_index = tap_inject_lookup_sw_if_index_from_tap_if_index (
+ l->ifi.ifi_index);
+
+ if (sw_if_index == ~0)
+ return;
+
+ sw = vnet_get_sw_interface (vnet_get_main (), sw_if_index);
+
+ flags = sw->flags;
+
+ if (l->ifi.ifi_flags & IFF_UP)
+ flags |= VNET_SW_INTERFACE_FLAG_ADMIN_UP;
+ else
+ flags &= ~VNET_SW_INTERFACE_FLAG_ADMIN_UP;
+
+ args.index = sw_if_index;
+ args.flags = flags;
+
+ vl_api_rpc_call_main_thread (set_flags_cb, (u8 *)&args, sizeof (args));
+}
+
+
+static void
+add_del_neigh (ns_neigh_t * n, int is_del)
+{
+ vnet_main_t * vnet_main = vnet_get_main ();
+ vlib_main_t * vm = vlib_get_main ();
+ u32 sw_if_index;
+
+ sw_if_index = tap_inject_lookup_sw_if_index_from_tap_if_index (
+ n->nd.ndm_ifindex);
+
+ if (sw_if_index == ~0)
+ return;
+
+ if (n->nd.ndm_family == AF_INET)
+ {
+ ethernet_arp_ip4_over_ethernet_address_t a;
+
+ memset (&a, 0, sizeof (a));
+
+ clib_memcpy (&a.mac, n->lladdr, ETHER_ADDR_LEN);
+ clib_memcpy (&a.ip4, n->dst, sizeof (a.ip4));
+
+
+ if (n->nd.ndm_state & NUD_REACHABLE)
+ {
+ vnet_arp_set_ip4_over_ethernet (vnet_main, sw_if_index,
+ &a,
+ IP_NEIGHBOR_FLAG_NO_FIB_ENTRY);
+
+ }
+ else if (n->nd.ndm_state & NUD_FAILED)
+ {
+ vnet_arp_unset_ip4_over_ethernet (vnet_main, sw_if_index, &a);
+ }
+ }
+ else if (n->nd.ndm_family == AF_INET6)
+ {
+ if (n->nd.ndm_state & NUD_REACHABLE)
+ {
+
+ mac_address_t * mac1;
+ mac1=malloc(sizeof(mac_address_t));
+ memcpy (mac1, n->lladdr, ETHER_ADDR_LEN);
+ vnet_set_ip6_ethernet_neighbor (vm, sw_if_index,
+ (ip6_address_t *) n->dst, (mac_address_t *) mac1,
+ IP_NEIGHBOR_FLAG_NO_FIB_ENTRY);
+ }
+ else
+ vnet_unset_ip6_ethernet_neighbor (vm, sw_if_index,
+ (ip6_address_t *) n->dst);
+ }
+}
+
+
+#define TAP_INJECT_HOST_ROUTE_TABLE_MAIN 254
+
+static void
+get_mpls_label_stack(struct mpls_label *addr, u32* l)
+{
+ u32 entry = ntohl(addr[0].entry);
+ u32 label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+
+ for(int i = 1; label != 0; i++) {
+ *l++ = label;
+ if(entry & MPLS_LS_S_MASK)
+ return;
+ entry = ntohl(addr[i].entry);
+ label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+ }
+}
+
+static void
+add_del_route (ns_route_t * r, int is_del)
+{
+ u32 sw_if_index;
+
+ sw_if_index = tap_inject_lookup_sw_if_index_from_tap_if_index (r->oif);
+
+ if (sw_if_index == ~0)
+ return;
+
+ if (r->rtm.rtm_family == AF_INET)
+ {
+ u32 stack[MPLS_STACK_DEPTH] = {0};
+
+ fib_prefix_t prefix;
+ ip46_address_t nh;
+
+ memset (&prefix, 0, sizeof (prefix));
+ prefix.fp_len = r->rtm.rtm_dst_len;
+ prefix.fp_proto = FIB_PROTOCOL_IP4;
+ clib_memcpy (&prefix.fp_addr.ip4, r->dst, sizeof (prefix.fp_addr.ip4));
+ get_mpls_label_stack(r->encap, stack);
+ memset (&nh, 0, sizeof (nh));
+ clib_memcpy (&nh.ip4, r->gateway, sizeof (nh.ip4));
+ if(*stack == 0)
+ fib_table_entry_path_add (0, &prefix, FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE, prefix.fp_proto,
+ &nh, sw_if_index, 0,
+ 0 /* weight */, NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ else {
+ fib_route_path_t *rpaths = NULL, rpath;
+ memset(&rpath, 0, sizeof(rpath));
+ rpath.frp_weight = 1;
+ rpath.frp_proto = DPO_PROTO_IP4;
+ clib_memcpy(&rpath.frp_addr.ip4, r->gateway, sizeof(rpath.frp_addr.ip4));
+ rpath.frp_sw_if_index = sw_if_index;
+ for(int i = 0; i < MPLS_STACK_DEPTH && stack[i] != 0; i++) {
+ fib_mpls_label_t fib_label = {stack[i],0,0,0};
+ vec_add1(rpath.frp_label_stack, fib_label);
+ }
+ vec_add1(rpaths, rpath);
+ fib_table_entry_path_add2(0,
+ &prefix,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ rpaths);
+ }
+ }
+ else if (r->rtm.rtm_family == AF_INET6)
+ {
+ fib_prefix_t prefix;
+ ip46_address_t nh;
+ memset (&prefix, 0, sizeof (prefix));
+ prefix.fp_len = r->rtm.rtm_dst_len;
+ prefix.fp_proto = FIB_PROTOCOL_IP6;
+ clib_memcpy (&prefix.fp_addr.ip6, r->dst, sizeof (prefix.fp_addr.ip6));
+ memset (&nh, 0, sizeof (nh));
+ clib_memcpy (&nh.ip6, r->gateway, sizeof (nh.ip6));
+ fib_table_entry_path_add (0, &prefix, FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE, prefix.fp_proto,
+ &nh, sw_if_index, 0,
+ 0 /* weight */, NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ }
+/* else if (r->rtm.rtm_family == AF_MPLS)
+ {
+ u32 dst_label;
+ get_mpls_label_stack((struct mpls_label*) r->dst, &dst_label);
+ struct rtvia *via = (struct rtvia*) r->via;
+ fib_prefix_t prefix;
+ fib_route_path_t *rpaths = NULL, rpath;
+ memset (&prefix, 0, sizeof (prefix));
+ prefix.fp_len = 21;
+ prefix.fp_label = dst_label;
+ prefix.fp_proto = FIB_PROTOCOL_MPLS;
+ prefix.fp_payload_proto = DPO_PROTO_IP4;
+ memset(&rpath, 0, sizeof(rpath));
+ clib_memcpy (&rpath.frp_addr.ip4, via->rtvia_addr, sizeof (rpath.frp_addr.ip4));
+ rpath.frp_weight = 1;
+ rpath.frp_proto = DPO_PROTO_IP4;
+ rpath.frp_fib_index = 0;
+ rpath.frp_sw_if_index = sw_if_index;
+ vec_add1(rpaths, rpath);
+ fib_table_entry_path_add2(0,
+ &prefix,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ rpaths);
+ }*/
+}
+
+
+static void
+netns_notify_cb (void * obj, netns_type_t type, u32 flags, uword opaque)
+{
+ if (type == NETNS_TYPE_ADDR)
+ add_del_addr ((ns_addr_t *)obj, flags & NETNS_F_DEL);
+
+ else if (type == NETNS_TYPE_LINK)
+ add_del_link ((ns_link_t *)obj, flags & NETNS_F_DEL);
+
+ else if (type == NETNS_TYPE_NEIGH)
+ add_del_neigh ((ns_neigh_t *)obj, flags & NETNS_F_DEL);
+
+ else if (type == NETNS_TYPE_ROUTE)
+ add_del_route ((ns_route_t *)obj, flags & NETNS_F_DEL);
+}
+
+void
+tap_inject_enable_netlink (void)
+{
+ char nsname = 0;
+ netns_sub_t sub = {
+ .notify = netns_notify_cb,
+ .opaque = 0,
+ };
+
+ netns_open (&nsname, &sub);
+}
diff --git a/utils/extras/rtinject/tap_inject_node.c b/utils/extras/rtinject/tap_inject_node.c
new file mode 100644
index 000000000..73c296451
--- /dev/null
+++ b/utils/extras/rtinject/tap_inject_node.c
@@ -0,0 +1,374 @@
+/*
+ * Copyright 2016 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tap_inject.h"
+#include <sys/uio.h>
+#include <netinet/in.h>
+#include <vnet/ethernet/arp_packet.h>
+
+vlib_node_registration_t tap_inject_rx_node;
+vlib_node_registration_t tap_inject_tx_node;
+vlib_node_registration_t tap_inject_neighbor_node;
+
+enum {
+ NEXT_NEIGHBOR_ARP,
+ NEXT_NEIGHBOR_ICMP6,
+};
+
+/**
+ * @brief Dynamically added tap_inject DPO type
+ */
+dpo_type_t tap_inject_dpo_type;
+
+static inline void
+tap_inject_tap_send_buffer (int fd, vlib_buffer_t * b)
+{
+ struct iovec iov;
+ ssize_t n_bytes;
+
+ iov.iov_base = vlib_buffer_get_current (b);
+ iov.iov_len = b->current_length;
+
+ n_bytes = writev (fd, &iov, 1);
+
+ if (n_bytes < 0)
+ clib_warning ("writev failed");
+ else if (n_bytes < b->current_length || b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ clib_warning ("buffer truncated");
+}
+
+static uword
+tap_inject_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ vlib_buffer_t * b;
+ u32 * pkts;
+ u32 fd;
+ u32 i;
+
+ pkts = vlib_frame_vector_args (f);
+
+ for (i = 0; i < f->n_vectors; ++i)
+ {
+ b = vlib_get_buffer (vm, pkts[i]);
+
+ fd = tap_inject_lookup_tap_fd (vnet_buffer (b)->sw_if_index[VLIB_RX]);
+ if (fd == ~0)
+ continue;
+
+ /* Re-wind the buffer to the start of the Ethernet header. */
+ vlib_buffer_advance (b, -b->current_data);
+
+ tap_inject_tap_send_buffer (fd, b);
+ }
+
+ vlib_buffer_free (vm, pkts, f->n_vectors);
+ return f->n_vectors;
+}
+
+VLIB_REGISTER_NODE (tap_inject_tx_node) = {
+ .function = tap_inject_tx,
+ .name = "tap-inject-tx",
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+};
+
+
+static uword
+tap_inject_neighbor (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ vlib_buffer_t * b;
+ u32 * pkts;
+ u32 fd;
+ u32 i;
+ u32 bi;
+ u32 next_index = node->cached_next_index;
+ u32 next = ~0;
+ u32 n_left;
+ u32 * to_next;
+
+ pkts = vlib_frame_vector_args (f);
+
+ for (i = 0; i < f->n_vectors; ++i)
+ {
+ bi = pkts[i];
+ b = vlib_get_buffer (vm, bi);
+
+ fd = tap_inject_lookup_tap_fd (vnet_buffer (b)->sw_if_index[VLIB_RX]);
+ if (fd == ~0)
+ {
+ vlib_buffer_free (vm, &bi, 1);
+ continue;
+ }
+
+ /* Re-wind the buffer to the start of the Ethernet header. */
+ vlib_buffer_advance (b, -b->current_data);
+
+ tap_inject_tap_send_buffer (fd, b);
+
+ /* Send the buffer to a neighbor node too? */
+ {
+ ethernet_header_t * eth = vlib_buffer_get_current (b);
+ u16 ether_type = htons (eth->type);
+
+ if (ether_type == ETHERNET_TYPE_ARP)
+ {
+ ethernet_arp_header_t * arp = (void *)(eth + 1);
+
+ if (arp->opcode == ntohs (ETHERNET_ARP_OPCODE_reply))
+ next = NEXT_NEIGHBOR_ARP;
+ }
+ else if (ether_type == ETHERNET_TYPE_IP6)
+ {
+ ip6_header_t * ip = (void *)(eth + 1);
+ icmp46_header_t * icmp = (void *)(ip + 1);
+
+ if (ip->protocol == IP_PROTOCOL_ICMP6 &&
+ icmp->type == ICMP6_neighbor_advertisement)
+ next = NEXT_NEIGHBOR_ICMP6;
+ }
+ }
+
+ if (next == ~0)
+ {
+ vlib_buffer_free (vm, &bi, 1);
+ continue;
+ }
+
+ /* ARP and ICMP6 expect to start processing after the Ethernet header. */
+ vlib_buffer_advance (b, sizeof (ethernet_header_t));
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left);
+
+ *(to_next++) = bi;
+ --n_left;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left, bi, next);
+ vlib_put_next_frame (vm, node, next_index, n_left);
+ }
+
+ return f->n_vectors;
+}
+
+VLIB_REGISTER_NODE (tap_inject_neighbor_node) = {
+ .function = tap_inject_neighbor,
+ .name = "tap-inject-neighbor",
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_next_nodes = 2,
+ .next_nodes = {
+ [NEXT_NEIGHBOR_ARP] = "arp-input",
+ [NEXT_NEIGHBOR_ICMP6] = "icmp6-neighbor-solicitation",
+ },
+};
+
+
+#define MTU 1500
+#define MTU_BUFFERS ((MTU + vlib_buffer_get_default_data_size(vm) - 1) / vlib_buffer_get_default_data_size(vm))
+#define NUM_BUFFERS_TO_ALLOC 32
+
+static inline uword
+tap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f, int fd)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+ u32 sw_if_index;
+ struct iovec iov[MTU_BUFFERS];
+ u32 bi[MTU_BUFFERS];
+ vlib_buffer_t * b;
+ ssize_t n_bytes;
+ ssize_t n_bytes_left;
+ u32 i, j;
+
+ sw_if_index = tap_inject_lookup_sw_if_index_from_tap_fd (fd);
+ if (sw_if_index == ~0)
+ return 0;
+
+ /* Allocate buffers in bulk when there are less than enough to rx an MTU. */
+ if (vec_len (im->rx_buffers) < MTU_BUFFERS)
+ {
+ u32 len = vec_len (im->rx_buffers);
+
+
+ u8 index = vlib_buffer_pool_get_default_for_numa (vm,0);
+ len = vlib_buffer_alloc_from_pool(vm,
+ &im->rx_buffers[len], NUM_BUFFERS_TO_ALLOC,
+ index);
+
+ _vec_len (im->rx_buffers) += len;
+
+ if (vec_len (im->rx_buffers) < MTU_BUFFERS)
+ {
+ clib_warning ("failed to allocate buffers");
+ return 0;
+ }
+ }
+
+ /* Fill buffers from the end of the list to make it easier to resize. */
+ for (i = 0, j = vec_len (im->rx_buffers) - 1; i < MTU_BUFFERS; ++i, --j)
+ {
+ vlib_buffer_t * b;
+
+ bi[i] = im->rx_buffers[j];
+
+ b = vlib_get_buffer (vm, bi[i]);
+
+ iov[i].iov_base = b->data;
+ iov[i].iov_len = VLIB_BUFFER_DEFAULT_DATA_SIZE;
+ }
+
+ n_bytes = readv (fd, iov, MTU_BUFFERS);
+ if (n_bytes < 0)
+ {
+ clib_warning ("readv failed");
+ return 0;
+ }
+
+ b = vlib_get_buffer (vm, bi[0]);
+
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = sw_if_index;
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
+
+ n_bytes_left = n_bytes - VLIB_BUFFER_DEFAULT_DATA_SIZE;
+
+ if (n_bytes_left > 0)
+ {
+ b->total_length_not_including_first_buffer = n_bytes_left;
+ b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ }
+
+ b->current_length = n_bytes;
+
+ /* If necessary, configure any remaining buffers in the chain. */
+ for (i = 1; n_bytes_left > 0; ++i, n_bytes_left -= VLIB_BUFFER_DEFAULT_DATA_SIZE)
+ {
+ b = vlib_get_buffer (vm, bi[i - 1]);
+ b->current_length = VLIB_BUFFER_DEFAULT_DATA_SIZE;
+ b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ b->next_buffer = bi[i];
+
+ b = vlib_get_buffer (vm, bi[i]);
+ b->current_length = n_bytes_left;
+ }
+
+ _vec_len (im->rx_buffers) -= i;
+
+ /* Get the packet to the output node. */
+ {
+ vnet_hw_interface_t * hw;
+ vlib_frame_t * new_frame;
+ u32 * to_next;
+
+ hw = vnet_get_hw_interface (vnet_get_main (), sw_if_index);
+
+ new_frame = vlib_get_frame_to_node (vm, hw->output_node_index);
+ to_next = vlib_frame_vector_args (new_frame);
+ to_next[0] = bi[0];
+ new_frame->n_vectors = 1;
+
+ vlib_put_frame_to_node (vm, hw->output_node_index, new_frame);
+ }
+
+ return 1;
+}
+
+static uword
+tap_inject_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+ u32 * fd;
+ uword count = 0;
+
+ vec_foreach (fd, im->rx_file_descriptors)
+ {
+ if (tap_rx (vm, node, f, *fd) != 1)
+ {
+ clib_warning ("rx failed");
+ count = 0;
+ break;
+ }
+ ++count;
+ }
+
+ vec_free (im->rx_file_descriptors);
+
+ return count;
+}
+
+VLIB_REGISTER_NODE (tap_inject_rx_node) = {
+ .function = tap_inject_rx,
+ .name = "tap-inject-rx",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+ .vector_size = sizeof (u32),
+};
+
+/**
+ * @brief no-op lock function.
+ */
+static void
+tap_inject_dpo_lock (dpo_id_t * dpo)
+{
+}
+
+/**
+ * @brief no-op unlock function.
+ */
+static void
+tap_inject_dpo_unlock (dpo_id_t * dpo)
+{
+}
+
+u8 *
+format_tap_inject_dpo (u8 * s, va_list * args)
+{
+ return (format (s, "tap-inject:[%d]", 0));
+}
+
+const static dpo_vft_t tap_inject_vft = {
+ .dv_lock = tap_inject_dpo_lock,
+ .dv_unlock = tap_inject_dpo_unlock,
+ .dv_format = format_tap_inject_dpo,
+};
+
+const static char *const tap_inject_tx_nodes[] = {
+ "tap-inject-tx",
+ NULL,
+};
+
+const static char *const *const tap_inject_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP4] = tap_inject_tx_nodes,
+ [DPO_PROTO_IP6] = tap_inject_tx_nodes,
+};
+
+static clib_error_t *
+tap_inject_init (vlib_main_t * vm)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ im->rx_node_index = tap_inject_rx_node.index;
+ im->tx_node_index = tap_inject_tx_node.index;
+ im->neighbor_node_index = tap_inject_neighbor_node.index;
+
+ tap_inject_dpo_type = dpo_register_new_type (&tap_inject_vft, tap_inject_nodes);
+
+ vec_alloc (im->rx_buffers, NUM_BUFFERS_TO_ALLOC);
+ vec_reset_length (im->rx_buffers);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (tap_inject_init);
diff --git a/utils/extras/rtinject/tap_inject_tap.c b/utils/extras/rtinject/tap_inject_tap.c
new file mode 100644
index 000000000..a3ec9ffef
--- /dev/null
+++ b/utils/extras/rtinject/tap_inject_tap.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright 2016 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tap_inject.h"
+
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <linux/if.h>
+#include <linux/if_arp.h>
+#include <linux/if_ether.h>
+#include <linux/if_tun.h>
+#include <netinet/in.h>
+#include <vnet/unix/tuntap.h>
+
+#include <vlib/unix/unix.h>
+
+
+static clib_error_t *
+tap_inject_tap_read (clib_file_t * f)
+{
+ vlib_main_t * vm = vlib_get_main ();
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ vec_add1 (im->rx_file_descriptors, f->file_descriptor);
+
+ vlib_node_set_interrupt_pending (vm, im->rx_node_index);
+
+ return 0;
+}
+
+#define TAP_INJECT_TAP_BASE_NAME "vpp"
+
+clib_error_t *
+tap_inject_tap_connect (vnet_hw_interface_t * hw)
+{
+ vnet_main_t * vnet_main = vnet_get_main ();
+ vnet_sw_interface_t * sw = vnet_get_sw_interface (vnet_main, hw->hw_if_index);
+ static const int one = 1;
+ int fd;
+ struct ifreq ifr;
+ clib_file_t template;
+ u32 tap_fd;
+ u8 * name;
+
+ memset (&ifr, 0, sizeof (ifr));
+ memset (&template, 0, sizeof (template));
+
+ ASSERT (hw->hw_if_index == sw->sw_if_index);
+
+ /* Create the tap. */
+ tap_fd = open ("/dev/net/tun", O_RDWR);
+
+ if ((int)tap_fd < 0)
+ return clib_error_return (0, "failed to open tun device");
+
+ name = format (0, TAP_INJECT_TAP_BASE_NAME "%u%c", hw->hw_instance, 0);
+
+ strncpy (ifr.ifr_name, (char *) name, sizeof (ifr.ifr_name) - 1);
+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+
+ if (ioctl (tap_fd, TUNSETIFF, (void *)&ifr) < 0)
+ {
+ close (tap_fd);
+ return clib_error_return (0, "failed to create tap");
+ }
+
+ if (ioctl (tap_fd, FIONBIO, &one) < 0)
+ {
+ close (tap_fd);
+ return clib_error_return (0, "failed to set tap to non-blocking io");
+ }
+
+ /* Open a socket to configure the device. */
+ fd = socket (PF_PACKET, SOCK_RAW, htons (ETH_P_ALL));
+
+ if (fd < 0)
+ {
+ close (tap_fd);
+ return clib_error_return (0, "failed to configure tap");
+ }
+
+ if (hw->hw_address)
+ clib_memcpy (ifr.ifr_hwaddr.sa_data, hw->hw_address, ETHER_ADDR_LEN);
+
+ ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER;
+
+ /* Set the hardware address. */
+ if (ioctl (fd, SIOCSIFHWADDR, &ifr) < 0)
+ {
+ close (tap_fd);
+ close (fd);
+ return clib_error_return (0, "failed to set tap hardware address");
+ }
+
+ /* Get the tap if index. */
+ if (ioctl (fd, SIOCGIFINDEX, &ifr) < 0)
+ {
+ close (tap_fd);
+ close (fd);
+ return clib_error_return (0, "failed to procure tap if index");
+ }
+
+ close (fd);
+
+ /* Get notified when the tap needs to be read. */
+ template.read_function = tap_inject_tap_read;
+ template.file_descriptor = tap_fd;
+
+ clib_file_add (&file_main, &template);
+
+ tap_inject_insert_tap (sw->sw_if_index, tap_fd, ifr.ifr_ifindex);
+
+ return 0;
+}
+
+clib_error_t *
+tap_inject_tap_disconnect (u32 sw_if_index)
+{
+ u32 tap_fd;
+
+ tap_fd = tap_inject_lookup_tap_fd (sw_if_index);
+ if (tap_fd == ~0)
+ return clib_error_return (0, "failed to disconnect tap");
+
+ tap_inject_delete_tap (sw_if_index);
+
+ close (tap_fd);
+ return 0;
+}
+
+
+u8 *
+format_tap_inject_tap_name (u8 * s, va_list * args)
+{
+ int fd;
+ struct ifreq ifr;
+
+ fd = socket (PF_PACKET, SOCK_RAW, htons (ETH_P_ALL));
+
+ if (fd < 0)
+ return 0;
+
+ memset (&ifr, 0, sizeof (ifr));
+
+ ifr.ifr_ifindex = va_arg (*args, u32);
+
+ if (ioctl (fd, SIOCGIFNAME, &ifr) < 0)
+ {
+ close (fd);
+ return 0;
+ }
+
+ close (fd);
+
+ return format (s, "%s", ifr.ifr_name);
+}