aboutsummaryrefslogtreecommitdiffstats
path: root/router
diff options
context:
space:
mode:
Diffstat (limited to 'router')
-rw-r--r--router/Makefile.am5
-rw-r--r--router/router/router.c691
-rw-r--r--router/router/tap_inject.c375
-rw-r--r--router/router/tap_inject.h109
-rw-r--r--router/router/tap_inject_netlink.c184
-rw-r--r--router/router/tap_inject_node.c331
-rw-r--r--router/router/tap_inject_tap.c170
7 files changed, 1173 insertions, 692 deletions
diff --git a/router/Makefile.am b/router/Makefile.am
index 2e9b38f..b9de6a1 100644
--- a/router/Makefile.am
+++ b/router/Makefile.am
@@ -3,7 +3,10 @@ AUTOMAKE_OPTIONS = foreign subdir-objects
AM_CFLAGS = -Wall -I@TOOLKIT_INCLUDE@
lib_LTLIBRARIES = router.la
-router_la_SOURCES = router/router.c
+router_la_SOURCES = router/tap_inject.c \
+ router/tap_inject_netlink.c \
+ router/tap_inject_node.c \
+ router/tap_inject_tap.c
router_la_LDFLAGS = -module
router_la_LIBADD = -lrtnl
diff --git a/router/router/router.c b/router/router/router.c
deleted file mode 100644
index 741f34c..0000000
--- a/router/router/router.c
+++ /dev/null
@@ -1,691 +0,0 @@
-/*
- * Copyright 2016 Intel Corporation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <net/ethernet.h>
-#include <net/if_arp.h>
-#include <netinet/in.h>
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <vnet/vnet.h>
-#include <vnet/plugin/plugin.h>
-#include <vnet/ip/ip.h>
-#include <vnet/unix/tuntap.h>
-#include <librtnl/mapper.h>
-#include <vnet/ethernet/arp_packet.h>
-#include <vlibmemory/api.h>
-
-enum {
- NEXT_UNTAPPED = 0,
- NEXT_INJECT,
-};
-
-enum {
- ERROR_INJECT_ARP,
- ERROR_INJECT_ICMP,
- ERROR_INJECT_CLASSIFIED,
-};
-
-static char *error_strings[] = {
- [ERROR_INJECT_ARP] = "Inject ARP",
- [ERROR_INJECT_ICMP] = "Inject ICMP",
- [ERROR_INJECT_CLASSIFIED] = "Inject Classified",
-};
-
-struct tap_to_iface {
- u32 tap;
- u32 iface;
-};
-
-struct router_main {
- vnet_main_t *vnet_main;
- u32 *iface_to_tap;
- u32 *iface_to_protos;
- struct tap_to_iface *tap_to_iface;
- u32 ns_index;
-};
-
-static struct router_main rm;
-
-enum {
- PROTO_ARP = 0,
- PROTO_ICMP4,
- PROTO_IGMP4,
- PROTO_OSPF2,
- PROTO_TCP,
- PROTO_UDP,
- PROTO_N_TOTAL,
-};
-
-enum {
- PROTO_BIT_ARP = 1 << PROTO_ARP,
- PROTO_BIT_ICMP4 = 1 << PROTO_ICMP4,
- PROTO_BIT_IGMP4 = 1 << PROTO_IGMP4,
- PROTO_BIT_OSPF2 = 1 << PROTO_OSPF2,
- PROTO_BIT_TCP = 1 << PROTO_TCP,
- PROTO_BIT_UDP = 1 << PROTO_UDP,
-};
-
-static char *proto_strings[PROTO_N_TOTAL] = {
- [PROTO_ARP] = "arp",
- [PROTO_ICMP4] = "icmp4",
- [PROTO_IGMP4] = "igmp4",
- [PROTO_OSPF2] = "ospf2",
- [PROTO_TCP] = "tcp",
- [PROTO_UDP] = "udp",
-};
-
-static inline u32 parse_protos(char *proto_string)
-{
- u32 protos = 0;
- char *tok, **proto;
-
- for (tok = strtok(proto_string, ","); tok; tok = strtok(NULL, ","))
- for (proto = proto_strings; proto && *proto; ++proto)
- if (!strncmp(tok, *proto, 16))
- protos |= 1 << (proto - proto_strings);
- return protos;
-}
-
-static uword unformat_protos(unformat_input_t *input, va_list *args)
-{
- u32 *protos = va_arg(*args, u32 *);
- u8 *proto_string;
-
- if (unformat(input, "%s", &proto_string))
- *protos = parse_protos((char *)proto_string);
- return 1;
-}
-
-vlib_node_registration_t tap_inject_arp_node;
-vlib_node_registration_t tap_inject_icmp_node;
-vlib_node_registration_t tap_inject_classified_node;
-
-static inline void
-update_arp_entry(vlib_buffer_t *b0, ethernet_arp_header_t *arp, u32 vlib_rx)
-{
- ethernet_header_t *eth;
- ip4_address_t *if_addr;
- ip_interface_address_t *ifa;
-
- if (arp->l2_type != ntohs(ETHERNET_ARP_HARDWARE_TYPE_ethernet) ||
- arp->l3_type != ntohs(ETHERNET_TYPE_IP4))
- return;
-
- /* Check that IP address is local and matches incoming interface. */
- if_addr = ip4_interface_address_matching_destination(&ip4_main,
- &arp->ip4_over_ethernet[1].ip4,
- vlib_rx, &ifa);
- if (!if_addr)
- return;
-
- /* Source must also be local to subnet of matching interface address. */
- if (!ip4_destination_matches_interface(&ip4_main,
- &arp->ip4_over_ethernet[0].ip4, ifa))
- return;
-
- /* Reject replies with our local interface address. */
- if (if_addr->as_u32 == arp->ip4_over_ethernet[0].ip4.as_u32)
- return;
-
- if (if_addr->as_u32 != arp->ip4_over_ethernet[1].ip4.as_u32)
- return;
-
- eth = ethernet_buffer_get_header(b0);
-
- /* Trash ARP packets whose ARP-level source addresses do not
- * match their L2-frame-level source addresses */
- if (memcmp(eth->src_address, arp->ip4_over_ethernet[0].ethernet,
- sizeof(eth->src_address)))
- return;
-
- if (arp->ip4_over_ethernet[0].ip4.as_u32 == 0 ||
- (arp->ip4_over_ethernet[0].ip4.as_u32 ==
- arp->ip4_over_ethernet[1].ip4.as_u32))
- return;
-
- /* Learn or update sender's mapping only for requests or unicasts
- * that don't match local interface address. */
- if (ethernet_address_cast(eth->dst_address) != ETHERNET_ADDRESS_UNICAST)
- return;
-
- vnet_arp_set_ip4_over_ethernet(rm.vnet_main, vlib_rx, ~0,
- &arp->ip4_over_ethernet[0], 0);
-}
-
-static uword
-tap_inject_func(vlib_main_t *m, vlib_node_runtime_t *node, vlib_frame_t *f,
- int mode)
-{
- u32 n_left_from = f->n_vectors;
- u32 *from = vlib_frame_vector_args(f);
- u32 next_index = node->cached_next_index;
- u32 *to_next;
- u32 counter, count = 0;
-
- while (n_left_from) {
- vlib_buffer_t *b0;
- u32 next0, bi0, n_left;
- u32 vlib_rx, vlib_tx;
- u32 protos, proto_bit = 0;
-
- vlib_get_next_frame(m, node, next_index, to_next, n_left);
-
- *(to_next++) = bi0 = *(from++);
- --n_left_from;
- --n_left;
-
- b0 = vlib_get_buffer(m, bi0);
-
- vlib_rx = vnet_buffer(b0)->sw_if_index[VLIB_RX];
- vlib_tx = rm.iface_to_tap[vlib_rx];
- protos = rm.iface_to_protos[vlib_rx];
-
- next0 = NEXT_UNTAPPED;
-
- if (vlib_tx == 0 || vlib_tx == ~0 || protos == 0)
- goto untapped;
-
- if (mode == ERROR_INJECT_CLASSIFIED) {
- ip4_header_t *iphdr;
-
- iphdr = vlib_buffer_get_current(b0);
- if (iphdr->protocol == IP_PROTOCOL_TCP)
- proto_bit = PROTO_BIT_TCP;
- else if (iphdr->protocol == IP_PROTOCOL_UDP)
- proto_bit = PROTO_BIT_UDP;
- else if (iphdr->protocol == IP_PROTOCOL_OSPF)
- proto_bit = PROTO_BIT_OSPF2;
- else if (iphdr->protocol == IP_PROTOCOL_IGMP)
- proto_bit = PROTO_BIT_IGMP4;
- } else if (mode == ERROR_INJECT_ARP) {
- proto_bit = PROTO_BIT_ARP;
- } else if (mode == ERROR_INJECT_ICMP) {
- proto_bit = PROTO_BIT_ICMP4;
- }
-
- if (!(protos & proto_bit))
- goto untapped;
-
- next0 = NEXT_INJECT;
-
- vnet_buffer(b0)->sw_if_index[VLIB_TX] = vlib_tx;
- ++count;
-
- if (mode == ERROR_INJECT_ARP) {
- ethernet_arp_header_t *arphdr;
-
- arphdr = vlib_buffer_get_current(b0);
- if (arphdr->opcode == ntohs(ETHERNET_ARP_OPCODE_reply))
- update_arp_entry(b0, arphdr, vlib_rx);
- }
-
- /* FIXME: What about VLAN? */
- b0->current_data -= sizeof(ethernet_header_t);
- b0->current_length += sizeof(ethernet_header_t);
-
-untapped:
- vlib_validate_buffer_enqueue_x1(m, node, next_index, to_next,
- n_left, bi0, next0);
- vlib_put_next_frame(m, node, next_index, n_left);
- }
-
- switch (mode) {
- case ERROR_INJECT_ARP:
- counter = ERROR_INJECT_ARP;
- break;
- case ERROR_INJECT_ICMP:
- counter = ERROR_INJECT_ICMP;
- break;
- default:
- counter = ERROR_INJECT_CLASSIFIED;
- }
-
- vlib_node_increment_counter(m, node->node_index, counter, count);
- return f->n_vectors;
-}
-
-static uword
-tap_inject_arp(vlib_main_t *m, vlib_node_runtime_t *node, vlib_frame_t *f)
-{
- return tap_inject_func(m, node, f, ERROR_INJECT_ARP);
-}
-
-VLIB_REGISTER_NODE(tap_inject_arp_node) = {
- .function = tap_inject_arp,
- .name = "tap-inject-arp",
- .vector_size = sizeof(u32),
- .type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(error_strings),
- .error_strings = error_strings,
- .n_next_nodes = 2,
- .next_nodes = {
- [NEXT_UNTAPPED] = "arp-input",
- [NEXT_INJECT] = "interface-output",
- },
-};
-
-static uword
-tap_inject_icmp(vlib_main_t *m, vlib_node_runtime_t *node, vlib_frame_t *f)
-{
- return tap_inject_func(m, node, f, ERROR_INJECT_ICMP);
-}
-
-VLIB_REGISTER_NODE(tap_inject_icmp_node) = {
- .function = tap_inject_icmp,
- .name = "tap-inject-icmp",
- .vector_size = sizeof(u32),
- .type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(error_strings),
- .error_strings = error_strings,
- .n_next_nodes = 2,
- .next_nodes = {
- [NEXT_UNTAPPED] = "ip4-icmp-input",
- [NEXT_INJECT] = "interface-output",
- },
-};
-
-static uword
-tap_inject_classified(vlib_main_t *m, vlib_node_runtime_t *node,
- vlib_frame_t *f)
-{
- return tap_inject_func(m, node, f, ERROR_INJECT_CLASSIFIED);
-}
-
-VLIB_REGISTER_NODE(tap_inject_classified_node) = {
- .function = tap_inject_classified,
- .name = "tap-inject-classified",
- .vector_size = sizeof(u32),
- .type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(error_strings),
- .error_strings = error_strings,
- .n_next_nodes = 2,
- .next_nodes = {
- [NEXT_UNTAPPED] = "error-drop",
- [NEXT_INJECT] = "interface-output",
- },
-};
-
-static int
-set_tap_hwaddr(vlib_main_t *m, char *name, u8 *hwaddr)
-{
- int fd, rc;
- struct ifreq ifr;
-
- fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
- if (fd < 0)
- return -1;
-
- memset(&ifr, 0, sizeof(ifr));
- strncpy(ifr.ifr_name, (char *)name, sizeof(ifr.ifr_name) - 1);
- memcpy(ifr.ifr_hwaddr.sa_data, hwaddr, ETHER_ADDR_LEN);
- ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER;
- rc = ioctl(fd, SIOCSIFHWADDR, &ifr) < 0 ? -1 : 0;
- close(fd);
- return rc;
-}
-
-static int
-set_tap_link_state(vlib_main_t *m, char *name, u16 flags)
-{
- int fd, rc;
- struct ifreq ifr;
-
- fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
- if (fd < 0)
- return -1;
-
- memset(&ifr, 0, sizeof(ifr));
- strncpy(ifr.ifr_name, (char *)name, sizeof(ifr.ifr_name) - 1);
-
- rc = ioctl(fd, SIOCGIFFLAGS, &ifr);
- if (rc < 0)
- goto out;
-
- if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
- ifr.ifr_flags |= (IFF_UP | IFF_RUNNING);
- else
- ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING);
-
- rc = ioctl(fd, SIOCSIFFLAGS, &ifr) < 0 ? -1 : 0;
-out:
- close(fd);
- return rc;
-}
-
-static clib_error_t *
-do_tap_connect(vlib_main_t *m, char *name, u32 iface, u32 *tap)
-{
- vnet_hw_interface_t *hw = vnet_get_hw_interface(rm.vnet_main, iface);
- vnet_sw_interface_t *sw = vnet_get_sw_interface(rm.vnet_main, iface);
- u64 hw_address = 0;
-
- *tap = ~0;
- if (!hw)
- return clib_error_return(0, "invalid interface");
- else if (hw->hw_address)
- memcpy(&hw_address, hw->hw_address, 6);
-
- if (vnet_tap_connect(m, (u8 *)name, (u8 *)&hw_address, tap))
- return clib_error_return(0, "failed to connect tap");
-
- if (set_tap_hwaddr(m, name, (u8 *)&hw_address))
- return clib_error_return(0, "failed to set tap hw address");
-
- if (set_tap_link_state(m, name, sw->flags))
- return clib_error_return(0, "failed to set tap link state");
-
- if (set_int_l2_mode(m, rm.vnet_main, MODE_L2_XC, *tap, 0, 0, 0, iface))
- return clib_error_return(0, "failed to xconnect to interface");
-
- return vnet_sw_interface_set_flags(rm.vnet_main, *tap,
- VNET_SW_INTERFACE_FLAG_ADMIN_UP);
-}
-
-static void add_del_addr(ns_addr_t *a, int is_del)
-{
- struct tap_to_iface *map = NULL;
- u32 sw_if_index = ~0;
-
- vec_foreach(map, rm.tap_to_iface) {
- if (a->ifaddr.ifa_index == map->tap) {
- sw_if_index = map->iface;
- break;
- }
- }
-
- if (sw_if_index == ~0)
- return;
-
- ip4_add_del_interface_address(vlib_get_main(),
- sw_if_index, (ip4_address_t *)a->local,
- a->ifaddr.ifa_prefixlen, is_del);
-}
-
-static void add_del_route(ns_route_t *r, int is_del)
-{
- struct tap_to_iface *map = NULL;
- u32 sw_if_index = ~0;
-
- vec_foreach(map, rm.tap_to_iface) {
- if (r->oif == map->tap) {
- sw_if_index = map->iface;
- break;
- }
- }
-
- if (sw_if_index == ~0 || r->table != 254)
- return;
-
- ip4_add_del_route_next_hop(&ip4_main,
- is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD,
- (ip4_address_t *)r->dst, r->rtm.rtm_dst_len,
- (ip4_address_t *)r->gateway, sw_if_index, 0, ~0, 0);
-}
-
-struct set_flags_args {
- u32 sw_if_index;
- u8 flags;
-};
-
-static void set_interface_flags_callback(struct set_flags_args *a)
-{
- vnet_sw_interface_set_flags(rm.vnet_main, a->sw_if_index,
- a->flags);
-}
-
-static void add_del_link(ns_link_t *l, int is_del)
-{
- struct tap_to_iface *map = NULL;
- u32 sw_if_index = ~0;
- u8 flags = 0;
- struct set_flags_args args;
- vnet_sw_interface_t *sw = NULL;
-
- vec_foreach(map, rm.tap_to_iface) {
- if (l->ifi.ifi_index == map->tap) {
- sw_if_index = map->iface;
- break;
- }
- }
-
- if (sw_if_index == ~0)
- return;
-
- sw = vnet_get_sw_interface(rm.vnet_main, sw_if_index);
- flags = sw->flags;
-
- if (l->ifi.ifi_flags & IFF_UP)
- flags |= VNET_SW_INTERFACE_FLAG_ADMIN_UP;
- else
- flags &= ~VNET_SW_INTERFACE_FLAG_ADMIN_UP;
-
- args.sw_if_index = sw_if_index;
- args.flags = flags;
-
- vl_api_rpc_call_main_thread(set_interface_flags_callback,
- (u8 *) &args, sizeof(args));
-}
-
-static void
-netns_notify_cb(void *obj, netns_type_t type, u32 flags, uword opaque)
-{
- if (type == NETNS_TYPE_ADDR)
- add_del_addr((ns_addr_t *)obj, flags & NETNS_F_DEL);
- else if (type == NETNS_TYPE_ROUTE)
- add_del_route((ns_route_t *)obj, flags & NETNS_F_DEL);
- else if (type == NETNS_TYPE_LINK)
- add_del_link((ns_link_t *)obj, flags & NETNS_F_DEL);
-}
-
-static void insert_tap_to_iface(u32 tap, u32 iface)
-{
- struct tap_to_iface map = {
- .tap = tap,
- .iface = iface,
- };
-
- vec_add1(rm.tap_to_iface, map);
-}
-
-
-static u32 ip4_next_index = ~0;
-
-static u32
-ip4_lookup_next_index(void)
-{
- if (ip4_next_index == ~0) {
- ip4_next_index = vlib_node_add_next(vlib_get_main(),
- ip4_lookup_node.index,
- tap_inject_classified_node.index);
- }
-
- return ip4_next_index;
-}
-
-static u32 ip4_multicast_arc_added;
-
-static void
-add_ip4_multicast_arc(void)
-{
- ip4_add_del_route_args_t a;
- ip_adjacency_t add_adj;
-
- if (ip4_multicast_arc_added)
- return;
-
- memset(&a, 0, sizeof(a));
- memset(&add_adj, 0, sizeof(add_adj));
-
- a.add_adj = &add_adj;
- a.n_add_adj = 1;
-
- a.flags = IP4_ROUTE_FLAG_TABLE_ID | IP4_ROUTE_FLAG_ADD;
- a.table_index_or_table_id = 0;
- a.dst_address.as_u32 = 0x000000E0; /* 224.0.0.0 */
- a.dst_address_length = 24;
- a.adj_index = ~0;
-
- add_adj.explicit_fib_index = ~0;
- add_adj.rewrite_header.node_index = ip4_rewrite_node.index;
- add_adj.lookup_next_index = ip4_lookup_next_index();
- add_adj.if_address_index = ~0;
-
- ip4_add_del_route(&ip4_main, &a);
- ip4_multicast_arc_added = 1;
-}
-
-static clib_error_t *
-tap_inject(vlib_main_t *m, unformat_input_t *input, vlib_cli_command_t *cmd)
-{
- char *name = NULL;
- u32 iface = ~0, tap = ~0, protos = 0;
- clib_error_t *err;
-
- while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) {
- if (unformat(input, "from %U", unformat_vnet_sw_interface,
- rm.vnet_main, &iface))
- ;
- else if (unformat(input, "as %s", &name))
- ;
- else if (unformat(input, "%U", unformat_protos, &protos))
- ;
- else
- break;
- }
-
- if (!protos)
- return clib_error_return(0,
- "no protocols specified");
- else if (iface == ~0)
- return clib_error_return(0,
- "interface name is missing or invalid");
- else if (!name)
- return clib_error_return(0,
- "host interface name is missing or invalid");
-
- if (protos & PROTO_BIT_OSPF2) {
- /* Require arp, icmp4, and igmp4 for ospf2. */
- if (!(protos & PROTO_BIT_ARP) ||
- !(protos & PROTO_BIT_ICMP4) ||
- !(protos & PROTO_BIT_IGMP4))
- return clib_error_return(0,
- "ospf2 requires arp, icmp4, and igmp4");
- }
-
- if (protos & PROTO_BIT_TCP) /* Require arp and icmp4 for tcp. */
- if (!(protos & PROTO_BIT_ARP) || !(protos & PROTO_BIT_ICMP4))
- return clib_error_return(0,
- "tcp requires arp and icmp4");
-
- if (protos & PROTO_BIT_UDP) {
- /* Require arp, icmp4, and igmp4 for udp. */
- if (!(protos & PROTO_BIT_ARP) ||
- !(protos & PROTO_BIT_ICMP4) ||
- !(protos & PROTO_BIT_IGMP4))
- return clib_error_return(0,
- "udp requires arp, icmp4, and igmp4");
- }
-
- err = do_tap_connect(m, name, iface, &tap);
- if (err) {
- if (tap != ~0)
- vnet_tap_delete(m, tap);
- return err;
- }
-
- if ((protos & PROTO_BIT_ARP) || (protos & PROTO_BIT_ICMP4)) {
- if (rm.ns_index == ~0) {
- char nsname = 0;
- netns_sub_t sub = {
- .notify = netns_notify_cb,
- .opaque = 0,
- };
-
- rm.ns_index = netns_open(&nsname, &sub);
- if (rm.ns_index == ~0) {
- vnet_tap_delete(m, tap);
- clib_error_return(0,
- "failed to open namespace");
- }
- }
- }
-
- if (protos & PROTO_BIT_IGMP4)
- add_ip4_multicast_arc();
-
- if (protos & PROTO_BIT_ARP)
- ethernet_register_input_type(m, ETHERNET_TYPE_ARP,
- tap_inject_arp_node.index);
-
- if (protos & PROTO_BIT_ICMP4)
- ip4_register_protocol(IP_PROTOCOL_ICMP,
- tap_inject_icmp_node.index);
-
- if (protos & PROTO_BIT_OSPF2)
- ip4_register_protocol(IP_PROTOCOL_OSPF,
- tap_inject_classified_node.index);
-
- if (protos & PROTO_BIT_TCP)
- ip4_register_protocol(IP_PROTOCOL_TCP,
- tap_inject_classified_node.index);
-
- if (protos & PROTO_BIT_UDP)
- ip4_register_protocol(IP_PROTOCOL_UDP,
- tap_inject_classified_node.index);
-
- /* Find sw_if_index of tap associated with data plane interface. */
- rm.iface_to_tap[iface] = tap;
- rm.iface_to_protos[iface] = protos;
-
- /* Find data plane interface associated with host tap ifindex. */
- insert_tap_to_iface(if_nametoindex(name), iface);
-
- return 0;
-}
-
-VLIB_CLI_COMMAND(tap_inject_command, static) = {
- .path = "tap inject",
- .short_help = "tap inject <protocol[,protocol...]> from <intfc-name> as <host-intfc-name>",
- .function = tap_inject,
-};
-
-static clib_error_t *
-interface_add_del(struct vnet_main_t *m, u32 hw_if_index, u32 add)
-{
- vnet_hw_interface_t *hw = vnet_get_hw_interface(m, hw_if_index);
- vnet_sw_interface_t *sw = vnet_get_sw_interface(m, hw->sw_if_index);
- ASSERT(hw->sw_if_index == sw->sw_if_index);
-
- vec_validate(rm.iface_to_tap, sw->sw_if_index);
- vec_validate(rm.iface_to_protos, sw->sw_if_index);
- rm.iface_to_tap[sw->sw_if_index] = ~0;
- rm.iface_to_protos[sw->sw_if_index] = 0;
- return 0;
-}
-VNET_HW_INTERFACE_ADD_DEL_FUNCTION(interface_add_del);
-
-clib_error_t *
-vlib_plugin_register(vlib_main_t *m, vnet_plugin_handoff_t *h, int f)
-{
- rm.vnet_main = h->vnet_main;
- rm.ns_index = ~0;
- return 0;
-}
-
-static clib_error_t *router_init(vlib_main_t *m)
-{
- return 0;
-}
-VLIB_INIT_FUNCTION(router_init);
diff --git a/router/router/tap_inject.c b/router/router/tap_inject.c
new file mode 100644
index 0000000..8d6f5af
--- /dev/null
+++ b/router/router/tap_inject.c
@@ -0,0 +1,375 @@
+/*
+ * Copyright 2016 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tap_inject.h"
+
+static tap_inject_main_t tap_inject_main;
+
+tap_inject_main_t *
+tap_inject_get_main (void)
+{
+ return &tap_inject_main;
+}
+
+void
+tap_inject_insert_tap (u32 sw_if_index, u32 tap_fd, u32 tap_if_index)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ vec_validate_init_empty (im->sw_if_index_to_tap_fd, sw_if_index, ~0);
+ vec_validate_init_empty (im->sw_if_index_to_tap_if_index, sw_if_index, ~0);
+
+ vec_validate_init_empty (im->tap_fd_to_sw_if_index, tap_fd, ~0);
+
+ im->sw_if_index_to_tap_fd[sw_if_index] = tap_fd;
+ im->sw_if_index_to_tap_if_index[sw_if_index] = tap_if_index;
+
+ im->tap_fd_to_sw_if_index[tap_fd] = sw_if_index;
+
+ hash_set (im->tap_if_index_to_sw_if_index, tap_if_index, sw_if_index);
+}
+
+void
+tap_inject_delete_tap (u32 sw_if_index)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+ u32 tap_fd = im->sw_if_index_to_tap_fd[sw_if_index];
+ u32 tap_if_index = im->sw_if_index_to_tap_if_index[sw_if_index];
+
+ im->sw_if_index_to_tap_if_index[sw_if_index] = ~0;
+ im->sw_if_index_to_tap_fd[sw_if_index] = ~0;
+ im->tap_fd_to_sw_if_index[tap_fd] = ~0;
+
+ hash_unset (im->tap_if_index_to_sw_if_index, tap_if_index);
+}
+
+u32
+tap_inject_lookup_tap_fd (u32 sw_if_index)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ vec_validate_init_empty (im->sw_if_index_to_tap_fd, sw_if_index, ~0);
+ return im->sw_if_index_to_tap_fd[sw_if_index];
+}
+
+u32
+tap_inject_lookup_sw_if_index_from_tap_fd (u32 tap_fd)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ vec_validate_init_empty (im->tap_fd_to_sw_if_index, tap_fd, ~0);
+ return im->tap_fd_to_sw_if_index[tap_fd];
+}
+
+u32
+tap_inject_lookup_sw_if_index_from_tap_if_index (u32 tap_if_index)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+ uword * sw_if_index;
+
+ sw_if_index = hash_get (im->tap_if_index_to_sw_if_index, tap_if_index);
+ return sw_if_index ? *(u32 *)sw_if_index : ~0;
+}
+
+
+clib_error_t *
+vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h, int f)
+{
+ return 0;
+}
+
+
+static void
+tap_inject_disable (void)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ im->flags &= ~TAP_INJECT_F_ENABLED;
+
+ clib_warning ("tap-inject is not actually disabled.");
+}
+
+static clib_error_t *
+tap_inject_enable (void)
+{
+ vlib_main_t * vm = vlib_get_main ();
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ if (tap_inject_is_enabled ())
+ return 0;
+
+ tap_inject_enable_netlink ();
+
+ /* Only enable netlink? */
+ if (im->flags & TAP_INJECT_F_CONFIG_NETLINK)
+ {
+ im->flags |= TAP_INJECT_F_ENABLED;
+ return 0;
+ }
+
+ /* Register ARP and ICMP6 as neighbor nodes. */
+ ethernet_register_input_type (vm, ETHERNET_TYPE_ARP, im->neighbor_node_index);
+ ip6_register_protocol (IP_PROTOCOL_ICMP6, im->neighbor_node_index);
+
+ /* Register remaining protocols. */
+ ip4_register_protocol (IP_PROTOCOL_ICMP, im->tx_node_index);
+
+ ip4_register_protocol (IP_PROTOCOL_OSPF, im->tx_node_index);
+ ip4_register_protocol (IP_PROTOCOL_TCP, im->tx_node_index);
+ ip4_register_protocol (IP_PROTOCOL_UDP, im->tx_node_index);
+
+ ip6_register_protocol (IP_PROTOCOL_OSPF, im->tx_node_index);
+ ip6_register_protocol (IP_PROTOCOL_TCP, im->tx_node_index);
+ ip6_register_protocol (IP_PROTOCOL_UDP, im->tx_node_index);
+
+ /* Add IPv4 multicast route. */
+ {
+ ip4_add_del_route_args_t a;
+ ip_adjacency_t add_adj;
+ u32 next_node_index;
+
+ memset (&a, 0, sizeof (a));
+ memset (&add_adj, 0, sizeof (add_adj));
+
+ a.add_adj = &add_adj;
+ a.n_add_adj = 1;
+
+ a.flags = IP4_ROUTE_FLAG_TABLE_ID | IP4_ROUTE_FLAG_ADD;
+ a.table_index_or_table_id = 0;
+ a.dst_address.as_u32 = 0x000000E0; /* 224.0.0.0 */
+ a.dst_address_length = 24;
+ a.adj_index = ~0;
+
+ next_node_index = vlib_node_add_next (vm, ip4_lookup_node.index,
+ im->tx_node_index);
+
+ add_adj.explicit_fib_index = ~0;
+ add_adj.rewrite_header.node_index = ip4_rewrite_node.index;
+ add_adj.lookup_next_index = next_node_index;
+ add_adj.if_address_index = ~0;
+
+ ip4_add_del_route (&ip4_main, &a);
+ }
+
+ im->flags |= TAP_INJECT_F_ENABLED;
+
+ return 0;
+}
+
+static uword
+tap_inject_iface_isr (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * f)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+ vnet_hw_interface_t * hw;
+ u32 * hw_if_index;
+ clib_error_t * err = 0;
+
+ vec_foreach (hw_if_index, im->interfaces_to_enable)
+ {
+ hw = vnet_get_hw_interface (vnet_get_main (), *hw_if_index);
+
+ if (hw->hw_class_index == ethernet_hw_interface_class.index)
+ {
+ err = tap_inject_tap_connect (hw);
+ if (err)
+ break;
+ }
+ }
+
+ vec_foreach (hw_if_index, im->interfaces_to_disable)
+ tap_inject_tap_disconnect (*hw_if_index);
+
+ vec_free (im->interfaces_to_enable);
+ vec_free (im->interfaces_to_disable);
+
+ return err ? -1 : 0;
+}
+
+VLIB_REGISTER_NODE (tap_inject_iface_isr_node, static) = {
+ .function = tap_inject_iface_isr,
+ .name = "tap-inject-iface-isr",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+ .vector_size = sizeof (u32),
+};
+
+
+static clib_error_t *
+tap_inject_interface_add_del (struct vnet_main_t * vnet_main, u32 hw_if_index,
+ u32 add)
+{
+ vlib_main_t * vm = vlib_get_main ();
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ if (!tap_inject_is_config_enabled ())
+ return 0;
+
+ tap_inject_enable ();
+
+ if (add)
+ vec_add1 (im->interfaces_to_enable, hw_if_index);
+ else
+ vec_add1 (im->interfaces_to_disable, hw_if_index);
+
+ vlib_node_set_interrupt_pending (vm, tap_inject_iface_isr_node.index);
+
+ return 0;
+}
+
+VNET_HW_INTERFACE_ADD_DEL_FUNCTION (tap_inject_interface_add_del);
+
+
+static clib_error_t *
+tap_inject_enable_disable_all_interfaces (int enable)
+{
+ vnet_main_t * vnet_main = vnet_get_main ();
+ tap_inject_main_t * im = tap_inject_get_main ();
+ vnet_hw_interface_t * interfaces;
+ vnet_hw_interface_t * hw;
+ u32 ** indices;
+
+ if (enable)
+ tap_inject_enable ();
+ else
+ tap_inject_disable ();
+
+ /* Collect all the interface indices. */
+ interfaces = vnet_main->interface_main.hw_interfaces;
+ indices = enable ? &im->interfaces_to_enable : &im->interfaces_to_disable;
+ pool_foreach (hw, interfaces, vec_add1 (*indices, hw - interfaces));
+
+ if (tap_inject_iface_isr (vlib_get_main (), 0, 0))
+ return clib_error_return (0, "tap-inject interface add del isr failed");
+
+ return 0;
+}
+
+static clib_error_t *
+tap_inject_cli (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ if (cmd->function_arg)
+ {
+ clib_error_t * err;
+
+ if (tap_inject_is_config_disabled ())
+ return clib_error_return (0,
+ "tap-inject is disabled in config, thus cannot be enabled.");
+
+ /* Enable */
+ err = tap_inject_enable_disable_all_interfaces (1);
+ if (err)
+ {
+ tap_inject_enable_disable_all_interfaces (0);
+ return err;
+ }
+
+ im->flags |= TAP_INJECT_F_CONFIG_ENABLE;
+ }
+ else
+ {
+ /* Disable */
+ tap_inject_enable_disable_all_interfaces (0);
+ im->flags &= ~TAP_INJECT_F_CONFIG_ENABLE;
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (tap_inject_enable_cmd, static) = {
+ .path = "enable tap-inject",
+ .short_help = "enable tap-inject",
+ .function = tap_inject_cli,
+ .function_arg = 1,
+};
+
+VLIB_CLI_COMMAND (tap_inject_disable_cmd, static) = {
+ .path = "disable tap-inject",
+ .short_help = "disable tap-inject",
+ .function = tap_inject_cli,
+ .function_arg = 0,
+};
+
+
+static clib_error_t *
+show_tap_inject (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnet_main = vnet_get_main ();
+ tap_inject_main_t * im = tap_inject_get_main ();
+ u32 k, v;
+
+ if (tap_inject_is_config_disabled ())
+ {
+ vlib_cli_output (vm, "tap-inject is disabled in config.\n");
+ return 0;
+ }
+
+ if (!tap_inject_is_enabled ())
+ {
+ vlib_cli_output (vm, "tap-inject is not enabled.\n");
+ return 0;
+ }
+
+ hash_foreach (k, v, im->tap_if_index_to_sw_if_index, {
+ vlib_cli_output (vm, "%U -> %U",
+ format_vnet_sw_interface_name, vnet_main,
+ vnet_get_sw_interface (vnet_main, v),
+ format_tap_inject_tap_name, k);
+ });
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_tap_inject_cmd, static) = {
+ .path = "show tap-inject",
+ .short_help = "show tap-inject",
+ .function = show_tap_inject,
+};
+
+
+static clib_error_t *
+tap_inject_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "enable"))
+ im->flags |= TAP_INJECT_F_CONFIG_ENABLE;
+
+ else if (unformat (input, "disable"))
+ im->flags |= TAP_INJECT_F_CONFIG_DISABLE;
+
+ else if (unformat (input, "netlink-only"))
+ im->flags |= TAP_INJECT_F_CONFIG_NETLINK;
+
+ else
+ return clib_error_return (0, "syntax error `%U'",
+ format_unformat_error, input);
+ }
+
+ if (tap_inject_is_config_enabled () && tap_inject_is_config_disabled ())
+ return clib_error_return (0,
+ "tap-inject cannot be both enabled and disabled.");
+
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (tap_inject_config, "tap-inject");
diff --git a/router/router/tap_inject.h b/router/router/tap_inject.h
new file mode 100644
index 0000000..001ab52
--- /dev/null
+++ b/router/router/tap_inject.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2016 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TAP_INJECT_H
+#define _TAP_INJECT_H
+
+#include <vnet/plugin/plugin.h>
+#include <vnet/ip/ip.h>
+
+
+#ifndef ETHER_ADDR_LEN
+#define ETHER_ADDR_LEN 6
+#endif
+
+typedef struct {
+ /*
+ * tap-inject can be enabled or disabled in config file or during runtime.
+ * When disabled in config, it is not possible to enable during runtime.
+ *
+ * When the netlink-only option is used, netlink configuration is monitored
+ * and mirrored to the data plane but no traffic is passed between the host
+ * and the data plane.
+ */
+#define TAP_INJECT_F_CONFIG_ENABLE (1U << 0)
+#define TAP_INJECT_F_CONFIG_DISABLE (1U << 1)
+#define TAP_INJECT_F_CONFIG_NETLINK (1U << 2)
+#define TAP_INJECT_F_ENABLED (1U << 3)
+
+ u32 flags;
+
+ u32 * sw_if_index_to_tap_fd;
+ u32 * sw_if_index_to_tap_if_index;
+ u32 * tap_fd_to_sw_if_index;
+ u32 * tap_if_index_to_sw_if_index;
+
+ u32 * interfaces_to_enable;
+ u32 * interfaces_to_disable;
+
+ u32 * rx_file_descriptors;
+
+ u32 rx_node_index;
+ u32 tx_node_index;
+ u32 neighbor_node_index;
+
+ u32 * rx_buffers;
+
+} tap_inject_main_t;
+
+
+tap_inject_main_t * tap_inject_get_main (void);
+
+void tap_inject_insert_tap (u32 sw_if_index, u32 tap_fd, u32 tap_if_index);
+void tap_inject_delete_tap (u32 sw_if_index);
+
+u32 tap_inject_lookup_tap_fd (u32 sw_if_index);
+u32 tap_inject_lookup_sw_if_index_from_tap_fd (u32 tap_fd);
+u32 tap_inject_lookup_sw_if_index_from_tap_if_index (u32 tap_if_index);
+
+static inline int
+tap_inject_is_enabled (void)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ return !!(im->flags & TAP_INJECT_F_ENABLED);
+}
+
+static inline int
+tap_inject_is_config_enabled (void)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ return !!(im->flags & TAP_INJECT_F_CONFIG_ENABLE);
+}
+
+static inline int
+tap_inject_is_config_disabled (void)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ return !!(im->flags & TAP_INJECT_F_CONFIG_DISABLE);
+}
+
+
+/* Netlink */
+
+void tap_inject_enable_netlink (void);
+
+
+/* Tap */
+
+clib_error_t * tap_inject_tap_connect (vnet_hw_interface_t * hw);
+clib_error_t * tap_inject_tap_disconnect (u32 sw_if_index);
+
+u8 * format_tap_inject_tap_name (u8 * s, va_list * args);
+
+#endif /* _TAP_INJECT_H */
diff --git a/router/router/tap_inject_netlink.c b/router/router/tap_inject_netlink.c
new file mode 100644
index 0000000..a30e262
--- /dev/null
+++ b/router/router/tap_inject_netlink.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright 2016 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tap_inject.h"
+
+#include <librtnl/netns.h>
+#include <vlibmemory/api.h>
+#include <vnet/ethernet/arp_packet.h>
+
+
+static void
+add_del_addr (ns_addr_t * a, int is_del)
+{
+ vlib_main_t * vm = vlib_get_main ();
+ u32 sw_if_index;
+
+ sw_if_index = tap_inject_lookup_sw_if_index_from_tap_if_index (
+ a->ifaddr.ifa_index);
+
+ if (sw_if_index == ~0)
+ return;
+
+ if (a->ifaddr.ifa_family == AF_INET)
+ {
+ ip4_add_del_interface_address (vm, sw_if_index,
+ (ip4_address_t *) a->local, a->ifaddr.ifa_prefixlen, is_del);
+ }
+ else if (a->ifaddr.ifa_family == AF_INET6)
+ {
+ ip6_add_del_interface_address (vm, sw_if_index,
+ (ip6_address_t *) a->addr, a->ifaddr.ifa_prefixlen, is_del);
+ }
+}
+
+
+struct set_flags_args {
+ u32 index;
+ u8 flags;
+};
+
+static void
+set_flags_cb (struct set_flags_args * a)
+{
+ vnet_sw_interface_set_flags (vnet_get_main (), a->index, a->flags);
+}
+
+static void
+add_del_link (ns_link_t * l, int is_del)
+{
+ struct set_flags_args args = { ~0, 0 };
+ vnet_sw_interface_t * sw;
+ u8 flags = 0;
+ u32 sw_if_index;
+
+ sw_if_index = tap_inject_lookup_sw_if_index_from_tap_if_index (
+ l->ifi.ifi_index);
+
+ if (sw_if_index == ~0)
+ return;
+
+ sw = vnet_get_sw_interface (vnet_get_main (), sw_if_index);
+
+ flags = sw->flags;
+
+ if (l->ifi.ifi_flags & IFF_UP)
+ flags |= VNET_SW_INTERFACE_FLAG_ADMIN_UP;
+ else
+ flags &= ~VNET_SW_INTERFACE_FLAG_ADMIN_UP;
+
+ args.index = sw_if_index;
+ args.flags = flags;
+
+ vl_api_rpc_call_main_thread (set_flags_cb, (u8 *)&args, sizeof (args));
+}
+
+
+static void
+add_del_neigh (ns_neigh_t * n, int is_del)
+{
+ vnet_main_t * vnet_main = vnet_get_main ();
+ vlib_main_t * vm = vlib_get_main ();
+ u32 sw_if_index;
+
+ sw_if_index = tap_inject_lookup_sw_if_index_from_tap_if_index (
+ n->nd.ndm_ifindex);
+
+ if (sw_if_index == ~0)
+ return;
+
+ if (n->nd.ndm_family == AF_INET)
+ {
+ ethernet_arp_ip4_over_ethernet_address_t a;
+
+ memset (&a, 0, sizeof (a));
+
+ clib_memcpy (&a.ethernet, n->lladdr, ETHER_ADDR_LEN);
+ clib_memcpy (&a.ip4, n->dst, sizeof (a.ip4));
+
+ if (n->nd.ndm_state & NUD_REACHABLE)
+ vnet_arp_set_ip4_over_ethernet (vnet_main, sw_if_index, ~0, &a, 0);
+ else if (n->nd.ndm_state & NUD_FAILED)
+ vnet_arp_unset_ip4_over_ethernet (vnet_main, sw_if_index, ~0, &a);
+ }
+ else if (n->nd.ndm_family == AF_INET6)
+ {
+ if (n->nd.ndm_state & NUD_REACHABLE)
+ vnet_set_ip6_ethernet_neighbor (vm, sw_if_index,
+ (ip6_address_t *) n->dst, n->lladdr, ETHER_ADDR_LEN, 0);
+ else
+ vnet_unset_ip6_ethernet_neighbor (vm, sw_if_index,
+ (ip6_address_t *) n->dst, n->lladdr, ETHER_ADDR_LEN);
+ }
+}
+
+
+#define TAP_INJECT_HOST_ROUTE_TABLE_MAIN 254
+
+static void
+add_del_route (ns_route_t * r, int is_del)
+{
+ u32 sw_if_index;
+
+ sw_if_index = tap_inject_lookup_sw_if_index_from_tap_if_index (r->oif);
+
+ if (sw_if_index == ~0 || r->table != TAP_INJECT_HOST_ROUTE_TABLE_MAIN)
+ return;
+
+ if (r->rtm.rtm_family == AF_INET)
+ {
+ ip4_add_del_route_next_hop (&ip4_main,
+ is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD,
+ (ip4_address_t *) r->dst, r->rtm.rtm_dst_len,
+ (ip4_address_t *) r->gateway, sw_if_index, 0, ~0, 0);
+ }
+ else if (r->rtm.rtm_family == AF_INET6)
+ {
+ ip6_add_del_route_next_hop (&ip6_main,
+ is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD,
+ (ip6_address_t *) r->dst, r->rtm.rtm_dst_len,
+ (ip6_address_t *) r->gateway, sw_if_index, 0, ~0, 0);
+ }
+}
+
+
+static void
+netns_notify_cb (void * obj, netns_type_t type, u32 flags, uword opaque)
+{
+ if (type == NETNS_TYPE_ADDR)
+ add_del_addr ((ns_addr_t *)obj, flags & NETNS_F_DEL);
+
+ else if (type == NETNS_TYPE_LINK)
+ add_del_link ((ns_link_t *)obj, flags & NETNS_F_DEL);
+
+ else if (type == NETNS_TYPE_NEIGH)
+ add_del_neigh ((ns_neigh_t *)obj, flags & NETNS_F_DEL);
+
+ else if (type == NETNS_TYPE_ROUTE)
+ add_del_route ((ns_route_t *)obj, flags & NETNS_F_DEL);
+}
+
+void
+tap_inject_enable_netlink (void)
+{
+ char nsname = 0;
+ netns_sub_t sub = {
+ .notify = netns_notify_cb,
+ .opaque = 0,
+ };
+
+ netns_open (&nsname, &sub);
+}
diff --git a/router/router/tap_inject_node.c b/router/router/tap_inject_node.c
new file mode 100644
index 0000000..fe108dc
--- /dev/null
+++ b/router/router/tap_inject_node.c
@@ -0,0 +1,331 @@
+/*
+ * Copyright 2016 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tap_inject.h"
+
+#include <netinet/in.h>
+#include <vnet/ethernet/arp_packet.h>
+
+vlib_node_registration_t tap_inject_rx_node;
+vlib_node_registration_t tap_inject_tx_node;
+vlib_node_registration_t tap_inject_neighbor_node;
+
+enum {
+ NEXT_NEIGHBOR_ARP,
+ NEXT_NEIGHBOR_ICMP6,
+};
+
+
+static inline void
+tap_inject_tap_send_buffer (int fd, vlib_buffer_t * b)
+{
+ struct iovec iov;
+ ssize_t n_bytes;
+
+ iov.iov_base = vlib_buffer_get_current (b);
+ iov.iov_len = b->current_length;
+
+ n_bytes = writev (fd, &iov, 1);
+
+ if (n_bytes < 0)
+ clib_warning ("writev failed");
+ else if (n_bytes < b->current_length || b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ clib_warning ("buffer truncated");
+}
+
+static uword
+tap_inject_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ vlib_buffer_t * b;
+ u32 * pkts;
+ u32 fd;
+ u32 i;
+
+ pkts = vlib_frame_vector_args (f);
+
+ for (i = 0; i < f->n_vectors; ++i)
+ {
+ b = vlib_get_buffer (vm, pkts[i]);
+
+ fd = tap_inject_lookup_tap_fd (vnet_buffer (b)->sw_if_index[VLIB_RX]);
+ if (fd == ~0)
+ continue;
+
+ /* Re-wind the buffer to the start of the Ethernet header. */
+ vlib_buffer_advance (b, -b->current_data);
+
+ tap_inject_tap_send_buffer (fd, b);
+ }
+
+ vlib_buffer_free (vm, pkts, f->n_vectors);
+ return f->n_vectors;
+}
+
+VLIB_REGISTER_NODE (tap_inject_tx_node) = {
+ .function = tap_inject_tx,
+ .name = "tap-inject-tx",
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+};
+
+
+static uword
+tap_inject_neighbor (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ vlib_buffer_t * b;
+ u32 * pkts;
+ u32 fd;
+ u32 i;
+ u32 bi;
+ u32 next_index = node->cached_next_index;
+ u32 next = ~0;
+ u32 n_left;
+ u32 * to_next;
+
+ pkts = vlib_frame_vector_args (f);
+
+ for (i = 0; i < f->n_vectors; ++i)
+ {
+ bi = pkts[i];
+ b = vlib_get_buffer (vm, bi);
+
+ fd = tap_inject_lookup_tap_fd (vnet_buffer (b)->sw_if_index[VLIB_RX]);
+ if (fd == ~0)
+ {
+ vlib_buffer_free (vm, &bi, 1);
+ continue;
+ }
+
+ /* Re-wind the buffer to the start of the Ethernet header. */
+ vlib_buffer_advance (b, -b->current_data);
+
+ tap_inject_tap_send_buffer (fd, b);
+
+ /* Send the buffer to a neighbor node too? */
+ {
+ ethernet_header_t * eth = vlib_buffer_get_current (b);
+ u16 ether_type = htons (eth->type);
+
+ if (ether_type == ETHERNET_TYPE_ARP)
+ {
+ ethernet_arp_header_t * arp = (void *)(eth + 1);
+
+ if (arp->opcode == ntohs (ETHERNET_ARP_OPCODE_reply))
+ next = NEXT_NEIGHBOR_ARP;
+ }
+ else if (ether_type == ETHERNET_TYPE_IP6)
+ {
+ ip6_header_t * ip = (void *)(eth + 1);
+ icmp46_header_t * icmp = (void *)(ip + 1);
+
+ if (ip->protocol == IP_PROTOCOL_ICMP6 &&
+ icmp->type == ICMP6_neighbor_advertisement)
+ next = NEXT_NEIGHBOR_ICMP6;
+ }
+ }
+
+ if (next == ~0)
+ {
+ vlib_buffer_free (vm, &bi, 1);
+ continue;
+ }
+
+ /* ARP and ICMP6 expect to start processing after the Ethernet header. */
+ vlib_buffer_advance (b, sizeof (ethernet_header_t));
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left);
+
+ *(to_next++) = bi;
+ --n_left;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left, bi, next);
+ vlib_put_next_frame (vm, node, next_index, n_left);
+ }
+
+ return f->n_vectors;
+}
+
+VLIB_REGISTER_NODE (tap_inject_neighbor_node) = {
+ .function = tap_inject_neighbor,
+ .name = "tap-inject-neighbor",
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_next_nodes = 2,
+ .next_nodes = {
+ [NEXT_NEIGHBOR_ARP] = "arp-input",
+ [NEXT_NEIGHBOR_ICMP6] = "icmp6-neighbor-solicitation",
+ },
+};
+
+
+#define MTU 1500
+#define MTU_BUFFERS ((MTU + VLIB_BUFFER_DATA_SIZE - 1) / VLIB_BUFFER_DATA_SIZE)
+#define NUM_BUFFERS_TO_ALLOC 32
+
+static inline uword
+tap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f, int fd)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+ u32 sw_if_index;
+ struct iovec iov[MTU_BUFFERS];
+ u32 bi[MTU_BUFFERS];
+ vlib_buffer_t * b;
+ ssize_t n_bytes;
+ ssize_t n_bytes_left;
+ u32 i, j;
+
+ sw_if_index = tap_inject_lookup_sw_if_index_from_tap_fd (fd);
+ if (sw_if_index == ~0)
+ return 0;
+
+ /* Allocate buffers in bulk when there are less than enough to rx an MTU. */
+ if (vec_len (im->rx_buffers) < MTU_BUFFERS)
+ {
+ u32 len = vec_len (im->rx_buffers);
+
+ len = vlib_buffer_alloc_from_free_list (vm,
+ &im->rx_buffers[len], NUM_BUFFERS_TO_ALLOC,
+ VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+
+ _vec_len (im->rx_buffers) += len;
+
+ if (vec_len (im->rx_buffers) < MTU_BUFFERS)
+ {
+ clib_warning ("failed to allocate buffers");
+ return 0;
+ }
+ }
+
+ /* Fill buffers from the end of the list to make it easier to resize. */
+ for (i = 0, j = vec_len (im->rx_buffers) - 1; i < MTU_BUFFERS; ++i, --j)
+ {
+ vlib_buffer_t * b;
+
+ bi[i] = im->rx_buffers[j];
+
+ b = vlib_get_buffer (vm, bi[i]);
+
+ iov[i].iov_base = b->data;
+ iov[i].iov_len = VLIB_BUFFER_DATA_SIZE;
+ }
+
+ n_bytes = readv (fd, iov, MTU_BUFFERS);
+ if (n_bytes < 0)
+ {
+ clib_warning ("readv failed");
+ return 0;
+ }
+
+ b = vlib_get_buffer (vm, bi[0]);
+
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = ~0;
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
+
+ n_bytes_left = n_bytes - VLIB_BUFFER_DATA_SIZE;
+
+ if (n_bytes_left > 0)
+ {
+ b->total_length_not_including_first_buffer = n_bytes_left;
+ b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ }
+
+ b->current_length = n_bytes;
+
+ /* If necessary, configure any remaining buffers in the chain. */
+ for (i = 1; n_bytes_left > 0; ++i, n_bytes_left -= VLIB_BUFFER_DATA_SIZE)
+ {
+ b = vlib_get_buffer (vm, bi[i - 1]);
+ b->current_length = VLIB_BUFFER_DATA_SIZE;
+ b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ b->next_buffer = bi[i];
+
+ b = vlib_get_buffer (vm, bi[i]);
+ b->current_length = n_bytes_left;
+ }
+
+ _vec_len (im->rx_buffers) -= i;
+
+ vlib_buffer_chain_validate (vm, vlib_get_buffer (vm, bi[0]));
+
+ /* Get the packet to the output node. */
+ {
+ vnet_hw_interface_t * hw;
+ vlib_frame_t * new_frame;
+ u32 * to_next;
+
+ hw = vnet_get_hw_interface (vnet_get_main (), sw_if_index);
+
+ new_frame = vlib_get_frame_to_node (vm, hw->output_node_index);
+ to_next = vlib_frame_vector_args (new_frame);
+ to_next[0] = bi[0];
+ new_frame->n_vectors = 1;
+
+ vlib_put_frame_to_node (vm, hw->output_node_index, new_frame);
+ }
+
+ return 1;
+}
+
+static uword
+tap_inject_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+ u32 * fd;
+ uword count = 0;
+
+ vec_foreach (fd, im->rx_file_descriptors)
+ {
+ if (tap_rx (vm, node, f, *fd) != 1)
+ {
+ clib_warning ("rx failed");
+ count = 0;
+ break;
+ }
+ ++count;
+ }
+
+ vec_free (im->rx_file_descriptors);
+
+ return count;
+}
+
+VLIB_REGISTER_NODE (tap_inject_rx_node) = {
+ .function = tap_inject_rx,
+ .name = "tap-inject-rx",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+ .vector_size = sizeof (u32),
+};
+
+
+static clib_error_t *
+tap_inject_init (vlib_main_t * vm)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ im->rx_node_index = tap_inject_rx_node.index;
+ im->tx_node_index = tap_inject_tx_node.index;
+ im->neighbor_node_index = tap_inject_neighbor_node.index;
+
+ vec_alloc (im->rx_buffers, NUM_BUFFERS_TO_ALLOC);
+ vec_reset_length (im->rx_buffers);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (tap_inject_init);
diff --git a/router/router/tap_inject_tap.c b/router/router/tap_inject_tap.c
new file mode 100644
index 0000000..9650323
--- /dev/null
+++ b/router/router/tap_inject_tap.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright 2016 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tap_inject.h"
+
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <linux/if.h>
+#include <linux/if_arp.h>
+#include <linux/if_ether.h>
+#include <linux/if_tun.h>
+#include <netinet/in.h>
+#include <vnet/unix/tuntap.h>
+
+#include <vlib/unix/unix.h>
+
+
+static clib_error_t *
+tap_inject_tap_read (unix_file_t * f)
+{
+ vlib_main_t * vm = vlib_get_main ();
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ vec_add1 (im->rx_file_descriptors, f->file_descriptor);
+
+ vlib_node_set_interrupt_pending (vm, im->rx_node_index);
+
+ return 0;
+}
+
+#define TAP_INJECT_TAP_BASE_NAME "vpp"
+
+clib_error_t *
+tap_inject_tap_connect (vnet_hw_interface_t * hw)
+{
+ vnet_main_t * vnet_main = vnet_get_main ();
+ vnet_sw_interface_t * sw = vnet_get_sw_interface (vnet_main, hw->hw_if_index);
+ static const int one = 1;
+ int fd;
+ struct ifreq ifr;
+ unix_file_t template;
+ u32 tap_fd;
+ u8 * name;
+
+ memset (&ifr, 0, sizeof (ifr));
+ memset (&template, 0, sizeof (template));
+
+ ASSERT (hw->hw_if_index == sw->sw_if_index);
+
+ /* Create the tap. */
+ tap_fd = open ("/dev/net/tun", O_RDWR);
+
+ if ((int)tap_fd < 0)
+ return clib_error_return (0, "failed to open tun device");
+
+ name = format (0, TAP_INJECT_TAP_BASE_NAME "%u%c", hw->hw_instance, 0);
+
+ strncpy (ifr.ifr_name, (char *) name, sizeof (ifr.ifr_name) - 1);
+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+
+ if (ioctl (tap_fd, TUNSETIFF, (void *)&ifr) < 0)
+ {
+ close (tap_fd);
+ return clib_error_return (0, "failed to create tap");
+ }
+
+ if (ioctl (tap_fd, FIONBIO, &one) < 0)
+ {
+ close (tap_fd);
+ return clib_error_return (0, "failed to set tap to non-blocking io");
+ }
+
+ /* Open a socket to configure the device. */
+ fd = socket (PF_PACKET, SOCK_RAW, htons (ETH_P_ALL));
+
+ if (fd < 0)
+ {
+ close (tap_fd);
+ return clib_error_return (0, "failed to configure tap");
+ }
+
+ if (hw->hw_address)
+ clib_memcpy (ifr.ifr_hwaddr.sa_data, hw->hw_address, ETHER_ADDR_LEN);
+
+ ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER;
+
+ /* Set the hardware address. */
+ if (ioctl (fd, SIOCSIFHWADDR, &ifr) < 0)
+ {
+ close (tap_fd);
+ close (fd);
+ return clib_error_return (0, "failed to set tap hardware address");
+ }
+
+ /* Get the tap if index. */
+ if (ioctl (fd, SIOCGIFINDEX, &ifr) < 0)
+ {
+ close (tap_fd);
+ close (fd);
+ return clib_error_return (0, "failed to procure tap if index");
+ }
+
+ close (fd);
+
+ /* Get notified when the tap needs to be read. */
+ template.read_function = tap_inject_tap_read;
+ template.file_descriptor = tap_fd;
+
+ unix_file_add (&unix_main, &template);
+
+ tap_inject_insert_tap (sw->sw_if_index, tap_fd, ifr.ifr_ifindex);
+
+ return 0;
+}
+
+clib_error_t *
+tap_inject_tap_disconnect (u32 sw_if_index)
+{
+ u32 tap_fd;
+
+ tap_fd = tap_inject_lookup_tap_fd (sw_if_index);
+ if (tap_fd == ~0)
+ return clib_error_return (0, "failed to disconnect tap");
+
+ tap_inject_delete_tap (sw_if_index);
+
+ close (tap_fd);
+ return 0;
+}
+
+
+u8 *
+format_tap_inject_tap_name (u8 * s, va_list * args)
+{
+ int fd;
+ struct ifreq ifr;
+
+ fd = socket (PF_PACKET, SOCK_RAW, htons (ETH_P_ALL));
+
+ if (fd < 0)
+ return 0;
+
+ memset (&ifr, 0, sizeof (ifr));
+
+ ifr.ifr_ifindex = va_arg (*args, u32);
+
+ if (ioctl (fd, SIOCGIFNAME, &ifr) < 0)
+ {
+ close (fd);
+ return 0;
+ }
+
+ close (fd);
+
+ return format (s, "%s", ifr.ifr_name);
+}