From dfae7756baf895957a43944f63bfe0c850b16467 Mon Sep 17 00:00:00 2001 From: Jeff Shaw Date: Wed, 21 Sep 2016 19:12:46 -0400 Subject: [router] IPv6 support and refactoring. This change adds support for IPv6 while refactoring most of the original plugin code in the following ways. - Adhere to vpp style guidelines. - Split the netlink, node, and tap processing into separate files named with a "tap_inject" prefix which more accurately represents the functionality. - Implement our own tap management and rx/tx. This is to reduce the overhead of passing packets in and out of vnet tap devices, in favor of directly reading/writing from the tap. - Change how nodes work. Now we have neighbor, rx, and tx nodes. The neighbor node sends ARP replies and ICMP6 neighbor advertisements to the arp-input and icmp6-neighbor-solicitation nodes, respectively, before also injecting the packet to the host, making it possible for both vpp and the host network stack to resolve the next hop. The tx node injects packets into the host by writing to the tap. The rx node reads packets from the tap and sends them on its associated data plane interface. - Simplify the CLI. Instead of creating taps specifically for a given interface we create a tap for all of the Ethernet interfaces with the "enable tap-inject" CLI command. The interfaces are named with a "vpp" prefix, i.e. "vpp0". Also add a "disable tap-inject" option. - Provide ability to enable at configuration time with the tap-inject { enable } stanza. Change-Id: I6b56da606e2da1d793ce6aca222fe4eb5a4e070d Signed-off-by: Jeff Shaw --- router/Makefile.am | 5 +- router/router/router.c | 691 ------------------------------------- router/router/tap_inject.c | 375 ++++++++++++++++++++ router/router/tap_inject.h | 109 ++++++ router/router/tap_inject_netlink.c | 184 ++++++++++ router/router/tap_inject_node.c | 331 ++++++++++++++++++ router/router/tap_inject_tap.c | 170 +++++++++ 7 files changed, 1173 insertions(+), 692 deletions(-) delete mode 100644 router/router/router.c create mode 100644 router/router/tap_inject.c create mode 100644 router/router/tap_inject.h create mode 100644 router/router/tap_inject_netlink.c create mode 100644 router/router/tap_inject_node.c create mode 100644 router/router/tap_inject_tap.c (limited to 'router') diff --git a/router/Makefile.am b/router/Makefile.am index 2e9b38f..b9de6a1 100644 --- a/router/Makefile.am +++ b/router/Makefile.am @@ -3,7 +3,10 @@ AUTOMAKE_OPTIONS = foreign subdir-objects AM_CFLAGS = -Wall -I@TOOLKIT_INCLUDE@ lib_LTLIBRARIES = router.la -router_la_SOURCES = router/router.c +router_la_SOURCES = router/tap_inject.c \ + router/tap_inject_netlink.c \ + router/tap_inject_node.c \ + router/tap_inject_tap.c router_la_LDFLAGS = -module router_la_LIBADD = -lrtnl diff --git a/router/router/router.c b/router/router/router.c deleted file mode 100644 index 741f34c..0000000 --- a/router/router/router.c +++ /dev/null @@ -1,691 +0,0 @@ -/* - * Copyright 2016 Intel Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -enum { - NEXT_UNTAPPED = 0, - NEXT_INJECT, -}; - -enum { - ERROR_INJECT_ARP, - ERROR_INJECT_ICMP, - ERROR_INJECT_CLASSIFIED, -}; - -static char *error_strings[] = { - [ERROR_INJECT_ARP] = "Inject ARP", - [ERROR_INJECT_ICMP] = "Inject ICMP", - [ERROR_INJECT_CLASSIFIED] = "Inject Classified", -}; - -struct tap_to_iface { - u32 tap; - u32 iface; -}; - -struct router_main { - vnet_main_t *vnet_main; - u32 *iface_to_tap; - u32 *iface_to_protos; - struct tap_to_iface *tap_to_iface; - u32 ns_index; -}; - -static struct router_main rm; - -enum { - PROTO_ARP = 0, - PROTO_ICMP4, - PROTO_IGMP4, - PROTO_OSPF2, - PROTO_TCP, - PROTO_UDP, - PROTO_N_TOTAL, -}; - -enum { - PROTO_BIT_ARP = 1 << PROTO_ARP, - PROTO_BIT_ICMP4 = 1 << PROTO_ICMP4, - PROTO_BIT_IGMP4 = 1 << PROTO_IGMP4, - PROTO_BIT_OSPF2 = 1 << PROTO_OSPF2, - PROTO_BIT_TCP = 1 << PROTO_TCP, - PROTO_BIT_UDP = 1 << PROTO_UDP, -}; - -static char *proto_strings[PROTO_N_TOTAL] = { - [PROTO_ARP] = "arp", - [PROTO_ICMP4] = "icmp4", - [PROTO_IGMP4] = "igmp4", - [PROTO_OSPF2] = "ospf2", - [PROTO_TCP] = "tcp", - [PROTO_UDP] = "udp", -}; - -static inline u32 parse_protos(char *proto_string) -{ - u32 protos = 0; - char *tok, **proto; - - for (tok = strtok(proto_string, ","); tok; tok = strtok(NULL, ",")) - for (proto = proto_strings; proto && *proto; ++proto) - if (!strncmp(tok, *proto, 16)) - protos |= 1 << (proto - proto_strings); - return protos; -} - -static uword unformat_protos(unformat_input_t *input, va_list *args) -{ - u32 *protos = va_arg(*args, u32 *); - u8 *proto_string; - - if (unformat(input, "%s", &proto_string)) - *protos = parse_protos((char *)proto_string); - return 1; -} - -vlib_node_registration_t tap_inject_arp_node; -vlib_node_registration_t tap_inject_icmp_node; -vlib_node_registration_t tap_inject_classified_node; - -static inline void -update_arp_entry(vlib_buffer_t *b0, ethernet_arp_header_t *arp, u32 vlib_rx) -{ - ethernet_header_t *eth; - ip4_address_t *if_addr; - ip_interface_address_t *ifa; - - if (arp->l2_type != ntohs(ETHERNET_ARP_HARDWARE_TYPE_ethernet) || - arp->l3_type != ntohs(ETHERNET_TYPE_IP4)) - return; - - /* Check that IP address is local and matches incoming interface. */ - if_addr = ip4_interface_address_matching_destination(&ip4_main, - &arp->ip4_over_ethernet[1].ip4, - vlib_rx, &ifa); - if (!if_addr) - return; - - /* Source must also be local to subnet of matching interface address. */ - if (!ip4_destination_matches_interface(&ip4_main, - &arp->ip4_over_ethernet[0].ip4, ifa)) - return; - - /* Reject replies with our local interface address. */ - if (if_addr->as_u32 == arp->ip4_over_ethernet[0].ip4.as_u32) - return; - - if (if_addr->as_u32 != arp->ip4_over_ethernet[1].ip4.as_u32) - return; - - eth = ethernet_buffer_get_header(b0); - - /* Trash ARP packets whose ARP-level source addresses do not - * match their L2-frame-level source addresses */ - if (memcmp(eth->src_address, arp->ip4_over_ethernet[0].ethernet, - sizeof(eth->src_address))) - return; - - if (arp->ip4_over_ethernet[0].ip4.as_u32 == 0 || - (arp->ip4_over_ethernet[0].ip4.as_u32 == - arp->ip4_over_ethernet[1].ip4.as_u32)) - return; - - /* Learn or update sender's mapping only for requests or unicasts - * that don't match local interface address. */ - if (ethernet_address_cast(eth->dst_address) != ETHERNET_ADDRESS_UNICAST) - return; - - vnet_arp_set_ip4_over_ethernet(rm.vnet_main, vlib_rx, ~0, - &arp->ip4_over_ethernet[0], 0); -} - -static uword -tap_inject_func(vlib_main_t *m, vlib_node_runtime_t *node, vlib_frame_t *f, - int mode) -{ - u32 n_left_from = f->n_vectors; - u32 *from = vlib_frame_vector_args(f); - u32 next_index = node->cached_next_index; - u32 *to_next; - u32 counter, count = 0; - - while (n_left_from) { - vlib_buffer_t *b0; - u32 next0, bi0, n_left; - u32 vlib_rx, vlib_tx; - u32 protos, proto_bit = 0; - - vlib_get_next_frame(m, node, next_index, to_next, n_left); - - *(to_next++) = bi0 = *(from++); - --n_left_from; - --n_left; - - b0 = vlib_get_buffer(m, bi0); - - vlib_rx = vnet_buffer(b0)->sw_if_index[VLIB_RX]; - vlib_tx = rm.iface_to_tap[vlib_rx]; - protos = rm.iface_to_protos[vlib_rx]; - - next0 = NEXT_UNTAPPED; - - if (vlib_tx == 0 || vlib_tx == ~0 || protos == 0) - goto untapped; - - if (mode == ERROR_INJECT_CLASSIFIED) { - ip4_header_t *iphdr; - - iphdr = vlib_buffer_get_current(b0); - if (iphdr->protocol == IP_PROTOCOL_TCP) - proto_bit = PROTO_BIT_TCP; - else if (iphdr->protocol == IP_PROTOCOL_UDP) - proto_bit = PROTO_BIT_UDP; - else if (iphdr->protocol == IP_PROTOCOL_OSPF) - proto_bit = PROTO_BIT_OSPF2; - else if (iphdr->protocol == IP_PROTOCOL_IGMP) - proto_bit = PROTO_BIT_IGMP4; - } else if (mode == ERROR_INJECT_ARP) { - proto_bit = PROTO_BIT_ARP; - } else if (mode == ERROR_INJECT_ICMP) { - proto_bit = PROTO_BIT_ICMP4; - } - - if (!(protos & proto_bit)) - goto untapped; - - next0 = NEXT_INJECT; - - vnet_buffer(b0)->sw_if_index[VLIB_TX] = vlib_tx; - ++count; - - if (mode == ERROR_INJECT_ARP) { - ethernet_arp_header_t *arphdr; - - arphdr = vlib_buffer_get_current(b0); - if (arphdr->opcode == ntohs(ETHERNET_ARP_OPCODE_reply)) - update_arp_entry(b0, arphdr, vlib_rx); - } - - /* FIXME: What about VLAN? */ - b0->current_data -= sizeof(ethernet_header_t); - b0->current_length += sizeof(ethernet_header_t); - -untapped: - vlib_validate_buffer_enqueue_x1(m, node, next_index, to_next, - n_left, bi0, next0); - vlib_put_next_frame(m, node, next_index, n_left); - } - - switch (mode) { - case ERROR_INJECT_ARP: - counter = ERROR_INJECT_ARP; - break; - case ERROR_INJECT_ICMP: - counter = ERROR_INJECT_ICMP; - break; - default: - counter = ERROR_INJECT_CLASSIFIED; - } - - vlib_node_increment_counter(m, node->node_index, counter, count); - return f->n_vectors; -} - -static uword -tap_inject_arp(vlib_main_t *m, vlib_node_runtime_t *node, vlib_frame_t *f) -{ - return tap_inject_func(m, node, f, ERROR_INJECT_ARP); -} - -VLIB_REGISTER_NODE(tap_inject_arp_node) = { - .function = tap_inject_arp, - .name = "tap-inject-arp", - .vector_size = sizeof(u32), - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = ARRAY_LEN(error_strings), - .error_strings = error_strings, - .n_next_nodes = 2, - .next_nodes = { - [NEXT_UNTAPPED] = "arp-input", - [NEXT_INJECT] = "interface-output", - }, -}; - -static uword -tap_inject_icmp(vlib_main_t *m, vlib_node_runtime_t *node, vlib_frame_t *f) -{ - return tap_inject_func(m, node, f, ERROR_INJECT_ICMP); -} - -VLIB_REGISTER_NODE(tap_inject_icmp_node) = { - .function = tap_inject_icmp, - .name = "tap-inject-icmp", - .vector_size = sizeof(u32), - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = ARRAY_LEN(error_strings), - .error_strings = error_strings, - .n_next_nodes = 2, - .next_nodes = { - [NEXT_UNTAPPED] = "ip4-icmp-input", - [NEXT_INJECT] = "interface-output", - }, -}; - -static uword -tap_inject_classified(vlib_main_t *m, vlib_node_runtime_t *node, - vlib_frame_t *f) -{ - return tap_inject_func(m, node, f, ERROR_INJECT_CLASSIFIED); -} - -VLIB_REGISTER_NODE(tap_inject_classified_node) = { - .function = tap_inject_classified, - .name = "tap-inject-classified", - .vector_size = sizeof(u32), - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = ARRAY_LEN(error_strings), - .error_strings = error_strings, - .n_next_nodes = 2, - .next_nodes = { - [NEXT_UNTAPPED] = "error-drop", - [NEXT_INJECT] = "interface-output", - }, -}; - -static int -set_tap_hwaddr(vlib_main_t *m, char *name, u8 *hwaddr) -{ - int fd, rc; - struct ifreq ifr; - - fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); - if (fd < 0) - return -1; - - memset(&ifr, 0, sizeof(ifr)); - strncpy(ifr.ifr_name, (char *)name, sizeof(ifr.ifr_name) - 1); - memcpy(ifr.ifr_hwaddr.sa_data, hwaddr, ETHER_ADDR_LEN); - ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER; - rc = ioctl(fd, SIOCSIFHWADDR, &ifr) < 0 ? -1 : 0; - close(fd); - return rc; -} - -static int -set_tap_link_state(vlib_main_t *m, char *name, u16 flags) -{ - int fd, rc; - struct ifreq ifr; - - fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); - if (fd < 0) - return -1; - - memset(&ifr, 0, sizeof(ifr)); - strncpy(ifr.ifr_name, (char *)name, sizeof(ifr.ifr_name) - 1); - - rc = ioctl(fd, SIOCGIFFLAGS, &ifr); - if (rc < 0) - goto out; - - if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) - ifr.ifr_flags |= (IFF_UP | IFF_RUNNING); - else - ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING); - - rc = ioctl(fd, SIOCSIFFLAGS, &ifr) < 0 ? -1 : 0; -out: - close(fd); - return rc; -} - -static clib_error_t * -do_tap_connect(vlib_main_t *m, char *name, u32 iface, u32 *tap) -{ - vnet_hw_interface_t *hw = vnet_get_hw_interface(rm.vnet_main, iface); - vnet_sw_interface_t *sw = vnet_get_sw_interface(rm.vnet_main, iface); - u64 hw_address = 0; - - *tap = ~0; - if (!hw) - return clib_error_return(0, "invalid interface"); - else if (hw->hw_address) - memcpy(&hw_address, hw->hw_address, 6); - - if (vnet_tap_connect(m, (u8 *)name, (u8 *)&hw_address, tap)) - return clib_error_return(0, "failed to connect tap"); - - if (set_tap_hwaddr(m, name, (u8 *)&hw_address)) - return clib_error_return(0, "failed to set tap hw address"); - - if (set_tap_link_state(m, name, sw->flags)) - return clib_error_return(0, "failed to set tap link state"); - - if (set_int_l2_mode(m, rm.vnet_main, MODE_L2_XC, *tap, 0, 0, 0, iface)) - return clib_error_return(0, "failed to xconnect to interface"); - - return vnet_sw_interface_set_flags(rm.vnet_main, *tap, - VNET_SW_INTERFACE_FLAG_ADMIN_UP); -} - -static void add_del_addr(ns_addr_t *a, int is_del) -{ - struct tap_to_iface *map = NULL; - u32 sw_if_index = ~0; - - vec_foreach(map, rm.tap_to_iface) { - if (a->ifaddr.ifa_index == map->tap) { - sw_if_index = map->iface; - break; - } - } - - if (sw_if_index == ~0) - return; - - ip4_add_del_interface_address(vlib_get_main(), - sw_if_index, (ip4_address_t *)a->local, - a->ifaddr.ifa_prefixlen, is_del); -} - -static void add_del_route(ns_route_t *r, int is_del) -{ - struct tap_to_iface *map = NULL; - u32 sw_if_index = ~0; - - vec_foreach(map, rm.tap_to_iface) { - if (r->oif == map->tap) { - sw_if_index = map->iface; - break; - } - } - - if (sw_if_index == ~0 || r->table != 254) - return; - - ip4_add_del_route_next_hop(&ip4_main, - is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD, - (ip4_address_t *)r->dst, r->rtm.rtm_dst_len, - (ip4_address_t *)r->gateway, sw_if_index, 0, ~0, 0); -} - -struct set_flags_args { - u32 sw_if_index; - u8 flags; -}; - -static void set_interface_flags_callback(struct set_flags_args *a) -{ - vnet_sw_interface_set_flags(rm.vnet_main, a->sw_if_index, - a->flags); -} - -static void add_del_link(ns_link_t *l, int is_del) -{ - struct tap_to_iface *map = NULL; - u32 sw_if_index = ~0; - u8 flags = 0; - struct set_flags_args args; - vnet_sw_interface_t *sw = NULL; - - vec_foreach(map, rm.tap_to_iface) { - if (l->ifi.ifi_index == map->tap) { - sw_if_index = map->iface; - break; - } - } - - if (sw_if_index == ~0) - return; - - sw = vnet_get_sw_interface(rm.vnet_main, sw_if_index); - flags = sw->flags; - - if (l->ifi.ifi_flags & IFF_UP) - flags |= VNET_SW_INTERFACE_FLAG_ADMIN_UP; - else - flags &= ~VNET_SW_INTERFACE_FLAG_ADMIN_UP; - - args.sw_if_index = sw_if_index; - args.flags = flags; - - vl_api_rpc_call_main_thread(set_interface_flags_callback, - (u8 *) &args, sizeof(args)); -} - -static void -netns_notify_cb(void *obj, netns_type_t type, u32 flags, uword opaque) -{ - if (type == NETNS_TYPE_ADDR) - add_del_addr((ns_addr_t *)obj, flags & NETNS_F_DEL); - else if (type == NETNS_TYPE_ROUTE) - add_del_route((ns_route_t *)obj, flags & NETNS_F_DEL); - else if (type == NETNS_TYPE_LINK) - add_del_link((ns_link_t *)obj, flags & NETNS_F_DEL); -} - -static void insert_tap_to_iface(u32 tap, u32 iface) -{ - struct tap_to_iface map = { - .tap = tap, - .iface = iface, - }; - - vec_add1(rm.tap_to_iface, map); -} - - -static u32 ip4_next_index = ~0; - -static u32 -ip4_lookup_next_index(void) -{ - if (ip4_next_index == ~0) { - ip4_next_index = vlib_node_add_next(vlib_get_main(), - ip4_lookup_node.index, - tap_inject_classified_node.index); - } - - return ip4_next_index; -} - -static u32 ip4_multicast_arc_added; - -static void -add_ip4_multicast_arc(void) -{ - ip4_add_del_route_args_t a; - ip_adjacency_t add_adj; - - if (ip4_multicast_arc_added) - return; - - memset(&a, 0, sizeof(a)); - memset(&add_adj, 0, sizeof(add_adj)); - - a.add_adj = &add_adj; - a.n_add_adj = 1; - - a.flags = IP4_ROUTE_FLAG_TABLE_ID | IP4_ROUTE_FLAG_ADD; - a.table_index_or_table_id = 0; - a.dst_address.as_u32 = 0x000000E0; /* 224.0.0.0 */ - a.dst_address_length = 24; - a.adj_index = ~0; - - add_adj.explicit_fib_index = ~0; - add_adj.rewrite_header.node_index = ip4_rewrite_node.index; - add_adj.lookup_next_index = ip4_lookup_next_index(); - add_adj.if_address_index = ~0; - - ip4_add_del_route(&ip4_main, &a); - ip4_multicast_arc_added = 1; -} - -static clib_error_t * -tap_inject(vlib_main_t *m, unformat_input_t *input, vlib_cli_command_t *cmd) -{ - char *name = NULL; - u32 iface = ~0, tap = ~0, protos = 0; - clib_error_t *err; - - while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) { - if (unformat(input, "from %U", unformat_vnet_sw_interface, - rm.vnet_main, &iface)) - ; - else if (unformat(input, "as %s", &name)) - ; - else if (unformat(input, "%U", unformat_protos, &protos)) - ; - else - break; - } - - if (!protos) - return clib_error_return(0, - "no protocols specified"); - else if (iface == ~0) - return clib_error_return(0, - "interface name is missing or invalid"); - else if (!name) - return clib_error_return(0, - "host interface name is missing or invalid"); - - if (protos & PROTO_BIT_OSPF2) { - /* Require arp, icmp4, and igmp4 for ospf2. */ - if (!(protos & PROTO_BIT_ARP) || - !(protos & PROTO_BIT_ICMP4) || - !(protos & PROTO_BIT_IGMP4)) - return clib_error_return(0, - "ospf2 requires arp, icmp4, and igmp4"); - } - - if (protos & PROTO_BIT_TCP) /* Require arp and icmp4 for tcp. */ - if (!(protos & PROTO_BIT_ARP) || !(protos & PROTO_BIT_ICMP4)) - return clib_error_return(0, - "tcp requires arp and icmp4"); - - if (protos & PROTO_BIT_UDP) { - /* Require arp, icmp4, and igmp4 for udp. */ - if (!(protos & PROTO_BIT_ARP) || - !(protos & PROTO_BIT_ICMP4) || - !(protos & PROTO_BIT_IGMP4)) - return clib_error_return(0, - "udp requires arp, icmp4, and igmp4"); - } - - err = do_tap_connect(m, name, iface, &tap); - if (err) { - if (tap != ~0) - vnet_tap_delete(m, tap); - return err; - } - - if ((protos & PROTO_BIT_ARP) || (protos & PROTO_BIT_ICMP4)) { - if (rm.ns_index == ~0) { - char nsname = 0; - netns_sub_t sub = { - .notify = netns_notify_cb, - .opaque = 0, - }; - - rm.ns_index = netns_open(&nsname, &sub); - if (rm.ns_index == ~0) { - vnet_tap_delete(m, tap); - clib_error_return(0, - "failed to open namespace"); - } - } - } - - if (protos & PROTO_BIT_IGMP4) - add_ip4_multicast_arc(); - - if (protos & PROTO_BIT_ARP) - ethernet_register_input_type(m, ETHERNET_TYPE_ARP, - tap_inject_arp_node.index); - - if (protos & PROTO_BIT_ICMP4) - ip4_register_protocol(IP_PROTOCOL_ICMP, - tap_inject_icmp_node.index); - - if (protos & PROTO_BIT_OSPF2) - ip4_register_protocol(IP_PROTOCOL_OSPF, - tap_inject_classified_node.index); - - if (protos & PROTO_BIT_TCP) - ip4_register_protocol(IP_PROTOCOL_TCP, - tap_inject_classified_node.index); - - if (protos & PROTO_BIT_UDP) - ip4_register_protocol(IP_PROTOCOL_UDP, - tap_inject_classified_node.index); - - /* Find sw_if_index of tap associated with data plane interface. */ - rm.iface_to_tap[iface] = tap; - rm.iface_to_protos[iface] = protos; - - /* Find data plane interface associated with host tap ifindex. */ - insert_tap_to_iface(if_nametoindex(name), iface); - - return 0; -} - -VLIB_CLI_COMMAND(tap_inject_command, static) = { - .path = "tap inject", - .short_help = "tap inject from as ", - .function = tap_inject, -}; - -static clib_error_t * -interface_add_del(struct vnet_main_t *m, u32 hw_if_index, u32 add) -{ - vnet_hw_interface_t *hw = vnet_get_hw_interface(m, hw_if_index); - vnet_sw_interface_t *sw = vnet_get_sw_interface(m, hw->sw_if_index); - ASSERT(hw->sw_if_index == sw->sw_if_index); - - vec_validate(rm.iface_to_tap, sw->sw_if_index); - vec_validate(rm.iface_to_protos, sw->sw_if_index); - rm.iface_to_tap[sw->sw_if_index] = ~0; - rm.iface_to_protos[sw->sw_if_index] = 0; - return 0; -} -VNET_HW_INTERFACE_ADD_DEL_FUNCTION(interface_add_del); - -clib_error_t * -vlib_plugin_register(vlib_main_t *m, vnet_plugin_handoff_t *h, int f) -{ - rm.vnet_main = h->vnet_main; - rm.ns_index = ~0; - return 0; -} - -static clib_error_t *router_init(vlib_main_t *m) -{ - return 0; -} -VLIB_INIT_FUNCTION(router_init); diff --git a/router/router/tap_inject.c b/router/router/tap_inject.c new file mode 100644 index 0000000..8d6f5af --- /dev/null +++ b/router/router/tap_inject.c @@ -0,0 +1,375 @@ +/* + * Copyright 2016 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tap_inject.h" + +static tap_inject_main_t tap_inject_main; + +tap_inject_main_t * +tap_inject_get_main (void) +{ + return &tap_inject_main; +} + +void +tap_inject_insert_tap (u32 sw_if_index, u32 tap_fd, u32 tap_if_index) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + vec_validate_init_empty (im->sw_if_index_to_tap_fd, sw_if_index, ~0); + vec_validate_init_empty (im->sw_if_index_to_tap_if_index, sw_if_index, ~0); + + vec_validate_init_empty (im->tap_fd_to_sw_if_index, tap_fd, ~0); + + im->sw_if_index_to_tap_fd[sw_if_index] = tap_fd; + im->sw_if_index_to_tap_if_index[sw_if_index] = tap_if_index; + + im->tap_fd_to_sw_if_index[tap_fd] = sw_if_index; + + hash_set (im->tap_if_index_to_sw_if_index, tap_if_index, sw_if_index); +} + +void +tap_inject_delete_tap (u32 sw_if_index) +{ + tap_inject_main_t * im = tap_inject_get_main (); + u32 tap_fd = im->sw_if_index_to_tap_fd[sw_if_index]; + u32 tap_if_index = im->sw_if_index_to_tap_if_index[sw_if_index]; + + im->sw_if_index_to_tap_if_index[sw_if_index] = ~0; + im->sw_if_index_to_tap_fd[sw_if_index] = ~0; + im->tap_fd_to_sw_if_index[tap_fd] = ~0; + + hash_unset (im->tap_if_index_to_sw_if_index, tap_if_index); +} + +u32 +tap_inject_lookup_tap_fd (u32 sw_if_index) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + vec_validate_init_empty (im->sw_if_index_to_tap_fd, sw_if_index, ~0); + return im->sw_if_index_to_tap_fd[sw_if_index]; +} + +u32 +tap_inject_lookup_sw_if_index_from_tap_fd (u32 tap_fd) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + vec_validate_init_empty (im->tap_fd_to_sw_if_index, tap_fd, ~0); + return im->tap_fd_to_sw_if_index[tap_fd]; +} + +u32 +tap_inject_lookup_sw_if_index_from_tap_if_index (u32 tap_if_index) +{ + tap_inject_main_t * im = tap_inject_get_main (); + uword * sw_if_index; + + sw_if_index = hash_get (im->tap_if_index_to_sw_if_index, tap_if_index); + return sw_if_index ? *(u32 *)sw_if_index : ~0; +} + + +clib_error_t * +vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h, int f) +{ + return 0; +} + + +static void +tap_inject_disable (void) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + im->flags &= ~TAP_INJECT_F_ENABLED; + + clib_warning ("tap-inject is not actually disabled."); +} + +static clib_error_t * +tap_inject_enable (void) +{ + vlib_main_t * vm = vlib_get_main (); + tap_inject_main_t * im = tap_inject_get_main (); + + if (tap_inject_is_enabled ()) + return 0; + + tap_inject_enable_netlink (); + + /* Only enable netlink? */ + if (im->flags & TAP_INJECT_F_CONFIG_NETLINK) + { + im->flags |= TAP_INJECT_F_ENABLED; + return 0; + } + + /* Register ARP and ICMP6 as neighbor nodes. */ + ethernet_register_input_type (vm, ETHERNET_TYPE_ARP, im->neighbor_node_index); + ip6_register_protocol (IP_PROTOCOL_ICMP6, im->neighbor_node_index); + + /* Register remaining protocols. */ + ip4_register_protocol (IP_PROTOCOL_ICMP, im->tx_node_index); + + ip4_register_protocol (IP_PROTOCOL_OSPF, im->tx_node_index); + ip4_register_protocol (IP_PROTOCOL_TCP, im->tx_node_index); + ip4_register_protocol (IP_PROTOCOL_UDP, im->tx_node_index); + + ip6_register_protocol (IP_PROTOCOL_OSPF, im->tx_node_index); + ip6_register_protocol (IP_PROTOCOL_TCP, im->tx_node_index); + ip6_register_protocol (IP_PROTOCOL_UDP, im->tx_node_index); + + /* Add IPv4 multicast route. */ + { + ip4_add_del_route_args_t a; + ip_adjacency_t add_adj; + u32 next_node_index; + + memset (&a, 0, sizeof (a)); + memset (&add_adj, 0, sizeof (add_adj)); + + a.add_adj = &add_adj; + a.n_add_adj = 1; + + a.flags = IP4_ROUTE_FLAG_TABLE_ID | IP4_ROUTE_FLAG_ADD; + a.table_index_or_table_id = 0; + a.dst_address.as_u32 = 0x000000E0; /* 224.0.0.0 */ + a.dst_address_length = 24; + a.adj_index = ~0; + + next_node_index = vlib_node_add_next (vm, ip4_lookup_node.index, + im->tx_node_index); + + add_adj.explicit_fib_index = ~0; + add_adj.rewrite_header.node_index = ip4_rewrite_node.index; + add_adj.lookup_next_index = next_node_index; + add_adj.if_address_index = ~0; + + ip4_add_del_route (&ip4_main, &a); + } + + im->flags |= TAP_INJECT_F_ENABLED; + + return 0; +} + +static uword +tap_inject_iface_isr (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * f) +{ + tap_inject_main_t * im = tap_inject_get_main (); + vnet_hw_interface_t * hw; + u32 * hw_if_index; + clib_error_t * err = 0; + + vec_foreach (hw_if_index, im->interfaces_to_enable) + { + hw = vnet_get_hw_interface (vnet_get_main (), *hw_if_index); + + if (hw->hw_class_index == ethernet_hw_interface_class.index) + { + err = tap_inject_tap_connect (hw); + if (err) + break; + } + } + + vec_foreach (hw_if_index, im->interfaces_to_disable) + tap_inject_tap_disconnect (*hw_if_index); + + vec_free (im->interfaces_to_enable); + vec_free (im->interfaces_to_disable); + + return err ? -1 : 0; +} + +VLIB_REGISTER_NODE (tap_inject_iface_isr_node, static) = { + .function = tap_inject_iface_isr, + .name = "tap-inject-iface-isr", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .vector_size = sizeof (u32), +}; + + +static clib_error_t * +tap_inject_interface_add_del (struct vnet_main_t * vnet_main, u32 hw_if_index, + u32 add) +{ + vlib_main_t * vm = vlib_get_main (); + tap_inject_main_t * im = tap_inject_get_main (); + + if (!tap_inject_is_config_enabled ()) + return 0; + + tap_inject_enable (); + + if (add) + vec_add1 (im->interfaces_to_enable, hw_if_index); + else + vec_add1 (im->interfaces_to_disable, hw_if_index); + + vlib_node_set_interrupt_pending (vm, tap_inject_iface_isr_node.index); + + return 0; +} + +VNET_HW_INTERFACE_ADD_DEL_FUNCTION (tap_inject_interface_add_del); + + +static clib_error_t * +tap_inject_enable_disable_all_interfaces (int enable) +{ + vnet_main_t * vnet_main = vnet_get_main (); + tap_inject_main_t * im = tap_inject_get_main (); + vnet_hw_interface_t * interfaces; + vnet_hw_interface_t * hw; + u32 ** indices; + + if (enable) + tap_inject_enable (); + else + tap_inject_disable (); + + /* Collect all the interface indices. */ + interfaces = vnet_main->interface_main.hw_interfaces; + indices = enable ? &im->interfaces_to_enable : &im->interfaces_to_disable; + pool_foreach (hw, interfaces, vec_add1 (*indices, hw - interfaces)); + + if (tap_inject_iface_isr (vlib_get_main (), 0, 0)) + return clib_error_return (0, "tap-inject interface add del isr failed"); + + return 0; +} + +static clib_error_t * +tap_inject_cli (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + if (cmd->function_arg) + { + clib_error_t * err; + + if (tap_inject_is_config_disabled ()) + return clib_error_return (0, + "tap-inject is disabled in config, thus cannot be enabled."); + + /* Enable */ + err = tap_inject_enable_disable_all_interfaces (1); + if (err) + { + tap_inject_enable_disable_all_interfaces (0); + return err; + } + + im->flags |= TAP_INJECT_F_CONFIG_ENABLE; + } + else + { + /* Disable */ + tap_inject_enable_disable_all_interfaces (0); + im->flags &= ~TAP_INJECT_F_CONFIG_ENABLE; + } + + return 0; +} + +VLIB_CLI_COMMAND (tap_inject_enable_cmd, static) = { + .path = "enable tap-inject", + .short_help = "enable tap-inject", + .function = tap_inject_cli, + .function_arg = 1, +}; + +VLIB_CLI_COMMAND (tap_inject_disable_cmd, static) = { + .path = "disable tap-inject", + .short_help = "disable tap-inject", + .function = tap_inject_cli, + .function_arg = 0, +}; + + +static clib_error_t * +show_tap_inject (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnet_main = vnet_get_main (); + tap_inject_main_t * im = tap_inject_get_main (); + u32 k, v; + + if (tap_inject_is_config_disabled ()) + { + vlib_cli_output (vm, "tap-inject is disabled in config.\n"); + return 0; + } + + if (!tap_inject_is_enabled ()) + { + vlib_cli_output (vm, "tap-inject is not enabled.\n"); + return 0; + } + + hash_foreach (k, v, im->tap_if_index_to_sw_if_index, { + vlib_cli_output (vm, "%U -> %U", + format_vnet_sw_interface_name, vnet_main, + vnet_get_sw_interface (vnet_main, v), + format_tap_inject_tap_name, k); + }); + + return 0; +} + +VLIB_CLI_COMMAND (show_tap_inject_cmd, static) = { + .path = "show tap-inject", + .short_help = "show tap-inject", + .function = show_tap_inject, +}; + + +static clib_error_t * +tap_inject_config (vlib_main_t * vm, unformat_input_t * input) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "enable")) + im->flags |= TAP_INJECT_F_CONFIG_ENABLE; + + else if (unformat (input, "disable")) + im->flags |= TAP_INJECT_F_CONFIG_DISABLE; + + else if (unformat (input, "netlink-only")) + im->flags |= TAP_INJECT_F_CONFIG_NETLINK; + + else + return clib_error_return (0, "syntax error `%U'", + format_unformat_error, input); + } + + if (tap_inject_is_config_enabled () && tap_inject_is_config_disabled ()) + return clib_error_return (0, + "tap-inject cannot be both enabled and disabled."); + + return 0; +} + +VLIB_CONFIG_FUNCTION (tap_inject_config, "tap-inject"); diff --git a/router/router/tap_inject.h b/router/router/tap_inject.h new file mode 100644 index 0000000..001ab52 --- /dev/null +++ b/router/router/tap_inject.h @@ -0,0 +1,109 @@ +/* + * Copyright 2016 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _TAP_INJECT_H +#define _TAP_INJECT_H + +#include +#include + + +#ifndef ETHER_ADDR_LEN +#define ETHER_ADDR_LEN 6 +#endif + +typedef struct { + /* + * tap-inject can be enabled or disabled in config file or during runtime. + * When disabled in config, it is not possible to enable during runtime. + * + * When the netlink-only option is used, netlink configuration is monitored + * and mirrored to the data plane but no traffic is passed between the host + * and the data plane. + */ +#define TAP_INJECT_F_CONFIG_ENABLE (1U << 0) +#define TAP_INJECT_F_CONFIG_DISABLE (1U << 1) +#define TAP_INJECT_F_CONFIG_NETLINK (1U << 2) +#define TAP_INJECT_F_ENABLED (1U << 3) + + u32 flags; + + u32 * sw_if_index_to_tap_fd; + u32 * sw_if_index_to_tap_if_index; + u32 * tap_fd_to_sw_if_index; + u32 * tap_if_index_to_sw_if_index; + + u32 * interfaces_to_enable; + u32 * interfaces_to_disable; + + u32 * rx_file_descriptors; + + u32 rx_node_index; + u32 tx_node_index; + u32 neighbor_node_index; + + u32 * rx_buffers; + +} tap_inject_main_t; + + +tap_inject_main_t * tap_inject_get_main (void); + +void tap_inject_insert_tap (u32 sw_if_index, u32 tap_fd, u32 tap_if_index); +void tap_inject_delete_tap (u32 sw_if_index); + +u32 tap_inject_lookup_tap_fd (u32 sw_if_index); +u32 tap_inject_lookup_sw_if_index_from_tap_fd (u32 tap_fd); +u32 tap_inject_lookup_sw_if_index_from_tap_if_index (u32 tap_if_index); + +static inline int +tap_inject_is_enabled (void) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + return !!(im->flags & TAP_INJECT_F_ENABLED); +} + +static inline int +tap_inject_is_config_enabled (void) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + return !!(im->flags & TAP_INJECT_F_CONFIG_ENABLE); +} + +static inline int +tap_inject_is_config_disabled (void) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + return !!(im->flags & TAP_INJECT_F_CONFIG_DISABLE); +} + + +/* Netlink */ + +void tap_inject_enable_netlink (void); + + +/* Tap */ + +clib_error_t * tap_inject_tap_connect (vnet_hw_interface_t * hw); +clib_error_t * tap_inject_tap_disconnect (u32 sw_if_index); + +u8 * format_tap_inject_tap_name (u8 * s, va_list * args); + +#endif /* _TAP_INJECT_H */ diff --git a/router/router/tap_inject_netlink.c b/router/router/tap_inject_netlink.c new file mode 100644 index 0000000..a30e262 --- /dev/null +++ b/router/router/tap_inject_netlink.c @@ -0,0 +1,184 @@ +/* + * Copyright 2016 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tap_inject.h" + +#include +#include +#include + + +static void +add_del_addr (ns_addr_t * a, int is_del) +{ + vlib_main_t * vm = vlib_get_main (); + u32 sw_if_index; + + sw_if_index = tap_inject_lookup_sw_if_index_from_tap_if_index ( + a->ifaddr.ifa_index); + + if (sw_if_index == ~0) + return; + + if (a->ifaddr.ifa_family == AF_INET) + { + ip4_add_del_interface_address (vm, sw_if_index, + (ip4_address_t *) a->local, a->ifaddr.ifa_prefixlen, is_del); + } + else if (a->ifaddr.ifa_family == AF_INET6) + { + ip6_add_del_interface_address (vm, sw_if_index, + (ip6_address_t *) a->addr, a->ifaddr.ifa_prefixlen, is_del); + } +} + + +struct set_flags_args { + u32 index; + u8 flags; +}; + +static void +set_flags_cb (struct set_flags_args * a) +{ + vnet_sw_interface_set_flags (vnet_get_main (), a->index, a->flags); +} + +static void +add_del_link (ns_link_t * l, int is_del) +{ + struct set_flags_args args = { ~0, 0 }; + vnet_sw_interface_t * sw; + u8 flags = 0; + u32 sw_if_index; + + sw_if_index = tap_inject_lookup_sw_if_index_from_tap_if_index ( + l->ifi.ifi_index); + + if (sw_if_index == ~0) + return; + + sw = vnet_get_sw_interface (vnet_get_main (), sw_if_index); + + flags = sw->flags; + + if (l->ifi.ifi_flags & IFF_UP) + flags |= VNET_SW_INTERFACE_FLAG_ADMIN_UP; + else + flags &= ~VNET_SW_INTERFACE_FLAG_ADMIN_UP; + + args.index = sw_if_index; + args.flags = flags; + + vl_api_rpc_call_main_thread (set_flags_cb, (u8 *)&args, sizeof (args)); +} + + +static void +add_del_neigh (ns_neigh_t * n, int is_del) +{ + vnet_main_t * vnet_main = vnet_get_main (); + vlib_main_t * vm = vlib_get_main (); + u32 sw_if_index; + + sw_if_index = tap_inject_lookup_sw_if_index_from_tap_if_index ( + n->nd.ndm_ifindex); + + if (sw_if_index == ~0) + return; + + if (n->nd.ndm_family == AF_INET) + { + ethernet_arp_ip4_over_ethernet_address_t a; + + memset (&a, 0, sizeof (a)); + + clib_memcpy (&a.ethernet, n->lladdr, ETHER_ADDR_LEN); + clib_memcpy (&a.ip4, n->dst, sizeof (a.ip4)); + + if (n->nd.ndm_state & NUD_REACHABLE) + vnet_arp_set_ip4_over_ethernet (vnet_main, sw_if_index, ~0, &a, 0); + else if (n->nd.ndm_state & NUD_FAILED) + vnet_arp_unset_ip4_over_ethernet (vnet_main, sw_if_index, ~0, &a); + } + else if (n->nd.ndm_family == AF_INET6) + { + if (n->nd.ndm_state & NUD_REACHABLE) + vnet_set_ip6_ethernet_neighbor (vm, sw_if_index, + (ip6_address_t *) n->dst, n->lladdr, ETHER_ADDR_LEN, 0); + else + vnet_unset_ip6_ethernet_neighbor (vm, sw_if_index, + (ip6_address_t *) n->dst, n->lladdr, ETHER_ADDR_LEN); + } +} + + +#define TAP_INJECT_HOST_ROUTE_TABLE_MAIN 254 + +static void +add_del_route (ns_route_t * r, int is_del) +{ + u32 sw_if_index; + + sw_if_index = tap_inject_lookup_sw_if_index_from_tap_if_index (r->oif); + + if (sw_if_index == ~0 || r->table != TAP_INJECT_HOST_ROUTE_TABLE_MAIN) + return; + + if (r->rtm.rtm_family == AF_INET) + { + ip4_add_del_route_next_hop (&ip4_main, + is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD, + (ip4_address_t *) r->dst, r->rtm.rtm_dst_len, + (ip4_address_t *) r->gateway, sw_if_index, 0, ~0, 0); + } + else if (r->rtm.rtm_family == AF_INET6) + { + ip6_add_del_route_next_hop (&ip6_main, + is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD, + (ip6_address_t *) r->dst, r->rtm.rtm_dst_len, + (ip6_address_t *) r->gateway, sw_if_index, 0, ~0, 0); + } +} + + +static void +netns_notify_cb (void * obj, netns_type_t type, u32 flags, uword opaque) +{ + if (type == NETNS_TYPE_ADDR) + add_del_addr ((ns_addr_t *)obj, flags & NETNS_F_DEL); + + else if (type == NETNS_TYPE_LINK) + add_del_link ((ns_link_t *)obj, flags & NETNS_F_DEL); + + else if (type == NETNS_TYPE_NEIGH) + add_del_neigh ((ns_neigh_t *)obj, flags & NETNS_F_DEL); + + else if (type == NETNS_TYPE_ROUTE) + add_del_route ((ns_route_t *)obj, flags & NETNS_F_DEL); +} + +void +tap_inject_enable_netlink (void) +{ + char nsname = 0; + netns_sub_t sub = { + .notify = netns_notify_cb, + .opaque = 0, + }; + + netns_open (&nsname, &sub); +} diff --git a/router/router/tap_inject_node.c b/router/router/tap_inject_node.c new file mode 100644 index 0000000..fe108dc --- /dev/null +++ b/router/router/tap_inject_node.c @@ -0,0 +1,331 @@ +/* + * Copyright 2016 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tap_inject.h" + +#include +#include + +vlib_node_registration_t tap_inject_rx_node; +vlib_node_registration_t tap_inject_tx_node; +vlib_node_registration_t tap_inject_neighbor_node; + +enum { + NEXT_NEIGHBOR_ARP, + NEXT_NEIGHBOR_ICMP6, +}; + + +static inline void +tap_inject_tap_send_buffer (int fd, vlib_buffer_t * b) +{ + struct iovec iov; + ssize_t n_bytes; + + iov.iov_base = vlib_buffer_get_current (b); + iov.iov_len = b->current_length; + + n_bytes = writev (fd, &iov, 1); + + if (n_bytes < 0) + clib_warning ("writev failed"); + else if (n_bytes < b->current_length || b->flags & VLIB_BUFFER_NEXT_PRESENT) + clib_warning ("buffer truncated"); +} + +static uword +tap_inject_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) +{ + vlib_buffer_t * b; + u32 * pkts; + u32 fd; + u32 i; + + pkts = vlib_frame_vector_args (f); + + for (i = 0; i < f->n_vectors; ++i) + { + b = vlib_get_buffer (vm, pkts[i]); + + fd = tap_inject_lookup_tap_fd (vnet_buffer (b)->sw_if_index[VLIB_RX]); + if (fd == ~0) + continue; + + /* Re-wind the buffer to the start of the Ethernet header. */ + vlib_buffer_advance (b, -b->current_data); + + tap_inject_tap_send_buffer (fd, b); + } + + vlib_buffer_free (vm, pkts, f->n_vectors); + return f->n_vectors; +} + +VLIB_REGISTER_NODE (tap_inject_tx_node) = { + .function = tap_inject_tx, + .name = "tap-inject-tx", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, +}; + + +static uword +tap_inject_neighbor (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * f) +{ + vlib_buffer_t * b; + u32 * pkts; + u32 fd; + u32 i; + u32 bi; + u32 next_index = node->cached_next_index; + u32 next = ~0; + u32 n_left; + u32 * to_next; + + pkts = vlib_frame_vector_args (f); + + for (i = 0; i < f->n_vectors; ++i) + { + bi = pkts[i]; + b = vlib_get_buffer (vm, bi); + + fd = tap_inject_lookup_tap_fd (vnet_buffer (b)->sw_if_index[VLIB_RX]); + if (fd == ~0) + { + vlib_buffer_free (vm, &bi, 1); + continue; + } + + /* Re-wind the buffer to the start of the Ethernet header. */ + vlib_buffer_advance (b, -b->current_data); + + tap_inject_tap_send_buffer (fd, b); + + /* Send the buffer to a neighbor node too? */ + { + ethernet_header_t * eth = vlib_buffer_get_current (b); + u16 ether_type = htons (eth->type); + + if (ether_type == ETHERNET_TYPE_ARP) + { + ethernet_arp_header_t * arp = (void *)(eth + 1); + + if (arp->opcode == ntohs (ETHERNET_ARP_OPCODE_reply)) + next = NEXT_NEIGHBOR_ARP; + } + else if (ether_type == ETHERNET_TYPE_IP6) + { + ip6_header_t * ip = (void *)(eth + 1); + icmp46_header_t * icmp = (void *)(ip + 1); + + if (ip->protocol == IP_PROTOCOL_ICMP6 && + icmp->type == ICMP6_neighbor_advertisement) + next = NEXT_NEIGHBOR_ICMP6; + } + } + + if (next == ~0) + { + vlib_buffer_free (vm, &bi, 1); + continue; + } + + /* ARP and ICMP6 expect to start processing after the Ethernet header. */ + vlib_buffer_advance (b, sizeof (ethernet_header_t)); + + vlib_get_next_frame (vm, node, next_index, to_next, n_left); + + *(to_next++) = bi; + --n_left; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left, bi, next); + vlib_put_next_frame (vm, node, next_index, n_left); + } + + return f->n_vectors; +} + +VLIB_REGISTER_NODE (tap_inject_neighbor_node) = { + .function = tap_inject_neighbor, + .name = "tap-inject-neighbor", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, + .n_next_nodes = 2, + .next_nodes = { + [NEXT_NEIGHBOR_ARP] = "arp-input", + [NEXT_NEIGHBOR_ICMP6] = "icmp6-neighbor-solicitation", + }, +}; + + +#define MTU 1500 +#define MTU_BUFFERS ((MTU + VLIB_BUFFER_DATA_SIZE - 1) / VLIB_BUFFER_DATA_SIZE) +#define NUM_BUFFERS_TO_ALLOC 32 + +static inline uword +tap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f, int fd) +{ + tap_inject_main_t * im = tap_inject_get_main (); + u32 sw_if_index; + struct iovec iov[MTU_BUFFERS]; + u32 bi[MTU_BUFFERS]; + vlib_buffer_t * b; + ssize_t n_bytes; + ssize_t n_bytes_left; + u32 i, j; + + sw_if_index = tap_inject_lookup_sw_if_index_from_tap_fd (fd); + if (sw_if_index == ~0) + return 0; + + /* Allocate buffers in bulk when there are less than enough to rx an MTU. */ + if (vec_len (im->rx_buffers) < MTU_BUFFERS) + { + u32 len = vec_len (im->rx_buffers); + + len = vlib_buffer_alloc_from_free_list (vm, + &im->rx_buffers[len], NUM_BUFFERS_TO_ALLOC, + VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + + _vec_len (im->rx_buffers) += len; + + if (vec_len (im->rx_buffers) < MTU_BUFFERS) + { + clib_warning ("failed to allocate buffers"); + return 0; + } + } + + /* Fill buffers from the end of the list to make it easier to resize. */ + for (i = 0, j = vec_len (im->rx_buffers) - 1; i < MTU_BUFFERS; ++i, --j) + { + vlib_buffer_t * b; + + bi[i] = im->rx_buffers[j]; + + b = vlib_get_buffer (vm, bi[i]); + + iov[i].iov_base = b->data; + iov[i].iov_len = VLIB_BUFFER_DATA_SIZE; + } + + n_bytes = readv (fd, iov, MTU_BUFFERS); + if (n_bytes < 0) + { + clib_warning ("readv failed"); + return 0; + } + + b = vlib_get_buffer (vm, bi[0]); + + vnet_buffer (b)->sw_if_index[VLIB_RX] = ~0; + vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index; + + n_bytes_left = n_bytes - VLIB_BUFFER_DATA_SIZE; + + if (n_bytes_left > 0) + { + b->total_length_not_including_first_buffer = n_bytes_left; + b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + } + + b->current_length = n_bytes; + + /* If necessary, configure any remaining buffers in the chain. */ + for (i = 1; n_bytes_left > 0; ++i, n_bytes_left -= VLIB_BUFFER_DATA_SIZE) + { + b = vlib_get_buffer (vm, bi[i - 1]); + b->current_length = VLIB_BUFFER_DATA_SIZE; + b->flags |= VLIB_BUFFER_NEXT_PRESENT; + b->next_buffer = bi[i]; + + b = vlib_get_buffer (vm, bi[i]); + b->current_length = n_bytes_left; + } + + _vec_len (im->rx_buffers) -= i; + + vlib_buffer_chain_validate (vm, vlib_get_buffer (vm, bi[0])); + + /* Get the packet to the output node. */ + { + vnet_hw_interface_t * hw; + vlib_frame_t * new_frame; + u32 * to_next; + + hw = vnet_get_hw_interface (vnet_get_main (), sw_if_index); + + new_frame = vlib_get_frame_to_node (vm, hw->output_node_index); + to_next = vlib_frame_vector_args (new_frame); + to_next[0] = bi[0]; + new_frame->n_vectors = 1; + + vlib_put_frame_to_node (vm, hw->output_node_index, new_frame); + } + + return 1; +} + +static uword +tap_inject_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) +{ + tap_inject_main_t * im = tap_inject_get_main (); + u32 * fd; + uword count = 0; + + vec_foreach (fd, im->rx_file_descriptors) + { + if (tap_rx (vm, node, f, *fd) != 1) + { + clib_warning ("rx failed"); + count = 0; + break; + } + ++count; + } + + vec_free (im->rx_file_descriptors); + + return count; +} + +VLIB_REGISTER_NODE (tap_inject_rx_node) = { + .function = tap_inject_rx, + .name = "tap-inject-rx", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .vector_size = sizeof (u32), +}; + + +static clib_error_t * +tap_inject_init (vlib_main_t * vm) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + im->rx_node_index = tap_inject_rx_node.index; + im->tx_node_index = tap_inject_tx_node.index; + im->neighbor_node_index = tap_inject_neighbor_node.index; + + vec_alloc (im->rx_buffers, NUM_BUFFERS_TO_ALLOC); + vec_reset_length (im->rx_buffers); + + return 0; +} + +VLIB_INIT_FUNCTION (tap_inject_init); diff --git a/router/router/tap_inject_tap.c b/router/router/tap_inject_tap.c new file mode 100644 index 0000000..9650323 --- /dev/null +++ b/router/router/tap_inject_tap.c @@ -0,0 +1,170 @@ +/* + * Copyright 2016 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tap_inject.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +static clib_error_t * +tap_inject_tap_read (unix_file_t * f) +{ + vlib_main_t * vm = vlib_get_main (); + tap_inject_main_t * im = tap_inject_get_main (); + + vec_add1 (im->rx_file_descriptors, f->file_descriptor); + + vlib_node_set_interrupt_pending (vm, im->rx_node_index); + + return 0; +} + +#define TAP_INJECT_TAP_BASE_NAME "vpp" + +clib_error_t * +tap_inject_tap_connect (vnet_hw_interface_t * hw) +{ + vnet_main_t * vnet_main = vnet_get_main (); + vnet_sw_interface_t * sw = vnet_get_sw_interface (vnet_main, hw->hw_if_index); + static const int one = 1; + int fd; + struct ifreq ifr; + unix_file_t template; + u32 tap_fd; + u8 * name; + + memset (&ifr, 0, sizeof (ifr)); + memset (&template, 0, sizeof (template)); + + ASSERT (hw->hw_if_index == sw->sw_if_index); + + /* Create the tap. */ + tap_fd = open ("/dev/net/tun", O_RDWR); + + if ((int)tap_fd < 0) + return clib_error_return (0, "failed to open tun device"); + + name = format (0, TAP_INJECT_TAP_BASE_NAME "%u%c", hw->hw_instance, 0); + + strncpy (ifr.ifr_name, (char *) name, sizeof (ifr.ifr_name) - 1); + ifr.ifr_flags = IFF_TAP | IFF_NO_PI; + + if (ioctl (tap_fd, TUNSETIFF, (void *)&ifr) < 0) + { + close (tap_fd); + return clib_error_return (0, "failed to create tap"); + } + + if (ioctl (tap_fd, FIONBIO, &one) < 0) + { + close (tap_fd); + return clib_error_return (0, "failed to set tap to non-blocking io"); + } + + /* Open a socket to configure the device. */ + fd = socket (PF_PACKET, SOCK_RAW, htons (ETH_P_ALL)); + + if (fd < 0) + { + close (tap_fd); + return clib_error_return (0, "failed to configure tap"); + } + + if (hw->hw_address) + clib_memcpy (ifr.ifr_hwaddr.sa_data, hw->hw_address, ETHER_ADDR_LEN); + + ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER; + + /* Set the hardware address. */ + if (ioctl (fd, SIOCSIFHWADDR, &ifr) < 0) + { + close (tap_fd); + close (fd); + return clib_error_return (0, "failed to set tap hardware address"); + } + + /* Get the tap if index. */ + if (ioctl (fd, SIOCGIFINDEX, &ifr) < 0) + { + close (tap_fd); + close (fd); + return clib_error_return (0, "failed to procure tap if index"); + } + + close (fd); + + /* Get notified when the tap needs to be read. */ + template.read_function = tap_inject_tap_read; + template.file_descriptor = tap_fd; + + unix_file_add (&unix_main, &template); + + tap_inject_insert_tap (sw->sw_if_index, tap_fd, ifr.ifr_ifindex); + + return 0; +} + +clib_error_t * +tap_inject_tap_disconnect (u32 sw_if_index) +{ + u32 tap_fd; + + tap_fd = tap_inject_lookup_tap_fd (sw_if_index); + if (tap_fd == ~0) + return clib_error_return (0, "failed to disconnect tap"); + + tap_inject_delete_tap (sw_if_index); + + close (tap_fd); + return 0; +} + + +u8 * +format_tap_inject_tap_name (u8 * s, va_list * args) +{ + int fd; + struct ifreq ifr; + + fd = socket (PF_PACKET, SOCK_RAW, htons (ETH_P_ALL)); + + if (fd < 0) + return 0; + + memset (&ifr, 0, sizeof (ifr)); + + ifr.ifr_ifindex = va_arg (*args, u32); + + if (ioctl (fd, SIOCGIFNAME, &ifr) < 0) + { + close (fd); + return 0; + } + + close (fd); + + return format (s, "%s", ifr.ifr_name); +} -- cgit 1.2.3-korg