diff options
Diffstat (limited to 'router')
-rw-r--r-- | router/Makefile.am | 5 | ||||
-rw-r--r-- | router/router/router.c | 691 | ||||
-rw-r--r-- | router/router/tap_inject.c | 375 | ||||
-rw-r--r-- | router/router/tap_inject.h | 109 | ||||
-rw-r--r-- | router/router/tap_inject_netlink.c | 184 | ||||
-rw-r--r-- | router/router/tap_inject_node.c | 331 | ||||
-rw-r--r-- | router/router/tap_inject_tap.c | 170 |
7 files changed, 1173 insertions, 692 deletions
diff --git a/router/Makefile.am b/router/Makefile.am index 2e9b38f..b9de6a1 100644 --- a/router/Makefile.am +++ b/router/Makefile.am @@ -3,7 +3,10 @@ AUTOMAKE_OPTIONS = foreign subdir-objects AM_CFLAGS = -Wall -I@TOOLKIT_INCLUDE@ lib_LTLIBRARIES = router.la -router_la_SOURCES = router/router.c +router_la_SOURCES = router/tap_inject.c \ + router/tap_inject_netlink.c \ + router/tap_inject_node.c \ + router/tap_inject_tap.c router_la_LDFLAGS = -module router_la_LIBADD = -lrtnl diff --git a/router/router/router.c b/router/router/router.c deleted file mode 100644 index 741f34c..0000000 --- a/router/router/router.c +++ /dev/null @@ -1,691 +0,0 @@ -/* - * Copyright 2016 Intel Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include <net/ethernet.h> -#include <net/if_arp.h> -#include <netinet/in.h> -#include <sys/ioctl.h> -#include <sys/socket.h> -#include <vnet/vnet.h> -#include <vnet/plugin/plugin.h> -#include <vnet/ip/ip.h> -#include <vnet/unix/tuntap.h> -#include <librtnl/mapper.h> -#include <vnet/ethernet/arp_packet.h> -#include <vlibmemory/api.h> - -enum { - NEXT_UNTAPPED = 0, - NEXT_INJECT, -}; - -enum { - ERROR_INJECT_ARP, - ERROR_INJECT_ICMP, - ERROR_INJECT_CLASSIFIED, -}; - -static char *error_strings[] = { - [ERROR_INJECT_ARP] = "Inject ARP", - [ERROR_INJECT_ICMP] = "Inject ICMP", - [ERROR_INJECT_CLASSIFIED] = "Inject Classified", -}; - -struct tap_to_iface { - u32 tap; - u32 iface; -}; - -struct router_main { - vnet_main_t *vnet_main; - u32 *iface_to_tap; - u32 *iface_to_protos; - struct tap_to_iface *tap_to_iface; - u32 ns_index; -}; - -static struct router_main rm; - -enum { - PROTO_ARP = 0, - PROTO_ICMP4, - PROTO_IGMP4, - PROTO_OSPF2, - PROTO_TCP, - PROTO_UDP, - PROTO_N_TOTAL, -}; - -enum { - PROTO_BIT_ARP = 1 << PROTO_ARP, - PROTO_BIT_ICMP4 = 1 << PROTO_ICMP4, - PROTO_BIT_IGMP4 = 1 << PROTO_IGMP4, - PROTO_BIT_OSPF2 = 1 << PROTO_OSPF2, - PROTO_BIT_TCP = 1 << PROTO_TCP, - PROTO_BIT_UDP = 1 << PROTO_UDP, -}; - -static char *proto_strings[PROTO_N_TOTAL] = { - [PROTO_ARP] = "arp", - [PROTO_ICMP4] = "icmp4", - [PROTO_IGMP4] = "igmp4", - [PROTO_OSPF2] = "ospf2", - [PROTO_TCP] = "tcp", - [PROTO_UDP] = "udp", -}; - -static inline u32 parse_protos(char *proto_string) -{ - u32 protos = 0; - char *tok, **proto; - - for (tok = strtok(proto_string, ","); tok; tok = strtok(NULL, ",")) - for (proto = proto_strings; proto && *proto; ++proto) - if (!strncmp(tok, *proto, 16)) - protos |= 1 << (proto - proto_strings); - return protos; -} - -static uword unformat_protos(unformat_input_t *input, va_list *args) -{ - u32 *protos = va_arg(*args, u32 *); - u8 *proto_string; - - if (unformat(input, "%s", &proto_string)) - *protos = parse_protos((char *)proto_string); - return 1; -} - -vlib_node_registration_t tap_inject_arp_node; -vlib_node_registration_t tap_inject_icmp_node; -vlib_node_registration_t tap_inject_classified_node; - -static inline void -update_arp_entry(vlib_buffer_t *b0, ethernet_arp_header_t *arp, u32 vlib_rx) -{ - ethernet_header_t *eth; - ip4_address_t *if_addr; - ip_interface_address_t *ifa; - - if (arp->l2_type != ntohs(ETHERNET_ARP_HARDWARE_TYPE_ethernet) || - arp->l3_type != ntohs(ETHERNET_TYPE_IP4)) - return; - - /* Check that IP address is local and matches incoming interface. */ - if_addr = ip4_interface_address_matching_destination(&ip4_main, - &arp->ip4_over_ethernet[1].ip4, - vlib_rx, &ifa); - if (!if_addr) - return; - - /* Source must also be local to subnet of matching interface address. */ - if (!ip4_destination_matches_interface(&ip4_main, - &arp->ip4_over_ethernet[0].ip4, ifa)) - return; - - /* Reject replies with our local interface address. */ - if (if_addr->as_u32 == arp->ip4_over_ethernet[0].ip4.as_u32) - return; - - if (if_addr->as_u32 != arp->ip4_over_ethernet[1].ip4.as_u32) - return; - - eth = ethernet_buffer_get_header(b0); - - /* Trash ARP packets whose ARP-level source addresses do not - * match their L2-frame-level source addresses */ - if (memcmp(eth->src_address, arp->ip4_over_ethernet[0].ethernet, - sizeof(eth->src_address))) - return; - - if (arp->ip4_over_ethernet[0].ip4.as_u32 == 0 || - (arp->ip4_over_ethernet[0].ip4.as_u32 == - arp->ip4_over_ethernet[1].ip4.as_u32)) - return; - - /* Learn or update sender's mapping only for requests or unicasts - * that don't match local interface address. */ - if (ethernet_address_cast(eth->dst_address) != ETHERNET_ADDRESS_UNICAST) - return; - - vnet_arp_set_ip4_over_ethernet(rm.vnet_main, vlib_rx, ~0, - &arp->ip4_over_ethernet[0], 0); -} - -static uword -tap_inject_func(vlib_main_t *m, vlib_node_runtime_t *node, vlib_frame_t *f, - int mode) -{ - u32 n_left_from = f->n_vectors; - u32 *from = vlib_frame_vector_args(f); - u32 next_index = node->cached_next_index; - u32 *to_next; - u32 counter, count = 0; - - while (n_left_from) { - vlib_buffer_t *b0; - u32 next0, bi0, n_left; - u32 vlib_rx, vlib_tx; - u32 protos, proto_bit = 0; - - vlib_get_next_frame(m, node, next_index, to_next, n_left); - - *(to_next++) = bi0 = *(from++); - --n_left_from; - --n_left; - - b0 = vlib_get_buffer(m, bi0); - - vlib_rx = vnet_buffer(b0)->sw_if_index[VLIB_RX]; - vlib_tx = rm.iface_to_tap[vlib_rx]; - protos = rm.iface_to_protos[vlib_rx]; - - next0 = NEXT_UNTAPPED; - - if (vlib_tx == 0 || vlib_tx == ~0 || protos == 0) - goto untapped; - - if (mode == ERROR_INJECT_CLASSIFIED) { - ip4_header_t *iphdr; - - iphdr = vlib_buffer_get_current(b0); - if (iphdr->protocol == IP_PROTOCOL_TCP) - proto_bit = PROTO_BIT_TCP; - else if (iphdr->protocol == IP_PROTOCOL_UDP) - proto_bit = PROTO_BIT_UDP; - else if (iphdr->protocol == IP_PROTOCOL_OSPF) - proto_bit = PROTO_BIT_OSPF2; - else if (iphdr->protocol == IP_PROTOCOL_IGMP) - proto_bit = PROTO_BIT_IGMP4; - } else if (mode == ERROR_INJECT_ARP) { - proto_bit = PROTO_BIT_ARP; - } else if (mode == ERROR_INJECT_ICMP) { - proto_bit = PROTO_BIT_ICMP4; - } - - if (!(protos & proto_bit)) - goto untapped; - - next0 = NEXT_INJECT; - - vnet_buffer(b0)->sw_if_index[VLIB_TX] = vlib_tx; - ++count; - - if (mode == ERROR_INJECT_ARP) { - ethernet_arp_header_t *arphdr; - - arphdr = vlib_buffer_get_current(b0); - if (arphdr->opcode == ntohs(ETHERNET_ARP_OPCODE_reply)) - update_arp_entry(b0, arphdr, vlib_rx); - } - - /* FIXME: What about VLAN? */ - b0->current_data -= sizeof(ethernet_header_t); - b0->current_length += sizeof(ethernet_header_t); - -untapped: - vlib_validate_buffer_enqueue_x1(m, node, next_index, to_next, - n_left, bi0, next0); - vlib_put_next_frame(m, node, next_index, n_left); - } - - switch (mode) { - case ERROR_INJECT_ARP: - counter = ERROR_INJECT_ARP; - break; - case ERROR_INJECT_ICMP: - counter = ERROR_INJECT_ICMP; - break; - default: - counter = ERROR_INJECT_CLASSIFIED; - } - - vlib_node_increment_counter(m, node->node_index, counter, count); - return f->n_vectors; -} - -static uword -tap_inject_arp(vlib_main_t *m, vlib_node_runtime_t *node, vlib_frame_t *f) -{ - return tap_inject_func(m, node, f, ERROR_INJECT_ARP); -} - -VLIB_REGISTER_NODE(tap_inject_arp_node) = { - .function = tap_inject_arp, - .name = "tap-inject-arp", - .vector_size = sizeof(u32), - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = ARRAY_LEN(error_strings), - .error_strings = error_strings, - .n_next_nodes = 2, - .next_nodes = { - [NEXT_UNTAPPED] = "arp-input", - [NEXT_INJECT] = "interface-output", - }, -}; - -static uword -tap_inject_icmp(vlib_main_t *m, vlib_node_runtime_t *node, vlib_frame_t *f) -{ - return tap_inject_func(m, node, f, ERROR_INJECT_ICMP); -} - -VLIB_REGISTER_NODE(tap_inject_icmp_node) = { - .function = tap_inject_icmp, - .name = "tap-inject-icmp", - .vector_size = sizeof(u32), - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = ARRAY_LEN(error_strings), - .error_strings = error_strings, - .n_next_nodes = 2, - .next_nodes = { - [NEXT_UNTAPPED] = "ip4-icmp-input", - [NEXT_INJECT] = "interface-output", - }, -}; - -static uword -tap_inject_classified(vlib_main_t *m, vlib_node_runtime_t *node, - vlib_frame_t *f) -{ - return tap_inject_func(m, node, f, ERROR_INJECT_CLASSIFIED); -} - -VLIB_REGISTER_NODE(tap_inject_classified_node) = { - .function = tap_inject_classified, - .name = "tap-inject-classified", - .vector_size = sizeof(u32), - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = ARRAY_LEN(error_strings), - .error_strings = error_strings, - .n_next_nodes = 2, - .next_nodes = { - [NEXT_UNTAPPED] = "error-drop", - [NEXT_INJECT] = "interface-output", - }, -}; - -static int -set_tap_hwaddr(vlib_main_t *m, char *name, u8 *hwaddr) -{ - int fd, rc; - struct ifreq ifr; - - fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); - if (fd < 0) - return -1; - - memset(&ifr, 0, sizeof(ifr)); - strncpy(ifr.ifr_name, (char *)name, sizeof(ifr.ifr_name) - 1); - memcpy(ifr.ifr_hwaddr.sa_data, hwaddr, ETHER_ADDR_LEN); - ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER; - rc = ioctl(fd, SIOCSIFHWADDR, &ifr) < 0 ? -1 : 0; - close(fd); - return rc; -} - -static int -set_tap_link_state(vlib_main_t *m, char *name, u16 flags) -{ - int fd, rc; - struct ifreq ifr; - - fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); - if (fd < 0) - return -1; - - memset(&ifr, 0, sizeof(ifr)); - strncpy(ifr.ifr_name, (char *)name, sizeof(ifr.ifr_name) - 1); - - rc = ioctl(fd, SIOCGIFFLAGS, &ifr); - if (rc < 0) - goto out; - - if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) - ifr.ifr_flags |= (IFF_UP | IFF_RUNNING); - else - ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING); - - rc = ioctl(fd, SIOCSIFFLAGS, &ifr) < 0 ? -1 : 0; -out: - close(fd); - return rc; -} - -static clib_error_t * -do_tap_connect(vlib_main_t *m, char *name, u32 iface, u32 *tap) -{ - vnet_hw_interface_t *hw = vnet_get_hw_interface(rm.vnet_main, iface); - vnet_sw_interface_t *sw = vnet_get_sw_interface(rm.vnet_main, iface); - u64 hw_address = 0; - - *tap = ~0; - if (!hw) - return clib_error_return(0, "invalid interface"); - else if (hw->hw_address) - memcpy(&hw_address, hw->hw_address, 6); - - if (vnet_tap_connect(m, (u8 *)name, (u8 *)&hw_address, tap)) - return clib_error_return(0, "failed to connect tap"); - - if (set_tap_hwaddr(m, name, (u8 *)&hw_address)) - return clib_error_return(0, "failed to set tap hw address"); - - if (set_tap_link_state(m, name, sw->flags)) - return clib_error_return(0, "failed to set tap link state"); - - if (set_int_l2_mode(m, rm.vnet_main, MODE_L2_XC, *tap, 0, 0, 0, iface)) - return clib_error_return(0, "failed to xconnect to interface"); - - return vnet_sw_interface_set_flags(rm.vnet_main, *tap, - VNET_SW_INTERFACE_FLAG_ADMIN_UP); -} - -static void add_del_addr(ns_addr_t *a, int is_del) -{ - struct tap_to_iface *map = NULL; - u32 sw_if_index = ~0; - - vec_foreach(map, rm.tap_to_iface) { - if (a->ifaddr.ifa_index == map->tap) { - sw_if_index = map->iface; - break; - } - } - - if (sw_if_index == ~0) - return; - - ip4_add_del_interface_address(vlib_get_main(), - sw_if_index, (ip4_address_t *)a->local, - a->ifaddr.ifa_prefixlen, is_del); -} - -static void add_del_route(ns_route_t *r, int is_del) -{ - struct tap_to_iface *map = NULL; - u32 sw_if_index = ~0; - - vec_foreach(map, rm.tap_to_iface) { - if (r->oif == map->tap) { - sw_if_index = map->iface; - break; - } - } - - if (sw_if_index == ~0 || r->table != 254) - return; - - ip4_add_del_route_next_hop(&ip4_main, - is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD, - (ip4_address_t *)r->dst, r->rtm.rtm_dst_len, - (ip4_address_t *)r->gateway, sw_if_index, 0, ~0, 0); -} - -struct set_flags_args { - u32 sw_if_index; - u8 flags; -}; - -static void set_interface_flags_callback(struct set_flags_args *a) -{ - vnet_sw_interface_set_flags(rm.vnet_main, a->sw_if_index, - a->flags); -} - -static void add_del_link(ns_link_t *l, int is_del) -{ - struct tap_to_iface *map = NULL; - u32 sw_if_index = ~0; - u8 flags = 0; - struct set_flags_args args; - vnet_sw_interface_t *sw = NULL; - - vec_foreach(map, rm.tap_to_iface) { - if (l->ifi.ifi_index == map->tap) { - sw_if_index = map->iface; - break; - } - } - - if (sw_if_index == ~0) - return; - - sw = vnet_get_sw_interface(rm.vnet_main, sw_if_index); - flags = sw->flags; - - if (l->ifi.ifi_flags & IFF_UP) - flags |= VNET_SW_INTERFACE_FLAG_ADMIN_UP; - else - flags &= ~VNET_SW_INTERFACE_FLAG_ADMIN_UP; - - args.sw_if_index = sw_if_index; - args.flags = flags; - - vl_api_rpc_call_main_thread(set_interface_flags_callback, - (u8 *) &args, sizeof(args)); -} - -static void -netns_notify_cb(void *obj, netns_type_t type, u32 flags, uword opaque) -{ - if (type == NETNS_TYPE_ADDR) - add_del_addr((ns_addr_t *)obj, flags & NETNS_F_DEL); - else if (type == NETNS_TYPE_ROUTE) - add_del_route((ns_route_t *)obj, flags & NETNS_F_DEL); - else if (type == NETNS_TYPE_LINK) - add_del_link((ns_link_t *)obj, flags & NETNS_F_DEL); -} - -static void insert_tap_to_iface(u32 tap, u32 iface) -{ - struct tap_to_iface map = { - .tap = tap, - .iface = iface, - }; - - vec_add1(rm.tap_to_iface, map); -} - - -static u32 ip4_next_index = ~0; - -static u32 -ip4_lookup_next_index(void) -{ - if (ip4_next_index == ~0) { - ip4_next_index = vlib_node_add_next(vlib_get_main(), - ip4_lookup_node.index, - tap_inject_classified_node.index); - } - - return ip4_next_index; -} - -static u32 ip4_multicast_arc_added; - -static void -add_ip4_multicast_arc(void) -{ - ip4_add_del_route_args_t a; - ip_adjacency_t add_adj; - - if (ip4_multicast_arc_added) - return; - - memset(&a, 0, sizeof(a)); - memset(&add_adj, 0, sizeof(add_adj)); - - a.add_adj = &add_adj; - a.n_add_adj = 1; - - a.flags = IP4_ROUTE_FLAG_TABLE_ID | IP4_ROUTE_FLAG_ADD; - a.table_index_or_table_id = 0; - a.dst_address.as_u32 = 0x000000E0; /* 224.0.0.0 */ - a.dst_address_length = 24; - a.adj_index = ~0; - - add_adj.explicit_fib_index = ~0; - add_adj.rewrite_header.node_index = ip4_rewrite_node.index; - add_adj.lookup_next_index = ip4_lookup_next_index(); - add_adj.if_address_index = ~0; - - ip4_add_del_route(&ip4_main, &a); - ip4_multicast_arc_added = 1; -} - -static clib_error_t * -tap_inject(vlib_main_t *m, unformat_input_t *input, vlib_cli_command_t *cmd) -{ - char *name = NULL; - u32 iface = ~0, tap = ~0, protos = 0; - clib_error_t *err; - - while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) { - if (unformat(input, "from %U", unformat_vnet_sw_interface, - rm.vnet_main, &iface)) - ; - else if (unformat(input, "as %s", &name)) - ; - else if (unformat(input, "%U", unformat_protos, &protos)) - ; - else - break; - } - - if (!protos) - return clib_error_return(0, - "no protocols specified"); - else if (iface == ~0) - return clib_error_return(0, - "interface name is missing or invalid"); - else if (!name) - return clib_error_return(0, - "host interface name is missing or invalid"); - - if (protos & PROTO_BIT_OSPF2) { - /* Require arp, icmp4, and igmp4 for ospf2. */ - if (!(protos & PROTO_BIT_ARP) || - !(protos & PROTO_BIT_ICMP4) || - !(protos & PROTO_BIT_IGMP4)) - return clib_error_return(0, - "ospf2 requires arp, icmp4, and igmp4"); - } - - if (protos & PROTO_BIT_TCP) /* Require arp and icmp4 for tcp. */ - if (!(protos & PROTO_BIT_ARP) || !(protos & PROTO_BIT_ICMP4)) - return clib_error_return(0, - "tcp requires arp and icmp4"); - - if (protos & PROTO_BIT_UDP) { - /* Require arp, icmp4, and igmp4 for udp. */ - if (!(protos & PROTO_BIT_ARP) || - !(protos & PROTO_BIT_ICMP4) || - !(protos & PROTO_BIT_IGMP4)) - return clib_error_return(0, - "udp requires arp, icmp4, and igmp4"); - } - - err = do_tap_connect(m, name, iface, &tap); - if (err) { - if (tap != ~0) - vnet_tap_delete(m, tap); - return err; - } - - if ((protos & PROTO_BIT_ARP) || (protos & PROTO_BIT_ICMP4)) { - if (rm.ns_index == ~0) { - char nsname = 0; - netns_sub_t sub = { - .notify = netns_notify_cb, - .opaque = 0, - }; - - rm.ns_index = netns_open(&nsname, &sub); - if (rm.ns_index == ~0) { - vnet_tap_delete(m, tap); - clib_error_return(0, - "failed to open namespace"); - } - } - } - - if (protos & PROTO_BIT_IGMP4) - add_ip4_multicast_arc(); - - if (protos & PROTO_BIT_ARP) - ethernet_register_input_type(m, ETHERNET_TYPE_ARP, - tap_inject_arp_node.index); - - if (protos & PROTO_BIT_ICMP4) - ip4_register_protocol(IP_PROTOCOL_ICMP, - tap_inject_icmp_node.index); - - if (protos & PROTO_BIT_OSPF2) - ip4_register_protocol(IP_PROTOCOL_OSPF, - tap_inject_classified_node.index); - - if (protos & PROTO_BIT_TCP) - ip4_register_protocol(IP_PROTOCOL_TCP, - tap_inject_classified_node.index); - - if (protos & PROTO_BIT_UDP) - ip4_register_protocol(IP_PROTOCOL_UDP, - tap_inject_classified_node.index); - - /* Find sw_if_index of tap associated with data plane interface. */ - rm.iface_to_tap[iface] = tap; - rm.iface_to_protos[iface] = protos; - - /* Find data plane interface associated with host tap ifindex. */ - insert_tap_to_iface(if_nametoindex(name), iface); - - return 0; -} - -VLIB_CLI_COMMAND(tap_inject_command, static) = { - .path = "tap inject", - .short_help = "tap inject <protocol[,protocol...]> from <intfc-name> as <host-intfc-name>", - .function = tap_inject, -}; - -static clib_error_t * -interface_add_del(struct vnet_main_t *m, u32 hw_if_index, u32 add) -{ - vnet_hw_interface_t *hw = vnet_get_hw_interface(m, hw_if_index); - vnet_sw_interface_t *sw = vnet_get_sw_interface(m, hw->sw_if_index); - ASSERT(hw->sw_if_index == sw->sw_if_index); - - vec_validate(rm.iface_to_tap, sw->sw_if_index); - vec_validate(rm.iface_to_protos, sw->sw_if_index); - rm.iface_to_tap[sw->sw_if_index] = ~0; - rm.iface_to_protos[sw->sw_if_index] = 0; - return 0; -} -VNET_HW_INTERFACE_ADD_DEL_FUNCTION(interface_add_del); - -clib_error_t * -vlib_plugin_register(vlib_main_t *m, vnet_plugin_handoff_t *h, int f) -{ - rm.vnet_main = h->vnet_main; - rm.ns_index = ~0; - return 0; -} - -static clib_error_t *router_init(vlib_main_t *m) -{ - return 0; -} -VLIB_INIT_FUNCTION(router_init); diff --git a/router/router/tap_inject.c b/router/router/tap_inject.c new file mode 100644 index 0000000..8d6f5af --- /dev/null +++ b/router/router/tap_inject.c @@ -0,0 +1,375 @@ +/* + * Copyright 2016 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tap_inject.h" + +static tap_inject_main_t tap_inject_main; + +tap_inject_main_t * +tap_inject_get_main (void) +{ + return &tap_inject_main; +} + +void +tap_inject_insert_tap (u32 sw_if_index, u32 tap_fd, u32 tap_if_index) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + vec_validate_init_empty (im->sw_if_index_to_tap_fd, sw_if_index, ~0); + vec_validate_init_empty (im->sw_if_index_to_tap_if_index, sw_if_index, ~0); + + vec_validate_init_empty (im->tap_fd_to_sw_if_index, tap_fd, ~0); + + im->sw_if_index_to_tap_fd[sw_if_index] = tap_fd; + im->sw_if_index_to_tap_if_index[sw_if_index] = tap_if_index; + + im->tap_fd_to_sw_if_index[tap_fd] = sw_if_index; + + hash_set (im->tap_if_index_to_sw_if_index, tap_if_index, sw_if_index); +} + +void +tap_inject_delete_tap (u32 sw_if_index) +{ + tap_inject_main_t * im = tap_inject_get_main (); + u32 tap_fd = im->sw_if_index_to_tap_fd[sw_if_index]; + u32 tap_if_index = im->sw_if_index_to_tap_if_index[sw_if_index]; + + im->sw_if_index_to_tap_if_index[sw_if_index] = ~0; + im->sw_if_index_to_tap_fd[sw_if_index] = ~0; + im->tap_fd_to_sw_if_index[tap_fd] = ~0; + + hash_unset (im->tap_if_index_to_sw_if_index, tap_if_index); +} + +u32 +tap_inject_lookup_tap_fd (u32 sw_if_index) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + vec_validate_init_empty (im->sw_if_index_to_tap_fd, sw_if_index, ~0); + return im->sw_if_index_to_tap_fd[sw_if_index]; +} + +u32 +tap_inject_lookup_sw_if_index_from_tap_fd (u32 tap_fd) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + vec_validate_init_empty (im->tap_fd_to_sw_if_index, tap_fd, ~0); + return im->tap_fd_to_sw_if_index[tap_fd]; +} + +u32 +tap_inject_lookup_sw_if_index_from_tap_if_index (u32 tap_if_index) +{ + tap_inject_main_t * im = tap_inject_get_main (); + uword * sw_if_index; + + sw_if_index = hash_get (im->tap_if_index_to_sw_if_index, tap_if_index); + return sw_if_index ? *(u32 *)sw_if_index : ~0; +} + + +clib_error_t * +vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h, int f) +{ + return 0; +} + + +static void +tap_inject_disable (void) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + im->flags &= ~TAP_INJECT_F_ENABLED; + + clib_warning ("tap-inject is not actually disabled."); +} + +static clib_error_t * +tap_inject_enable (void) +{ + vlib_main_t * vm = vlib_get_main (); + tap_inject_main_t * im = tap_inject_get_main (); + + if (tap_inject_is_enabled ()) + return 0; + + tap_inject_enable_netlink (); + + /* Only enable netlink? */ + if (im->flags & TAP_INJECT_F_CONFIG_NETLINK) + { + im->flags |= TAP_INJECT_F_ENABLED; + return 0; + } + + /* Register ARP and ICMP6 as neighbor nodes. */ + ethernet_register_input_type (vm, ETHERNET_TYPE_ARP, im->neighbor_node_index); + ip6_register_protocol (IP_PROTOCOL_ICMP6, im->neighbor_node_index); + + /* Register remaining protocols. */ + ip4_register_protocol (IP_PROTOCOL_ICMP, im->tx_node_index); + + ip4_register_protocol (IP_PROTOCOL_OSPF, im->tx_node_index); + ip4_register_protocol (IP_PROTOCOL_TCP, im->tx_node_index); + ip4_register_protocol (IP_PROTOCOL_UDP, im->tx_node_index); + + ip6_register_protocol (IP_PROTOCOL_OSPF, im->tx_node_index); + ip6_register_protocol (IP_PROTOCOL_TCP, im->tx_node_index); + ip6_register_protocol (IP_PROTOCOL_UDP, im->tx_node_index); + + /* Add IPv4 multicast route. */ + { + ip4_add_del_route_args_t a; + ip_adjacency_t add_adj; + u32 next_node_index; + + memset (&a, 0, sizeof (a)); + memset (&add_adj, 0, sizeof (add_adj)); + + a.add_adj = &add_adj; + a.n_add_adj = 1; + + a.flags = IP4_ROUTE_FLAG_TABLE_ID | IP4_ROUTE_FLAG_ADD; + a.table_index_or_table_id = 0; + a.dst_address.as_u32 = 0x000000E0; /* 224.0.0.0 */ + a.dst_address_length = 24; + a.adj_index = ~0; + + next_node_index = vlib_node_add_next (vm, ip4_lookup_node.index, + im->tx_node_index); + + add_adj.explicit_fib_index = ~0; + add_adj.rewrite_header.node_index = ip4_rewrite_node.index; + add_adj.lookup_next_index = next_node_index; + add_adj.if_address_index = ~0; + + ip4_add_del_route (&ip4_main, &a); + } + + im->flags |= TAP_INJECT_F_ENABLED; + + return 0; +} + +static uword +tap_inject_iface_isr (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * f) +{ + tap_inject_main_t * im = tap_inject_get_main (); + vnet_hw_interface_t * hw; + u32 * hw_if_index; + clib_error_t * err = 0; + + vec_foreach (hw_if_index, im->interfaces_to_enable) + { + hw = vnet_get_hw_interface (vnet_get_main (), *hw_if_index); + + if (hw->hw_class_index == ethernet_hw_interface_class.index) + { + err = tap_inject_tap_connect (hw); + if (err) + break; + } + } + + vec_foreach (hw_if_index, im->interfaces_to_disable) + tap_inject_tap_disconnect (*hw_if_index); + + vec_free (im->interfaces_to_enable); + vec_free (im->interfaces_to_disable); + + return err ? -1 : 0; +} + +VLIB_REGISTER_NODE (tap_inject_iface_isr_node, static) = { + .function = tap_inject_iface_isr, + .name = "tap-inject-iface-isr", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .vector_size = sizeof (u32), +}; + + +static clib_error_t * +tap_inject_interface_add_del (struct vnet_main_t * vnet_main, u32 hw_if_index, + u32 add) +{ + vlib_main_t * vm = vlib_get_main (); + tap_inject_main_t * im = tap_inject_get_main (); + + if (!tap_inject_is_config_enabled ()) + return 0; + + tap_inject_enable (); + + if (add) + vec_add1 (im->interfaces_to_enable, hw_if_index); + else + vec_add1 (im->interfaces_to_disable, hw_if_index); + + vlib_node_set_interrupt_pending (vm, tap_inject_iface_isr_node.index); + + return 0; +} + +VNET_HW_INTERFACE_ADD_DEL_FUNCTION (tap_inject_interface_add_del); + + +static clib_error_t * +tap_inject_enable_disable_all_interfaces (int enable) +{ + vnet_main_t * vnet_main = vnet_get_main (); + tap_inject_main_t * im = tap_inject_get_main (); + vnet_hw_interface_t * interfaces; + vnet_hw_interface_t * hw; + u32 ** indices; + + if (enable) + tap_inject_enable (); + else + tap_inject_disable (); + + /* Collect all the interface indices. */ + interfaces = vnet_main->interface_main.hw_interfaces; + indices = enable ? &im->interfaces_to_enable : &im->interfaces_to_disable; + pool_foreach (hw, interfaces, vec_add1 (*indices, hw - interfaces)); + + if (tap_inject_iface_isr (vlib_get_main (), 0, 0)) + return clib_error_return (0, "tap-inject interface add del isr failed"); + + return 0; +} + +static clib_error_t * +tap_inject_cli (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + if (cmd->function_arg) + { + clib_error_t * err; + + if (tap_inject_is_config_disabled ()) + return clib_error_return (0, + "tap-inject is disabled in config, thus cannot be enabled."); + + /* Enable */ + err = tap_inject_enable_disable_all_interfaces (1); + if (err) + { + tap_inject_enable_disable_all_interfaces (0); + return err; + } + + im->flags |= TAP_INJECT_F_CONFIG_ENABLE; + } + else + { + /* Disable */ + tap_inject_enable_disable_all_interfaces (0); + im->flags &= ~TAP_INJECT_F_CONFIG_ENABLE; + } + + return 0; +} + +VLIB_CLI_COMMAND (tap_inject_enable_cmd, static) = { + .path = "enable tap-inject", + .short_help = "enable tap-inject", + .function = tap_inject_cli, + .function_arg = 1, +}; + +VLIB_CLI_COMMAND (tap_inject_disable_cmd, static) = { + .path = "disable tap-inject", + .short_help = "disable tap-inject", + .function = tap_inject_cli, + .function_arg = 0, +}; + + +static clib_error_t * +show_tap_inject (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnet_main = vnet_get_main (); + tap_inject_main_t * im = tap_inject_get_main (); + u32 k, v; + + if (tap_inject_is_config_disabled ()) + { + vlib_cli_output (vm, "tap-inject is disabled in config.\n"); + return 0; + } + + if (!tap_inject_is_enabled ()) + { + vlib_cli_output (vm, "tap-inject is not enabled.\n"); + return 0; + } + + hash_foreach (k, v, im->tap_if_index_to_sw_if_index, { + vlib_cli_output (vm, "%U -> %U", + format_vnet_sw_interface_name, vnet_main, + vnet_get_sw_interface (vnet_main, v), + format_tap_inject_tap_name, k); + }); + + return 0; +} + +VLIB_CLI_COMMAND (show_tap_inject_cmd, static) = { + .path = "show tap-inject", + .short_help = "show tap-inject", + .function = show_tap_inject, +}; + + +static clib_error_t * +tap_inject_config (vlib_main_t * vm, unformat_input_t * input) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "enable")) + im->flags |= TAP_INJECT_F_CONFIG_ENABLE; + + else if (unformat (input, "disable")) + im->flags |= TAP_INJECT_F_CONFIG_DISABLE; + + else if (unformat (input, "netlink-only")) + im->flags |= TAP_INJECT_F_CONFIG_NETLINK; + + else + return clib_error_return (0, "syntax error `%U'", + format_unformat_error, input); + } + + if (tap_inject_is_config_enabled () && tap_inject_is_config_disabled ()) + return clib_error_return (0, + "tap-inject cannot be both enabled and disabled."); + + return 0; +} + +VLIB_CONFIG_FUNCTION (tap_inject_config, "tap-inject"); diff --git a/router/router/tap_inject.h b/router/router/tap_inject.h new file mode 100644 index 0000000..001ab52 --- /dev/null +++ b/router/router/tap_inject.h @@ -0,0 +1,109 @@ +/* + * Copyright 2016 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _TAP_INJECT_H +#define _TAP_INJECT_H + +#include <vnet/plugin/plugin.h> +#include <vnet/ip/ip.h> + + +#ifndef ETHER_ADDR_LEN +#define ETHER_ADDR_LEN 6 +#endif + +typedef struct { + /* + * tap-inject can be enabled or disabled in config file or during runtime. + * When disabled in config, it is not possible to enable during runtime. + * + * When the netlink-only option is used, netlink configuration is monitored + * and mirrored to the data plane but no traffic is passed between the host + * and the data plane. + */ +#define TAP_INJECT_F_CONFIG_ENABLE (1U << 0) +#define TAP_INJECT_F_CONFIG_DISABLE (1U << 1) +#define TAP_INJECT_F_CONFIG_NETLINK (1U << 2) +#define TAP_INJECT_F_ENABLED (1U << 3) + + u32 flags; + + u32 * sw_if_index_to_tap_fd; + u32 * sw_if_index_to_tap_if_index; + u32 * tap_fd_to_sw_if_index; + u32 * tap_if_index_to_sw_if_index; + + u32 * interfaces_to_enable; + u32 * interfaces_to_disable; + + u32 * rx_file_descriptors; + + u32 rx_node_index; + u32 tx_node_index; + u32 neighbor_node_index; + + u32 * rx_buffers; + +} tap_inject_main_t; + + +tap_inject_main_t * tap_inject_get_main (void); + +void tap_inject_insert_tap (u32 sw_if_index, u32 tap_fd, u32 tap_if_index); +void tap_inject_delete_tap (u32 sw_if_index); + +u32 tap_inject_lookup_tap_fd (u32 sw_if_index); +u32 tap_inject_lookup_sw_if_index_from_tap_fd (u32 tap_fd); +u32 tap_inject_lookup_sw_if_index_from_tap_if_index (u32 tap_if_index); + +static inline int +tap_inject_is_enabled (void) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + return !!(im->flags & TAP_INJECT_F_ENABLED); +} + +static inline int +tap_inject_is_config_enabled (void) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + return !!(im->flags & TAP_INJECT_F_CONFIG_ENABLE); +} + +static inline int +tap_inject_is_config_disabled (void) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + return !!(im->flags & TAP_INJECT_F_CONFIG_DISABLE); +} + + +/* Netlink */ + +void tap_inject_enable_netlink (void); + + +/* Tap */ + +clib_error_t * tap_inject_tap_connect (vnet_hw_interface_t * hw); +clib_error_t * tap_inject_tap_disconnect (u32 sw_if_index); + +u8 * format_tap_inject_tap_name (u8 * s, va_list * args); + +#endif /* _TAP_INJECT_H */ diff --git a/router/router/tap_inject_netlink.c b/router/router/tap_inject_netlink.c new file mode 100644 index 0000000..a30e262 --- /dev/null +++ b/router/router/tap_inject_netlink.c @@ -0,0 +1,184 @@ +/* + * Copyright 2016 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tap_inject.h" + +#include <librtnl/netns.h> +#include <vlibmemory/api.h> +#include <vnet/ethernet/arp_packet.h> + + +static void +add_del_addr (ns_addr_t * a, int is_del) +{ + vlib_main_t * vm = vlib_get_main (); + u32 sw_if_index; + + sw_if_index = tap_inject_lookup_sw_if_index_from_tap_if_index ( + a->ifaddr.ifa_index); + + if (sw_if_index == ~0) + return; + + if (a->ifaddr.ifa_family == AF_INET) + { + ip4_add_del_interface_address (vm, sw_if_index, + (ip4_address_t *) a->local, a->ifaddr.ifa_prefixlen, is_del); + } + else if (a->ifaddr.ifa_family == AF_INET6) + { + ip6_add_del_interface_address (vm, sw_if_index, + (ip6_address_t *) a->addr, a->ifaddr.ifa_prefixlen, is_del); + } +} + + +struct set_flags_args { + u32 index; + u8 flags; +}; + +static void +set_flags_cb (struct set_flags_args * a) +{ + vnet_sw_interface_set_flags (vnet_get_main (), a->index, a->flags); +} + +static void +add_del_link (ns_link_t * l, int is_del) +{ + struct set_flags_args args = { ~0, 0 }; + vnet_sw_interface_t * sw; + u8 flags = 0; + u32 sw_if_index; + + sw_if_index = tap_inject_lookup_sw_if_index_from_tap_if_index ( + l->ifi.ifi_index); + + if (sw_if_index == ~0) + return; + + sw = vnet_get_sw_interface (vnet_get_main (), sw_if_index); + + flags = sw->flags; + + if (l->ifi.ifi_flags & IFF_UP) + flags |= VNET_SW_INTERFACE_FLAG_ADMIN_UP; + else + flags &= ~VNET_SW_INTERFACE_FLAG_ADMIN_UP; + + args.index = sw_if_index; + args.flags = flags; + + vl_api_rpc_call_main_thread (set_flags_cb, (u8 *)&args, sizeof (args)); +} + + +static void +add_del_neigh (ns_neigh_t * n, int is_del) +{ + vnet_main_t * vnet_main = vnet_get_main (); + vlib_main_t * vm = vlib_get_main (); + u32 sw_if_index; + + sw_if_index = tap_inject_lookup_sw_if_index_from_tap_if_index ( + n->nd.ndm_ifindex); + + if (sw_if_index == ~0) + return; + + if (n->nd.ndm_family == AF_INET) + { + ethernet_arp_ip4_over_ethernet_address_t a; + + memset (&a, 0, sizeof (a)); + + clib_memcpy (&a.ethernet, n->lladdr, ETHER_ADDR_LEN); + clib_memcpy (&a.ip4, n->dst, sizeof (a.ip4)); + + if (n->nd.ndm_state & NUD_REACHABLE) + vnet_arp_set_ip4_over_ethernet (vnet_main, sw_if_index, ~0, &a, 0); + else if (n->nd.ndm_state & NUD_FAILED) + vnet_arp_unset_ip4_over_ethernet (vnet_main, sw_if_index, ~0, &a); + } + else if (n->nd.ndm_family == AF_INET6) + { + if (n->nd.ndm_state & NUD_REACHABLE) + vnet_set_ip6_ethernet_neighbor (vm, sw_if_index, + (ip6_address_t *) n->dst, n->lladdr, ETHER_ADDR_LEN, 0); + else + vnet_unset_ip6_ethernet_neighbor (vm, sw_if_index, + (ip6_address_t *) n->dst, n->lladdr, ETHER_ADDR_LEN); + } +} + + +#define TAP_INJECT_HOST_ROUTE_TABLE_MAIN 254 + +static void +add_del_route (ns_route_t * r, int is_del) +{ + u32 sw_if_index; + + sw_if_index = tap_inject_lookup_sw_if_index_from_tap_if_index (r->oif); + + if (sw_if_index == ~0 || r->table != TAP_INJECT_HOST_ROUTE_TABLE_MAIN) + return; + + if (r->rtm.rtm_family == AF_INET) + { + ip4_add_del_route_next_hop (&ip4_main, + is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD, + (ip4_address_t *) r->dst, r->rtm.rtm_dst_len, + (ip4_address_t *) r->gateway, sw_if_index, 0, ~0, 0); + } + else if (r->rtm.rtm_family == AF_INET6) + { + ip6_add_del_route_next_hop (&ip6_main, + is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD, + (ip6_address_t *) r->dst, r->rtm.rtm_dst_len, + (ip6_address_t *) r->gateway, sw_if_index, 0, ~0, 0); + } +} + + +static void +netns_notify_cb (void * obj, netns_type_t type, u32 flags, uword opaque) +{ + if (type == NETNS_TYPE_ADDR) + add_del_addr ((ns_addr_t *)obj, flags & NETNS_F_DEL); + + else if (type == NETNS_TYPE_LINK) + add_del_link ((ns_link_t *)obj, flags & NETNS_F_DEL); + + else if (type == NETNS_TYPE_NEIGH) + add_del_neigh ((ns_neigh_t *)obj, flags & NETNS_F_DEL); + + else if (type == NETNS_TYPE_ROUTE) + add_del_route ((ns_route_t *)obj, flags & NETNS_F_DEL); +} + +void +tap_inject_enable_netlink (void) +{ + char nsname = 0; + netns_sub_t sub = { + .notify = netns_notify_cb, + .opaque = 0, + }; + + netns_open (&nsname, &sub); +} diff --git a/router/router/tap_inject_node.c b/router/router/tap_inject_node.c new file mode 100644 index 0000000..fe108dc --- /dev/null +++ b/router/router/tap_inject_node.c @@ -0,0 +1,331 @@ +/* + * Copyright 2016 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tap_inject.h" + +#include <netinet/in.h> +#include <vnet/ethernet/arp_packet.h> + +vlib_node_registration_t tap_inject_rx_node; +vlib_node_registration_t tap_inject_tx_node; +vlib_node_registration_t tap_inject_neighbor_node; + +enum { + NEXT_NEIGHBOR_ARP, + NEXT_NEIGHBOR_ICMP6, +}; + + +static inline void +tap_inject_tap_send_buffer (int fd, vlib_buffer_t * b) +{ + struct iovec iov; + ssize_t n_bytes; + + iov.iov_base = vlib_buffer_get_current (b); + iov.iov_len = b->current_length; + + n_bytes = writev (fd, &iov, 1); + + if (n_bytes < 0) + clib_warning ("writev failed"); + else if (n_bytes < b->current_length || b->flags & VLIB_BUFFER_NEXT_PRESENT) + clib_warning ("buffer truncated"); +} + +static uword +tap_inject_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) +{ + vlib_buffer_t * b; + u32 * pkts; + u32 fd; + u32 i; + + pkts = vlib_frame_vector_args (f); + + for (i = 0; i < f->n_vectors; ++i) + { + b = vlib_get_buffer (vm, pkts[i]); + + fd = tap_inject_lookup_tap_fd (vnet_buffer (b)->sw_if_index[VLIB_RX]); + if (fd == ~0) + continue; + + /* Re-wind the buffer to the start of the Ethernet header. */ + vlib_buffer_advance (b, -b->current_data); + + tap_inject_tap_send_buffer (fd, b); + } + + vlib_buffer_free (vm, pkts, f->n_vectors); + return f->n_vectors; +} + +VLIB_REGISTER_NODE (tap_inject_tx_node) = { + .function = tap_inject_tx, + .name = "tap-inject-tx", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, +}; + + +static uword +tap_inject_neighbor (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * f) +{ + vlib_buffer_t * b; + u32 * pkts; + u32 fd; + u32 i; + u32 bi; + u32 next_index = node->cached_next_index; + u32 next = ~0; + u32 n_left; + u32 * to_next; + + pkts = vlib_frame_vector_args (f); + + for (i = 0; i < f->n_vectors; ++i) + { + bi = pkts[i]; + b = vlib_get_buffer (vm, bi); + + fd = tap_inject_lookup_tap_fd (vnet_buffer (b)->sw_if_index[VLIB_RX]); + if (fd == ~0) + { + vlib_buffer_free (vm, &bi, 1); + continue; + } + + /* Re-wind the buffer to the start of the Ethernet header. */ + vlib_buffer_advance (b, -b->current_data); + + tap_inject_tap_send_buffer (fd, b); + + /* Send the buffer to a neighbor node too? */ + { + ethernet_header_t * eth = vlib_buffer_get_current (b); + u16 ether_type = htons (eth->type); + + if (ether_type == ETHERNET_TYPE_ARP) + { + ethernet_arp_header_t * arp = (void *)(eth + 1); + + if (arp->opcode == ntohs (ETHERNET_ARP_OPCODE_reply)) + next = NEXT_NEIGHBOR_ARP; + } + else if (ether_type == ETHERNET_TYPE_IP6) + { + ip6_header_t * ip = (void *)(eth + 1); + icmp46_header_t * icmp = (void *)(ip + 1); + + if (ip->protocol == IP_PROTOCOL_ICMP6 && + icmp->type == ICMP6_neighbor_advertisement) + next = NEXT_NEIGHBOR_ICMP6; + } + } + + if (next == ~0) + { + vlib_buffer_free (vm, &bi, 1); + continue; + } + + /* ARP and ICMP6 expect to start processing after the Ethernet header. */ + vlib_buffer_advance (b, sizeof (ethernet_header_t)); + + vlib_get_next_frame (vm, node, next_index, to_next, n_left); + + *(to_next++) = bi; + --n_left; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left, bi, next); + vlib_put_next_frame (vm, node, next_index, n_left); + } + + return f->n_vectors; +} + +VLIB_REGISTER_NODE (tap_inject_neighbor_node) = { + .function = tap_inject_neighbor, + .name = "tap-inject-neighbor", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, + .n_next_nodes = 2, + .next_nodes = { + [NEXT_NEIGHBOR_ARP] = "arp-input", + [NEXT_NEIGHBOR_ICMP6] = "icmp6-neighbor-solicitation", + }, +}; + + +#define MTU 1500 +#define MTU_BUFFERS ((MTU + VLIB_BUFFER_DATA_SIZE - 1) / VLIB_BUFFER_DATA_SIZE) +#define NUM_BUFFERS_TO_ALLOC 32 + +static inline uword +tap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f, int fd) +{ + tap_inject_main_t * im = tap_inject_get_main (); + u32 sw_if_index; + struct iovec iov[MTU_BUFFERS]; + u32 bi[MTU_BUFFERS]; + vlib_buffer_t * b; + ssize_t n_bytes; + ssize_t n_bytes_left; + u32 i, j; + + sw_if_index = tap_inject_lookup_sw_if_index_from_tap_fd (fd); + if (sw_if_index == ~0) + return 0; + + /* Allocate buffers in bulk when there are less than enough to rx an MTU. */ + if (vec_len (im->rx_buffers) < MTU_BUFFERS) + { + u32 len = vec_len (im->rx_buffers); + + len = vlib_buffer_alloc_from_free_list (vm, + &im->rx_buffers[len], NUM_BUFFERS_TO_ALLOC, + VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + + _vec_len (im->rx_buffers) += len; + + if (vec_len (im->rx_buffers) < MTU_BUFFERS) + { + clib_warning ("failed to allocate buffers"); + return 0; + } + } + + /* Fill buffers from the end of the list to make it easier to resize. */ + for (i = 0, j = vec_len (im->rx_buffers) - 1; i < MTU_BUFFERS; ++i, --j) + { + vlib_buffer_t * b; + + bi[i] = im->rx_buffers[j]; + + b = vlib_get_buffer (vm, bi[i]); + + iov[i].iov_base = b->data; + iov[i].iov_len = VLIB_BUFFER_DATA_SIZE; + } + + n_bytes = readv (fd, iov, MTU_BUFFERS); + if (n_bytes < 0) + { + clib_warning ("readv failed"); + return 0; + } + + b = vlib_get_buffer (vm, bi[0]); + + vnet_buffer (b)->sw_if_index[VLIB_RX] = ~0; + vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index; + + n_bytes_left = n_bytes - VLIB_BUFFER_DATA_SIZE; + + if (n_bytes_left > 0) + { + b->total_length_not_including_first_buffer = n_bytes_left; + b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + } + + b->current_length = n_bytes; + + /* If necessary, configure any remaining buffers in the chain. */ + for (i = 1; n_bytes_left > 0; ++i, n_bytes_left -= VLIB_BUFFER_DATA_SIZE) + { + b = vlib_get_buffer (vm, bi[i - 1]); + b->current_length = VLIB_BUFFER_DATA_SIZE; + b->flags |= VLIB_BUFFER_NEXT_PRESENT; + b->next_buffer = bi[i]; + + b = vlib_get_buffer (vm, bi[i]); + b->current_length = n_bytes_left; + } + + _vec_len (im->rx_buffers) -= i; + + vlib_buffer_chain_validate (vm, vlib_get_buffer (vm, bi[0])); + + /* Get the packet to the output node. */ + { + vnet_hw_interface_t * hw; + vlib_frame_t * new_frame; + u32 * to_next; + + hw = vnet_get_hw_interface (vnet_get_main (), sw_if_index); + + new_frame = vlib_get_frame_to_node (vm, hw->output_node_index); + to_next = vlib_frame_vector_args (new_frame); + to_next[0] = bi[0]; + new_frame->n_vectors = 1; + + vlib_put_frame_to_node (vm, hw->output_node_index, new_frame); + } + + return 1; +} + +static uword +tap_inject_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) +{ + tap_inject_main_t * im = tap_inject_get_main (); + u32 * fd; + uword count = 0; + + vec_foreach (fd, im->rx_file_descriptors) + { + if (tap_rx (vm, node, f, *fd) != 1) + { + clib_warning ("rx failed"); + count = 0; + break; + } + ++count; + } + + vec_free (im->rx_file_descriptors); + + return count; +} + +VLIB_REGISTER_NODE (tap_inject_rx_node) = { + .function = tap_inject_rx, + .name = "tap-inject-rx", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .vector_size = sizeof (u32), +}; + + +static clib_error_t * +tap_inject_init (vlib_main_t * vm) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + im->rx_node_index = tap_inject_rx_node.index; + im->tx_node_index = tap_inject_tx_node.index; + im->neighbor_node_index = tap_inject_neighbor_node.index; + + vec_alloc (im->rx_buffers, NUM_BUFFERS_TO_ALLOC); + vec_reset_length (im->rx_buffers); + + return 0; +} + +VLIB_INIT_FUNCTION (tap_inject_init); diff --git a/router/router/tap_inject_tap.c b/router/router/tap_inject_tap.c new file mode 100644 index 0000000..9650323 --- /dev/null +++ b/router/router/tap_inject_tap.c @@ -0,0 +1,170 @@ +/* + * Copyright 2016 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tap_inject.h" + +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <linux/if.h> +#include <linux/if_arp.h> +#include <linux/if_ether.h> +#include <linux/if_tun.h> +#include <netinet/in.h> +#include <vnet/unix/tuntap.h> + +#include <vlib/unix/unix.h> + + +static clib_error_t * +tap_inject_tap_read (unix_file_t * f) +{ + vlib_main_t * vm = vlib_get_main (); + tap_inject_main_t * im = tap_inject_get_main (); + + vec_add1 (im->rx_file_descriptors, f->file_descriptor); + + vlib_node_set_interrupt_pending (vm, im->rx_node_index); + + return 0; +} + +#define TAP_INJECT_TAP_BASE_NAME "vpp" + +clib_error_t * +tap_inject_tap_connect (vnet_hw_interface_t * hw) +{ + vnet_main_t * vnet_main = vnet_get_main (); + vnet_sw_interface_t * sw = vnet_get_sw_interface (vnet_main, hw->hw_if_index); + static const int one = 1; + int fd; + struct ifreq ifr; + unix_file_t template; + u32 tap_fd; + u8 * name; + + memset (&ifr, 0, sizeof (ifr)); + memset (&template, 0, sizeof (template)); + + ASSERT (hw->hw_if_index == sw->sw_if_index); + + /* Create the tap. */ + tap_fd = open ("/dev/net/tun", O_RDWR); + + if ((int)tap_fd < 0) + return clib_error_return (0, "failed to open tun device"); + + name = format (0, TAP_INJECT_TAP_BASE_NAME "%u%c", hw->hw_instance, 0); + + strncpy (ifr.ifr_name, (char *) name, sizeof (ifr.ifr_name) - 1); + ifr.ifr_flags = IFF_TAP | IFF_NO_PI; + + if (ioctl (tap_fd, TUNSETIFF, (void *)&ifr) < 0) + { + close (tap_fd); + return clib_error_return (0, "failed to create tap"); + } + + if (ioctl (tap_fd, FIONBIO, &one) < 0) + { + close (tap_fd); + return clib_error_return (0, "failed to set tap to non-blocking io"); + } + + /* Open a socket to configure the device. */ + fd = socket (PF_PACKET, SOCK_RAW, htons (ETH_P_ALL)); + + if (fd < 0) + { + close (tap_fd); + return clib_error_return (0, "failed to configure tap"); + } + + if (hw->hw_address) + clib_memcpy (ifr.ifr_hwaddr.sa_data, hw->hw_address, ETHER_ADDR_LEN); + + ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER; + + /* Set the hardware address. */ + if (ioctl (fd, SIOCSIFHWADDR, &ifr) < 0) + { + close (tap_fd); + close (fd); + return clib_error_return (0, "failed to set tap hardware address"); + } + + /* Get the tap if index. */ + if (ioctl (fd, SIOCGIFINDEX, &ifr) < 0) + { + close (tap_fd); + close (fd); + return clib_error_return (0, "failed to procure tap if index"); + } + + close (fd); + + /* Get notified when the tap needs to be read. */ + template.read_function = tap_inject_tap_read; + template.file_descriptor = tap_fd; + + unix_file_add (&unix_main, &template); + + tap_inject_insert_tap (sw->sw_if_index, tap_fd, ifr.ifr_ifindex); + + return 0; +} + +clib_error_t * +tap_inject_tap_disconnect (u32 sw_if_index) +{ + u32 tap_fd; + + tap_fd = tap_inject_lookup_tap_fd (sw_if_index); + if (tap_fd == ~0) + return clib_error_return (0, "failed to disconnect tap"); + + tap_inject_delete_tap (sw_if_index); + + close (tap_fd); + return 0; +} + + +u8 * +format_tap_inject_tap_name (u8 * s, va_list * args) +{ + int fd; + struct ifreq ifr; + + fd = socket (PF_PACKET, SOCK_RAW, htons (ETH_P_ALL)); + + if (fd < 0) + return 0; + + memset (&ifr, 0, sizeof (ifr)); + + ifr.ifr_ifindex = va_arg (*args, u32); + + if (ioctl (fd, SIOCGIFNAME, &ifr) < 0) + { + close (fd); + return 0; + } + + close (fd); + + return format (s, "%s", ifr.ifr_name); +} |