diff options
author | Jeff Shaw <jeffrey.b.shaw@intel.com> | 2016-09-21 19:12:46 -0400 |
---|---|---|
committer | Jeff Shaw <jeffrey.b.shaw@intel.com> | 2016-10-04 15:58:00 -0400 |
commit | dfae7756baf895957a43944f63bfe0c850b16467 (patch) | |
tree | cb8b0c5ae0a09835f9817a8956bd86dbf9a1de57 /router/router/tap_inject_node.c | |
parent | 961580e47e58e9cb7175ec89703bc951c7ce71b2 (diff) |
[router] IPv6 support and refactoring.
This change adds support for IPv6 while refactoring most of the
original plugin code in the following ways.
- Adhere to vpp style guidelines.
- Split the netlink, node, and tap processing into separate
files named with a "tap_inject" prefix which more accurately
represents the functionality.
- Implement our own tap management and rx/tx. This is to reduce
the overhead of passing packets in and out of vnet tap
devices, in favor of directly reading/writing from the tap.
- Change how nodes work. Now we have neighbor, rx, and tx nodes.
The neighbor node sends ARP replies and ICMP6 neighbor
advertisements to the arp-input and icmp6-neighbor-solicitation
nodes, respectively, before also injecting the packet to the
host, making it possible for both vpp and the host network stack
to resolve the next hop. The tx node injects packets into the
host by writing to the tap. The rx node reads packets from the
tap and sends them on its associated data plane interface.
- Simplify the CLI. Instead of creating taps specifically for
a given interface we create a tap for all of the Ethernet
interfaces with the "enable tap-inject" CLI command. The
interfaces are named with a "vpp" prefix, i.e. "vpp0". Also
add a "disable tap-inject" option.
- Provide ability to enable at configuration time with the
tap-inject { enable } stanza.
Change-Id: I6b56da606e2da1d793ce6aca222fe4eb5a4e070d
Signed-off-by: Jeff Shaw <jeffrey.b.shaw@intel.com>
Diffstat (limited to 'router/router/tap_inject_node.c')
-rw-r--r-- | router/router/tap_inject_node.c | 331 |
1 files changed, 331 insertions, 0 deletions
diff --git a/router/router/tap_inject_node.c b/router/router/tap_inject_node.c new file mode 100644 index 0000000..fe108dc --- /dev/null +++ b/router/router/tap_inject_node.c @@ -0,0 +1,331 @@ +/* + * Copyright 2016 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tap_inject.h" + +#include <netinet/in.h> +#include <vnet/ethernet/arp_packet.h> + +vlib_node_registration_t tap_inject_rx_node; +vlib_node_registration_t tap_inject_tx_node; +vlib_node_registration_t tap_inject_neighbor_node; + +enum { + NEXT_NEIGHBOR_ARP, + NEXT_NEIGHBOR_ICMP6, +}; + + +static inline void +tap_inject_tap_send_buffer (int fd, vlib_buffer_t * b) +{ + struct iovec iov; + ssize_t n_bytes; + + iov.iov_base = vlib_buffer_get_current (b); + iov.iov_len = b->current_length; + + n_bytes = writev (fd, &iov, 1); + + if (n_bytes < 0) + clib_warning ("writev failed"); + else if (n_bytes < b->current_length || b->flags & VLIB_BUFFER_NEXT_PRESENT) + clib_warning ("buffer truncated"); +} + +static uword +tap_inject_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) +{ + vlib_buffer_t * b; + u32 * pkts; + u32 fd; + u32 i; + + pkts = vlib_frame_vector_args (f); + + for (i = 0; i < f->n_vectors; ++i) + { + b = vlib_get_buffer (vm, pkts[i]); + + fd = tap_inject_lookup_tap_fd (vnet_buffer (b)->sw_if_index[VLIB_RX]); + if (fd == ~0) + continue; + + /* Re-wind the buffer to the start of the Ethernet header. */ + vlib_buffer_advance (b, -b->current_data); + + tap_inject_tap_send_buffer (fd, b); + } + + vlib_buffer_free (vm, pkts, f->n_vectors); + return f->n_vectors; +} + +VLIB_REGISTER_NODE (tap_inject_tx_node) = { + .function = tap_inject_tx, + .name = "tap-inject-tx", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, +}; + + +static uword +tap_inject_neighbor (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * f) +{ + vlib_buffer_t * b; + u32 * pkts; + u32 fd; + u32 i; + u32 bi; + u32 next_index = node->cached_next_index; + u32 next = ~0; + u32 n_left; + u32 * to_next; + + pkts = vlib_frame_vector_args (f); + + for (i = 0; i < f->n_vectors; ++i) + { + bi = pkts[i]; + b = vlib_get_buffer (vm, bi); + + fd = tap_inject_lookup_tap_fd (vnet_buffer (b)->sw_if_index[VLIB_RX]); + if (fd == ~0) + { + vlib_buffer_free (vm, &bi, 1); + continue; + } + + /* Re-wind the buffer to the start of the Ethernet header. */ + vlib_buffer_advance (b, -b->current_data); + + tap_inject_tap_send_buffer (fd, b); + + /* Send the buffer to a neighbor node too? */ + { + ethernet_header_t * eth = vlib_buffer_get_current (b); + u16 ether_type = htons (eth->type); + + if (ether_type == ETHERNET_TYPE_ARP) + { + ethernet_arp_header_t * arp = (void *)(eth + 1); + + if (arp->opcode == ntohs (ETHERNET_ARP_OPCODE_reply)) + next = NEXT_NEIGHBOR_ARP; + } + else if (ether_type == ETHERNET_TYPE_IP6) + { + ip6_header_t * ip = (void *)(eth + 1); + icmp46_header_t * icmp = (void *)(ip + 1); + + if (ip->protocol == IP_PROTOCOL_ICMP6 && + icmp->type == ICMP6_neighbor_advertisement) + next = NEXT_NEIGHBOR_ICMP6; + } + } + + if (next == ~0) + { + vlib_buffer_free (vm, &bi, 1); + continue; + } + + /* ARP and ICMP6 expect to start processing after the Ethernet header. */ + vlib_buffer_advance (b, sizeof (ethernet_header_t)); + + vlib_get_next_frame (vm, node, next_index, to_next, n_left); + + *(to_next++) = bi; + --n_left; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left, bi, next); + vlib_put_next_frame (vm, node, next_index, n_left); + } + + return f->n_vectors; +} + +VLIB_REGISTER_NODE (tap_inject_neighbor_node) = { + .function = tap_inject_neighbor, + .name = "tap-inject-neighbor", + .vector_size = sizeof (u32), + .type = VLIB_NODE_TYPE_INTERNAL, + .n_next_nodes = 2, + .next_nodes = { + [NEXT_NEIGHBOR_ARP] = "arp-input", + [NEXT_NEIGHBOR_ICMP6] = "icmp6-neighbor-solicitation", + }, +}; + + +#define MTU 1500 +#define MTU_BUFFERS ((MTU + VLIB_BUFFER_DATA_SIZE - 1) / VLIB_BUFFER_DATA_SIZE) +#define NUM_BUFFERS_TO_ALLOC 32 + +static inline uword +tap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f, int fd) +{ + tap_inject_main_t * im = tap_inject_get_main (); + u32 sw_if_index; + struct iovec iov[MTU_BUFFERS]; + u32 bi[MTU_BUFFERS]; + vlib_buffer_t * b; + ssize_t n_bytes; + ssize_t n_bytes_left; + u32 i, j; + + sw_if_index = tap_inject_lookup_sw_if_index_from_tap_fd (fd); + if (sw_if_index == ~0) + return 0; + + /* Allocate buffers in bulk when there are less than enough to rx an MTU. */ + if (vec_len (im->rx_buffers) < MTU_BUFFERS) + { + u32 len = vec_len (im->rx_buffers); + + len = vlib_buffer_alloc_from_free_list (vm, + &im->rx_buffers[len], NUM_BUFFERS_TO_ALLOC, + VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + + _vec_len (im->rx_buffers) += len; + + if (vec_len (im->rx_buffers) < MTU_BUFFERS) + { + clib_warning ("failed to allocate buffers"); + return 0; + } + } + + /* Fill buffers from the end of the list to make it easier to resize. */ + for (i = 0, j = vec_len (im->rx_buffers) - 1; i < MTU_BUFFERS; ++i, --j) + { + vlib_buffer_t * b; + + bi[i] = im->rx_buffers[j]; + + b = vlib_get_buffer (vm, bi[i]); + + iov[i].iov_base = b->data; + iov[i].iov_len = VLIB_BUFFER_DATA_SIZE; + } + + n_bytes = readv (fd, iov, MTU_BUFFERS); + if (n_bytes < 0) + { + clib_warning ("readv failed"); + return 0; + } + + b = vlib_get_buffer (vm, bi[0]); + + vnet_buffer (b)->sw_if_index[VLIB_RX] = ~0; + vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index; + + n_bytes_left = n_bytes - VLIB_BUFFER_DATA_SIZE; + + if (n_bytes_left > 0) + { + b->total_length_not_including_first_buffer = n_bytes_left; + b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + } + + b->current_length = n_bytes; + + /* If necessary, configure any remaining buffers in the chain. */ + for (i = 1; n_bytes_left > 0; ++i, n_bytes_left -= VLIB_BUFFER_DATA_SIZE) + { + b = vlib_get_buffer (vm, bi[i - 1]); + b->current_length = VLIB_BUFFER_DATA_SIZE; + b->flags |= VLIB_BUFFER_NEXT_PRESENT; + b->next_buffer = bi[i]; + + b = vlib_get_buffer (vm, bi[i]); + b->current_length = n_bytes_left; + } + + _vec_len (im->rx_buffers) -= i; + + vlib_buffer_chain_validate (vm, vlib_get_buffer (vm, bi[0])); + + /* Get the packet to the output node. */ + { + vnet_hw_interface_t * hw; + vlib_frame_t * new_frame; + u32 * to_next; + + hw = vnet_get_hw_interface (vnet_get_main (), sw_if_index); + + new_frame = vlib_get_frame_to_node (vm, hw->output_node_index); + to_next = vlib_frame_vector_args (new_frame); + to_next[0] = bi[0]; + new_frame->n_vectors = 1; + + vlib_put_frame_to_node (vm, hw->output_node_index, new_frame); + } + + return 1; +} + +static uword +tap_inject_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) +{ + tap_inject_main_t * im = tap_inject_get_main (); + u32 * fd; + uword count = 0; + + vec_foreach (fd, im->rx_file_descriptors) + { + if (tap_rx (vm, node, f, *fd) != 1) + { + clib_warning ("rx failed"); + count = 0; + break; + } + ++count; + } + + vec_free (im->rx_file_descriptors); + + return count; +} + +VLIB_REGISTER_NODE (tap_inject_rx_node) = { + .function = tap_inject_rx, + .name = "tap-inject-rx", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, + .vector_size = sizeof (u32), +}; + + +static clib_error_t * +tap_inject_init (vlib_main_t * vm) +{ + tap_inject_main_t * im = tap_inject_get_main (); + + im->rx_node_index = tap_inject_rx_node.index; + im->tx_node_index = tap_inject_tx_node.index; + im->neighbor_node_index = tap_inject_neighbor_node.index; + + vec_alloc (im->rx_buffers, NUM_BUFFERS_TO_ALLOC); + vec_reset_length (im->rx_buffers); + + return 0; +} + +VLIB_INIT_FUNCTION (tap_inject_init); |