aboutsummaryrefslogtreecommitdiffstats
path: root/router/router/tap_inject_node.c
diff options
context:
space:
mode:
authorJeff Shaw <jeffrey.b.shaw@intel.com>2016-09-21 19:12:46 -0400
committerJeff Shaw <jeffrey.b.shaw@intel.com>2016-10-04 15:58:00 -0400
commitdfae7756baf895957a43944f63bfe0c850b16467 (patch)
treecb8b0c5ae0a09835f9817a8956bd86dbf9a1de57 /router/router/tap_inject_node.c
parent961580e47e58e9cb7175ec89703bc951c7ce71b2 (diff)
[router] IPv6 support and refactoring.
This change adds support for IPv6 while refactoring most of the original plugin code in the following ways. - Adhere to vpp style guidelines. - Split the netlink, node, and tap processing into separate files named with a "tap_inject" prefix which more accurately represents the functionality. - Implement our own tap management and rx/tx. This is to reduce the overhead of passing packets in and out of vnet tap devices, in favor of directly reading/writing from the tap. - Change how nodes work. Now we have neighbor, rx, and tx nodes. The neighbor node sends ARP replies and ICMP6 neighbor advertisements to the arp-input and icmp6-neighbor-solicitation nodes, respectively, before also injecting the packet to the host, making it possible for both vpp and the host network stack to resolve the next hop. The tx node injects packets into the host by writing to the tap. The rx node reads packets from the tap and sends them on its associated data plane interface. - Simplify the CLI. Instead of creating taps specifically for a given interface we create a tap for all of the Ethernet interfaces with the "enable tap-inject" CLI command. The interfaces are named with a "vpp" prefix, i.e. "vpp0". Also add a "disable tap-inject" option. - Provide ability to enable at configuration time with the tap-inject { enable } stanza. Change-Id: I6b56da606e2da1d793ce6aca222fe4eb5a4e070d Signed-off-by: Jeff Shaw <jeffrey.b.shaw@intel.com>
Diffstat (limited to 'router/router/tap_inject_node.c')
-rw-r--r--router/router/tap_inject_node.c331
1 files changed, 331 insertions, 0 deletions
diff --git a/router/router/tap_inject_node.c b/router/router/tap_inject_node.c
new file mode 100644
index 0000000..fe108dc
--- /dev/null
+++ b/router/router/tap_inject_node.c
@@ -0,0 +1,331 @@
+/*
+ * Copyright 2016 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tap_inject.h"
+
+#include <netinet/in.h>
+#include <vnet/ethernet/arp_packet.h>
+
+vlib_node_registration_t tap_inject_rx_node;
+vlib_node_registration_t tap_inject_tx_node;
+vlib_node_registration_t tap_inject_neighbor_node;
+
+enum {
+ NEXT_NEIGHBOR_ARP,
+ NEXT_NEIGHBOR_ICMP6,
+};
+
+
+static inline void
+tap_inject_tap_send_buffer (int fd, vlib_buffer_t * b)
+{
+ struct iovec iov;
+ ssize_t n_bytes;
+
+ iov.iov_base = vlib_buffer_get_current (b);
+ iov.iov_len = b->current_length;
+
+ n_bytes = writev (fd, &iov, 1);
+
+ if (n_bytes < 0)
+ clib_warning ("writev failed");
+ else if (n_bytes < b->current_length || b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ clib_warning ("buffer truncated");
+}
+
+static uword
+tap_inject_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ vlib_buffer_t * b;
+ u32 * pkts;
+ u32 fd;
+ u32 i;
+
+ pkts = vlib_frame_vector_args (f);
+
+ for (i = 0; i < f->n_vectors; ++i)
+ {
+ b = vlib_get_buffer (vm, pkts[i]);
+
+ fd = tap_inject_lookup_tap_fd (vnet_buffer (b)->sw_if_index[VLIB_RX]);
+ if (fd == ~0)
+ continue;
+
+ /* Re-wind the buffer to the start of the Ethernet header. */
+ vlib_buffer_advance (b, -b->current_data);
+
+ tap_inject_tap_send_buffer (fd, b);
+ }
+
+ vlib_buffer_free (vm, pkts, f->n_vectors);
+ return f->n_vectors;
+}
+
+VLIB_REGISTER_NODE (tap_inject_tx_node) = {
+ .function = tap_inject_tx,
+ .name = "tap-inject-tx",
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+};
+
+
+static uword
+tap_inject_neighbor (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ vlib_buffer_t * b;
+ u32 * pkts;
+ u32 fd;
+ u32 i;
+ u32 bi;
+ u32 next_index = node->cached_next_index;
+ u32 next = ~0;
+ u32 n_left;
+ u32 * to_next;
+
+ pkts = vlib_frame_vector_args (f);
+
+ for (i = 0; i < f->n_vectors; ++i)
+ {
+ bi = pkts[i];
+ b = vlib_get_buffer (vm, bi);
+
+ fd = tap_inject_lookup_tap_fd (vnet_buffer (b)->sw_if_index[VLIB_RX]);
+ if (fd == ~0)
+ {
+ vlib_buffer_free (vm, &bi, 1);
+ continue;
+ }
+
+ /* Re-wind the buffer to the start of the Ethernet header. */
+ vlib_buffer_advance (b, -b->current_data);
+
+ tap_inject_tap_send_buffer (fd, b);
+
+ /* Send the buffer to a neighbor node too? */
+ {
+ ethernet_header_t * eth = vlib_buffer_get_current (b);
+ u16 ether_type = htons (eth->type);
+
+ if (ether_type == ETHERNET_TYPE_ARP)
+ {
+ ethernet_arp_header_t * arp = (void *)(eth + 1);
+
+ if (arp->opcode == ntohs (ETHERNET_ARP_OPCODE_reply))
+ next = NEXT_NEIGHBOR_ARP;
+ }
+ else if (ether_type == ETHERNET_TYPE_IP6)
+ {
+ ip6_header_t * ip = (void *)(eth + 1);
+ icmp46_header_t * icmp = (void *)(ip + 1);
+
+ if (ip->protocol == IP_PROTOCOL_ICMP6 &&
+ icmp->type == ICMP6_neighbor_advertisement)
+ next = NEXT_NEIGHBOR_ICMP6;
+ }
+ }
+
+ if (next == ~0)
+ {
+ vlib_buffer_free (vm, &bi, 1);
+ continue;
+ }
+
+ /* ARP and ICMP6 expect to start processing after the Ethernet header. */
+ vlib_buffer_advance (b, sizeof (ethernet_header_t));
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left);
+
+ *(to_next++) = bi;
+ --n_left;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left, bi, next);
+ vlib_put_next_frame (vm, node, next_index, n_left);
+ }
+
+ return f->n_vectors;
+}
+
+VLIB_REGISTER_NODE (tap_inject_neighbor_node) = {
+ .function = tap_inject_neighbor,
+ .name = "tap-inject-neighbor",
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_next_nodes = 2,
+ .next_nodes = {
+ [NEXT_NEIGHBOR_ARP] = "arp-input",
+ [NEXT_NEIGHBOR_ICMP6] = "icmp6-neighbor-solicitation",
+ },
+};
+
+
+#define MTU 1500
+#define MTU_BUFFERS ((MTU + VLIB_BUFFER_DATA_SIZE - 1) / VLIB_BUFFER_DATA_SIZE)
+#define NUM_BUFFERS_TO_ALLOC 32
+
+static inline uword
+tap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f, int fd)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+ u32 sw_if_index;
+ struct iovec iov[MTU_BUFFERS];
+ u32 bi[MTU_BUFFERS];
+ vlib_buffer_t * b;
+ ssize_t n_bytes;
+ ssize_t n_bytes_left;
+ u32 i, j;
+
+ sw_if_index = tap_inject_lookup_sw_if_index_from_tap_fd (fd);
+ if (sw_if_index == ~0)
+ return 0;
+
+ /* Allocate buffers in bulk when there are less than enough to rx an MTU. */
+ if (vec_len (im->rx_buffers) < MTU_BUFFERS)
+ {
+ u32 len = vec_len (im->rx_buffers);
+
+ len = vlib_buffer_alloc_from_free_list (vm,
+ &im->rx_buffers[len], NUM_BUFFERS_TO_ALLOC,
+ VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+
+ _vec_len (im->rx_buffers) += len;
+
+ if (vec_len (im->rx_buffers) < MTU_BUFFERS)
+ {
+ clib_warning ("failed to allocate buffers");
+ return 0;
+ }
+ }
+
+ /* Fill buffers from the end of the list to make it easier to resize. */
+ for (i = 0, j = vec_len (im->rx_buffers) - 1; i < MTU_BUFFERS; ++i, --j)
+ {
+ vlib_buffer_t * b;
+
+ bi[i] = im->rx_buffers[j];
+
+ b = vlib_get_buffer (vm, bi[i]);
+
+ iov[i].iov_base = b->data;
+ iov[i].iov_len = VLIB_BUFFER_DATA_SIZE;
+ }
+
+ n_bytes = readv (fd, iov, MTU_BUFFERS);
+ if (n_bytes < 0)
+ {
+ clib_warning ("readv failed");
+ return 0;
+ }
+
+ b = vlib_get_buffer (vm, bi[0]);
+
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = ~0;
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
+
+ n_bytes_left = n_bytes - VLIB_BUFFER_DATA_SIZE;
+
+ if (n_bytes_left > 0)
+ {
+ b->total_length_not_including_first_buffer = n_bytes_left;
+ b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ }
+
+ b->current_length = n_bytes;
+
+ /* If necessary, configure any remaining buffers in the chain. */
+ for (i = 1; n_bytes_left > 0; ++i, n_bytes_left -= VLIB_BUFFER_DATA_SIZE)
+ {
+ b = vlib_get_buffer (vm, bi[i - 1]);
+ b->current_length = VLIB_BUFFER_DATA_SIZE;
+ b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ b->next_buffer = bi[i];
+
+ b = vlib_get_buffer (vm, bi[i]);
+ b->current_length = n_bytes_left;
+ }
+
+ _vec_len (im->rx_buffers) -= i;
+
+ vlib_buffer_chain_validate (vm, vlib_get_buffer (vm, bi[0]));
+
+ /* Get the packet to the output node. */
+ {
+ vnet_hw_interface_t * hw;
+ vlib_frame_t * new_frame;
+ u32 * to_next;
+
+ hw = vnet_get_hw_interface (vnet_get_main (), sw_if_index);
+
+ new_frame = vlib_get_frame_to_node (vm, hw->output_node_index);
+ to_next = vlib_frame_vector_args (new_frame);
+ to_next[0] = bi[0];
+ new_frame->n_vectors = 1;
+
+ vlib_put_frame_to_node (vm, hw->output_node_index, new_frame);
+ }
+
+ return 1;
+}
+
+static uword
+tap_inject_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+ u32 * fd;
+ uword count = 0;
+
+ vec_foreach (fd, im->rx_file_descriptors)
+ {
+ if (tap_rx (vm, node, f, *fd) != 1)
+ {
+ clib_warning ("rx failed");
+ count = 0;
+ break;
+ }
+ ++count;
+ }
+
+ vec_free (im->rx_file_descriptors);
+
+ return count;
+}
+
+VLIB_REGISTER_NODE (tap_inject_rx_node) = {
+ .function = tap_inject_rx,
+ .name = "tap-inject-rx",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+ .vector_size = sizeof (u32),
+};
+
+
+static clib_error_t *
+tap_inject_init (vlib_main_t * vm)
+{
+ tap_inject_main_t * im = tap_inject_get_main ();
+
+ im->rx_node_index = tap_inject_rx_node.index;
+ im->tx_node_index = tap_inject_tx_node.index;
+ im->neighbor_node_index = tap_inject_neighbor_node.index;
+
+ vec_alloc (im->rx_buffers, NUM_BUFFERS_TO_ALLOC);
+ vec_reset_length (im->rx_buffers);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (tap_inject_init);