aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/vxlan-gpe
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet/vxlan-gpe')
-rw-r--r--src/vnet/vxlan-gpe/decap.c1178
-rw-r--r--src/vnet/vxlan-gpe/dir.dox32
-rw-r--r--src/vnet/vxlan-gpe/encap.c388
-rw-r--r--src/vnet/vxlan-gpe/vxlan-gpe-rfc.txt868
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe.api79
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe.c1264
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe.h259
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe_api.c272
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe_error.def16
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe_packet.h110
10 files changed, 4466 insertions, 0 deletions
diff --git a/src/vnet/vxlan-gpe/decap.c b/src/vnet/vxlan-gpe/decap.c
new file mode 100644
index 00000000..1b3a8b00
--- /dev/null
+++ b/src/vnet/vxlan-gpe/decap.c
@@ -0,0 +1,1178 @@
+/*
+ * decap.c - decapsulate VXLAN GPE
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief Functions for decapsulating VXLAN GPE tunnels
+ *
+*/
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+
+vlib_node_registration_t vxlan_gpe_input_node;
+
+/**
+ * @brief Struct for VXLAN GPE decap packet tracing
+ *
+ */
+typedef struct {
+ u32 next_index;
+ u32 tunnel_index;
+ u32 error;
+} vxlan_gpe_rx_trace_t;
+
+/**
+ * @brief Tracing function for VXLAN GPE packet decapsulation
+ *
+ * @param *s
+ * @param *args
+ *
+ * @return *s
+ *
+ */
+static u8 * format_vxlan_gpe_rx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ vxlan_gpe_rx_trace_t * t = va_arg (*args, vxlan_gpe_rx_trace_t *);
+
+ if (t->tunnel_index != ~0)
+ {
+ s = format (s, "VXLAN-GPE: tunnel %d next %d error %d", t->tunnel_index,
+ t->next_index, t->error);
+ }
+ else
+ {
+ s = format (s, "VXLAN-GPE: no tunnel next %d error %d\n", t->next_index,
+ t->error);
+ }
+ return s;
+}
+
+/**
+ * @brief Tracing function for VXLAN GPE packet decapsulation including length
+ *
+ * @param *s
+ * @param *args
+ *
+ * @return *s
+ *
+ */
+static u8 * format_vxlan_gpe_with_length (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+
+
+ return s;
+}
+
+/**
+ * @brief Common processing for IPv4 and IPv6 VXLAN GPE decap dispatch functions
+ *
+ * It is worth noting that other than trivial UDP forwarding (transit), VXLAN GPE
+ * tunnels are "terminate local". This means that there is no "TX" interface for this
+ * decap case, so that field in the buffer_metadata can be "used for something else".
+ * The something else in this case is, for the IPv4/IPv6 inner-packet type case, the
+ * FIB index used to look up the inner-packet's adjacency.
+ *
+ * vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->decap_fib_index;
+ *
+ * @param *vm
+ * @param *node
+ * @param *from_frame
+ * @param is_ip4
+ *
+ * @return from_frame->n_vectors
+ *
+ */
+always_inline uword
+vxlan_gpe_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame,
+ u8 is_ip4)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ vxlan_gpe_main_t * nngm = &vxlan_gpe_main;
+ vnet_main_t * vnm = nngm->vnet_main;
+ vnet_interface_main_t * im = &vnm->interface_main;
+ u32 last_tunnel_index = ~0;
+ vxlan4_gpe_tunnel_key_t last_key4;
+ vxlan6_gpe_tunnel_key_t last_key6;
+ u32 pkts_decapsulated = 0;
+ u32 thread_index = vlib_get_thread_index ();
+ u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
+
+ if (is_ip4)
+ memset (&last_key4, 0xff, sizeof(last_key4));
+ else
+ memset (&last_key6, 0xff, sizeof(last_key6));
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+ stats_sw_if_index = node->runtime_data[0];
+ stats_n_packets = stats_n_bytes = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, *b1;
+ u32 next0, next1;
+ ip4_vxlan_gpe_header_t * iuvn4_0, *iuvn4_1;
+ ip6_vxlan_gpe_header_t * iuvn6_0, *iuvn6_1;
+ uword * p0, *p1;
+ u32 tunnel_index0, tunnel_index1;
+ vxlan_gpe_tunnel_t * t0, *t1;
+ vxlan4_gpe_tunnel_key_t key4_0, key4_1;
+ vxlan6_gpe_tunnel_key_t key6_0, key6_1;
+ u32 error0, error1;
+ u32 sw_if_index0, sw_if_index1, len0, len1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header(p2, LOAD);
+ vlib_prefetch_buffer_header(p3, LOAD);
+
+ CLIB_PREFETCH(p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH(p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ if (is_ip4)
+ {
+ /* udp leaves current_data pointing at the vxlan-gpe header */
+ vlib_buffer_advance (b0, -(word) (sizeof(udp_header_t) + sizeof(ip4_header_t)));
+ vlib_buffer_advance (b1, -(word) (sizeof(udp_header_t) + sizeof(ip4_header_t)));
+
+ iuvn4_0 = vlib_buffer_get_current (b0);
+ iuvn4_1 = vlib_buffer_get_current (b1);
+
+ /* pop (ip, udp, vxlan) */
+ vlib_buffer_advance (b0, sizeof(*iuvn4_0));
+ vlib_buffer_advance (b1, sizeof(*iuvn4_1));
+ }
+ else
+ {
+ /* udp leaves current_data pointing at the vxlan-gpe header */
+ vlib_buffer_advance (b0, -(word) (sizeof(udp_header_t) + sizeof(ip6_header_t)));
+ vlib_buffer_advance (b1, -(word) (sizeof(udp_header_t) + sizeof(ip6_header_t)));
+
+ iuvn6_0 = vlib_buffer_get_current (b0);
+ iuvn6_1 = vlib_buffer_get_current (b1);
+
+ /* pop (ip, udp, vxlan) */
+ vlib_buffer_advance (b0, sizeof(*iuvn6_0));
+ vlib_buffer_advance (b1, sizeof(*iuvn6_1));
+ }
+
+ tunnel_index0 = ~0;
+ tunnel_index1 = ~0;
+ error0 = 0;
+ error1 = 0;
+
+ if (is_ip4)
+ {
+ next0 =
+ (iuvn4_0->vxlan.protocol < VXLAN_GPE_PROTOCOL_MAX)?
+ nngm->decap_next_node_list[iuvn4_0->vxlan.protocol]: \
+ VXLAN_GPE_INPUT_NEXT_DROP;
+ next1 =
+ (iuvn4_1->vxlan.protocol < VXLAN_GPE_PROTOCOL_MAX)?
+ nngm->decap_next_node_list[iuvn4_1->vxlan.protocol]: \
+ VXLAN_GPE_INPUT_NEXT_DROP;
+
+ key4_0.local = iuvn4_0->ip4.dst_address.as_u32;
+ key4_1.local = iuvn4_1->ip4.dst_address.as_u32;
+
+ key4_0.remote = iuvn4_0->ip4.src_address.as_u32;
+ key4_1.remote = iuvn4_1->ip4.src_address.as_u32;
+
+ key4_0.vni = iuvn4_0->vxlan.vni_res;
+ key4_1.vni = iuvn4_1->vxlan.vni_res;
+
+ key4_0.pad = 0;
+ key4_1.pad = 0;
+ }
+ else /* is_ip6 */
+ {
+ next0 = (iuvn6_0->vxlan.protocol < node->n_next_nodes) ?
+ iuvn6_0->vxlan.protocol : VXLAN_GPE_INPUT_NEXT_DROP;
+ next1 = (iuvn6_1->vxlan.protocol < node->n_next_nodes) ?
+ iuvn6_1->vxlan.protocol : VXLAN_GPE_INPUT_NEXT_DROP;
+
+ key6_0.local.as_u64[0] = iuvn6_0->ip6.dst_address.as_u64[0];
+ key6_0.local.as_u64[1] = iuvn6_0->ip6.dst_address.as_u64[1];
+ key6_1.local.as_u64[0] = iuvn6_1->ip6.dst_address.as_u64[0];
+ key6_1.local.as_u64[1] = iuvn6_1->ip6.dst_address.as_u64[1];
+
+ key6_0.remote.as_u64[0] = iuvn6_0->ip6.src_address.as_u64[0];
+ key6_0.remote.as_u64[1] = iuvn6_0->ip6.src_address.as_u64[1];
+ key6_1.remote.as_u64[0] = iuvn6_1->ip6.src_address.as_u64[0];
+ key6_1.remote.as_u64[1] = iuvn6_1->ip6.src_address.as_u64[1];
+
+ key6_0.vni = iuvn6_0->vxlan.vni_res;
+ key6_1.vni = iuvn6_1->vxlan.vni_res;
+ }
+
+ /* Processing packet 0*/
+ if (is_ip4)
+ {
+ /* Processing for key4_0 */
+ if (PREDICT_FALSE((key4_0.as_u64[0] != last_key4.as_u64[0])
+ || (key4_0.as_u64[1] != last_key4.as_u64[1])))
+ {
+ p0 = hash_get_mem(nngm->vxlan4_gpe_tunnel_by_key, &key4_0);
+
+ if (p0 == 0)
+ {
+ error0 = VXLAN_GPE_ERROR_NO_SUCH_TUNNEL;
+ goto trace0;
+ }
+
+ last_key4.as_u64[0] = key4_0.as_u64[0];
+ last_key4.as_u64[1] = key4_0.as_u64[1];
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
+ }
+ else /* is_ip6 */
+ {
+ next0 =
+ (iuvn6_0->vxlan.protocol < VXLAN_GPE_PROTOCOL_MAX)?
+ nngm->decap_next_node_list[iuvn6_0->vxlan.protocol]: \
+ VXLAN_GPE_INPUT_NEXT_DROP;
+ next1 =
+ (iuvn6_1->vxlan.protocol < VXLAN_GPE_PROTOCOL_MAX)?
+ nngm->decap_next_node_list[iuvn6_1->vxlan.protocol]: \
+ VXLAN_GPE_INPUT_NEXT_DROP;
+
+ key6_0.local.as_u64[0] = iuvn6_0->ip6.dst_address.as_u64[0];
+ key6_0.local.as_u64[1] = iuvn6_0->ip6.dst_address.as_u64[1];
+ key6_1.local.as_u64[0] = iuvn6_1->ip6.dst_address.as_u64[0];
+ key6_1.local.as_u64[1] = iuvn6_1->ip6.dst_address.as_u64[1];
+
+ key6_0.remote.as_u64[0] = iuvn6_0->ip6.src_address.as_u64[0];
+ key6_0.remote.as_u64[1] = iuvn6_0->ip6.src_address.as_u64[1];
+ key6_1.remote.as_u64[0] = iuvn6_1->ip6.src_address.as_u64[0];
+ key6_1.remote.as_u64[1] = iuvn6_1->ip6.src_address.as_u64[1];
+
+ key6_0.vni = iuvn6_0->vxlan.vni_res;
+ key6_1.vni = iuvn6_1->vxlan.vni_res;
+
+ /* Processing for key6_0 */
+ if (PREDICT_FALSE(memcmp (&key6_0, &last_key6, sizeof(last_key6)) != 0))
+ {
+ p0 = hash_get_mem(nngm->vxlan6_gpe_tunnel_by_key, &key6_0);
+
+ if (p0 == 0)
+ {
+ error0 = VXLAN_GPE_ERROR_NO_SUCH_TUNNEL;
+ goto trace0;
+ }
+
+ memcpy (&last_key6, &key6_0, sizeof(key6_0));
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
+ }
+
+ t0 = pool_elt_at_index(nngm->tunnels, tunnel_index0);
+
+
+ sw_if_index0 = t0->sw_if_index;
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ vnet_update_l2_len (b0);
+
+ /**
+ * ip[46] lookup in the configured FIB
+ */
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->decap_fib_index;
+
+ pkts_decapsulated++;
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
+
+ if (PREDICT_FALSE(sw_if_index0 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len0;
+ if (stats_n_packets)
+ vlib_increment_combined_counter (
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len0;
+ stats_sw_if_index = sw_if_index0;
+ }
+
+ trace0: b0->error = error0 ? node->errors[error0] : 0;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_rx_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof(*tr));
+ tr->next_index = next0;
+ tr->error = error0;
+ tr->tunnel_index = tunnel_index0;
+ }
+
+ /* Process packet 1 */
+ if (is_ip4)
+ {
+ /* Processing for key4_1 */
+ if (PREDICT_FALSE(
+ (key4_1.as_u64[0] != last_key4.as_u64[0])
+ || (key4_1.as_u64[1] != last_key4.as_u64[1])))
+ {
+ p1 = hash_get_mem(nngm->vxlan4_gpe_tunnel_by_key, &key4_1);
+
+ if (p1 == 0)
+ {
+ error1 = VXLAN_GPE_ERROR_NO_SUCH_TUNNEL;
+ goto trace1;
+ }
+
+ last_key4.as_u64[0] = key4_1.as_u64[0];
+ last_key4.as_u64[1] = key4_1.as_u64[1];
+ tunnel_index1 = last_tunnel_index = p1[0];
+ }
+ else
+ tunnel_index1 = last_tunnel_index;
+ }
+ else /* is_ip6 */
+ {
+ /* Processing for key6_1 */
+ if (PREDICT_FALSE(memcmp (&key6_1, &last_key6, sizeof(last_key6)) != 0))
+ {
+ p1 = hash_get_mem(nngm->vxlan6_gpe_tunnel_by_key, &key6_1);
+
+ if (p1 == 0)
+ {
+ error1 = VXLAN_GPE_ERROR_NO_SUCH_TUNNEL;
+ goto trace1;
+ }
+
+ memcpy (&last_key6, &key6_1, sizeof(key6_1));
+ tunnel_index1 = last_tunnel_index = p1[0];
+ }
+ else
+ tunnel_index1 = last_tunnel_index;
+ }
+
+ t1 = pool_elt_at_index(nngm->tunnels, tunnel_index1);
+
+ sw_if_index1 = t1->sw_if_index;
+ len1 = vlib_buffer_length_in_chain (vm, b1);
+
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ vnet_update_l2_len (b1);
+
+ /*
+ * ip[46] lookup in the configured FIB
+ */
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] = t1->decap_fib_index;
+
+ pkts_decapsulated++;
+ stats_n_packets += 1;
+ stats_n_bytes += len1;
+
+ /* Batch stats increment on the same vxlan tunnel so counter
+ is not incremented per packet */
+ if (PREDICT_FALSE(sw_if_index1 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len1;
+ if (stats_n_packets)
+ vlib_increment_combined_counter (
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len1;
+ stats_sw_if_index = sw_if_index1;
+ }
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] = t1->decap_fib_index;
+
+ trace1: b1->error = error1 ? node->errors[error1] : 0;
+
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_rx_trace_t *tr = vlib_add_trace (vm, node, b1, sizeof(*tr));
+ tr->next_index = next1;
+ tr->error = error1;
+ tr->tunnel_index = tunnel_index1;
+ }
+
+ vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ ip4_vxlan_gpe_header_t * iuvn4_0;
+ ip6_vxlan_gpe_header_t * iuvn6_0;
+ uword * p0;
+ u32 tunnel_index0;
+ vxlan_gpe_tunnel_t * t0;
+ vxlan4_gpe_tunnel_key_t key4_0;
+ vxlan6_gpe_tunnel_key_t key6_0;
+ u32 error0;
+ u32 sw_if_index0, len0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ if (is_ip4)
+ {
+ /* udp leaves current_data pointing at the vxlan-gpe header */
+ vlib_buffer_advance (
+ b0, -(word) (sizeof(udp_header_t) + sizeof(ip4_header_t)));
+
+ iuvn4_0 = vlib_buffer_get_current (b0);
+
+ /* pop (ip, udp, vxlan) */
+ vlib_buffer_advance (b0, sizeof(*iuvn4_0));
+ }
+ else
+ {
+ /* udp leaves current_data pointing at the vxlan-gpe header */
+ vlib_buffer_advance (
+ b0, -(word) (sizeof(udp_header_t) + sizeof(ip6_header_t)));
+
+ iuvn6_0 = vlib_buffer_get_current (b0);
+
+ /* pop (ip, udp, vxlan) */
+ vlib_buffer_advance (b0, sizeof(*iuvn6_0));
+ }
+
+ tunnel_index0 = ~0;
+ error0 = 0;
+
+ if (is_ip4)
+ {
+ next0 =
+ (iuvn4_0->vxlan.protocol < VXLAN_GPE_PROTOCOL_MAX)?
+ nngm->decap_next_node_list[iuvn4_0->vxlan.protocol]: \
+ VXLAN_GPE_INPUT_NEXT_DROP;
+
+ key4_0.local = iuvn4_0->ip4.dst_address.as_u32;
+ key4_0.remote = iuvn4_0->ip4.src_address.as_u32;
+ key4_0.vni = iuvn4_0->vxlan.vni_res;
+ key4_0.pad = 0;
+
+ /* Processing for key4_0 */
+ if (PREDICT_FALSE(
+ (key4_0.as_u64[0] != last_key4.as_u64[0])
+ || (key4_0.as_u64[1] != last_key4.as_u64[1])))
+ {
+ p0 = hash_get_mem(nngm->vxlan4_gpe_tunnel_by_key, &key4_0);
+
+ if (p0 == 0)
+ {
+ error0 = VXLAN_GPE_ERROR_NO_SUCH_TUNNEL;
+ goto trace00;
+ }
+
+ last_key4.as_u64[0] = key4_0.as_u64[0];
+ last_key4.as_u64[1] = key4_0.as_u64[1];
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
+ }
+ else /* is_ip6 */
+ {
+ next0 =
+ (iuvn6_0->vxlan.protocol < VXLAN_GPE_PROTOCOL_MAX)?
+ nngm->decap_next_node_list[iuvn6_0->vxlan.protocol]: \
+ VXLAN_GPE_INPUT_NEXT_DROP;
+
+ key6_0.local.as_u64[0] = iuvn6_0->ip6.dst_address.as_u64[0];
+ key6_0.local.as_u64[1] = iuvn6_0->ip6.dst_address.as_u64[1];
+ key6_0.remote.as_u64[0] = iuvn6_0->ip6.src_address.as_u64[0];
+ key6_0.remote.as_u64[1] = iuvn6_0->ip6.src_address.as_u64[1];
+ key6_0.vni = iuvn6_0->vxlan.vni_res;
+
+ /* Processing for key6_0 */
+ if (PREDICT_FALSE(memcmp (&key6_0, &last_key6, sizeof(last_key6)) != 0))
+ {
+ p0 = hash_get_mem(nngm->vxlan6_gpe_tunnel_by_key, &key6_0);
+
+ if (p0 == 0)
+ {
+ error0 = VXLAN_GPE_ERROR_NO_SUCH_TUNNEL;
+ goto trace00;
+ }
+
+ memcpy (&last_key6, &key6_0, sizeof(key6_0));
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
+ }
+
+ t0 = pool_elt_at_index(nngm->tunnels, tunnel_index0);
+
+
+ sw_if_index0 = t0->sw_if_index;
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ vnet_update_l2_len (b0);
+
+ /*
+ * ip[46] lookup in the configured FIB
+ */
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->decap_fib_index;
+
+ pkts_decapsulated++;
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
+
+ /* Batch stats increment on the same vxlan-gpe tunnel so counter
+ is not incremented per packet */
+ if (PREDICT_FALSE(sw_if_index0 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len0;
+ if (stats_n_packets)
+ vlib_increment_combined_counter (
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len0;
+ stats_sw_if_index = sw_if_index0;
+ }
+
+ trace00: b0->error = error0 ? node->errors[error0] : 0;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_rx_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof(*tr));
+ tr->next_index = next0;
+ tr->error = error0;
+ tr->tunnel_index = tunnel_index0;
+ }
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, vxlan_gpe_input_node.index,
+ VXLAN_GPE_ERROR_DECAPSULATED, pkts_decapsulated);
+ /* Increment any remaining batch stats */
+ if (stats_n_packets)
+ {
+ vlib_increment_combined_counter (
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, thread_index,
+ stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ node->runtime_data[0] = stats_sw_if_index;
+ }
+ return from_frame->n_vectors;
+}
+
+/**
+ * @brief Graph processing dispatch function for IPv4 VXLAN GPE
+ *
+ * @node vxlan4-gpe-input
+ * @param *vm
+ * @param *node
+ * @param *from_frame
+ *
+ * @return from_frame->n_vectors
+ *
+ */
+static uword
+vxlan4_gpe_input (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return vxlan_gpe_input (vm, node, from_frame, /* is_ip4 */1);
+}
+
+
+void
+vxlan_gpe_register_decap_protocol (u8 protocol_id, uword next_node_index)
+{
+ vxlan_gpe_main_t *hm = &vxlan_gpe_main;
+ hm->decap_next_node_list[protocol_id] = next_node_index;
+ return;
+}
+
+void
+vxlan_gpe_unregister_decap_protocol (u8 protocol_id, uword next_node_index)
+{
+ vxlan_gpe_main_t *hm = &vxlan_gpe_main;
+ hm->decap_next_node_list[protocol_id] = VXLAN_GPE_INPUT_NEXT_DROP;
+ return;
+}
+
+
+/**
+ * @brief Graph processing dispatch function for IPv6 VXLAN GPE
+ *
+ * @node vxlan6-gpe-input
+ * @param *vm
+ * @param *node
+ * @param *from_frame
+ *
+ * @return from_frame->n_vectors - uword
+ *
+ */
+static uword
+vxlan6_gpe_input (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return vxlan_gpe_input (vm, node, from_frame, /* is_ip4 */0);
+}
+
+/**
+ * @brief VXLAN GPE error strings
+ */
+static char * vxlan_gpe_error_strings[] = {
+#define vxlan_gpe_error(n,s) s,
+#include <vnet/vxlan-gpe/vxlan_gpe_error.def>
+#undef vxlan_gpe_error
+#undef _
+};
+
+VLIB_REGISTER_NODE (vxlan4_gpe_input_node) = {
+ .function = vxlan4_gpe_input,
+ .name = "vxlan4-gpe-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(vxlan_gpe_error_strings),
+ .error_strings = vxlan_gpe_error_strings,
+
+ .n_next_nodes = VXLAN_GPE_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [VXLAN_GPE_INPUT_NEXT_##s] = n,
+ foreach_vxlan_gpe_input_next
+#undef _
+ },
+
+ .format_buffer = format_vxlan_gpe_with_length,
+ .format_trace = format_vxlan_gpe_rx_trace,
+ // $$$$ .unformat_buffer = unformat_vxlan_gpe_header,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (vxlan4_gpe_input_node, vxlan4_gpe_input);
+
+VLIB_REGISTER_NODE (vxlan6_gpe_input_node) = {
+ .function = vxlan6_gpe_input,
+ .name = "vxlan6-gpe-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(vxlan_gpe_error_strings),
+ .error_strings = vxlan_gpe_error_strings,
+
+ .n_next_nodes = VXLAN_GPE_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [VXLAN_GPE_INPUT_NEXT_##s] = n,
+ foreach_vxlan_gpe_input_next
+#undef _
+ },
+
+ .format_buffer = format_vxlan_gpe_with_length,
+ .format_trace = format_vxlan_gpe_rx_trace,
+ // $$$$ .unformat_buffer = unformat_vxlan_gpe_header,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (vxlan6_gpe_input_node, vxlan6_gpe_input);
+typedef enum {
+ IP_VXLAN_BYPASS_NEXT_DROP,
+ IP_VXLAN_BYPASS_NEXT_VXLAN,
+ IP_VXLAN_BYPASS_N_NEXT,
+} ip_vxan_bypass_next_t;
+
+always_inline uword
+ip_vxlan_gpe_bypass_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ u32 is_ip4)
+{
+ vxlan_gpe_main_t * ngm = &vxlan_gpe_main;
+ u32 * from, * to_next, n_left_from, n_left_to_next, next_index;
+ vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
+ ip4_address_t addr4; /* last IPv4 address matching a local VTEP address */
+ ip6_address_t addr6; /* last IPv6 address matching a local VTEP address */
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ if (is_ip4) addr4.data_u32 = ~0;
+ else ip6_address_set_zero (&addr6);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t * b0, * b1;
+ ip4_header_t * ip40, * ip41;
+ ip6_header_t * ip60, * ip61;
+ udp_header_t * udp0, * udp1;
+ u32 bi0, ip_len0, udp_len0, flags0, next0;
+ u32 bi1, ip_len1, udp_len1, flags1, next1;
+ i32 len_diff0, len_diff1;
+ u8 error0, good_udp0, proto0;
+ u8 error1, good_udp1, proto1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = to_next[0] = from[0];
+ bi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ if (is_ip4)
+ {
+ ip40 = vlib_buffer_get_current (b0);
+ ip41 = vlib_buffer_get_current (b1);
+ }
+ else
+ {
+ ip60 = vlib_buffer_get_current (b0);
+ ip61 = vlib_buffer_get_current (b1);
+ }
+
+ /* Setup packet for next IP feature */
+ vnet_feature_next(vnet_buffer(b0)->sw_if_index[VLIB_RX], &next0, b0);
+ vnet_feature_next(vnet_buffer(b1)->sw_if_index[VLIB_RX], &next1, b1);
+
+ if (is_ip4)
+ {
+ proto0 = ip40->protocol;
+ proto1 = ip41->protocol;
+ }
+ else
+ {
+ proto0 = ip60->protocol;
+ proto1 = ip61->protocol;
+ }
+
+ /* Process packet 0 */
+ if (proto0 != IP_PROTOCOL_UDP)
+ goto exit0; /* not UDP packet */
+
+ if (is_ip4)
+ udp0 = ip4_next_header (ip40);
+ else
+ udp0 = ip6_next_header (ip60);
+
+ if (udp0->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_VXLAN_GPE))
+ goto exit0; /* not VXLAN packet */
+
+ /* Validate DIP against VTEPs*/
+ if (is_ip4)
+ {
+ if (addr4.as_u32 != ip40->dst_address.as_u32)
+ {
+ if (!hash_get (ngm->vtep4, ip40->dst_address.as_u32))
+ goto exit0; /* no local VTEP for VXLAN packet */
+ addr4 = ip40->dst_address;
+ }
+ }
+ else
+ {
+ if (!ip6_address_is_equal (&addr6, &ip60->dst_address))
+ {
+ if (!hash_get_mem (ngm->vtep6, &ip60->dst_address))
+ goto exit0; /* no local VTEP for VXLAN packet */
+ addr6 = ip60->dst_address;
+ }
+ }
+
+ flags0 = b0->flags;
+ good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_udp0 |= udp0->checksum == 0;
+
+ /* Verify UDP length */
+ if (is_ip4)
+ ip_len0 = clib_net_to_host_u16 (ip40->length);
+ else
+ ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+ len_diff0 = ip_len0 - udp_len0;
+
+ /* Verify UDP checksum */
+ if (PREDICT_FALSE (!good_udp0))
+ {
+ if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
+ {
+ if (is_ip4)
+ flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
+ else
+ flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
+ good_udp0 =
+ (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ }
+ }
+
+ if (is_ip4)
+ {
+ error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
+ }
+ else
+ {
+ error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
+ }
+
+ next0 = error0 ?
+ IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
+ b0->error = error0 ? error_node->errors[error0] : 0;
+
+ /* vxlan_gpe-input node expect current at VXLAN header */
+ if (is_ip4)
+ vlib_buffer_advance (b0, sizeof(ip4_header_t)+sizeof(udp_header_t));
+ else
+ vlib_buffer_advance (b0, sizeof(ip6_header_t)+sizeof(udp_header_t));
+
+ exit0:
+ /* Process packet 1 */
+ if (proto1 != IP_PROTOCOL_UDP)
+ goto exit1; /* not UDP packet */
+
+ if (is_ip4)
+ udp1 = ip4_next_header (ip41);
+ else
+ udp1 = ip6_next_header (ip61);
+
+ if (udp1->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_VXLAN_GPE))
+ goto exit1; /* not VXLAN packet */
+
+ /* Validate DIP against VTEPs*/
+ if (is_ip4)
+ {
+ if (addr4.as_u32 != ip41->dst_address.as_u32)
+ {
+ if (!hash_get (ngm->vtep4, ip41->dst_address.as_u32))
+ goto exit1; /* no local VTEP for VXLAN packet */
+ addr4 = ip41->dst_address;
+ }
+ }
+ else
+ {
+ if (!ip6_address_is_equal (&addr6, &ip61->dst_address))
+ {
+ if (!hash_get_mem (ngm->vtep6, &ip61->dst_address))
+ goto exit1; /* no local VTEP for VXLAN packet */
+ addr6 = ip61->dst_address;
+ }
+ }
+
+ flags1 = b1->flags;
+ good_udp1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_udp1 |= udp1->checksum == 0;
+
+ /* Verify UDP length */
+ if (is_ip4)
+ ip_len1 = clib_net_to_host_u16 (ip41->length);
+ else
+ ip_len1 = clib_net_to_host_u16 (ip61->payload_length);
+ udp_len1 = clib_net_to_host_u16 (udp1->length);
+ len_diff1 = ip_len1 - udp_len1;
+
+ /* Verify UDP checksum */
+ if (PREDICT_FALSE (!good_udp1))
+ {
+ if ((flags1 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
+ {
+ if (is_ip4)
+ flags1 = ip4_tcp_udp_validate_checksum (vm, b1);
+ else
+ flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, b1);
+ good_udp1 =
+ (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ }
+ }
+
+ if (is_ip4)
+ {
+ error1 = good_udp1 ? 0 : IP4_ERROR_UDP_CHECKSUM;
+ error1 = (len_diff1 >= 0) ? error1 : IP4_ERROR_UDP_LENGTH;
+ }
+ else
+ {
+ error1 = good_udp1 ? 0 : IP6_ERROR_UDP_CHECKSUM;
+ error1 = (len_diff1 >= 0) ? error1 : IP6_ERROR_UDP_LENGTH;
+ }
+
+ next1 = error1 ?
+ IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
+ b1->error = error1 ? error_node->errors[error1] : 0;
+
+ /* vxlan_gpe-input node expect current at VXLAN header */
+ if (is_ip4)
+ vlib_buffer_advance (b1, sizeof(ip4_header_t)+sizeof(udp_header_t));
+ else
+ vlib_buffer_advance (b1, sizeof(ip6_header_t)+sizeof(udp_header_t));
+
+ exit1:
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * b0;
+ ip4_header_t * ip40;
+ ip6_header_t * ip60;
+ udp_header_t * udp0;
+ u32 bi0, ip_len0, udp_len0, flags0, next0;
+ i32 len_diff0;
+ u8 error0, good_udp0, proto0;
+
+ bi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ if (is_ip4)
+ ip40 = vlib_buffer_get_current (b0);
+ else
+ ip60 = vlib_buffer_get_current (b0);
+
+ /* Setup packet for next IP feature */
+ vnet_feature_next(vnet_buffer(b0)->sw_if_index[VLIB_RX], &next0, b0);
+
+ if (is_ip4)
+ proto0 = ip40->protocol;
+ else
+ proto0 = ip60->protocol;
+
+ if (proto0 != IP_PROTOCOL_UDP)
+ goto exit; /* not UDP packet */
+
+ if (is_ip4)
+ udp0 = ip4_next_header (ip40);
+ else
+ udp0 = ip6_next_header (ip60);
+
+ if (udp0->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_VXLAN_GPE))
+ goto exit; /* not VXLAN packet */
+
+ /* Validate DIP against VTEPs*/
+ if (is_ip4)
+ {
+ if (addr4.as_u32 != ip40->dst_address.as_u32)
+ {
+ if (!hash_get (ngm->vtep4, ip40->dst_address.as_u32))
+ goto exit; /* no local VTEP for VXLAN packet */
+ addr4 = ip40->dst_address;
+ }
+ }
+ else
+ {
+ if (!ip6_address_is_equal (&addr6, &ip60->dst_address))
+ {
+ if (!hash_get_mem (ngm->vtep6, &ip60->dst_address))
+ goto exit; /* no local VTEP for VXLAN packet */
+ addr6 = ip60->dst_address;
+ }
+ }
+
+ flags0 = b0->flags;
+ good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_udp0 |= udp0->checksum == 0;
+
+ /* Verify UDP length */
+ if (is_ip4)
+ ip_len0 = clib_net_to_host_u16 (ip40->length);
+ else
+ ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+ len_diff0 = ip_len0 - udp_len0;
+
+ /* Verify UDP checksum */
+ if (PREDICT_FALSE (!good_udp0))
+ {
+ if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
+ {
+ if (is_ip4)
+ flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
+ else
+ flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
+ good_udp0 =
+ (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ }
+ }
+
+ if (is_ip4)
+ {
+ error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
+ }
+ else
+ {
+ error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
+ }
+
+ next0 = error0 ?
+ IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
+ b0->error = error0 ? error_node->errors[error0] : 0;
+
+ /* vxlan_gpe-input node expect current at VXLAN header */
+ if (is_ip4)
+ vlib_buffer_advance (b0, sizeof(ip4_header_t)+sizeof(udp_header_t));
+ else
+ vlib_buffer_advance (b0, sizeof(ip6_header_t)+sizeof(udp_header_t));
+
+ exit:
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_vxlan_gpe_bypass (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip_vxlan_gpe_bypass_inline (vm, node, frame, /* is_ip4 */ 1);
+}
+
+VLIB_REGISTER_NODE (ip4_vxlan_gpe_bypass_node) = {
+ .function = ip4_vxlan_gpe_bypass,
+ .name = "ip4-vxlan-gpe-bypass",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = IP_VXLAN_BYPASS_N_NEXT,
+ .next_nodes = {
+ [IP_VXLAN_BYPASS_NEXT_DROP] = "error-drop",
+ [IP_VXLAN_BYPASS_NEXT_VXLAN] = "vxlan4-gpe-input",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_forward_next_trace,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_vxlan_gpe_bypass_node,ip4_vxlan_gpe_bypass)
+
+/* Dummy init function to get us linked in. */
+clib_error_t * ip4_vxlan_gpe_bypass_init (vlib_main_t * vm)
+{ return 0; }
+
+VLIB_INIT_FUNCTION (ip4_vxlan_gpe_bypass_init);
+
+static uword
+ip6_vxlan_gpe_bypass (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip_vxlan_gpe_bypass_inline (vm, node, frame, /* is_ip4 */ 0);
+}
+
+VLIB_REGISTER_NODE (ip6_vxlan_gpe_bypass_node) = {
+ .function = ip6_vxlan_gpe_bypass,
+ .name = "ip6-vxlan-gpe-bypass",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = IP_VXLAN_BYPASS_N_NEXT,
+ .next_nodes = {
+ [IP_VXLAN_BYPASS_NEXT_DROP] = "error-drop",
+ [IP_VXLAN_BYPASS_NEXT_VXLAN] = "vxlan6-gpe-input",
+ },
+
+ .format_buffer = format_ip6_header,
+ .format_trace = format_ip6_forward_next_trace,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_vxlan_gpe_bypass_node,ip6_vxlan_gpe_bypass)
+
+/* Dummy init function to get us linked in. */
+clib_error_t * ip6_vxlan_gpe_bypass_init (vlib_main_t * vm)
+{ return 0; }
+
+VLIB_INIT_FUNCTION (ip6_vxlan_gpe_bypass_init);
diff --git a/src/vnet/vxlan-gpe/dir.dox b/src/vnet/vxlan-gpe/dir.dox
new file mode 100644
index 00000000..c154733b
--- /dev/null
+++ b/src/vnet/vxlan-gpe/dir.dox
@@ -0,0 +1,32 @@
+/*
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ @dir
+ @brief VXLAN GPE
+
+ Based on IETF: draft-quinn-vxlan-gpe-03.txt
+
+Abstract
+
+ This draft describes extending Virtual eXtensible Local Area Network
+ (VXLAN), via changes to the VXLAN header, with three new
+ capabilities: support for multi-protocol encapsulation, operations,
+ administration and management (OAM) signaling and explicit
+ versioning.
+
+ See file: vxlan-gpe-rfc.txt
+
+*/ \ No newline at end of file
diff --git a/src/vnet/vxlan-gpe/encap.c b/src/vnet/vxlan-gpe/encap.c
new file mode 100644
index 00000000..67ed94b4
--- /dev/null
+++ b/src/vnet/vxlan-gpe/encap.c
@@ -0,0 +1,388 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief Functions for encapsulating VXLAN GPE tunnels
+ *
+*/
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+
+/** Statistics (not really errors) */
+#define foreach_vxlan_gpe_encap_error \
+_(ENCAPSULATED, "good packets encapsulated")
+
+/**
+ * @brief VXLAN GPE encap error strings
+ */
+static char * vxlan_gpe_encap_error_strings[] = {
+#define _(sym,string) string,
+ foreach_vxlan_gpe_encap_error
+#undef _
+};
+
+/**
+ * @brief Struct for VXLAN GPE errors/counters
+ */
+typedef enum {
+#define _(sym,str) VXLAN_GPE_ENCAP_ERROR_##sym,
+ foreach_vxlan_gpe_encap_error
+#undef _
+ VXLAN_GPE_ENCAP_N_ERROR,
+} vxlan_gpe_encap_error_t;
+
+/**
+ * @brief Struct for tracing VXLAN GPE encapsulated packets
+ */
+typedef struct {
+ u32 tunnel_index;
+} vxlan_gpe_encap_trace_t;
+
+/**
+ * @brief Trace of packets encapsulated in VXLAN GPE
+ *
+ * @param *s
+ * @param *args
+ *
+ * @return *s
+ *
+ */
+u8 * format_vxlan_gpe_encap_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ vxlan_gpe_encap_trace_t * t
+ = va_arg (*args, vxlan_gpe_encap_trace_t *);
+
+ s = format (s, "VXLAN-GPE-ENCAP: tunnel %d", t->tunnel_index);
+ return s;
+}
+
+/**
+ * @brief Instantiates UDP + VXLAN-GPE header then set next node to IP4|6 lookup
+ *
+ * @param *ngm
+ * @param *b0
+ * @param *t0 contains rewrite header
+ * @param *next0 relative index of next dispatch function (next node)
+ * @param is_v4 Is this IPv4? (or IPv6)
+ *
+ */
+always_inline void
+vxlan_gpe_encap_one_inline (vxlan_gpe_main_t * ngm, vlib_buffer_t * b0,
+ vxlan_gpe_tunnel_t * t0, u32 * next0,
+ u8 is_v4)
+{
+ ASSERT(sizeof(ip4_vxlan_gpe_header_t) == 36);
+ ASSERT(sizeof(ip6_vxlan_gpe_header_t) == 56);
+
+ ip_udp_encap_one (ngm->vlib_main, b0, t0->rewrite, t0->rewrite_size, is_v4);
+ next0[0] = t0->encap_next_node;
+}
+
+/**
+ * @brief Instantiates UDP + VXLAN-GPE header then set next node to IP4|6 lookup for two packets
+ *
+ * @param *ngm
+ * @param *b0 Packet0
+ * @param *b1 Packet1
+ * @param *t0 contains rewrite header for Packet0
+ * @param *t1 contains rewrite header for Packet1
+ * @param *next0 relative index of next dispatch function (next node) for Packet0
+ * @param *next1 relative index of next dispatch function (next node) for Packet1
+ * @param is_v4 Is this IPv4? (or IPv6)
+ *
+ */
+always_inline void
+vxlan_gpe_encap_two_inline (vxlan_gpe_main_t * ngm, vlib_buffer_t * b0,
+ vlib_buffer_t * b1, vxlan_gpe_tunnel_t * t0,
+ vxlan_gpe_tunnel_t * t1, u32 * next0,
+ u32 * next1, u8 is_v4)
+{
+ ASSERT(sizeof(ip4_vxlan_gpe_header_t) == 36);
+ ASSERT(sizeof(ip6_vxlan_gpe_header_t) == 56);
+
+ ip_udp_encap_one (ngm->vlib_main, b0, t0->rewrite, t0->rewrite_size, is_v4);
+ ip_udp_encap_one (ngm->vlib_main, b1, t1->rewrite, t1->rewrite_size, is_v4);
+ next0[0] = next1[0] = t0->encap_next_node;
+}
+
+/**
+ * @brief Common processing for IPv4 and IPv6 VXLAN GPE encap dispatch functions
+ *
+ * It is worth noting that other than trivial UDP forwarding (transit), VXLAN GPE
+ * tunnels are "establish local". This means that we don't have a TX interface as yet
+ * as we need to look up where the outer-header dest is. By setting the TX index in the
+ * buffer metadata to the encap FIB, we can do a lookup to get the adjacency and real TX.
+ *
+ * vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index;
+ *
+ * @node vxlan-gpe-input
+ * @param *vm
+ * @param *node
+ * @param *from_frame
+ *
+ * @return from_frame->n_vectors
+ *
+ */
+static uword
+vxlan_gpe_encap (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ vxlan_gpe_main_t * ngm = &vxlan_gpe_main;
+ vnet_main_t * vnm = ngm->vnet_main;
+ vnet_interface_main_t * im = &vnm->interface_main;
+ u32 pkts_encapsulated = 0;
+ u32 thread_index = vlib_get_thread_index ();
+ u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+ stats_sw_if_index = node->runtime_data[0];
+ stats_n_packets = stats_n_bytes = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, *b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1, len0, len1;
+ vnet_hw_interface_t * hi0, *hi1;
+ vxlan_gpe_tunnel_t * t0, *t1;
+ u8 is_ip4_0, is_ip4_1;
+
+ next0 = next1 = VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header(p2, LOAD);
+ vlib_prefetch_buffer_header(p3, LOAD);
+
+ CLIB_PREFETCH(p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH(p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* 1-wide cache? */
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_TX];
+ hi0 = vnet_get_sup_hw_interface (vnm, vnet_buffer(b0)->sw_if_index[VLIB_TX]);
+ hi1 = vnet_get_sup_hw_interface (vnm, vnet_buffer(b1)->sw_if_index[VLIB_TX]);
+
+ t0 = pool_elt_at_index(ngm->tunnels, hi0->dev_instance);
+ t1 = pool_elt_at_index(ngm->tunnels, hi1->dev_instance);
+
+ is_ip4_0 = (t0->flags & VXLAN_GPE_TUNNEL_IS_IPV4);
+ is_ip4_1 = (t1->flags & VXLAN_GPE_TUNNEL_IS_IPV4);
+
+ if (PREDICT_TRUE(is_ip4_0 == is_ip4_1))
+ {
+ vxlan_gpe_encap_two_inline (ngm, b0, b1, t0, t1, &next0, &next1,is_ip4_0);
+ }
+ else
+ {
+ vxlan_gpe_encap_one_inline (ngm, b0, t0, &next0, is_ip4_0);
+ vxlan_gpe_encap_one_inline (ngm, b1, t1, &next1, is_ip4_1);
+ }
+
+ /* Reset to look up tunnel partner in the configured FIB */
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index;
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] = t1->encap_fib_index;
+ vnet_buffer(b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+ vnet_buffer(b1)->sw_if_index[VLIB_RX] = sw_if_index1;
+ pkts_encapsulated += 2;
+
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+ len1 = vlib_buffer_length_in_chain (vm, b0);
+ stats_n_packets += 2;
+ stats_n_bytes += len0 + len1;
+
+ /* Batch stats increment on the same vxlan tunnel so counter is not
+ incremented per packet. Note stats are still incremented for deleted
+ and admin-down tunnel where packets are dropped. It is not worthwhile
+ to check for this rare case and affect normal path performance. */
+ if (PREDICT_FALSE((sw_if_index0 != stats_sw_if_index)
+ || (sw_if_index1 != stats_sw_if_index)))
+ {
+ stats_n_packets -= 2;
+ stats_n_bytes -= len0 + len1;
+ if (sw_if_index0 == sw_if_index1)
+ {
+ if (stats_n_packets)
+ vlib_increment_combined_counter (
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
+ thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ stats_sw_if_index = sw_if_index0;
+ stats_n_packets = 2;
+ stats_n_bytes = len0 + len1;
+ }
+ else
+ {
+ vlib_increment_combined_counter (
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
+ thread_index, sw_if_index0, 1, len0);
+ vlib_increment_combined_counter (
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
+ thread_index, sw_if_index1, 1, len1);
+ }
+ }
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_encap_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof(*tr));
+ tr->tunnel_index = t0 - ngm->tunnels;
+ }
+
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_encap_trace_t *tr = vlib_add_trace (vm, node, b1,
+ sizeof(*tr));
+ tr->tunnel_index = t1 - ngm->tunnels;
+ }
+
+ vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0 = VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP;
+ u32 sw_if_index0, len0;
+ vnet_hw_interface_t * hi0;
+ vxlan_gpe_tunnel_t * t0;
+ u8 is_ip4_0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* 1-wide cache? */
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
+ hi0 = vnet_get_sup_hw_interface (vnm, vnet_buffer(b0)->sw_if_index[VLIB_TX]);
+
+ t0 = pool_elt_at_index(ngm->tunnels, hi0->dev_instance);
+
+ is_ip4_0 = (t0->flags & VXLAN_GPE_TUNNEL_IS_IPV4);
+
+ vxlan_gpe_encap_one_inline (ngm, b0, t0, &next0, is_ip4_0);
+
+ /* Reset to look up tunnel partner in the configured FIB */
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index;
+ vnet_buffer(b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+ pkts_encapsulated++;
+
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
+
+ /* Batch stats increment on the same vxlan tunnel so counter is not
+ * incremented per packet. Note stats are still incremented for deleted
+ * and admin-down tunnel where packets are dropped. It is not worthwhile
+ * to check for this rare case and affect normal path performance. */
+ if (PREDICT_FALSE(sw_if_index0 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len0;
+ if (stats_n_packets)
+ vlib_increment_combined_counter (
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
+ thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len0;
+ stats_sw_if_index = sw_if_index0;
+ }
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_encap_trace_t *tr = vlib_add_trace (vm, node, b0,
+ sizeof(*tr));
+ tr->tunnel_index = t0 - ngm->tunnels;
+ }
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, node->node_index,
+ VXLAN_GPE_ENCAP_ERROR_ENCAPSULATED,
+ pkts_encapsulated);
+ /* Increment any remaining batch stats */
+ if (stats_n_packets)
+ {
+ vlib_increment_combined_counter (
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, thread_index,
+ stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ node->runtime_data[0] = stats_sw_if_index;
+ }
+
+ return from_frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (vxlan_gpe_encap_node) = {
+ .function = vxlan_gpe_encap,
+ .name = "vxlan-gpe-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_vxlan_gpe_encap_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(vxlan_gpe_encap_error_strings),
+ .error_strings = vxlan_gpe_encap_error_strings,
+
+ .n_next_nodes = VXLAN_GPE_ENCAP_N_NEXT,
+
+ .next_nodes = {
+ [VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [VXLAN_GPE_ENCAP_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [VXLAN_GPE_ENCAP_NEXT_DROP] = "error-drop",
+ },
+};
+
diff --git a/src/vnet/vxlan-gpe/vxlan-gpe-rfc.txt b/src/vnet/vxlan-gpe/vxlan-gpe-rfc.txt
new file mode 100644
index 00000000..35cee50f
--- /dev/null
+++ b/src/vnet/vxlan-gpe/vxlan-gpe-rfc.txt
@@ -0,0 +1,868 @@
+Network Working Group P. Quinn
+Internet-Draft Cisco Systems, Inc.
+Intended status: Experimental P. Agarwal
+Expires: January 4, 2015 Broadcom
+ R. Fernando
+ L. Kreeger
+ D. Lewis
+ F. Maino
+ M. Smith
+ N. Yadav
+ Cisco Systems, Inc.
+ L. Yong
+ Huawei USA
+ X. Xu
+ Huawei Technologies
+ U. Elzur
+ Intel
+ P. Garg
+ Microsoft
+ July 3, 2014
+
+
+ Generic Protocol Extension for VXLAN
+ draft-quinn-vxlan-gpe-03.txt
+
+Abstract
+
+ This draft describes extending Virtual eXtensible Local Area Network
+ (VXLAN), via changes to the VXLAN header, with three new
+ capabilities: support for multi-protocol encapsulation, operations,
+ administration and management (OAM) signaling and explicit
+ versioning.
+
+Status of this Memo
+
+ This Internet-Draft is submitted in full conformance with the
+ provisions of BCP 78 and BCP 79.
+
+ Internet-Drafts are working documents of the Internet Engineering
+ Task Force (IETF). Note that other groups may also distribute
+ working documents as Internet-Drafts. The list of current Internet-
+ Drafts is at http://datatracker.ietf.org/drafts/current/.
+
+ Internet-Drafts are draft documents valid for a maximum of six months
+ and may be updated, replaced, or obsoleted by other documents at any
+ time. It is inappropriate to use Internet-Drafts as reference
+ material or to cite them other than as "work in progress."
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 1]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+ This Internet-Draft will expire on January 4, 2015.
+
+Copyright Notice
+
+ Copyright (c) 2014 IETF Trust and the persons identified as the
+ document authors. All rights reserved.
+
+ This document is subject to BCP 78 and the IETF Trust's Legal
+ Provisions Relating to IETF Documents
+ (http://trustee.ietf.org/license-info) in effect on the date of
+ publication of this document. Please review these documents
+ carefully, as they describe your rights and restrictions with respect
+ to this document. Code Components extracted from this document must
+ include Simplified BSD License text as described in Section 4.e of
+ the Trust Legal Provisions and are provided without warranty as
+ described in the Simplified BSD License.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 2]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+Table of Contents
+
+ 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . . 4
+ 2. VXLAN Without Protocol Extension . . . . . . . . . . . . . . . 5
+ 3. Generic Protocol Extension VXLAN (VXLAN-gpe) . . . . . . . . . 6
+ 3.1. Multi Protocol Support . . . . . . . . . . . . . . . . . . 6
+ 3.2. OAM Support . . . . . . . . . . . . . . . . . . . . . . . 7
+ 3.3. Version Bits . . . . . . . . . . . . . . . . . . . . . . . 7
+ 4. Backward Compatibility . . . . . . . . . . . . . . . . . . . . 8
+ 4.1. VXLAN VTEP to VXLAN-gpe VTEP . . . . . . . . . . . . . . . 8
+ 4.2. VXLAN-gpe VTEP to VXLAN VTEP . . . . . . . . . . . . . . . 8
+ 4.3. VXLAN-gpe UDP Ports . . . . . . . . . . . . . . . . . . . 8
+ 4.4. VXLAN-gpe and Encapsulated IP Header Fields . . . . . . . 8
+ 5. VXLAN-gpe Examples . . . . . . . . . . . . . . . . . . . . . . 9
+ 6. Security Considerations . . . . . . . . . . . . . . . . . . . 11
+ 7. Acknowledgments . . . . . . . . . . . . . . . . . . . . . . . 12
+ 8. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 13
+ 8.1. UDP Port . . . . . . . . . . . . . . . . . . . . . . . . . 13
+ 8.2. VXLAN-gpe Next Protocol . . . . . . . . . . . . . . . . . 13
+ 8.3. VXLAN-gpe Reserved Bits . . . . . . . . . . . . . . . . . 13
+ 9. References . . . . . . . . . . . . . . . . . . . . . . . . . . 14
+ 9.1. Normative References . . . . . . . . . . . . . . . . . . . 14
+ 9.2. Informative References . . . . . . . . . . . . . . . . . . 14
+ Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . . 15
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 3]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+1. Introduction
+
+ Virtual eXtensible Local Area Network [VXLAN] defines an
+ encapsulation format that encapsulates Ethernet frames in an outer
+ UDP/IP transport. As data centers evolve, the need to carry other
+ protocols encapsulated in an IP packet is required, as well as the
+ need to provide increased visibility and diagnostic capabilities
+ within the overlay. The VXLAN header does not specify the protocol
+ being encapsulated and therefore is currently limited to
+ encapsulating only Ethernet frame payload, nor does it provide the
+ ability to define OAM protocols. Rather than defining yet another
+ encapsulation, VXLAN is extended to provide protocol typing and OAM
+ capabilities.
+
+ This document describes extending VXLAN via the following changes:
+
+ Next Protocol Bit (P bit): A reserved flag bit is allocated, and set
+ in the VXLAN-gpe header to indicate that a next protocol field is
+ present.
+
+ OAM Flag Bit (O bit): A reserved flag bit is allocated, and set in
+ the VXLAN-gpe header, to indicate that the packet is an OAM
+ packet.
+
+ Version: Two reserved bits are allocated, and set in the VXLAN-gpe
+ header, to indicate VXLAN-gpe protocol version.
+
+ Next Protocol: A 8 bit next protocol field is present in the VXLAN-
+ gpe header.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 4]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+2. VXLAN Without Protocol Extension
+
+ As described in the introduction, the VXLAN header has no protocol
+ identifier that indicates the type of payload being carried by VXLAN.
+ Because of this, VXLAN is limited to an Ethernet payload.
+ Furthermore, the VXLAN header has no mechanism to signal OAM packets.
+
+ The VXLAN header defines bits 0-7 as flags (some defined, some
+ reserved), the VXLAN network identifier (VNI) field and several
+ reserved bits. The flags provide flexibility to define how the
+ reserved bits can be used to change the definition of the VXLAN
+ header.
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|R|R|R| Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+ Figure 1: VXLAN Header
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 5]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+3. Generic Protocol Extension VXLAN (VXLAN-gpe)
+
+3.1. Multi Protocol Support
+
+ This draft defines the following two changes to the VXLAN header in
+ order to support multi-protocol encapsulation:
+
+ P Bit: Flag bit 5 is defined as the Next Protocol bit. The P bit
+ MUST be set to 1 to indicate the presence of the 8 bit next
+ protocol field.
+
+ P = 0 indicates that the payload MUST conform to VXLAN as defined
+ in [VXLAN].
+
+ Flag bit 5 was chosen as the P bit because this flag bit is
+ currently reserved in VXLAN.
+
+ Next Protocol Field: The lower 8 bits of the first word are used to
+ carry a next protocol. This next protocol field contains the
+ protocol of the encapsulated payload packet. A new protocol
+ registry will be requested from IANA.
+
+ This draft defines the following Next Protocol values:
+
+ 0x1 : IPv4
+ 0x2 : IPv6
+ 0x3 : Ethernet
+ 0x4 : Network Service Header [NSH]
+
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|P|R|R| Reserved |Next Protocol |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 2: VXLAN-gpe Next Protocol
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 6]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+3.2. OAM Support
+
+ Flag bit 7 is defined as the O bit. When the O bit is set to 1, the
+ packet is an OAM packet and OAM processing MUST occur. The OAM
+ protocol details are out of scope for this document. As with the
+ P-bit, bit 7 is currently a reserved flag in VXLAN.
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|P|R|O| Reserved |Next Protocol |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 3: VXLAN-gpe OAM Bit
+
+3.3. Version Bits
+
+ VXLAN-gpe bits 8 and 9 are defined as version bits. These bits are
+ reserved in VXLAN. The version field is used to ensure backward
+ compatibility going forward with future VXLAN-gpe updates.
+
+ The initial version for VXLAN-gpe is 0.
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|P|R|O|Ver| Reserved |Next Protocol |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+
+
+ Figure 4: VXLAN-gpe Version Bits
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 7]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+4. Backward Compatibility
+
+4.1. VXLAN VTEP to VXLAN-gpe VTEP
+
+ As per VXLAN, reserved bits 5 and 7, VXLAN-gpe P and O-bits
+ respectively must be set to zero. The remaining reserved bits must
+ be zero, including the VXLAN-gpe version field, bits 8 and 9. The
+ encapsulated payload MUST be Ethernet.
+
+4.2. VXLAN-gpe VTEP to VXLAN VTEP
+
+ A VXLAN-gpe VTEP MUST NOT encapsulate non-Ethernet frames to a VXLAN
+ VTEP. When encapsulating Ethernet frames to a VXLAN VTEP, the VXLAN-
+ gpe VTEP will set the P bit to 0, the Next Protocol to 0 and use UDP
+ destination port 4789. A VXLAN-gpe VTEP MUST also set O = 0 and Ver
+ = 0 when encapsulating Ethernet frames to VXLAN VTEP. The receiving
+ VXLAN VTEP will threat this packet as a VXLAN packet.
+
+ A method for determining the capabilities of a VXLAN VTEP (gpe or
+ non-gpe) is out of the scope of this draft.
+
+4.3. VXLAN-gpe UDP Ports
+
+ VXLAN-gpe uses a new UDP destination port (to be assigned by IANA)
+ when sending traffic to VXLAN-gpe VTEPs.
+
+4.4. VXLAN-gpe and Encapsulated IP Header Fields
+
+ When encapsulating and decapsulating IPv4 and IPv6 packets, certain
+ fields, such as IPv4 Time to Live (TTL) from the inner IP header need
+ to be considered. VXLAN-gpe IP encapsulation and decapsulation
+ utilizes the techniques described in [RFC6830], section 5.3.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 8]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+5. VXLAN-gpe Examples
+
+ This section provides three examples of protocols encapsulated using
+ the Generic Protocol Extension for VXLAN described in this document.
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|1|R|0|0|0| Reserved | NP = IPv4 |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Original IPv4 Packet |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 5: IPv4 and VXLAN-gpe
+
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|1|R|0|0|0| Reserved | NP = IPv6 |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Original IPv6 Packet |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 6: IPv6 and VXLAN-gpe
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 9]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|1|R|0|0|0| Reserved |NP = Ethernet |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Original Ethernet Frame |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 7: Ethernet and VXLAN-gpe
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 10]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+6. Security Considerations
+
+ VXLAN's security is focused on issues around L2 encapsulation into
+ L3. With VXLAN-gpe, issues such as spoofing, flooding, and traffic
+ redirection are dependent on the particular protocol payload
+ encapsulated.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 11]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+7. Acknowledgments
+
+ A special thank you goes to Dino Farinacci for his guidance and
+ detailed review.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 12]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+8. IANA Considerations
+
+8.1. UDP Port
+
+ A new UDP port will be requested from IANA.
+
+8.2. VXLAN-gpe Next Protocol
+
+ IANA is requested to set up a registry of "Next Protocol". These are
+ 8-bit values. Next Protocol values 0, 1, 2, 3 and 4 are defined in
+ this draft. New values are assigned via Standards Action [RFC5226].
+
+ +---------------+-------------+---------------+
+ | Next Protocol | Description | Reference |
+ +---------------+-------------+---------------+
+ | 0 | Reserved | This document |
+ | | | |
+ | 1 | IPv4 | This document |
+ | | | |
+ | 2 | IPv6 | This document |
+ | | | |
+ | 3 | Ethernet | This document |
+ | | | |
+ | 4 | NSH | This document |
+ | | | |
+ | 5..253 | Unassigned | |
+ +---------------+-------------+---------------+
+
+ Table 1
+
+8.3. VXLAN-gpe Reserved Bits
+
+ There are ten bits at the beginning of the VXLAN-gpe header. New
+ bits are assigned via Standards Action [RFC5226].
+
+ Bits 0-3 - Reserved
+ Bit 4 - Instance ID (I bit)
+ Bit 5 - Next Protocol (P bit)
+ Bit 6 - Reserved
+ Bit 7 - OAM (O bit)
+ Bits 8-9 - Version
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 13]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+9. References
+
+9.1. Normative References
+
+ [RFC0768] Postel, J., "User Datagram Protocol", STD 6, RFC 768,
+ August 1980.
+
+ [RFC0791] Postel, J., "Internet Protocol", STD 5, RFC 791,
+ September 1981.
+
+ [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate
+ Requirement Levels", BCP 14, RFC 2119, March 1997.
+
+ [RFC5226] Narten, T. and H. Alvestrand, "Guidelines for Writing an
+ IANA Considerations Section in RFCs", BCP 26, RFC 5226,
+ May 2008.
+
+9.2. Informative References
+
+ [NSH] Quinn, P. and et al. , "Network Service Header", 2014.
+
+ [RFC1700] Reynolds, J. and J. Postel, "Assigned Numbers", RFC 1700,
+ October 1994.
+
+ [RFC6830] Farinacci, D., Fuller, V., Meyer, D., and D. Lewis, "The
+ Locator/ID Separation Protocol (LISP)", RFC 6830,
+ January 2013.
+
+ [VXLAN] Dutt, D., Mahalingam, M., Duda, K., Agarwal, P., Kreeger,
+ L., Sridhar, T., Bursell, M., and C. Wright, "VXLAN: A
+ Framework for Overlaying Virtualized Layer 2 Networks over
+ Layer 3 Networks", 2013.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 14]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+Authors' Addresses
+
+ Paul Quinn
+ Cisco Systems, Inc.
+
+ Email: paulq@cisco.com
+
+
+ Puneet Agarwal
+ Broadcom
+
+ Email: pagarwal@broadcom.com
+
+
+ Rex Fernando
+ Cisco Systems, Inc.
+
+ Email: rex@cisco.com
+
+
+ Larry Kreeger
+ Cisco Systems, Inc.
+
+ Email: kreeger@cisco.com
+
+
+ Darrel Lewis
+ Cisco Systems, Inc.
+
+ Email: darlewis@cisco.com
+
+
+ Fabio Maino
+ Cisco Systems, Inc.
+
+ Email: kreeger@cisco.com
+
+
+ Michael Smith
+ Cisco Systems, Inc.
+
+ Email: michsmit@cisco.com
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 15]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+ Navindra Yadav
+ Cisco Systems, Inc.
+
+ Email: nyadav@cisco.com
+
+
+ Lucy Yong
+ Huawei USA
+
+ Email: lucy.yong@huawei.com
+
+
+ Xiaohu Xu
+ Huawei Technologies
+
+ Email: xuxiaohu@huawei.com
+
+
+ Uri Elzur
+ Intel
+
+ Email: uri.elzur@intel.com
+
+
+ Pankaj Garg
+ Microsoft
+
+ Email: Garg.Pankaj@microsoft.com
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.api b/src/vnet/vxlan-gpe/vxlan_gpe.api
new file mode 100644
index 00000000..04082d69
--- /dev/null
+++ b/src/vnet/vxlan-gpe/vxlan_gpe.api
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+define vxlan_gpe_add_del_tunnel
+{
+ u32 client_index;
+ u32 context;
+ u8 is_ipv6;
+ u8 local[16];
+ u8 remote[16];
+ u32 mcast_sw_if_index;
+ u32 encap_vrf_id;
+ u32 decap_vrf_id;
+ u8 protocol;
+ u32 vni;
+ u8 is_add;
+};
+
+define vxlan_gpe_add_del_tunnel_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+define vxlan_gpe_tunnel_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+};
+
+define vxlan_gpe_tunnel_details
+{
+ u32 context;
+ u32 sw_if_index;
+ u8 local[16];
+ u8 remote[16];
+ u32 vni;
+ u8 protocol;
+ u32 mcast_sw_if_index;
+ u32 encap_vrf_id;
+ u32 decap_vrf_id;
+ u8 is_ipv6;
+};
+
+/** \brief Interface set vxlan-gpe-bypass request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface used to reach neighbor
+ @param is_ipv6 - if non-zero, enable ipv6-vxlan-bypass, else ipv4-vxlan-bypass
+ @param enable - if non-zero enable, else disable
+*/
+autoreply define sw_interface_set_vxlan_gpe_bypass
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 is_ipv6;
+ u8 enable;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */ \ No newline at end of file
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.c b/src/vnet/vxlan-gpe/vxlan_gpe.c
new file mode 100644
index 00000000..462c79a0
--- /dev/null
+++ b/src/vnet/vxlan-gpe/vxlan_gpe.c
@@ -0,0 +1,1264 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief Common utility functions for IPv4 and IPv6 VXLAN GPE tunnels
+ *
+*/
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vnet/fib/fib.h>
+#include <vnet/ip/format.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/adj/adj_mcast.h>
+#include <vnet/interface.h>
+#include <vlib/vlib.h>
+
+/**
+ * @file
+ * @brief VXLAN-GPE.
+ *
+ * VXLAN-GPE provides the features needed to allow L2 bridge domains (BDs)
+ * to span multiple servers. This is done by building an L2 overlay on
+ * top of an L3 network underlay using VXLAN-GPE tunnels.
+ *
+ * This makes it possible for servers to be co-located in the same data
+ * center or be separated geographically as long as they are reachable
+ * through the underlay L3 network.
+ *
+ * You can refer to this kind of L2 overlay bridge domain as a VXLAN-GPE segment.
+ */
+
+vxlan_gpe_main_t vxlan_gpe_main;
+
+/**
+ * @brief Tracing function for VXLAN GPE tunnel packets
+ *
+ * @param *s formatting string
+ * @param *args
+ *
+ * @return *s formatted string
+ *
+ */
+u8 * format_vxlan_gpe_tunnel (u8 * s, va_list * args)
+{
+ vxlan_gpe_tunnel_t * t = va_arg (*args, vxlan_gpe_tunnel_t *);
+ vxlan_gpe_main_t * ngm = &vxlan_gpe_main;
+
+ s = format (s, "[%d] local: %U remote: %U ",
+ t - ngm->tunnels,
+ format_ip46_address, &t->local, IP46_TYPE_ANY,
+ format_ip46_address, &t->remote, IP46_TYPE_ANY);
+
+ s = format (s, " vxlan VNI %d ", t->vni);
+
+ switch (t->protocol)
+ {
+ case VXLAN_GPE_PROTOCOL_IP4:
+ s = format (s, "next-protocol ip4");
+ break;
+ case VXLAN_GPE_PROTOCOL_IP6:
+ s = format (s, "next-protocol ip6");
+ break;
+ case VXLAN_GPE_PROTOCOL_ETHERNET:
+ s = format (s, "next-protocol ethernet");
+ break;
+ case VXLAN_GPE_PROTOCOL_NSH:
+ s = format (s, "next-protocol nsh");
+ break;
+ default:
+ s = format (s, "next-protocol unknown %d", t->protocol);
+ }
+
+ if (ip46_address_is_multicast (&t->remote))
+ s = format (s, "mcast_sw_if_index %d ", t->mcast_sw_if_index);
+
+ s = format (s, " fibs: (encap %d, decap %d)",
+ t->encap_fib_index,
+ t->decap_fib_index);
+
+ return s;
+}
+
+/**
+ * @brief Naming for VXLAN GPE tunnel
+ *
+ * @param *s formatting string
+ * @param *args
+ *
+ * @return *s formatted string
+ *
+ */
+static u8 * format_vxlan_gpe_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "vxlan_gpe_tunnel%d", dev_instance);
+}
+
+static uword dummy_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ clib_warning ("you shouldn't be here, leaking buffers...");
+ return frame->n_vectors;
+}
+
+/**
+ * @brief CLI function for VXLAN GPE admin up/down
+ *
+ * @param *vnm
+ * @param hw_if_index
+ * @param flag
+ *
+ * @return *rc
+ *
+ */
+static clib_error_t *
+vxlan_gpe_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+ u32 hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
+ VNET_HW_INTERFACE_FLAG_LINK_UP : 0;
+ vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
+
+ return 0;
+}
+
+VNET_DEVICE_CLASS (vxlan_gpe_device_class,static) = {
+ .name = "VXLAN_GPE",
+ .format_device_name = format_vxlan_gpe_name,
+ .format_tx_trace = format_vxlan_gpe_encap_trace,
+ .tx_function = dummy_interface_tx,
+ .admin_up_down_function = vxlan_gpe_interface_admin_up_down,
+};
+
+
+/**
+ * @brief Formatting function for tracing VXLAN GPE with length
+ *
+ * @param *s
+ * @param *args
+ *
+ * @return *s
+ *
+ */
+static u8 * format_vxlan_gpe_header_with_length (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ s = format (s, "unimplemented dev %u", dev_instance);
+ return s;
+}
+
+VNET_HW_INTERFACE_CLASS (vxlan_gpe_hw_class) = {
+ .name = "VXLAN_GPE",
+ .format_header = format_vxlan_gpe_header_with_length,
+ .build_rewrite = default_build_rewrite,
+};
+
+static void
+vxlan_gpe_tunnel_restack_dpo(vxlan_gpe_tunnel_t * t)
+{
+ dpo_id_t dpo = DPO_INVALID;
+ u32 encap_index = vxlan_gpe_encap_node.index;
+ fib_forward_chain_type_t forw_type = ip46_address_is_ip4(&t->remote) ?
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4 : FIB_FORW_CHAIN_TYPE_UNICAST_IP6;
+
+ fib_entry_contribute_forwarding (t->fib_entry_index, forw_type, &dpo);
+ dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
+ dpo_reset(&dpo);
+}
+
+static vxlan_gpe_tunnel_t *
+vxlan_gpe_tunnel_from_fib_node (fib_node_t *node)
+{
+#if (CLIB_DEBUG > 0)
+ ASSERT(FIB_NODE_TYPE_VXLAN_GPE_TUNNEL == node->fn_type);
+#endif
+ return ((vxlan_gpe_tunnel_t*) (((char*)node) -
+ STRUCT_OFFSET_OF(vxlan_gpe_tunnel_t, node)));
+}
+
+/**
+ * Function definition to backwalk a FIB node -
+ * Here we will restack the new dpo of VXLAN_GPE DIP to encap node.
+ */
+static fib_node_back_walk_rc_t
+vxlan_gpe_tunnel_back_walk (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ vxlan_gpe_tunnel_restack_dpo(vxlan_gpe_tunnel_from_fib_node(node));
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/**
+ * Function definition to get a FIB node from its index
+ */
+static fib_node_t*
+vxlan_gpe_tunnel_fib_node_get (fib_node_index_t index)
+{
+ vxlan_gpe_tunnel_t * t;
+ vxlan_gpe_main_t * ngm = &vxlan_gpe_main;
+
+ t = pool_elt_at_index(ngm->tunnels, index);
+
+ return (&t->node);
+}
+
+/**
+ * Function definition to inform the FIB node that its last lock has gone.
+ */
+static void
+vxlan_gpe_tunnel_last_lock_gone (fib_node_t *node)
+{
+ /*
+ * The VXLAN_GPE tunnel is a root of the graph. As such
+ * it never has children and thus is never locked.
+ */
+ ASSERT(0);
+}
+
+/*
+ * Virtual function table registered by VXLAN_GPE tunnels
+ * for participation in the FIB object graph.
+ */
+const static fib_node_vft_t vxlan_gpe_vft = {
+ .fnv_get = vxlan_gpe_tunnel_fib_node_get,
+ .fnv_last_lock = vxlan_gpe_tunnel_last_lock_gone,
+ .fnv_back_walk = vxlan_gpe_tunnel_back_walk,
+};
+
+#define foreach_gpe_copy_field \
+_(vni) \
+_(protocol) \
+_(mcast_sw_if_index) \
+_(encap_fib_index) \
+_(decap_fib_index)
+
+#define foreach_copy_ipv4 { \
+ _(local.ip4.as_u32) \
+ _(remote.ip4.as_u32) \
+}
+
+#define foreach_copy_ipv6 { \
+ _(local.ip6.as_u64[0]) \
+ _(local.ip6.as_u64[1]) \
+ _(remote.ip6.as_u64[0]) \
+ _(remote.ip6.as_u64[1]) \
+}
+
+
+/**
+ * @brief Calculate IPv4 VXLAN GPE rewrite header
+ *
+ * @param *t
+ *
+ * @return rc
+ *
+ */
+int vxlan4_gpe_rewrite (vxlan_gpe_tunnel_t * t, u32 extension_size,
+ u8 protocol_override, uword encap_next_node)
+{
+ u8 *rw = 0;
+ ip4_header_t * ip0;
+ ip4_vxlan_gpe_header_t * h0;
+ int len;
+
+ len = sizeof (*h0) + extension_size;
+
+ vec_free(t->rewrite);
+ vec_validate_aligned (rw, len-1, CLIB_CACHE_LINE_BYTES);
+
+ h0 = (ip4_vxlan_gpe_header_t *) rw;
+
+ /* Fixed portion of the (outer) ip4 header */
+ ip0 = &h0->ip4;
+ ip0->ip_version_and_header_length = 0x45;
+ ip0->ttl = 254;
+ ip0->protocol = IP_PROTOCOL_UDP;
+
+ /* we fix up the ip4 header length and checksum after-the-fact */
+ ip0->src_address.as_u32 = t->local.ip4.as_u32;
+ ip0->dst_address.as_u32 = t->remote.ip4.as_u32;
+ ip0->checksum = ip4_header_checksum (ip0);
+
+ /* UDP header, randomize src port on something, maybe? */
+ h0->udp.src_port = clib_host_to_net_u16 (4790);
+ h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_VXLAN_GPE);
+
+ /* VXLAN header. Are we having fun yet? */
+ h0->vxlan.flags = VXLAN_GPE_FLAGS_I | VXLAN_GPE_FLAGS_P;
+ h0->vxlan.ver_res = VXLAN_GPE_VERSION;
+ if (protocol_override)
+ {
+ h0->vxlan.protocol = protocol_override;
+ }
+ else
+ {
+ h0->vxlan.protocol = t->protocol;
+ }
+ t->rewrite_size = sizeof(ip4_vxlan_gpe_header_t) + extension_size;
+ h0->vxlan.vni_res = clib_host_to_net_u32 (t->vni<<8);
+
+ t->rewrite = rw;
+ t->encap_next_node = encap_next_node;
+ return (0);
+}
+
+/**
+ * @brief Calculate IPv6 VXLAN GPE rewrite header
+ *
+ * @param *t
+ *
+ * @return rc
+ *
+ */
+int vxlan6_gpe_rewrite (vxlan_gpe_tunnel_t * t, u32 extension_size,
+ u8 protocol_override, uword encap_next_node)
+{
+ u8 *rw = 0;
+ ip6_header_t * ip0;
+ ip6_vxlan_gpe_header_t * h0;
+ int len;
+
+ len = sizeof (*h0) + extension_size;
+
+ vec_free(t->rewrite);
+ vec_validate_aligned (rw, len-1, CLIB_CACHE_LINE_BYTES);
+
+ h0 = (ip6_vxlan_gpe_header_t *) rw;
+
+ /* Fixed portion of the (outer) ip4 header */
+ ip0 = &h0->ip6;
+ ip0->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32(6 << 28);
+ ip0->hop_limit = 255;
+ ip0->protocol = IP_PROTOCOL_UDP;
+
+ ip0->src_address.as_u64[0] = t->local.ip6.as_u64[0];
+ ip0->src_address.as_u64[1] = t->local.ip6.as_u64[1];
+ ip0->dst_address.as_u64[0] = t->remote.ip6.as_u64[0];
+ ip0->dst_address.as_u64[1] = t->remote.ip6.as_u64[1];
+
+ /* UDP header, randomize src port on something, maybe? */
+ h0->udp.src_port = clib_host_to_net_u16 (4790);
+ h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_VXLAN_GPE);
+
+ /* VXLAN header. Are we having fun yet? */
+ h0->vxlan.flags = VXLAN_GPE_FLAGS_I | VXLAN_GPE_FLAGS_P;
+ h0->vxlan.ver_res = VXLAN_GPE_VERSION;
+ if (protocol_override)
+ {
+ h0->vxlan.protocol = t->protocol;
+ }
+ else
+ {
+ h0->vxlan.protocol = protocol_override;
+ }
+ t->rewrite_size = sizeof(ip4_vxlan_gpe_header_t) + extension_size;
+ h0->vxlan.vni_res = clib_host_to_net_u32 (t->vni<<8);
+
+ t->rewrite = rw;
+ t->encap_next_node = encap_next_node;
+ return (0);
+}
+
+static void
+hash_set_key_copy (uword ** h, void * key, uword v) {
+ size_t ksz = hash_header(*h)->user;
+ void * copy = clib_mem_alloc (ksz);
+ clib_memcpy (copy, key, ksz);
+ hash_set_mem (*h, copy, v);
+}
+
+static void
+hash_unset_key_free (uword ** h, void * key) {
+ hash_pair_t * hp = hash_get_pair_mem (*h, key);
+ ASSERT (hp);
+ key = uword_to_pointer (hp->key, void *);
+ hash_unset_mem (*h, key);
+ clib_mem_free (key);
+}
+
+static uword
+vtep_addr_ref(ip46_address_t *ip)
+{
+ uword *vtep = ip46_address_is_ip4(ip) ?
+ hash_get (vxlan_gpe_main.vtep4, ip->ip4.as_u32) :
+ hash_get_mem (vxlan_gpe_main.vtep6, &ip->ip6);
+ if (vtep)
+ return ++(*vtep);
+ ip46_address_is_ip4(ip) ?
+ hash_set (vxlan_gpe_main.vtep4, ip->ip4.as_u32, 1) :
+ hash_set_key_copy (&vxlan_gpe_main.vtep6, &ip->ip6, 1);
+ return 1;
+}
+
+static uword
+vtep_addr_unref(ip46_address_t *ip)
+{
+ uword *vtep = ip46_address_is_ip4(ip) ?
+ hash_get (vxlan_gpe_main.vtep4, ip->ip4.as_u32) :
+ hash_get_mem (vxlan_gpe_main.vtep6, &ip->ip6);
+ ASSERT(vtep);
+ if (--(*vtep) != 0)
+ return *vtep;
+ ip46_address_is_ip4(ip) ?
+ hash_unset (vxlan_gpe_main.vtep4, ip->ip4.as_u32) :
+ hash_unset_key_free (&vxlan_gpe_main.vtep6, &ip->ip6);
+ return 0;
+}
+
+typedef CLIB_PACKED(union {
+ struct {
+ fib_node_index_t mfib_entry_index;
+ adj_index_t mcast_adj_index;
+ };
+ u64 as_u64;
+}) mcast_shared_t;
+
+static inline mcast_shared_t
+mcast_shared_get(ip46_address_t * ip)
+{
+ ASSERT(ip46_address_is_multicast(ip));
+ uword * p = hash_get_mem (vxlan_gpe_main.mcast_shared, ip);
+ ASSERT(p);
+ return (mcast_shared_t) { .as_u64 = *p };
+}
+
+static inline void
+mcast_shared_add(ip46_address_t *remote,
+ fib_node_index_t mfei,
+ adj_index_t ai)
+{
+ mcast_shared_t new_ep = {
+ .mcast_adj_index = ai,
+ .mfib_entry_index = mfei,
+ };
+
+ hash_set_key_copy (&vxlan_gpe_main.mcast_shared, remote, new_ep.as_u64);
+}
+
+static inline void
+mcast_shared_remove(ip46_address_t *remote)
+{
+ mcast_shared_t ep = mcast_shared_get(remote);
+
+ adj_unlock(ep.mcast_adj_index);
+ mfib_table_entry_delete_index(ep.mfib_entry_index,
+ MFIB_SOURCE_VXLAN_GPE);
+
+ hash_unset_key_free (&vxlan_gpe_main.mcast_shared, remote);
+}
+
+static inline fib_protocol_t
+fib_ip_proto(bool is_ip6)
+{
+ return (is_ip6) ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4;
+}
+
+/**
+ * @brief Add or Del a VXLAN GPE tunnel
+ *
+ * @param *a
+ * @param *sw_if_index
+ *
+ * @return rc
+ *
+ */
+int vnet_vxlan_gpe_add_del_tunnel
+(vnet_vxlan_gpe_add_del_tunnel_args_t *a, u32 * sw_if_indexp)
+{
+ vxlan_gpe_main_t * ngm = &vxlan_gpe_main;
+ vxlan_gpe_tunnel_t *t = 0;
+ vnet_main_t * vnm = ngm->vnet_main;
+ vnet_hw_interface_t * hi;
+ uword * p;
+ u32 hw_if_index = ~0;
+ u32 sw_if_index = ~0;
+ int rv;
+ vxlan4_gpe_tunnel_key_t key4, *key4_copy;
+ vxlan6_gpe_tunnel_key_t key6, *key6_copy;
+ u32 is_ip6 = a->is_ip6;
+
+ if (!is_ip6)
+ {
+ key4.local = a->local.ip4.as_u32;
+ key4.remote = a->remote.ip4.as_u32;
+ key4.vni = clib_host_to_net_u32 (a->vni << 8);
+ key4.pad = 0;
+
+ p = hash_get_mem(ngm->vxlan4_gpe_tunnel_by_key, &key4);
+ }
+ else
+ {
+ key6.local.as_u64[0] = a->local.ip6.as_u64[0];
+ key6.local.as_u64[1] = a->local.ip6.as_u64[1];
+ key6.remote.as_u64[0] = a->remote.ip6.as_u64[0];
+ key6.remote.as_u64[1] = a->remote.ip6.as_u64[1];
+ key6.vni = clib_host_to_net_u32 (a->vni << 8);
+
+ p = hash_get_mem(ngm->vxlan6_gpe_tunnel_by_key, &key6);
+ }
+
+ if (a->is_add)
+ {
+ l2input_main_t * l2im = &l2input_main;
+
+ /* adding a tunnel: tunnel must not already exist */
+ if (p)
+ return VNET_API_ERROR_TUNNEL_EXIST;
+
+ pool_get_aligned (ngm->tunnels, t, CLIB_CACHE_LINE_BYTES);
+ memset (t, 0, sizeof (*t));
+
+ /* copy from arg structure */
+#define _(x) t->x = a->x;
+ foreach_gpe_copy_field;
+ if (!a->is_ip6) foreach_copy_ipv4
+ else foreach_copy_ipv6
+#undef _
+
+ if (!a->is_ip6) t->flags |= VXLAN_GPE_TUNNEL_IS_IPV4;
+
+ if (!a->is_ip6) {
+ rv = vxlan4_gpe_rewrite (t, 0, 0, VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP);
+ } else {
+ rv = vxlan6_gpe_rewrite (t, 0, 0, VXLAN_GPE_ENCAP_NEXT_IP6_LOOKUP);
+ }
+
+ if (rv)
+ {
+ pool_put (ngm->tunnels, t);
+ return rv;
+ }
+
+ if (!is_ip6)
+ {
+ key4_copy = clib_mem_alloc (sizeof (*key4_copy));
+ clib_memcpy (key4_copy, &key4, sizeof (*key4_copy));
+ hash_set_mem (ngm->vxlan4_gpe_tunnel_by_key, key4_copy,
+ t - ngm->tunnels);
+ }
+ else
+ {
+ key6_copy = clib_mem_alloc (sizeof (*key6_copy));
+ clib_memcpy (key6_copy, &key6, sizeof (*key6_copy));
+ hash_set_mem (ngm->vxlan6_gpe_tunnel_by_key, key6_copy,
+ t - ngm->tunnels);
+ }
+
+ if (vec_len (ngm->free_vxlan_gpe_tunnel_hw_if_indices) > 0)
+ {
+ vnet_interface_main_t * im = &vnm->interface_main;
+ hw_if_index = ngm->free_vxlan_gpe_tunnel_hw_if_indices
+ [vec_len (ngm->free_vxlan_gpe_tunnel_hw_if_indices)-1];
+ _vec_len (ngm->free_vxlan_gpe_tunnel_hw_if_indices) -= 1;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ hi->dev_instance = t - ngm->tunnels;
+ hi->hw_instance = hi->dev_instance;
+ /* clear old stats of freed tunnel before reuse */
+ sw_if_index = hi->sw_if_index;
+ vnet_interface_counter_lock(im);
+ vlib_zero_combined_counter
+ (&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_TX], sw_if_index);
+ vlib_zero_combined_counter
+ (&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_RX], sw_if_index);
+ vlib_zero_simple_counter
+ (&im->sw_if_counters[VNET_INTERFACE_COUNTER_DROP], sw_if_index);
+ vnet_interface_counter_unlock(im);
+ }
+ else
+ {
+ hw_if_index = vnet_register_interface
+ (vnm, vxlan_gpe_device_class.index, t - ngm->tunnels,
+ vxlan_gpe_hw_class.index, t - ngm->tunnels);
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ hi->output_node_index = vxlan_gpe_encap_node.index;
+ }
+
+ t->hw_if_index = hw_if_index;
+ t->sw_if_index = sw_if_index = hi->sw_if_index;
+ vec_validate_init_empty (ngm->tunnel_index_by_sw_if_index, sw_if_index, ~0);
+ ngm->tunnel_index_by_sw_if_index[sw_if_index] = t - ngm->tunnels;
+
+ /* setup l2 input config with l2 feature and bd 0 to drop packet */
+ vec_validate (l2im->configs, sw_if_index);
+ l2im->configs[sw_if_index].feature_bitmap = L2INPUT_FEAT_DROP;
+ l2im->configs[sw_if_index].bd_index = 0;
+
+ vnet_sw_interface_t * si = vnet_get_sw_interface (vnm, sw_if_index);
+ si->flags &= ~VNET_SW_INTERFACE_FLAG_HIDDEN;
+ vnet_sw_interface_set_flags (vnm, hi->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ fib_node_init(&t->node, FIB_NODE_TYPE_VXLAN_GPE_TUNNEL);
+ fib_prefix_t tun_remote_pfx;
+ u32 encap_index = vxlan_gpe_encap_node.index;
+ vnet_flood_class_t flood_class = VNET_FLOOD_CLASS_TUNNEL_NORMAL;
+
+ fib_prefix_from_ip46_addr(&t->remote, &tun_remote_pfx);
+ if (!ip46_address_is_multicast(&t->remote))
+ {
+ /* Unicast tunnel -
+ * source the FIB entry for the tunnel's destination
+ * and become a child thereof. The tunnel will then get poked
+ * when the forwarding for the entry updates, and the tunnel can
+ * re-stack accordingly
+ */
+ vtep_addr_ref(&t->local);
+ t->fib_entry_index = fib_table_entry_special_add
+ (t->encap_fib_index, &tun_remote_pfx, FIB_SOURCE_RR,
+ FIB_ENTRY_FLAG_NONE);
+ t->sibling_index = fib_entry_child_add
+ (t->fib_entry_index, FIB_NODE_TYPE_VXLAN_GPE_TUNNEL, t - ngm->tunnels);
+ vxlan_gpe_tunnel_restack_dpo(t);
+ }
+ else
+ {
+ /* Multicast tunnel -
+ * as the same mcast group can be used for mutiple mcast tunnels
+ * with different VNIs, create the output fib adjecency only if
+ * it does not already exist
+ */
+ fib_protocol_t fp = fib_ip_proto(is_ip6);
+
+ if (vtep_addr_ref(&t->remote) == 1)
+ {
+ fib_node_index_t mfei;
+ adj_index_t ai;
+ fib_route_path_t path = {
+ .frp_proto = fib_proto_to_dpo(fp),
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = 0xffffffff,
+ .frp_fib_index = ~0,
+ .frp_weight = 0,
+ .frp_flags = FIB_ROUTE_PATH_LOCAL,
+ };
+ const mfib_prefix_t mpfx = {
+ .fp_proto = fp,
+ .fp_len = (is_ip6 ? 128 : 32),
+ .fp_grp_addr = tun_remote_pfx.fp_addr,
+ };
+
+ /*
+ * Setup the (*,G) to receive traffic on the mcast group
+ * - the forwarding interface is for-us
+ * - the accepting interface is that from the API
+ */
+ mfib_table_entry_path_update(t->encap_fib_index,
+ &mpfx,
+ MFIB_SOURCE_VXLAN_GPE,
+ &path,
+ MFIB_ITF_FLAG_FORWARD);
+
+ path.frp_sw_if_index = a->mcast_sw_if_index;
+ path.frp_flags = FIB_ROUTE_PATH_FLAG_NONE;
+ mfei = mfib_table_entry_path_update(t->encap_fib_index,
+ &mpfx,
+ MFIB_SOURCE_VXLAN_GPE,
+ &path,
+ MFIB_ITF_FLAG_ACCEPT);
+
+ /*
+ * Create the mcast adjacency to send traffic to the group
+ */
+ ai = adj_mcast_add_or_lock(fp,
+ fib_proto_to_link(fp),
+ a->mcast_sw_if_index);
+
+ /*
+ * create a new end-point
+ */
+ mcast_shared_add(&t->remote, mfei, ai);
+ }
+
+ dpo_id_t dpo = DPO_INVALID;
+ mcast_shared_t ep = mcast_shared_get(&t->remote);
+
+ /* Stack shared mcast remote mac addr rewrite on encap */
+ dpo_set (&dpo, DPO_ADJACENCY_MCAST,
+ fib_proto_to_dpo(fp),
+ ep.mcast_adj_index);
+
+ dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
+ dpo_reset (&dpo);
+ flood_class = VNET_FLOOD_CLASS_TUNNEL_MASTER;
+ }
+
+ /* Set vxlan tunnel output node */
+ hi->output_node_index = encap_index;
+
+ vnet_get_sw_interface (vnet_get_main(), sw_if_index)->flood_class = flood_class;
+ }
+ else
+ {
+ /* deleting a tunnel: tunnel must exist */
+ if (!p)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ t = pool_elt_at_index (ngm->tunnels, p[0]);
+
+ sw_if_index = t->sw_if_index;
+ vnet_sw_interface_set_flags (vnm, t->sw_if_index, 0 /* down */);
+ vnet_sw_interface_t * si = vnet_get_sw_interface (vnm, t->sw_if_index);
+ si->flags |= VNET_SW_INTERFACE_FLAG_HIDDEN;
+ set_int_l2_mode(ngm->vlib_main, vnm, MODE_L3, t->sw_if_index, 0, 0, 0, 0);
+ vec_add1 (ngm->free_vxlan_gpe_tunnel_hw_if_indices, t->hw_if_index);
+
+ ngm->tunnel_index_by_sw_if_index[t->sw_if_index] = ~0;
+
+ if (!is_ip6)
+ hash_unset (ngm->vxlan4_gpe_tunnel_by_key, key4.as_u64);
+ else
+ hash_unset_key_free (&ngm->vxlan6_gpe_tunnel_by_key, &key6);
+
+ if (!ip46_address_is_multicast(&t->remote))
+ {
+ vtep_addr_unref(&t->local);
+ fib_entry_child_remove(t->fib_entry_index, t->sibling_index);
+ fib_table_entry_delete_index(t->fib_entry_index, FIB_SOURCE_RR);
+ }
+ else if (vtep_addr_unref(&t->remote) == 0)
+ {
+ mcast_shared_remove(&t->remote);
+ }
+
+ fib_node_deinit(&t->node);
+ vec_free (t->rewrite);
+ pool_put (ngm->tunnels, t);
+ }
+
+ if (sw_if_indexp)
+ *sw_if_indexp = sw_if_index;
+
+ return 0;
+}
+
+static clib_error_t *
+vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, * line_input = &_line_input;
+ u8 is_add = 1;
+ ip46_address_t local, remote;
+ u8 local_set = 0;
+ u8 remote_set = 0;
+ u8 grp_set = 0;
+ u8 ipv4_set = 0;
+ u8 ipv6_set = 0;
+ u32 mcast_sw_if_index = ~0;
+ u32 encap_fib_index = 0;
+ u32 decap_fib_index = 0;
+ u8 protocol = VXLAN_GPE_PROTOCOL_IP4;
+ u32 vni;
+ u8 vni_set = 0;
+ int rv;
+ u32 tmp;
+ vnet_vxlan_gpe_add_del_tunnel_args_t _a, * a = &_a;
+ u32 sw_if_index;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (! unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "local %U",
+ unformat_ip4_address, &local.ip4))
+ {
+ local_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "remote %U",
+ unformat_ip4_address, &remote.ip4))
+ {
+ remote_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "local %U",
+ unformat_ip6_address, &local.ip6))
+ {
+ local_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "remote %U",
+ unformat_ip6_address, &remote.ip6))
+ {
+ remote_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "group %U %U",
+ unformat_ip4_address, &remote.ip4,
+ unformat_vnet_sw_interface,
+ vnet_get_main(), &mcast_sw_if_index))
+ {
+ grp_set = remote_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "group %U %U",
+ unformat_ip6_address, &remote.ip6,
+ unformat_vnet_sw_interface,
+ vnet_get_main(), &mcast_sw_if_index))
+ {
+ grp_set = remote_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "encap-vrf-id %d", &tmp))
+ {
+ if (ipv6_set)
+ encap_fib_index = fib_table_find (FIB_PROTOCOL_IP6, tmp);
+ else
+ encap_fib_index = fib_table_find (FIB_PROTOCOL_IP4, tmp);
+
+ if (encap_fib_index == ~0)
+ {
+ error = clib_error_return (0, "nonexistent encap fib id %d", tmp);
+ goto done;
+ }
+ }
+ else if (unformat (line_input, "decap-vrf-id %d", &tmp))
+ {
+ if (ipv6_set)
+ decap_fib_index = fib_table_find (FIB_PROTOCOL_IP6, tmp);
+ else
+ decap_fib_index = fib_table_find (FIB_PROTOCOL_IP4, tmp);
+
+ if (decap_fib_index == ~0)
+ {
+ error = clib_error_return (0, "nonexistent decap fib id %d", tmp);
+ goto done;
+ }
+ }
+ else if (unformat (line_input, "vni %d", &vni))
+ vni_set = 1;
+ else if (unformat(line_input, "next-ip4"))
+ protocol = VXLAN_GPE_PROTOCOL_IP4;
+ else if (unformat(line_input, "next-ip6"))
+ protocol = VXLAN_GPE_PROTOCOL_IP6;
+ else if (unformat(line_input, "next-ethernet"))
+ protocol = VXLAN_GPE_PROTOCOL_ETHERNET;
+ else if (unformat(line_input, "next-nsh"))
+ protocol = VXLAN_GPE_PROTOCOL_NSH;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (local_set == 0)
+ {
+ error = clib_error_return (0, "tunnel local address not specified");
+ goto done;
+ }
+
+ if (remote_set == 0)
+ {
+ error = clib_error_return (0, "tunnel remote address not specified");
+ goto done;
+ }
+
+ if (grp_set && !ip46_address_is_multicast(&remote))
+ {
+ error = clib_error_return (0, "tunnel group address not multicast");
+ goto done;
+ }
+
+ if (grp_set == 0 && ip46_address_is_multicast(&remote))
+ {
+ error = clib_error_return (0, "remote address must be unicast");
+ goto done;
+ }
+
+ if (grp_set && mcast_sw_if_index == ~0)
+ {
+ error = clib_error_return (0, "tunnel nonexistent multicast device");
+ goto done;
+ }
+ if (ipv4_set && ipv6_set)
+ {
+ error = clib_error_return (0, "both IPv4 and IPv6 addresses specified");
+ goto done;
+ }
+
+ if ((ipv4_set && memcmp(&local.ip4, &remote.ip4, sizeof(local.ip4)) == 0) ||
+ (ipv6_set && memcmp(&local.ip6, &remote.ip6, sizeof(local.ip6)) == 0))
+ {
+ error = clib_error_return (0, "src and remote addresses are identical");
+ goto done;
+ }
+
+ if (vni_set == 0)
+ {
+ error = clib_error_return (0, "vni not specified");
+ goto done;
+ }
+
+ memset (a, 0, sizeof (*a));
+
+ a->is_add = is_add;
+ a->is_ip6 = ipv6_set;
+
+#define _(x) a->x = x;
+ foreach_gpe_copy_field;
+ if (ipv4_set) foreach_copy_ipv4
+ else foreach_copy_ipv6
+#undef _
+
+ rv = vnet_vxlan_gpe_add_del_tunnel (a, &sw_if_index);
+
+ switch(rv)
+ {
+ case 0:
+ vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main(), sw_if_index);
+ break;
+ case VNET_API_ERROR_INVALID_DECAP_NEXT:
+ error = clib_error_return (0, "invalid decap-next...");
+ goto done;
+
+ case VNET_API_ERROR_TUNNEL_EXIST:
+ error = clib_error_return (0, "tunnel already exists...");
+ goto done;
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error = clib_error_return (0, "tunnel does not exist...");
+ goto done;
+
+ default:
+ error = clib_error_return
+ (0, "vnet_vxlan_gpe_add_del_tunnel returned %d", rv);
+ goto done;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * Add or delete a VXLAN-GPE Tunnel.
+ *
+ * VXLAN-GPE provides the features needed to allow L2 bridge domains (BDs)
+ * to span multiple servers. This is done by building an L2 overlay on
+ * top of an L3 network underlay using VXLAN-GPE tunnels.
+ *
+ * This makes it possible for servers to be co-located in the same data
+ * center or be separated geographically as long as they are reachable
+ * through the underlay L3 network.
+ *
+ * You can refer to this kind of L2 overlay bridge domain as a VXLAN-GPE sengment.
+ *
+ * @cliexpar
+ * Example of how to create a VXLAN-GPE Tunnel:
+ * @cliexcmd{create vxlan-gpe tunnel local 10.0.3.1 local 10.0.3.3 vni 13 encap-vrf-id 7}
+ * Example of how to delete a VXLAN Tunnel:
+ * @cliexcmd{create vxlan tunnel src 10.0.3.1 remote 10.0.3.3 vni 13 del}
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (create_vxlan_gpe_tunnel_command, static) = {
+ .path = "create vxlan-gpe tunnel",
+ .short_help =
+ "create vxlan-gpe tunnel local <local-addr> "
+ " {remote <remote-addr>|group <mcast-addr> <intf-name>}"
+ " vni <nn> [next-ip4][next-ip6][next-ethernet][next-nsh]"
+ " [encap-vrf-id <nn>] [decap-vrf-id <nn>] [del]\n",
+ .function = vxlan_gpe_add_del_tunnel_command_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief CLI function for showing VXLAN GPE tunnels
+ *
+ * @param *vm
+ * @param *input
+ * @param *cmd
+ *
+ * @return error
+ *
+ */
+static clib_error_t *
+show_vxlan_gpe_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vxlan_gpe_main_t * ngm = &vxlan_gpe_main;
+ vxlan_gpe_tunnel_t * t;
+
+ if (pool_elts (ngm->tunnels) == 0)
+ vlib_cli_output (vm, "No vxlan-gpe tunnels configured.");
+
+ pool_foreach (t, ngm->tunnels,
+ ({
+ vlib_cli_output (vm, "%U", format_vxlan_gpe_tunnel, t);
+ }));
+
+ return 0;
+}
+
+/*?
+ * Display all the VXLAN-GPE Tunnel entries.
+ *
+ * @cliexpar
+ * Example of how to display the VXLAN-GPE Tunnel entries:
+ * @cliexstart{show vxlan-gpe tunnel}
+ * [0] local 10.0.3.1 remote 10.0.3.3 vni 13 encap_fib_index 0 sw_if_index 5 decap_next l2
+ * @cliexend
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_vxlan_gpe_tunnel_command, static) = {
+ .path = "show vxlan-gpe",
+ .function = show_vxlan_gpe_tunnel_command_fn,
+};
+/* *INDENT-ON* */
+
+void vnet_int_vxlan_gpe_bypass_mode (u32 sw_if_index,
+ u8 is_ip6,
+ u8 is_enable)
+{
+ if (is_ip6)
+ vnet_feature_enable_disable ("ip6-unicast", "ip6-vxlan-gpe-bypass",
+ sw_if_index, is_enable, 0, 0);
+ else
+ vnet_feature_enable_disable ("ip4-unicast", "ip4-vxlan-gpe-bypass",
+ sw_if_index, is_enable, 0, 0);
+}
+
+
+static clib_error_t *
+set_ip_vxlan_gpe_bypass (u32 is_ip6,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, * line_input = &_line_input;
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u32 sw_if_index, is_enable;
+
+ sw_if_index = ~0;
+ is_enable = 1;
+
+ if (! unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat_user (line_input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ ;
+ else if (unformat (line_input, "del"))
+ is_enable = 0;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (~0 == sw_if_index)
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ vnet_int_vxlan_gpe_bypass_mode (sw_if_index, is_ip6, is_enable);
+
+ done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+set_ip4_vxlan_gpe_bypass (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ return set_ip_vxlan_gpe_bypass (0, input, cmd);
+}
+
+/*?
+ * This command adds the 'ip4-vxlan-gpe-bypass' graph node for a given interface.
+ * By adding the IPv4 vxlan-gpe-bypass graph node to an interface, the node checks
+ * for and validate input vxlan_gpe packet and bypass ip4-lookup, ip4-local,
+ * ip4-udp-lookup nodes to speedup vxlan_gpe packet forwarding. This node will
+ * cause extra overhead to for non-vxlan_gpe packets which is kept at a minimum.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of graph node before ip4-vxlan-gpe-bypass is enabled:
+ * @cliexstart{show vlib graph ip4-vxlan-gpe-bypass}
+ * Name Next Previous
+ * ip4-vxlan-gpe-bypass error-drop [0]
+ * vxlan4-gpe-input [1]
+ * ip4-lookup [2]
+ * @cliexend
+ *
+ * Example of how to enable ip4-vxlan-gpe-bypass on an interface:
+ * @cliexcmd{set interface ip vxlan-gpe-bypass GigabitEthernet2/0/0}
+ *
+ * Example of graph node after ip4-vxlan-gpe-bypass is enabled:
+ * @cliexstart{show vlib graph ip4-vxlan-gpe-bypass}
+ * Name Next Previous
+ * ip4-vxlan-gpe-bypass error-drop [0] ip4-input
+ * vxlan4-gpe-input [1] ip4-input-no-checksum
+ * ip4-lookup [2]
+ * @cliexend
+ *
+ * Example of how to display the feature enabed on an interface:
+ * @cliexstart{show ip interface features GigabitEthernet2/0/0}
+ * IP feature paths configured on GigabitEthernet2/0/0...
+ * ...
+ * ipv4 unicast:
+ * ip4-vxlan-gpe-bypass
+ * ip4-lookup
+ * ...
+ * @cliexend
+ *
+ * Example of how to disable ip4-vxlan-gpe-bypass on an interface:
+ * @cliexcmd{set interface ip vxlan-gpe-bypass GigabitEthernet2/0/0 del}
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip_vxlan_gpe_bypass_command, static) = {
+ .path = "set interface ip vxlan-gpe-bypass",
+ .function = set_ip4_vxlan_gpe_bypass,
+ .short_help = "set interface ip vxlan-gpe-bypass <interface> [del]",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_ip6_vxlan_gpe_bypass (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ return set_ip_vxlan_gpe_bypass (1, input, cmd);
+}
+
+/*?
+ * This command adds the 'ip6-vxlan-gpe-bypass' graph node for a given interface.
+ * By adding the IPv6 vxlan-gpe-bypass graph node to an interface, the node checks
+ * for and validate input vxlan_gpe packet and bypass ip6-lookup, ip6-local,
+ * ip6-udp-lookup nodes to speedup vxlan_gpe packet forwarding. This node will
+ * cause extra overhead to for non-vxlan_gpe packets which is kept at a minimum.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of graph node before ip6-vxlan-gpe-bypass is enabled:
+ * @cliexstart{show vlib graph ip6-vxlan-gpe-bypass}
+ * Name Next Previous
+ * ip6-vxlan-gpe-bypass error-drop [0]
+ * vxlan6-gpe-input [1]
+ * ip6-lookup [2]
+ * @cliexend
+ *
+ * Example of how to enable ip6-vxlan-gpe-bypass on an interface:
+ * @cliexcmd{set interface ip6 vxlan-gpe-bypass GigabitEthernet2/0/0}
+ *
+ * Example of graph node after ip6-vxlan-gpe-bypass is enabled:
+ * @cliexstart{show vlib graph ip6-vxlan-gpe-bypass}
+ * Name Next Previous
+ * ip6-vxlan-gpe-bypass error-drop [0] ip6-input
+ * vxlan6-gpe-input [1] ip4-input-no-checksum
+ * ip6-lookup [2]
+ * @cliexend
+ *
+ * Example of how to display the feature enabed on an interface:
+ * @cliexstart{show ip interface features GigabitEthernet2/0/0}
+ * IP feature paths configured on GigabitEthernet2/0/0...
+ * ...
+ * ipv6 unicast:
+ * ip6-vxlan-gpe-bypass
+ * ip6-lookup
+ * ...
+ * @cliexend
+ *
+ * Example of how to disable ip6-vxlan-gpe-bypass on an interface:
+ * @cliexcmd{set interface ip6 vxlan-gpe-bypass GigabitEthernet2/0/0 del}
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip6_vxlan_gpe_bypass_command, static) = {
+ .path = "set interface ip6 vxlan-gpe-bypass",
+ .function = set_ip6_vxlan_gpe_bypass,
+ .short_help = "set interface ip vxlan-gpe-bypass <interface> [del]",
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VNET_FEATURE_INIT (ip4_vxlan_gpe_bypass, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-vxlan-gpe-bypass",
+ .runs_before = VNET_FEATURES ("ip4-lookup"),
+};
+
+VNET_FEATURE_INIT (ip6_vxlan_gpe_bypass, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-vxlan-gpe-bypass",
+ .runs_before = VNET_FEATURES ("ip6-lookup"),
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Feature init function for VXLAN GPE
+ *
+ * @param *vm
+ *
+ * @return error
+ *
+ */
+clib_error_t *vxlan_gpe_init (vlib_main_t *vm)
+{
+ vxlan_gpe_main_t *ngm = &vxlan_gpe_main;
+
+ ngm->vnet_main = vnet_get_main();
+ ngm->vlib_main = vm;
+
+ ngm->vxlan4_gpe_tunnel_by_key
+ = hash_create_mem (0, sizeof(vxlan4_gpe_tunnel_key_t), sizeof (uword));
+
+ ngm->vxlan6_gpe_tunnel_by_key
+ = hash_create_mem (0, sizeof(vxlan6_gpe_tunnel_key_t), sizeof (uword));
+
+
+ ngm->mcast_shared = hash_create_mem(0,
+ sizeof(ip46_address_t),
+ sizeof(mcast_shared_t));
+
+ udp_register_dst_port (vm, UDP_DST_PORT_VXLAN_GPE,
+ vxlan4_gpe_input_node.index, 1 /* is_ip4 */);
+ udp_register_dst_port (vm, UDP_DST_PORT_VXLAN6_GPE,
+ vxlan6_gpe_input_node.index, 0 /* is_ip4 */);
+
+ /* Register the list of standard decap protocols supported */
+ vxlan_gpe_register_decap_protocol (VXLAN_GPE_PROTOCOL_IP4,
+ VXLAN_GPE_INPUT_NEXT_IP4_INPUT);
+ vxlan_gpe_register_decap_protocol (VXLAN_GPE_PROTOCOL_IP6,
+ VXLAN_GPE_INPUT_NEXT_IP6_INPUT);
+ vxlan_gpe_register_decap_protocol (VXLAN_GPE_PROTOCOL_ETHERNET,
+ VXLAN_GPE_INPUT_NEXT_ETHERNET_INPUT);
+
+ fib_node_register_type(FIB_NODE_TYPE_VXLAN_GPE_TUNNEL, &vxlan_gpe_vft);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION(vxlan_gpe_init);
+
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.h b/src/vnet/vxlan-gpe/vxlan_gpe.h
new file mode 100644
index 00000000..c348b5d5
--- /dev/null
+++ b/src/vnet/vxlan-gpe/vxlan_gpe.h
@@ -0,0 +1,259 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief VXLAN GPE definitions
+ *
+*/
+#ifndef included_vnet_vxlan_gpe_h
+#define included_vnet_vxlan_gpe_h
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_output.h>
+#include <vnet/l2/l2_bd.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/vxlan-gpe/vxlan_gpe_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/udp/udp.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/adj/adj_types.h>
+
+/**
+ * @brief VXLAN GPE header struct
+ *
+ */
+typedef CLIB_PACKED (struct {
+ /** 20 bytes */
+ ip4_header_t ip4;
+ /** 8 bytes */
+ udp_header_t udp;
+ /** 8 bytes */
+ vxlan_gpe_header_t vxlan;
+}) ip4_vxlan_gpe_header_t;
+
+typedef CLIB_PACKED (struct {
+ /** 40 bytes */
+ ip6_header_t ip6;
+ /** 8 bytes */
+ udp_header_t udp;
+ /** 8 bytes */
+ vxlan_gpe_header_t vxlan;
+}) ip6_vxlan_gpe_header_t;
+
+/**
+ * @brief Key struct for IPv4 VXLAN GPE tunnel.
+ * Key fields: local remote, vni
+ * all fields in NET byte order
+ * VNI shifted 8 bits
+ */
+typedef CLIB_PACKED(struct {
+ union {
+ struct {
+ u32 local;
+ u32 remote;
+
+ u32 vni;
+ u32 pad;
+ };
+ u64 as_u64[2];
+ };
+}) vxlan4_gpe_tunnel_key_t;
+
+/**
+ * @brief Key struct for IPv6 VXLAN GPE tunnel.
+ * Key fields: local remote, vni
+ * all fields in NET byte order
+ * VNI shifted 8 bits
+ */
+typedef CLIB_PACKED(struct {
+ ip6_address_t local;
+ ip6_address_t remote;
+ u32 vni;
+}) vxlan6_gpe_tunnel_key_t;
+
+/**
+ * @brief Struct for VXLAN GPE tunnel
+ */
+typedef struct {
+ /** Rewrite string. $$$$ embed vnet_rewrite header */
+ u8 * rewrite;
+
+ /** encapsulated protocol */
+ u8 protocol;
+
+ /* FIB DPO for IP forwarding of VXLAN-GPE encap packet */
+ dpo_id_t next_dpo;
+ /** tunnel local address */
+ ip46_address_t local;
+ /** tunnel remote address */
+ ip46_address_t remote;
+
+ /* mcast packet output intfc index (used only if dst is mcast) */
+ u32 mcast_sw_if_index;
+
+ /** FIB indices - tunnel partner lookup here */
+ u32 encap_fib_index;
+ /** FIB indices - inner IP packet lookup here */
+ u32 decap_fib_index;
+
+ /** VXLAN GPE VNI in HOST byte order, shifted left 8 bits */
+ u32 vni;
+
+ /** vnet intfc hw_if_index */
+ u32 hw_if_index;
+ /** vnet intfc sw_if_index */
+ u32 sw_if_index;
+
+ /** flags */
+ u32 flags;
+
+ /** rewrite size for dynamic plugins like iOAM */
+ u8 rewrite_size;
+
+ /** Next node after VxLAN-GPE encap */
+ uword encap_next_node;
+
+ /**
+ * Linkage into the FIB object graph
+ */
+ fib_node_t node;
+
+ /*
+ * The FIB entry for (depending on VXLAN-GPE tunnel is unicast or mcast)
+ * sending unicast VXLAN-GPE encap packets or receiving mcast VXLAN-GPE packets
+ */
+ fib_node_index_t fib_entry_index;
+ adj_index_t mcast_adj_index;
+
+ /**
+ * The tunnel is a child of the FIB entry for its desintion. This is
+ * so it receives updates when the forwarding information for that entry
+ * changes.
+ * The tunnels sibling index on the FIB entry's dependency list.
+ */
+ u32 sibling_index;
+
+} vxlan_gpe_tunnel_t;
+
+/** Flags for vxlan_gpe_tunnel_t */
+#define VXLAN_GPE_TUNNEL_IS_IPV4 1
+
+/** next nodes for VXLAN GPE input */
+#define foreach_vxlan_gpe_input_next \
+_(DROP, "error-drop") \
+_(IP4_INPUT, "ip4-input") \
+_(IP6_INPUT, "ip6-input") \
+_(ETHERNET_INPUT, "ethernet-input")
+
+/** struct for next nodes for VXLAN GPE input */
+typedef enum {
+#define _(s,n) VXLAN_GPE_INPUT_NEXT_##s,
+ foreach_vxlan_gpe_input_next
+#undef _
+ VXLAN_GPE_INPUT_N_NEXT,
+} vxlan_gpe_input_next_t;
+
+/** struct for VXLAN GPE errors */
+typedef enum {
+#define vxlan_gpe_error(n,s) VXLAN_GPE_ERROR_##n,
+#include <vnet/vxlan-gpe/vxlan_gpe_error.def>
+#undef vxlan_gpe_error
+ VXLAN_GPE_N_ERROR,
+} vxlan_gpe_input_error_t;
+
+/** Struct for VXLAN GPE node state */
+typedef struct {
+ /** vector of encap tunnel instances */
+ vxlan_gpe_tunnel_t *tunnels;
+
+ /** lookup IPv4 VXLAN GPE tunnel by key */
+ uword * vxlan4_gpe_tunnel_by_key;
+ /** lookup IPv6 VXLAN GPE tunnel by key */
+ uword * vxlan6_gpe_tunnel_by_key;
+
+ /* local VTEP IPs ref count used by vxlan-bypass node to check if
+ received VXLAN packet DIP matches any local VTEP address */
+ uword * vtep4; /* local ip4 VTEPs keyed on their ip4 addr */
+ uword * vtep6; /* local ip6 VTEPs keyed on their ip6 addr */
+ /* mcast shared info */
+ uword * mcast_shared; /* keyed on mcast ip46 addr */
+ /** Free vlib hw_if_indices */
+ u32 * free_vxlan_gpe_tunnel_hw_if_indices;
+
+ /** Mapping from sw_if_index to tunnel index */
+ u32 * tunnel_index_by_sw_if_index;
+
+ /** State convenience vlib_main_t */
+ vlib_main_t * vlib_main;
+ /** State convenience vnet_main_t */
+ vnet_main_t * vnet_main;
+
+ /** List of next nodes for the decap indexed on protocol */
+ uword decap_next_node_list[VXLAN_GPE_PROTOCOL_MAX];
+} vxlan_gpe_main_t;
+
+vxlan_gpe_main_t vxlan_gpe_main;
+
+extern vlib_node_registration_t vxlan_gpe_encap_node;
+extern vlib_node_registration_t vxlan4_gpe_input_node;
+extern vlib_node_registration_t vxlan6_gpe_input_node;
+
+u8 * format_vxlan_gpe_encap_trace (u8 * s, va_list * args);
+
+/** Struct for VXLAN GPE add/del args */
+typedef struct {
+ u8 is_add;
+ u8 is_ip6;
+ ip46_address_t local, remote;
+ u8 protocol;
+ u32 mcast_sw_if_index;
+ u32 encap_fib_index;
+ u32 decap_fib_index;
+ u32 vni;
+} vnet_vxlan_gpe_add_del_tunnel_args_t;
+
+
+int vnet_vxlan_gpe_add_del_tunnel
+(vnet_vxlan_gpe_add_del_tunnel_args_t *a, u32 * sw_if_indexp);
+
+
+int vxlan4_gpe_rewrite (vxlan_gpe_tunnel_t * t, u32 extension_size,
+ u8 protocol_override, uword encap_next_node);
+int vxlan6_gpe_rewrite (vxlan_gpe_tunnel_t * t, u32 extension_size,
+ u8 protocol_override, uword encap_next_node);
+
+/**
+ * @brief Struct for defining VXLAN GPE next nodes
+ */
+typedef enum {
+ VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP,
+ VXLAN_GPE_ENCAP_NEXT_IP6_LOOKUP,
+ VXLAN_GPE_ENCAP_NEXT_DROP,
+ VXLAN_GPE_ENCAP_N_NEXT
+} vxlan_gpe_encap_next_t;
+
+
+void vxlan_gpe_unregister_decap_protocol (u8 protocol_id, uword next_node_index);
+
+void vxlan_gpe_register_decap_protocol (u8 protocol_id, uword next_node_index);
+
+void vnet_int_vxlan_gpe_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable);
+
+#endif /* included_vnet_vxlan_gpe_h */
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe_api.c b/src/vnet/vxlan-gpe/vxlan_gpe_api.c
new file mode 100644
index 00000000..8e268418
--- /dev/null
+++ b/src/vnet/vxlan-gpe/vxlan_gpe_api.c
@@ -0,0 +1,272 @@
+/*
+ *------------------------------------------------------------------
+ * vxlan_gpe_api.c - vxlan_gpe api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/feature/feature.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vnet/fib/fib_table.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(SW_INTERFACE_SET_VXLAN_GPE_BYPASS, sw_interface_set_vxlan_gpe_bypass) \
+_(VXLAN_GPE_ADD_DEL_TUNNEL, vxlan_gpe_add_del_tunnel) \
+_(VXLAN_GPE_TUNNEL_DUMP, vxlan_gpe_tunnel_dump)
+
+static void
+ vl_api_sw_interface_set_vxlan_gpe_bypass_t_handler
+ (vl_api_sw_interface_set_vxlan_gpe_bypass_t * mp)
+{
+ vl_api_sw_interface_set_vxlan_gpe_bypass_reply_t *rmp;
+ int rv = 0;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ vnet_int_vxlan_gpe_bypass_mode (sw_if_index, mp->is_ipv6, mp->enable);
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_SET_VXLAN_GPE_BYPASS_REPLY);
+}
+
+static void
+ vl_api_vxlan_gpe_add_del_tunnel_t_handler
+ (vl_api_vxlan_gpe_add_del_tunnel_t * mp)
+{
+ vl_api_vxlan_gpe_add_del_tunnel_reply_t *rmp;
+ int rv = 0;
+ vnet_vxlan_gpe_add_del_tunnel_args_t _a, *a = &_a;
+ u32 encap_fib_index, decap_fib_index;
+ u8 protocol;
+ uword *p;
+ ip4_main_t *im = &ip4_main;
+ u32 sw_if_index = ~0;
+
+
+ p = hash_get (im->fib_index_by_table_id, ntohl (mp->encap_vrf_id));
+ if (!p)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_FIB;
+ goto out;
+ }
+ encap_fib_index = p[0];
+
+ protocol = mp->protocol;
+
+ /* Interpret decap_vrf_id as an opaque if sending to other-than-ip4-input */
+ if (protocol == VXLAN_GPE_INPUT_NEXT_IP4_INPUT)
+ {
+ p = hash_get (im->fib_index_by_table_id, ntohl (mp->decap_vrf_id));
+ if (!p)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_INNER_FIB;
+ goto out;
+ }
+ decap_fib_index = p[0];
+ }
+ else
+ {
+ decap_fib_index = ntohl (mp->decap_vrf_id);
+ }
+
+ /* Check src & dst are different */
+ if ((mp->is_ipv6 && memcmp (mp->local, mp->remote, 16) == 0) ||
+ (!mp->is_ipv6 && memcmp (mp->local, mp->remote, 4) == 0))
+ {
+ rv = VNET_API_ERROR_SAME_SRC_DST;
+ goto out;
+ }
+ memset (a, 0, sizeof (*a));
+
+ a->is_add = mp->is_add;
+ a->is_ip6 = mp->is_ipv6;
+ /* ip addresses sent in network byte order */
+ if (a->is_ip6)
+ {
+ clib_memcpy (&(a->local.ip6), mp->local, 16);
+ clib_memcpy (&(a->remote.ip6), mp->remote, 16);
+ }
+ else
+ {
+ clib_memcpy (&(a->local.ip4), mp->local, 4);
+ clib_memcpy (&(a->remote.ip4), mp->remote, 4);
+ }
+ a->mcast_sw_if_index = ntohl (mp->mcast_sw_if_index);
+ a->encap_fib_index = encap_fib_index;
+ a->decap_fib_index = decap_fib_index;
+ a->protocol = protocol;
+ a->vni = ntohl (mp->vni);
+ rv = vnet_vxlan_gpe_add_del_tunnel (a, &sw_if_index);
+
+out:
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_VXLAN_GPE_ADD_DEL_TUNNEL_REPLY,
+ ({
+ rmp->sw_if_index = ntohl (sw_if_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void send_vxlan_gpe_tunnel_details
+ (vxlan_gpe_tunnel_t * t, unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_vxlan_gpe_tunnel_details_t *rmp;
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+ u8 is_ipv6 = !(t->flags & VXLAN_GPE_TUNNEL_IS_IPV4);
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_VXLAN_GPE_TUNNEL_DETAILS);
+ if (is_ipv6)
+ {
+ memcpy (rmp->local, &(t->local.ip6.as_u8), 16);
+ memcpy (rmp->remote, &(t->remote.ip6.as_u8), 16);
+ rmp->encap_vrf_id = htonl (im6->fibs[t->encap_fib_index].ft_table_id);
+ rmp->decap_vrf_id = htonl (im6->fibs[t->decap_fib_index].ft_table_id);
+ }
+ else
+ {
+ memcpy (rmp->local, &(t->local.ip4.as_u8), 4);
+ memcpy (rmp->remote, &(t->remote.ip4.as_u8), 4);
+ rmp->encap_vrf_id = htonl (im4->fibs[t->encap_fib_index].ft_table_id);
+ rmp->decap_vrf_id = htonl (im4->fibs[t->decap_fib_index].ft_table_id);
+ }
+ rmp->mcast_sw_if_index = htonl (t->mcast_sw_if_index);
+ rmp->vni = htonl (t->vni);
+ rmp->protocol = t->protocol;
+ rmp->sw_if_index = htonl (t->sw_if_index);
+ rmp->is_ipv6 = is_ipv6;
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void vl_api_vxlan_gpe_tunnel_dump_t_handler
+ (vl_api_vxlan_gpe_tunnel_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ vxlan_gpe_main_t *vgm = &vxlan_gpe_main;
+ vxlan_gpe_tunnel_t *t;
+ u32 sw_if_index;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ if (~0 == sw_if_index)
+ {
+ /* *INDENT-OFF* */
+ pool_foreach (t, vgm->tunnels,
+ ({
+ send_vxlan_gpe_tunnel_details(t, q, mp->context);
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ if ((sw_if_index >= vec_len (vgm->tunnel_index_by_sw_if_index)) ||
+ (~0 == vgm->tunnel_index_by_sw_if_index[sw_if_index]))
+ {
+ return;
+ }
+ t = &vgm->tunnels[vgm->tunnel_index_by_sw_if_index[sw_if_index]];
+ send_vxlan_gpe_tunnel_details (t, q, mp->context);
+ }
+}
+
+
+/*
+ * vpe_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_vxlan_gpe;
+#undef _
+}
+
+static clib_error_t *
+vxlan_gpe_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ am->api_trace_cfg[VL_API_VXLAN_GPE_ADD_DEL_TUNNEL].size +=
+ 17 * sizeof (u32);
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (vxlan_gpe_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe_error.def b/src/vnet/vxlan-gpe/vxlan_gpe_error.def
new file mode 100644
index 00000000..9cf1b1cb
--- /dev/null
+++ b/src/vnet/vxlan-gpe/vxlan_gpe_error.def
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+vxlan_gpe_error (DECAPSULATED, "good packets decapsulated")
+vxlan_gpe_error (NO_SUCH_TUNNEL, "no such tunnel packets")
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe_packet.h b/src/vnet/vxlan-gpe/vxlan_gpe_packet.h
new file mode 100644
index 00000000..ec3c2e58
--- /dev/null
+++ b/src/vnet/vxlan-gpe/vxlan_gpe_packet.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief VXLAN GPE packet header structure
+ *
+*/
+#ifndef included_vxlan_gpe_packet_h
+#define included_vxlan_gpe_packet_h
+
+/**
+ * From draft-quinn-vxlan-gpe-03.txt
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |R|R|R|R|I|P|R|O|Ver| Reserved |Next Protocol |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | VXLAN Network Identifier (VNI) | Reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * I Bit: Flag bit 4 indicates that the VNI is valid.
+ *
+ * P Bit: Flag bit 5 is defined as the Next Protocol bit. The P bit
+ * MUST be set to 1 to indicate the presence of the 8 bit next
+ * protocol field.
+ *
+ * O Bit: Flag bit 7 is defined as the O bit. When the O bit is set to 1,
+ *
+ * the packet is an OAM packet and OAM processing MUST occur. The OAM
+ * protocol details are out of scope for this document. As with the
+ * P-bit, bit 7 is currently a reserved flag in VXLAN.
+ *
+ * VXLAN-gpe bits 8 and 9 are defined as version bits. These bits are
+ * reserved in VXLAN. The version field is used to ensure backward
+ * compatibility going forward with future VXLAN-gpe updates.
+ *
+ * The initial version for VXLAN-gpe is 0.
+ *
+ * This draft defines the following Next Protocol values:
+ *
+ * 0x1 : IPv4
+ * 0x2 : IPv6
+ * 0x3 : Ethernet
+ * 0x4 : Network Service Header [NSH]
+ */
+
+/**
+ * @brief VXLAN GPE support inner protocol definition.
+ * 1 - IP4
+ * 2 - IP6
+ * 3 - ETHERNET
+ * 4 - NSH
+ */
+#define foreach_vxlan_gpe_protocol \
+_ (0x01, IP4) \
+_ (0x02, IP6) \
+_ (0x03, ETHERNET) \
+_ (0x04, NSH) \
+_ (0x05, IOAM)
+
+
+/**
+ * @brief Struct for VXLAN GPE support inner protocol definition.
+ * 1 - IP4
+ * 2 - IP6
+ * 3 - ETHERNET
+ * 4 - NSH
+ * 5 - IOAM
+ */
+typedef enum {
+#define _(n,f) VXLAN_GPE_PROTOCOL_##f = n,
+ foreach_vxlan_gpe_protocol
+#undef _
+ VXLAN_GPE_PROTOCOL_MAX,
+} vxlan_gpe_protocol_t;
+
+/**
+ * @brief VXLAN GPE Header definition
+ */
+typedef struct {
+ u8 flags;
+ /** Version and Reserved */
+ u8 ver_res;
+ /** Reserved */
+ u8 res;
+ /** see vxlan_gpe_protocol_t */
+ u8 protocol;
+ /** VNI and Reserved */
+ u32 vni_res;
+} vxlan_gpe_header_t;
+
+#define VXLAN_GPE_FLAGS_I 0x08
+#define VXLAN_GPE_FLAGS_P 0x04
+#define VXLAN_GPE_FLAGS_O 0x01
+#define VXLAN_GPE_VERSION 0x0
+
+#endif /* included_vxlan_gpe_packet_h */