aboutsummaryrefslogtreecommitdiffstats
path: root/vnet/vnet/vxlan-gpe
diff options
context:
space:
mode:
Diffstat (limited to 'vnet/vnet/vxlan-gpe')
-rw-r--r--vnet/vnet/vxlan-gpe/decap.c450
-rw-r--r--vnet/vnet/vxlan-gpe/encap.c425
-rw-r--r--vnet/vnet/vxlan-gpe/vxlan-gpe-rfc.txt868
-rw-r--r--vnet/vnet/vxlan-gpe/vxlan_gpe.c467
-rw-r--r--vnet/vnet/vxlan-gpe/vxlan_gpe.h139
-rw-r--r--vnet/vnet/vxlan-gpe/vxlan_gpe_error.def16
-rw-r--r--vnet/vnet/vxlan-gpe/vxlan_gpe_packet.h82
7 files changed, 2447 insertions, 0 deletions
diff --git a/vnet/vnet/vxlan-gpe/decap.c b/vnet/vnet/vxlan-gpe/decap.c
new file mode 100644
index 00000000000..aed5857d0b1
--- /dev/null
+++ b/vnet/vnet/vxlan-gpe/decap.c
@@ -0,0 +1,450 @@
+/*
+ * decap.c - decapsulate VXLAN GPE
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+
+typedef struct {
+ u32 next_index;
+ u32 tunnel_index;
+ u32 error;
+} vxlan_gpe_rx_trace_t;
+
+static u8 * format_vxlan_gpe_rx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ vxlan_gpe_rx_trace_t * t = va_arg (*args, vxlan_gpe_rx_trace_t *);
+
+ if (t->tunnel_index != ~0)
+ {
+ s = format (s, "VXLAN-GPE: tunnel %d next %d error %d", t->tunnel_index,
+ t->next_index, t->error);
+ }
+ else
+ {
+ s = format (s, "VXLAN-GPE: no tunnel next %d error %d\n", t->next_index,
+ t->error);
+ }
+ return s;
+}
+
+
+static u8 * format_vxlan_gpe_with_length (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+
+
+ return s;
+}
+
+static uword
+vxlan_gpe_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ vxlan_gpe_main_t * ngm = &vxlan_gpe_main;
+ vnet_main_t * vnm = ngm->vnet_main;
+ vnet_interface_main_t * im = &vnm->interface_main;
+ u32 last_tunnel_index = ~0;
+ vxlan_gpe_tunnel_key_t last_key;
+ u32 pkts_decapsulated = 0;
+ u32 cpu_index = os_get_cpu_number();
+ u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
+
+ memset (&last_key, 0xff, sizeof (last_key));
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+ stats_sw_if_index = node->runtime_data[0];
+ stats_n_packets = stats_n_bytes = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0, next1;
+ ip4_vxlan_gpe_header_t * iuvn0, * iuvn1;
+ uword * p0, * p1;
+ u32 tunnel_index0, tunnel_index1;
+ vxlan_gpe_tunnel_t * t0, * t1;
+ vxlan_gpe_tunnel_key_t key0, key1;
+ u32 error0, error1;
+ u32 sw_if_index0, sw_if_index1, len0, len1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* udp leaves current_data pointing at the vxlan header */
+ vlib_buffer_advance
+ (b0, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t)));
+ vlib_buffer_advance
+ (b1, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t)));
+
+ iuvn0 = vlib_buffer_get_current (b0);
+ iuvn1 = vlib_buffer_get_current (b1);
+
+ /* pop (ip, udp, vxlan) */
+ vlib_buffer_advance (b0, sizeof (*iuvn0));
+ vlib_buffer_advance (b1, sizeof (*iuvn1));
+
+ tunnel_index0 = ~0;
+ tunnel_index1 = ~0;
+ error0 = 0;
+ error1 = 0;
+
+ next0 = (iuvn0->vxlan.protocol < VXLAN_GPE_INPUT_N_NEXT) ? iuvn0->vxlan.protocol : VXLAN_GPE_INPUT_NEXT_DROP;
+ next1 = (iuvn1->vxlan.protocol < VXLAN_GPE_INPUT_N_NEXT) ? iuvn1->vxlan.protocol : VXLAN_GPE_INPUT_NEXT_DROP;
+
+
+
+
+ key0.local = iuvn0->ip4.dst_address.as_u32;
+ key1.local = iuvn1->ip4.dst_address.as_u32;
+
+ key0.remote = iuvn0->ip4.src_address.as_u32;
+ key1.remote = iuvn1->ip4.src_address.as_u32;
+
+ key0.vni = iuvn0->vxlan.vni_res;
+ key1.vni = iuvn1->vxlan.vni_res;
+
+ key0.pad = 0;
+ key1.pad = 0;
+
+ /* Processing for key0 */
+ if (PREDICT_FALSE ((key0.as_u64[0] != last_key.as_u64[0])
+ || (key0.as_u64[1] != last_key.as_u64[1])))
+ {
+ p0 = hash_get_mem (ngm->vxlan_gpe_tunnel_by_key, &key0);
+
+ if (p0 == 0)
+ {
+ error0 = VXLAN_GPE_ERROR_NO_SUCH_TUNNEL;
+ goto trace0;
+ }
+
+ last_key.as_u64[0] = key0.as_u64[0];
+ last_key.as_u64[1] = key0.as_u64[1];
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
+
+ t0 = pool_elt_at_index (ngm->tunnels, tunnel_index0);
+
+ next0 = t0->protocol;
+
+ sw_if_index0 = t0->sw_if_index;
+ len0 = vlib_buffer_length_in_chain(vm, b0);
+
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ vnet_update_l2_len (b0);
+
+ /*
+ * ip[46] lookup in the configured FIB
+ */
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->decap_fib_index;
+
+ pkts_decapsulated++;
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
+
+ if (PREDICT_FALSE(sw_if_index0 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len0;
+ if (stats_n_packets)
+ vlib_increment_combined_counter(
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len0;
+ stats_sw_if_index = sw_if_index0;
+ }
+
+ trace0:
+ b0->error = error0 ? node->errors[error0] : 0;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_rx_trace_t *tr
+ = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->error = error0;
+ tr->tunnel_index = tunnel_index0;
+ }
+
+
+ /* Processing for key1 */
+ if (PREDICT_FALSE ((key1.as_u64[0] != last_key.as_u64[0])
+ || (key1.as_u64[1] != last_key.as_u64[1])))
+ {
+ p1 = hash_get_mem (ngm->vxlan_gpe_tunnel_by_key, &key1);
+
+ if (p1 == 0)
+ {
+ error1 = VXLAN_GPE_ERROR_NO_SUCH_TUNNEL;
+ goto trace1;
+ }
+
+ last_key.as_u64[0] = key1.as_u64[0];
+ last_key.as_u64[1] = key1.as_u64[1];
+ tunnel_index1 = last_tunnel_index = p1[0];
+ }
+ else
+ tunnel_index1 = last_tunnel_index;
+
+ t1 = pool_elt_at_index (ngm->tunnels, tunnel_index1);
+
+ next1 = t1->protocol;
+ sw_if_index1 = t1->sw_if_index;
+ len1 = vlib_buffer_length_in_chain(vm, b1);
+
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ vnet_update_l2_len (b1);
+
+ /*
+ * ip[46] lookup in the configured FIB
+ */
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] = t1->decap_fib_index;
+
+
+ pkts_decapsulated++;
+ stats_n_packets += 1;
+ stats_n_bytes += len1;
+
+ /* Batch stats increment on the same vxlan tunnel so counter
+ is not incremented per packet */
+ if (PREDICT_FALSE(sw_if_index1 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len1;
+ if (stats_n_packets)
+ vlib_increment_combined_counter(
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len1;
+ stats_sw_if_index = sw_if_index1;
+ }
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] = t1->decap_fib_index;
+
+ trace1:
+ b1->error = error1 ? node->errors[error1] : 0;
+
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_rx_trace_t *tr
+ = vlib_add_trace (vm, node, b1, sizeof (*tr));
+ tr->next_index = next1;
+ tr->error = error1;
+ tr->tunnel_index = tunnel_index1;
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ ip4_vxlan_gpe_header_t * iuvn0;
+ uword * p0;
+ u32 tunnel_index0;
+ vxlan_gpe_tunnel_t * t0;
+ vxlan_gpe_tunnel_key_t key0;
+ u32 error0;
+ u32 sw_if_index0, len0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* udp leaves current_data pointing at the vxlan header */
+ vlib_buffer_advance
+ (b0, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t)));
+
+ iuvn0 = vlib_buffer_get_current (b0);
+
+ /* pop (ip, udp, vxlan) */
+ vlib_buffer_advance (b0, sizeof (*iuvn0));
+
+ tunnel_index0 = ~0;
+ error0 = 0;
+ next0 = (iuvn0->vxlan.protocol < VXLAN_GPE_INPUT_N_NEXT) ? iuvn0->vxlan.protocol : VXLAN_GPE_INPUT_NEXT_DROP;
+
+ key0.local = iuvn0->ip4.dst_address.as_u32;
+ key0.remote = iuvn0->ip4.src_address.as_u32;
+ key0.vni = iuvn0->vxlan.vni_res;
+ key0.pad = 0;
+
+ if (PREDICT_FALSE ((key0.as_u64[0] != last_key.as_u64[0])
+ || (key0.as_u64[1] != last_key.as_u64[1])))
+ {
+ p0 = hash_get_mem (ngm->vxlan_gpe_tunnel_by_key, &key0);
+
+ if (p0 == 0)
+ {
+ error0 = VXLAN_GPE_ERROR_NO_SUCH_TUNNEL;
+ goto trace00;
+ }
+
+ last_key.as_u64[0] = key0.as_u64[0];
+ last_key.as_u64[1] = key0.as_u64[1];
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
+
+ t0 = pool_elt_at_index (ngm->tunnels, tunnel_index0);
+
+ next0 = t0->protocol;
+
+ sw_if_index0 = t0->sw_if_index;
+ len0 = vlib_buffer_length_in_chain(vm, b0);
+
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ vnet_update_l2_len (b0);
+
+ /*
+ * ip[46] lookup in the configured FIB
+ */
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->decap_fib_index;
+
+ pkts_decapsulated ++;
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
+
+ /* Batch stats increment on the same vxlan-gpe tunnel so counter
+ is not incremented per packet */
+ if (PREDICT_FALSE(sw_if_index0 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len0;
+ if (stats_n_packets)
+ vlib_increment_combined_counter(
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len0;
+ stats_sw_if_index = sw_if_index0;
+ }
+
+ trace00:
+ b0->error = error0 ? node->errors[error0] : 0;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_rx_trace_t *tr
+ = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->error = error0;
+ tr->tunnel_index = tunnel_index0;
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, vxlan_gpe_input_node.index,
+ VXLAN_GPE_ERROR_DECAPSULATED,
+ pkts_decapsulated);
+ /* Increment any remaining batch stats */
+ if (stats_n_packets)
+ {
+ vlib_increment_combined_counter(
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, cpu_index,
+ stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ node->runtime_data[0] = stats_sw_if_index;
+ }
+ return from_frame->n_vectors;
+}
+
+static char * vxlan_gpe_error_strings[] = {
+#define vxlan_gpe_error(n,s) s,
+#include <vnet/vxlan-gpe/vxlan_gpe_error.def>
+#undef vxlan_gpe_error
+#undef _
+};
+
+VLIB_REGISTER_NODE (vxlan_gpe_input_node) = {
+ .function = vxlan_gpe_input,
+ .name = "vxlan-gpe-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(vxlan_gpe_error_strings),
+ .error_strings = vxlan_gpe_error_strings,
+
+ .n_next_nodes = VXLAN_GPE_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [VXLAN_GPE_INPUT_NEXT_##s] = n,
+ foreach_vxlan_gpe_input_next
+#undef _
+ },
+
+ .format_buffer = format_vxlan_gpe_with_length,
+ .format_trace = format_vxlan_gpe_rx_trace,
+ // $$$$ .unformat_buffer = unformat_vxlan_gpe_header,
+};
+
+
diff --git a/vnet/vnet/vxlan-gpe/encap.c b/vnet/vnet/vxlan-gpe/encap.c
new file mode 100644
index 00000000000..3ffe2a62db2
--- /dev/null
+++ b/vnet/vnet/vxlan-gpe/encap.c
@@ -0,0 +1,425 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+
+/* Statistics (not really errors) */
+#define foreach_vxlan_gpe_encap_error \
+_(ENCAPSULATED, "good packets encapsulated")
+
+static char * vxlan_gpe_encap_error_strings[] = {
+#define _(sym,string) string,
+ foreach_vxlan_gpe_encap_error
+#undef _
+};
+
+typedef enum {
+#define _(sym,str) VXLAN_GPE_ENCAP_ERROR_##sym,
+ foreach_vxlan_gpe_encap_error
+#undef _
+ VXLAN_GPE_ENCAP_N_ERROR,
+} vxlan_gpe_encap_error_t;
+
+typedef enum {
+ VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP,
+ VXLAN_GPE_ENCAP_NEXT_DROP,
+ VXLAN_GPE_ENCAP_N_NEXT
+} vxlan_gpe_encap_next_t;
+
+typedef struct {
+ u32 tunnel_index;
+} vxlan_gpe_encap_trace_t;
+
+
+u8 * format_vxlan_gpe_encap_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ vxlan_gpe_encap_trace_t * t
+ = va_arg (*args, vxlan_gpe_encap_trace_t *);
+
+ s = format (s, "VXLAN-GPE-ENCAP: tunnel %d", t->tunnel_index);
+ return s;
+}
+
+#define foreach_fixed_header_offset \
+_(0) _(1) _(2) _(3) _(4) _(5) _(6)
+
+static uword
+vxlan_gpe_encap (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ vxlan_gpe_main_t * ngm = &vxlan_gpe_main;
+ vnet_main_t * vnm = ngm->vnet_main;
+ vnet_interface_main_t * im = &vnm->interface_main;
+ u32 pkts_encapsulated = 0;
+ u16 old_l0 = 0, old_l1 = 0;
+ u32 cpu_index = os_get_cpu_number();
+ u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+ stats_sw_if_index = node->runtime_data[0];
+ stats_n_packets = stats_n_bytes = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0 = VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP;
+ u32 next1 = VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP;
+ u32 sw_if_index0, sw_if_index1, len0, len1;
+ vnet_hw_interface_t * hi0, * hi1;
+ ip4_header_t * ip0, * ip1;
+ udp_header_t * udp0, * udp1;
+ u64 * copy_src0, * copy_dst0;
+ u64 * copy_src1, * copy_dst1;
+ u32 * copy_src_last0, * copy_dst_last0;
+ u32 * copy_src_last1, * copy_dst_last1;
+ vxlan_gpe_tunnel_t * t0, * t1;
+ u16 new_l0, new_l1;
+ ip_csum_t sum0, sum1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* 1-wide cache? */
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_TX];
+ hi0 = vnet_get_sup_hw_interface
+ (vnm, vnet_buffer(b0)->sw_if_index[VLIB_TX]);
+ hi1 = vnet_get_sup_hw_interface
+ (vnm, vnet_buffer(b1)->sw_if_index[VLIB_TX]);
+
+ t0 = pool_elt_at_index (ngm->tunnels, hi0->dev_instance);
+ t1 = pool_elt_at_index (ngm->tunnels, hi1->dev_instance);
+
+ ASSERT(vec_len(t0->rewrite) >= 24);
+ ASSERT(vec_len(t1->rewrite) >= 24);
+
+ /* Apply the rewrite string. $$$$ vnet_rewrite? */
+ vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite));
+ vlib_buffer_advance (b1, -(word)_vec_len(t1->rewrite));
+
+ ip0 = vlib_buffer_get_current(b0);
+ ip1 = vlib_buffer_get_current(b1);
+ /* Copy the fixed header */
+ copy_dst0 = (u64 *) ip0;
+ copy_src0 = (u64 *) t0->rewrite;
+ copy_dst1 = (u64 *) ip1;
+ copy_src1 = (u64 *) t1->rewrite;
+
+ ASSERT (sizeof (ip4_vxlan_gpe_header_t) == 36);
+
+ /* Copy first 36 octets 8-bytes at a time */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+ foreach_fixed_header_offset;
+#undef _
+#define _(offs) copy_dst1[offs] = copy_src1[offs];
+ foreach_fixed_header_offset;
+#undef _
+
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last0 = (u32 *)(&copy_dst0[7]);
+ copy_src_last0 = (u32 *)(&copy_src0[7]);
+ copy_dst_last1 = (u32 *)(&copy_dst1[7]);
+ copy_src_last1 = (u32 *)(&copy_src1[7]);
+
+ copy_dst_last0[0] = copy_src_last0[0];
+ copy_dst_last1[0] = copy_src_last1[0];
+
+ /* If there are TLVs to copy, do so */
+ if (PREDICT_FALSE (_vec_len(t0->rewrite) > 64))
+ clib_memcpy (&copy_dst0[3], t0->rewrite + 64 ,
+ _vec_len (t0->rewrite)-64);
+
+ if (PREDICT_FALSE (_vec_len(t1->rewrite) > 64))
+ clib_memcpy (&copy_dst0[3], t1->rewrite + 64 ,
+ _vec_len (t1->rewrite)-64);
+
+ /* fix the <bleep>ing outer-IP checksum */
+ sum0 = ip0->checksum;
+ /* old_l0 always 0, see the rewrite setup */
+ new_l0 =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+
+ sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+ length /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+ ip0->length = new_l0;
+
+ sum1 = ip1->checksum;
+ /* old_l1 always 0, see the rewrite setup */
+ new_l1 =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1));
+
+ sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t,
+ length /* changed member */);
+ ip1->checksum = ip_csum_fold (sum1);
+ ip1->length = new_l1;
+
+ /* Fix UDP length */
+ udp0 = (udp_header_t *)(ip0+1);
+ new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
+ - sizeof (*ip0));
+ udp1 = (udp_header_t *)(ip1+1);
+ new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)
+ - sizeof (*ip1));
+
+ udp0->length = new_l0;
+ udp1->length = new_l1;
+
+ /* Reset to look up tunnel partner in the configured FIB */
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index;
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] = t1->encap_fib_index;
+ vnet_buffer(b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+ vnet_buffer(b1)->sw_if_index[VLIB_RX] = sw_if_index1;
+ pkts_encapsulated += 2;
+
+ len0 = vlib_buffer_length_in_chain(vm, b0);
+ len1 = vlib_buffer_length_in_chain(vm, b0);
+ stats_n_packets += 2;
+ stats_n_bytes += len0 + len1;
+
+ /* Batch stats increment on the same vxlan tunnel so counter is not
+ incremented per packet. Note stats are still incremented for deleted
+ and admin-down tunnel where packets are dropped. It is not worthwhile
+ to check for this rare case and affect normal path performance. */
+ if (PREDICT_FALSE(
+ (sw_if_index0 != stats_sw_if_index)
+ || (sw_if_index1 != stats_sw_if_index))) {
+ stats_n_packets -= 2;
+ stats_n_bytes -= len0 + len1;
+ if (sw_if_index0 == sw_if_index1) {
+ if (stats_n_packets)
+ vlib_increment_combined_counter(
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
+ cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ stats_sw_if_index = sw_if_index0;
+ stats_n_packets = 2;
+ stats_n_bytes = len0 + len1;
+ } else {
+ vlib_increment_combined_counter(
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
+ cpu_index, sw_if_index0, 1, len0);
+ vlib_increment_combined_counter(
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
+ cpu_index, sw_if_index1, 1, len1);
+ }
+ }
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_encap_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->tunnel_index = t0 - ngm->tunnels;
+ }
+
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_encap_trace_t *tr =
+ vlib_add_trace (vm, node, b1, sizeof (*tr));
+ tr->tunnel_index = t1 - ngm->tunnels;
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0 = VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP;
+ u32 sw_if_index0, len0;
+ vnet_hw_interface_t * hi0;
+ ip4_header_t * ip0;
+ udp_header_t * udp0;
+ u64 * copy_src0, * copy_dst0;
+ u32 * copy_src_last0, * copy_dst_last0;
+ vxlan_gpe_tunnel_t * t0;
+ u16 new_l0;
+ ip_csum_t sum0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* 1-wide cache? */
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_TX];
+ hi0 = vnet_get_sup_hw_interface
+ (vnm, vnet_buffer(b0)->sw_if_index[VLIB_TX]);
+
+ t0 = pool_elt_at_index (ngm->tunnels, hi0->dev_instance);
+
+ ASSERT(vec_len(t0->rewrite) >= 24);
+
+ /* Apply the rewrite string. $$$$ vnet_rewrite? */
+ vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite));
+
+ ip0 = vlib_buffer_get_current(b0);
+ /* Copy the fixed header */
+ copy_dst0 = (u64 *) ip0;
+ copy_src0 = (u64 *) t0->rewrite;
+
+ ASSERT (sizeof (ip4_vxlan_gpe_header_t) == 36);
+
+ /* Copy first 36 octets 8-bytes at a time */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+ foreach_fixed_header_offset;
+#undef _
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last0 = (u32 *)(&copy_dst0[7]);
+ copy_src_last0 = (u32 *)(&copy_src0[7]);
+
+ copy_dst_last0[0] = copy_src_last0[0];
+
+ /* If there are TLVs to copy, do so */
+ if (PREDICT_FALSE (_vec_len(t0->rewrite) > 64))
+ clib_memcpy (&copy_dst0[3], t0->rewrite + 64 ,
+ _vec_len (t0->rewrite)-64);
+
+ /* fix the <bleep>ing outer-IP checksum */
+ sum0 = ip0->checksum;
+ /* old_l0 always 0, see the rewrite setup */
+ new_l0 =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+
+ sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+ length /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+ ip0->length = new_l0;
+
+ /* Fix UDP length */
+ udp0 = (udp_header_t *)(ip0+1);
+ new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
+ - sizeof (*ip0));
+
+ udp0->length = new_l0;
+
+ /* Reset to look up tunnel partner in the configured FIB */
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index;
+ vnet_buffer(b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+ pkts_encapsulated ++;
+
+ len0 = vlib_buffer_length_in_chain(vm, b0);
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
+
+ /* Batch stats increment on the same vxlan tunnel so counter is not
+ * incremented per packet. Note stats are still incremented for deleted
+ * and admin-down tunnel where packets are dropped. It is not worthwhile
+ * to check for this rare case and affect normal path performance. */
+ if (PREDICT_FALSE(sw_if_index0 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len0;
+ if (stats_n_packets)
+ vlib_increment_combined_counter(
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX,
+ cpu_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len0;
+ stats_sw_if_index = sw_if_index0;
+ }
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_encap_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->tunnel_index = t0 - ngm->tunnels;
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, node->node_index,
+ VXLAN_GPE_ENCAP_ERROR_ENCAPSULATED,
+ pkts_encapsulated);
+ /* Increment any remaining batch stats */
+ if (stats_n_packets) {
+ vlib_increment_combined_counter(
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX, cpu_index,
+ stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ node->runtime_data[0] = stats_sw_if_index;
+ }
+
+ return from_frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (vxlan_gpe_encap_node) = {
+ .function = vxlan_gpe_encap,
+ .name = "vxlan-gpe-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_vxlan_gpe_encap_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(vxlan_gpe_encap_error_strings),
+ .error_strings = vxlan_gpe_encap_error_strings,
+
+ .n_next_nodes = VXLAN_GPE_ENCAP_N_NEXT,
+
+ .next_nodes = {
+ [VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [VXLAN_GPE_ENCAP_NEXT_DROP] = "error-drop",
+ },
+};
+
diff --git a/vnet/vnet/vxlan-gpe/vxlan-gpe-rfc.txt b/vnet/vnet/vxlan-gpe/vxlan-gpe-rfc.txt
new file mode 100644
index 00000000000..35cee50f573
--- /dev/null
+++ b/vnet/vnet/vxlan-gpe/vxlan-gpe-rfc.txt
@@ -0,0 +1,868 @@
+Network Working Group P. Quinn
+Internet-Draft Cisco Systems, Inc.
+Intended status: Experimental P. Agarwal
+Expires: January 4, 2015 Broadcom
+ R. Fernando
+ L. Kreeger
+ D. Lewis
+ F. Maino
+ M. Smith
+ N. Yadav
+ Cisco Systems, Inc.
+ L. Yong
+ Huawei USA
+ X. Xu
+ Huawei Technologies
+ U. Elzur
+ Intel
+ P. Garg
+ Microsoft
+ July 3, 2014
+
+
+ Generic Protocol Extension for VXLAN
+ draft-quinn-vxlan-gpe-03.txt
+
+Abstract
+
+ This draft describes extending Virtual eXtensible Local Area Network
+ (VXLAN), via changes to the VXLAN header, with three new
+ capabilities: support for multi-protocol encapsulation, operations,
+ administration and management (OAM) signaling and explicit
+ versioning.
+
+Status of this Memo
+
+ This Internet-Draft is submitted in full conformance with the
+ provisions of BCP 78 and BCP 79.
+
+ Internet-Drafts are working documents of the Internet Engineering
+ Task Force (IETF). Note that other groups may also distribute
+ working documents as Internet-Drafts. The list of current Internet-
+ Drafts is at http://datatracker.ietf.org/drafts/current/.
+
+ Internet-Drafts are draft documents valid for a maximum of six months
+ and may be updated, replaced, or obsoleted by other documents at any
+ time. It is inappropriate to use Internet-Drafts as reference
+ material or to cite them other than as "work in progress."
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 1]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+ This Internet-Draft will expire on January 4, 2015.
+
+Copyright Notice
+
+ Copyright (c) 2014 IETF Trust and the persons identified as the
+ document authors. All rights reserved.
+
+ This document is subject to BCP 78 and the IETF Trust's Legal
+ Provisions Relating to IETF Documents
+ (http://trustee.ietf.org/license-info) in effect on the date of
+ publication of this document. Please review these documents
+ carefully, as they describe your rights and restrictions with respect
+ to this document. Code Components extracted from this document must
+ include Simplified BSD License text as described in Section 4.e of
+ the Trust Legal Provisions and are provided without warranty as
+ described in the Simplified BSD License.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 2]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+Table of Contents
+
+ 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . . 4
+ 2. VXLAN Without Protocol Extension . . . . . . . . . . . . . . . 5
+ 3. Generic Protocol Extension VXLAN (VXLAN-gpe) . . . . . . . . . 6
+ 3.1. Multi Protocol Support . . . . . . . . . . . . . . . . . . 6
+ 3.2. OAM Support . . . . . . . . . . . . . . . . . . . . . . . 7
+ 3.3. Version Bits . . . . . . . . . . . . . . . . . . . . . . . 7
+ 4. Backward Compatibility . . . . . . . . . . . . . . . . . . . . 8
+ 4.1. VXLAN VTEP to VXLAN-gpe VTEP . . . . . . . . . . . . . . . 8
+ 4.2. VXLAN-gpe VTEP to VXLAN VTEP . . . . . . . . . . . . . . . 8
+ 4.3. VXLAN-gpe UDP Ports . . . . . . . . . . . . . . . . . . . 8
+ 4.4. VXLAN-gpe and Encapsulated IP Header Fields . . . . . . . 8
+ 5. VXLAN-gpe Examples . . . . . . . . . . . . . . . . . . . . . . 9
+ 6. Security Considerations . . . . . . . . . . . . . . . . . . . 11
+ 7. Acknowledgments . . . . . . . . . . . . . . . . . . . . . . . 12
+ 8. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 13
+ 8.1. UDP Port . . . . . . . . . . . . . . . . . . . . . . . . . 13
+ 8.2. VXLAN-gpe Next Protocol . . . . . . . . . . . . . . . . . 13
+ 8.3. VXLAN-gpe Reserved Bits . . . . . . . . . . . . . . . . . 13
+ 9. References . . . . . . . . . . . . . . . . . . . . . . . . . . 14
+ 9.1. Normative References . . . . . . . . . . . . . . . . . . . 14
+ 9.2. Informative References . . . . . . . . . . . . . . . . . . 14
+ Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . . 15
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 3]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+1. Introduction
+
+ Virtual eXtensible Local Area Network [VXLAN] defines an
+ encapsulation format that encapsulates Ethernet frames in an outer
+ UDP/IP transport. As data centers evolve, the need to carry other
+ protocols encapsulated in an IP packet is required, as well as the
+ need to provide increased visibility and diagnostic capabilities
+ within the overlay. The VXLAN header does not specify the protocol
+ being encapsulated and therefore is currently limited to
+ encapsulating only Ethernet frame payload, nor does it provide the
+ ability to define OAM protocols. Rather than defining yet another
+ encapsulation, VXLAN is extended to provide protocol typing and OAM
+ capabilities.
+
+ This document describes extending VXLAN via the following changes:
+
+ Next Protocol Bit (P bit): A reserved flag bit is allocated, and set
+ in the VXLAN-gpe header to indicate that a next protocol field is
+ present.
+
+ OAM Flag Bit (O bit): A reserved flag bit is allocated, and set in
+ the VXLAN-gpe header, to indicate that the packet is an OAM
+ packet.
+
+ Version: Two reserved bits are allocated, and set in the VXLAN-gpe
+ header, to indicate VXLAN-gpe protocol version.
+
+ Next Protocol: A 8 bit next protocol field is present in the VXLAN-
+ gpe header.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 4]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+2. VXLAN Without Protocol Extension
+
+ As described in the introduction, the VXLAN header has no protocol
+ identifier that indicates the type of payload being carried by VXLAN.
+ Because of this, VXLAN is limited to an Ethernet payload.
+ Furthermore, the VXLAN header has no mechanism to signal OAM packets.
+
+ The VXLAN header defines bits 0-7 as flags (some defined, some
+ reserved), the VXLAN network identifier (VNI) field and several
+ reserved bits. The flags provide flexibility to define how the
+ reserved bits can be used to change the definition of the VXLAN
+ header.
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|R|R|R| Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+ Figure 1: VXLAN Header
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 5]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+3. Generic Protocol Extension VXLAN (VXLAN-gpe)
+
+3.1. Multi Protocol Support
+
+ This draft defines the following two changes to the VXLAN header in
+ order to support multi-protocol encapsulation:
+
+ P Bit: Flag bit 5 is defined as the Next Protocol bit. The P bit
+ MUST be set to 1 to indicate the presence of the 8 bit next
+ protocol field.
+
+ P = 0 indicates that the payload MUST conform to VXLAN as defined
+ in [VXLAN].
+
+ Flag bit 5 was chosen as the P bit because this flag bit is
+ currently reserved in VXLAN.
+
+ Next Protocol Field: The lower 8 bits of the first word are used to
+ carry a next protocol. This next protocol field contains the
+ protocol of the encapsulated payload packet. A new protocol
+ registry will be requested from IANA.
+
+ This draft defines the following Next Protocol values:
+
+ 0x1 : IPv4
+ 0x2 : IPv6
+ 0x3 : Ethernet
+ 0x4 : Network Service Header [NSH]
+
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|P|R|R| Reserved |Next Protocol |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 2: VXLAN-gpe Next Protocol
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 6]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+3.2. OAM Support
+
+ Flag bit 7 is defined as the O bit. When the O bit is set to 1, the
+ packet is an OAM packet and OAM processing MUST occur. The OAM
+ protocol details are out of scope for this document. As with the
+ P-bit, bit 7 is currently a reserved flag in VXLAN.
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|P|R|O| Reserved |Next Protocol |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 3: VXLAN-gpe OAM Bit
+
+3.3. Version Bits
+
+ VXLAN-gpe bits 8 and 9 are defined as version bits. These bits are
+ reserved in VXLAN. The version field is used to ensure backward
+ compatibility going forward with future VXLAN-gpe updates.
+
+ The initial version for VXLAN-gpe is 0.
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|P|R|O|Ver| Reserved |Next Protocol |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+
+
+ Figure 4: VXLAN-gpe Version Bits
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 7]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+4. Backward Compatibility
+
+4.1. VXLAN VTEP to VXLAN-gpe VTEP
+
+ As per VXLAN, reserved bits 5 and 7, VXLAN-gpe P and O-bits
+ respectively must be set to zero. The remaining reserved bits must
+ be zero, including the VXLAN-gpe version field, bits 8 and 9. The
+ encapsulated payload MUST be Ethernet.
+
+4.2. VXLAN-gpe VTEP to VXLAN VTEP
+
+ A VXLAN-gpe VTEP MUST NOT encapsulate non-Ethernet frames to a VXLAN
+ VTEP. When encapsulating Ethernet frames to a VXLAN VTEP, the VXLAN-
+ gpe VTEP will set the P bit to 0, the Next Protocol to 0 and use UDP
+ destination port 4789. A VXLAN-gpe VTEP MUST also set O = 0 and Ver
+ = 0 when encapsulating Ethernet frames to VXLAN VTEP. The receiving
+ VXLAN VTEP will threat this packet as a VXLAN packet.
+
+ A method for determining the capabilities of a VXLAN VTEP (gpe or
+ non-gpe) is out of the scope of this draft.
+
+4.3. VXLAN-gpe UDP Ports
+
+ VXLAN-gpe uses a new UDP destination port (to be assigned by IANA)
+ when sending traffic to VXLAN-gpe VTEPs.
+
+4.4. VXLAN-gpe and Encapsulated IP Header Fields
+
+ When encapsulating and decapsulating IPv4 and IPv6 packets, certain
+ fields, such as IPv4 Time to Live (TTL) from the inner IP header need
+ to be considered. VXLAN-gpe IP encapsulation and decapsulation
+ utilizes the techniques described in [RFC6830], section 5.3.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 8]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+5. VXLAN-gpe Examples
+
+ This section provides three examples of protocols encapsulated using
+ the Generic Protocol Extension for VXLAN described in this document.
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|1|R|0|0|0| Reserved | NP = IPv4 |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Original IPv4 Packet |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 5: IPv4 and VXLAN-gpe
+
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|1|R|0|0|0| Reserved | NP = IPv6 |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Original IPv6 Packet |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 6: IPv6 and VXLAN-gpe
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 9]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|1|R|0|0|0| Reserved |NP = Ethernet |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Original Ethernet Frame |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 7: Ethernet and VXLAN-gpe
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 10]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+6. Security Considerations
+
+ VXLAN's security is focused on issues around L2 encapsulation into
+ L3. With VXLAN-gpe, issues such as spoofing, flooding, and traffic
+ redirection are dependent on the particular protocol payload
+ encapsulated.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 11]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+7. Acknowledgments
+
+ A special thank you goes to Dino Farinacci for his guidance and
+ detailed review.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 12]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+8. IANA Considerations
+
+8.1. UDP Port
+
+ A new UDP port will be requested from IANA.
+
+8.2. VXLAN-gpe Next Protocol
+
+ IANA is requested to set up a registry of "Next Protocol". These are
+ 8-bit values. Next Protocol values 0, 1, 2, 3 and 4 are defined in
+ this draft. New values are assigned via Standards Action [RFC5226].
+
+ +---------------+-------------+---------------+
+ | Next Protocol | Description | Reference |
+ +---------------+-------------+---------------+
+ | 0 | Reserved | This document |
+ | | | |
+ | 1 | IPv4 | This document |
+ | | | |
+ | 2 | IPv6 | This document |
+ | | | |
+ | 3 | Ethernet | This document |
+ | | | |
+ | 4 | NSH | This document |
+ | | | |
+ | 5..253 | Unassigned | |
+ +---------------+-------------+---------------+
+
+ Table 1
+
+8.3. VXLAN-gpe Reserved Bits
+
+ There are ten bits at the beginning of the VXLAN-gpe header. New
+ bits are assigned via Standards Action [RFC5226].
+
+ Bits 0-3 - Reserved
+ Bit 4 - Instance ID (I bit)
+ Bit 5 - Next Protocol (P bit)
+ Bit 6 - Reserved
+ Bit 7 - OAM (O bit)
+ Bits 8-9 - Version
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 13]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+9. References
+
+9.1. Normative References
+
+ [RFC0768] Postel, J., "User Datagram Protocol", STD 6, RFC 768,
+ August 1980.
+
+ [RFC0791] Postel, J., "Internet Protocol", STD 5, RFC 791,
+ September 1981.
+
+ [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate
+ Requirement Levels", BCP 14, RFC 2119, March 1997.
+
+ [RFC5226] Narten, T. and H. Alvestrand, "Guidelines for Writing an
+ IANA Considerations Section in RFCs", BCP 26, RFC 5226,
+ May 2008.
+
+9.2. Informative References
+
+ [NSH] Quinn, P. and et al. , "Network Service Header", 2014.
+
+ [RFC1700] Reynolds, J. and J. Postel, "Assigned Numbers", RFC 1700,
+ October 1994.
+
+ [RFC6830] Farinacci, D., Fuller, V., Meyer, D., and D. Lewis, "The
+ Locator/ID Separation Protocol (LISP)", RFC 6830,
+ January 2013.
+
+ [VXLAN] Dutt, D., Mahalingam, M., Duda, K., Agarwal, P., Kreeger,
+ L., Sridhar, T., Bursell, M., and C. Wright, "VXLAN: A
+ Framework for Overlaying Virtualized Layer 2 Networks over
+ Layer 3 Networks", 2013.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 14]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+Authors' Addresses
+
+ Paul Quinn
+ Cisco Systems, Inc.
+
+ Email: paulq@cisco.com
+
+
+ Puneet Agarwal
+ Broadcom
+
+ Email: pagarwal@broadcom.com
+
+
+ Rex Fernando
+ Cisco Systems, Inc.
+
+ Email: rex@cisco.com
+
+
+ Larry Kreeger
+ Cisco Systems, Inc.
+
+ Email: kreeger@cisco.com
+
+
+ Darrel Lewis
+ Cisco Systems, Inc.
+
+ Email: darlewis@cisco.com
+
+
+ Fabio Maino
+ Cisco Systems, Inc.
+
+ Email: kreeger@cisco.com
+
+
+ Michael Smith
+ Cisco Systems, Inc.
+
+ Email: michsmit@cisco.com
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 15]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+ Navindra Yadav
+ Cisco Systems, Inc.
+
+ Email: nyadav@cisco.com
+
+
+ Lucy Yong
+ Huawei USA
+
+ Email: lucy.yong@huawei.com
+
+
+ Xiaohu Xu
+ Huawei Technologies
+
+ Email: xuxiaohu@huawei.com
+
+
+ Uri Elzur
+ Intel
+
+ Email: uri.elzur@intel.com
+
+
+ Pankaj Garg
+ Microsoft
+
+ Email: Garg.Pankaj@microsoft.com
diff --git a/vnet/vnet/vxlan-gpe/vxlan_gpe.c b/vnet/vnet/vxlan-gpe/vxlan_gpe.c
new file mode 100644
index 00000000000..ef242d0bb8f
--- /dev/null
+++ b/vnet/vnet/vxlan-gpe/vxlan_gpe.c
@@ -0,0 +1,467 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+
+vxlan_gpe_main_t vxlan_gpe_main;
+
+u8 * format_vxlan_gpe_tunnel (u8 * s, va_list * args)
+{
+ vxlan_gpe_tunnel_t * t = va_arg (*args, vxlan_gpe_tunnel_t *);
+ vxlan_gpe_main_t * gm = &vxlan_gpe_main;
+
+ s = format (s, "[%d] local: %U remote: %U ",
+ t - gm->tunnels,
+ format_ip4_address, &t->local,
+ format_ip4_address, &t->remote);
+
+ switch (t->protocol)
+ {
+ case VXLAN_GPE_PROTOCOL_IP4:
+ s = format (s, "next-protocol ip4");
+ case VXLAN_GPE_PROTOCOL_IP6:
+ s = format (s, "next-protocol ip6");
+ case VXLAN_GPE_PROTOCOL_ETHERNET:
+ s = format (s, "next-protocol ethernet");
+ case VXLAN_GPE_PROTOCOL_NSH:
+ s = format (s, "next-protocol nsh");
+ default:
+ s = format (s, "next-protocol unknown %d", t->protocol);
+ }
+
+ s = format (s, " fibs: (encap %d, decap %d)",
+ t->encap_fib_index,
+ t->decap_fib_index);
+
+ s = format (s, " vxlan VNI %d ", t->vni);
+
+ return s;
+}
+
+static u8 * format_vxlan_gpe_name (u8 * s, va_list * args)
+{
+ vxlan_gpe_main_t * gm = &vxlan_gpe_main;
+ u32 i = va_arg (*args, u32);
+ u32 show_dev_instance = ~0;
+
+ if (i < vec_len (gm->dev_inst_by_real))
+ show_dev_instance = gm->dev_inst_by_real[i];
+
+ if (show_dev_instance != ~0)
+ i = show_dev_instance;
+
+ return format (s, "vxlan_gpe_tunnel%d", i);
+}
+
+static int vxlan_gpe_name_renumber (vnet_hw_interface_t * hi,
+ u32 new_dev_instance)
+{
+ vxlan_gpe_main_t * gm = &vxlan_gpe_main;
+
+ vec_validate_init_empty (gm->dev_inst_by_real, hi->dev_instance, ~0);
+
+ gm->dev_inst_by_real [hi->dev_instance] = new_dev_instance;
+
+ return 0;
+}
+
+static uword dummy_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ clib_warning ("you shouldn't be here, leaking buffers...");
+ return frame->n_vectors;
+}
+
+static uword dummy_set_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 l3_type,
+ void * dst_address,
+ void * rewrite,
+ uword max_rewrite_bytes)
+{
+ return 0;
+}
+
+VNET_DEVICE_CLASS (vxlan_gpe_device_class,static) = {
+ .name = "VXLAN_GPE",
+ .format_device_name = format_vxlan_gpe_name,
+ .format_tx_trace = format_vxlan_gpe_encap_trace,
+ .tx_function = dummy_interface_tx,
+ .name_renumber = vxlan_gpe_name_renumber,
+};
+
+static u8 * format_vxlan_gpe_header_with_length (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ s = format (s, "unimplemented dev %u", dev_instance);
+ return s;
+}
+
+VNET_HW_INTERFACE_CLASS (vxlan_gpe_hw_class) = {
+ .name = "VXLAN_GPE",
+ .format_header = format_vxlan_gpe_header_with_length,
+ .set_rewrite = dummy_set_rewrite,
+};
+
+
+#define foreach_gpe_copy_field \
+_(local.as_u32) \
+_(remote.as_u32) \
+_(vni) \
+_(protocol) \
+_(encap_fib_index) \
+_(decap_fib_index)
+
+#define foreach_copy_field \
+_(src.as_u32) \
+_(dst.as_u32) \
+_(vni) \
+_(encap_fib_index) \
+_(decap_fib_index) \
+_(decap_next_index)
+
+
+
+static int vxlan_gpe_rewrite (vxlan_gpe_tunnel_t * t)
+{
+ u8 *rw = 0;
+ ip4_header_t * ip0;
+ ip4_vxlan_gpe_header_t * h0;
+ int len;
+
+ len = sizeof (*h0);
+
+ vec_validate_aligned (rw, len-1, CLIB_CACHE_LINE_BYTES);
+
+ h0 = (ip4_vxlan_gpe_header_t *) rw;
+
+ /* Fixed portion of the (outer) ip4 header */
+ ip0 = &h0->ip4;
+ ip0->ip_version_and_header_length = 0x45;
+ ip0->ttl = 254;
+ ip0->protocol = IP_PROTOCOL_UDP;
+
+ /* we fix up the ip4 header length and checksum after-the-fact */
+ ip0->src_address.as_u32 = t->local.as_u32;
+ ip0->dst_address.as_u32 = t->remote.as_u32;
+ ip0->checksum = ip4_header_checksum (ip0);
+
+ /* UDP header, randomize src port on something, maybe? */
+ h0->udp.src_port = clib_host_to_net_u16 (4790);
+ h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_vxlan_gpe);
+
+ /* VXLAN header. Are we having fun yet? */
+ h0->vxlan.flags = VXLAN_GPE_FLAGS_I | VXLAN_GPE_FLAGS_P;
+ h0->vxlan.ver_res = VXLAN_GPE_VERSION;
+ h0->vxlan.protocol = VXLAN_GPE_PROTOCOL_IP4;
+ h0->vxlan.vni_res = clib_host_to_net_u32 (t->vni<<8);
+
+ t->rewrite = rw;
+ return (0);
+}
+
+int vnet_vxlan_gpe_add_del_tunnel
+(vnet_vxlan_gpe_add_del_tunnel_args_t *a, u32 * sw_if_indexp)
+{
+ vxlan_gpe_main_t * gm = &vxlan_gpe_main;
+ vxlan_gpe_tunnel_t *t = 0;
+ vnet_main_t * vnm = gm->vnet_main;
+ vnet_hw_interface_t * hi;
+ uword * p;
+ u32 hw_if_index = ~0;
+ u32 sw_if_index = ~0;
+ int rv;
+ vxlan_gpe_tunnel_key_t key, *key_copy;
+ hash_pair_t *hp;
+
+ key.local = a->local.as_u32;
+ key.remote = a->remote.as_u32;
+ key.vni = clib_host_to_net_u32 (a->vni << 8);
+ key.pad = 0;
+
+ p = hash_get_mem (gm->vxlan_gpe_tunnel_by_key, &key);
+
+ if (a->is_add)
+ {
+ /* adding a tunnel: tunnel must not already exist */
+ if (p)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ if (a->decap_next_index >= VXLAN_GPE_INPUT_N_NEXT)
+ return VNET_API_ERROR_INVALID_DECAP_NEXT;
+
+ pool_get_aligned (gm->tunnels, t, CLIB_CACHE_LINE_BYTES);
+ memset (t, 0, sizeof (*t));
+
+ /* copy from arg structure */
+#define _(x) t->x = a->x;
+ foreach_gpe_copy_field;
+#undef _
+
+ rv = vxlan_gpe_rewrite (t);
+
+ if (rv)
+ {
+ pool_put (gm->tunnels, t);
+ return rv;
+ }
+
+ key_copy = clib_mem_alloc (sizeof (*key_copy));
+ clib_memcpy (key_copy, &key, sizeof (*key_copy));
+
+ hash_set_mem (gm->vxlan_gpe_tunnel_by_key, key_copy,
+ t - gm->tunnels);
+
+ if (vec_len (gm->free_vxlan_gpe_tunnel_hw_if_indices) > 0)
+ {
+ hw_if_index = gm->free_vxlan_gpe_tunnel_hw_if_indices
+ [vec_len (gm->free_vxlan_gpe_tunnel_hw_if_indices)-1];
+ _vec_len (gm->free_vxlan_gpe_tunnel_hw_if_indices) -= 1;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ hi->dev_instance = t - gm->tunnels;
+ hi->hw_instance = hi->dev_instance;
+ }
+ else
+ {
+ hw_if_index = vnet_register_interface
+ (vnm, vxlan_gpe_device_class.index, t - gm->tunnels,
+ vxlan_gpe_hw_class.index, t - gm->tunnels);
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ hi->output_node_index = vxlan_gpe_encap_node.index;
+ }
+
+ t->hw_if_index = hw_if_index;
+ t->sw_if_index = sw_if_index = hi->sw_if_index;
+
+ vnet_sw_interface_set_flags (vnm, hi->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ }
+ else
+ {
+ /* deleting a tunnel: tunnel must exist */
+ if (!p)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ t = pool_elt_at_index (gm->tunnels, p[0]);
+
+ vnet_sw_interface_set_flags (vnm, t->sw_if_index, 0 /* down */);
+ vec_add1 (gm->free_vxlan_gpe_tunnel_hw_if_indices, t->hw_if_index);
+
+ hp = hash_get_pair (gm->vxlan_gpe_tunnel_by_key, &key);
+ key_copy = (void *)(hp->key);
+ hash_unset_mem (gm->vxlan_gpe_tunnel_by_key, &key);
+ clib_mem_free (key_copy);
+
+ vec_free (t->rewrite);
+ pool_put (gm->tunnels, t);
+ }
+
+ if (sw_if_indexp)
+ *sw_if_indexp = sw_if_index;
+
+ return 0;
+}
+
+static u32 fib_index_from_fib_id (u32 fib_id)
+{
+ ip4_main_t * im = &ip4_main;
+ uword * p;
+
+ p = hash_get (im->fib_index_by_table_id, fib_id);
+ if (!p)
+ return ~0;
+
+ return p[0];
+}
+
+static uword unformat_gpe_decap_next (unformat_input_t * input, va_list * args)
+{
+ u32 * result = va_arg (*args, u32 *);
+ u32 tmp;
+
+ if (unformat (input, "drop"))
+ *result = VXLAN_GPE_INPUT_NEXT_DROP;
+ else if (unformat (input, "ip4"))
+ *result = VXLAN_GPE_INPUT_NEXT_IP4_INPUT;
+ else if (unformat (input, "ip6"))
+ *result = VXLAN_GPE_INPUT_NEXT_IP6_INPUT;
+ else if (unformat (input, "ethernet"))
+ *result = VXLAN_GPE_INPUT_NEXT_ETHERNET_INPUT;
+ else if (unformat (input, "nsh"))
+ *result = VXLAN_GPE_INPUT_NEXT_NSH_INPUT;
+ else if (unformat (input, "%d", &tmp))
+ *result = tmp;
+ else
+ return 0;
+ return 1;
+}
+
+static clib_error_t *
+vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, * line_input = &_line_input;
+ u8 is_add = 1;
+ ip4_address_t local, remote;
+ u8 local_set = 0;
+ u8 remote_set = 0;
+ u32 encap_fib_index = 0;
+ u32 decap_fib_index = 0;
+ u8 protocol = VXLAN_GPE_PROTOCOL_IP4;
+ u32 decap_next_index = VXLAN_GPE_INPUT_NEXT_IP4_INPUT;
+ u32 vni;
+ u8 vni_set = 0;
+ int rv;
+ u32 tmp;
+ vnet_vxlan_gpe_add_del_tunnel_args_t _a, * a = &_a;
+
+ /* Get a line of input. */
+ if (! unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "local %U",
+ unformat_ip4_address, &local))
+ local_set = 1;
+ else if (unformat (line_input, "remote %U",
+ unformat_ip4_address, &remote))
+ remote_set = 1;
+ else if (unformat (line_input, "encap-vrf-id %d", &tmp))
+ {
+ encap_fib_index = fib_index_from_fib_id (tmp);
+ if (encap_fib_index == ~0)
+ return clib_error_return (0, "nonexistent encap fib id %d", tmp);
+ }
+ else if (unformat (line_input, "decap-vrf-id %d", &tmp))
+ {
+ decap_fib_index = fib_index_from_fib_id (tmp);
+ if (decap_fib_index == ~0)
+ return clib_error_return (0, "nonexistent decap fib id %d", tmp);
+ }
+ else if (unformat (line_input, "decap-next %U", unformat_gpe_decap_next,
+ &decap_next_index))
+ ;
+ else if (unformat (line_input, "vni %d", &vni))
+ vni_set = 1;
+ else if (unformat(line_input, "next-ip4"))
+ protocol = VXLAN_GPE_PROTOCOL_IP4;
+ else if (unformat(line_input, "next-ip6"))
+ protocol = VXLAN_GPE_PROTOCOL_IP6;
+ else if (unformat(line_input, "next-ethernet"))
+ protocol = VXLAN_GPE_PROTOCOL_ETHERNET;
+ else if (unformat(line_input, "next-nsh"))
+ protocol = VXLAN_GPE_PROTOCOL_NSH;
+ else
+ return clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ }
+
+ unformat_free (line_input);
+
+ if (local_set == 0)
+ return clib_error_return (0, "tunnel local address not specified");
+
+ if (remote_set == 0)
+ return clib_error_return (0, "tunnel remote address not specified");
+
+ if (vni_set == 0)
+ return clib_error_return (0, "vni not specified");
+
+ memset (a, 0, sizeof (*a));
+
+ a->is_add = is_add;
+
+#define _(x) a->x = x;
+ foreach_gpe_copy_field;
+#undef _
+
+ rv = vnet_vxlan_gpe_add_del_tunnel (a, 0 /* hw_if_indexp */);
+
+ switch(rv)
+ {
+ case 0:
+ break;
+ case VNET_API_ERROR_INVALID_DECAP_NEXT:
+ return clib_error_return (0, "invalid decap-next...");
+
+ case VNET_API_ERROR_TUNNEL_EXIST:
+ return clib_error_return (0, "tunnel already exists...");
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ return clib_error_return (0, "tunnel does not exist...");
+
+ default:
+ return clib_error_return
+ (0, "vnet_vxlan_gpe_add_del_tunnel returned %d", rv);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (create_vxlan_gpe_tunnel_command, static) = {
+ .path = "create vxlan-gpe tunnel",
+ .short_help =
+ "create vxlan-gpe tunnel local <ip4-addr> remote <ip4-addr>"
+ " vni <nn> [next-ip4][next-ip6][next-ethernet][next-nsh]"
+ " [encap-vrf-id <nn>] [decap-vrf-id <nn>]"
+ " [del]\n",
+ .function = vxlan_gpe_add_del_tunnel_command_fn,
+};
+
+static clib_error_t *
+show_vxlan_gpe_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vxlan_gpe_main_t * gm = &vxlan_gpe_main;
+ vxlan_gpe_tunnel_t * t;
+
+ if (pool_elts (gm->tunnels) == 0)
+ vlib_cli_output (vm, "No vxlan-gpe tunnels configured.");
+
+ pool_foreach (t, gm->tunnels,
+ ({
+ vlib_cli_output (vm, "%U", format_vxlan_gpe_tunnel, t);
+ }));
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_vxlan_gpe_tunnel_command, static) = {
+ .path = "show vxlan-gpe",
+ .function = show_vxlan_gpe_tunnel_command_fn,
+};
+
+clib_error_t *vxlan_gpe_init (vlib_main_t *vm)
+{
+ vxlan_gpe_main_t *gm = &vxlan_gpe_main;
+
+ gm->vnet_main = vnet_get_main();
+ gm->vlib_main = vm;
+
+ gm->vxlan_gpe_tunnel_by_key
+ = hash_create_mem (0, sizeof(vxlan_gpe_tunnel_key_t), sizeof (uword));
+
+ udp_register_dst_port (vm, UDP_DST_PORT_vxlan_gpe,
+ vxlan_gpe_input_node.index, 1 /* is_ip4 */);
+ return 0;
+}
+
+VLIB_INIT_FUNCTION(vxlan_gpe_init);
+
diff --git a/vnet/vnet/vxlan-gpe/vxlan_gpe.h b/vnet/vnet/vxlan-gpe/vxlan_gpe.h
new file mode 100644
index 00000000000..4c2ac444e34
--- /dev/null
+++ b/vnet/vnet/vxlan-gpe/vxlan_gpe.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_vxlan_gpe_h
+#define included_vnet_vxlan_gpe_h
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/vxlan-gpe/vxlan_gpe_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/udp.h>
+
+
+typedef CLIB_PACKED (struct {
+ ip4_header_t ip4; /* 20 bytes */
+ udp_header_t udp; /* 8 bytes */
+ vxlan_gpe_header_t vxlan; /* 8 bytes */
+}) ip4_vxlan_gpe_header_t;
+
+typedef CLIB_PACKED(struct {
+ /*
+ * Key fields: local remote, vni
+ * all fields in NET byte order
+ */
+ union {
+ struct {
+ u32 local;
+ u32 remote;
+ u32 vni; /* shifted 8 bits */
+ u32 pad;
+ };
+ u64 as_u64[2];
+ };
+}) vxlan_gpe_tunnel_key_t;
+
+typedef struct {
+ /* Rewrite string. $$$$ embed vnet_rewrite header */
+ u8 * rewrite;
+
+ /* encapsulated protocol */
+ u8 protocol;
+
+ /* tunnel src and dst addresses */
+ ip4_address_t local;
+ ip4_address_t remote;
+
+ /* FIB indices */
+ u32 encap_fib_index; /* tunnel partner lookup here */
+ u32 decap_fib_index; /* inner IP lookup here */
+
+ /* vxlan VNI in HOST byte order, shifted left 8 bits */
+ u32 vni;
+
+ /* vnet intfc hw/sw_if_index */
+ u32 hw_if_index;
+ u32 sw_if_index;
+
+} vxlan_gpe_tunnel_t;
+
+#define foreach_vxlan_gpe_input_next \
+_(DROP, "error-drop") \
+_(IP4_INPUT, "ip4-input") \
+_(IP6_INPUT, "ip6-input") \
+_(ETHERNET_INPUT, "ethernet-input") \
+_(NSH_INPUT, "nsh-input")
+
+typedef enum {
+#define _(s,n) VXLAN_GPE_INPUT_NEXT_##s,
+ foreach_vxlan_gpe_input_next
+#undef _
+ VXLAN_GPE_INPUT_N_NEXT,
+} vxlan_gpe_input_next_t;
+
+typedef enum {
+#define vxlan_gpe_error(n,s) VXLAN_GPE_ERROR_##n,
+#include <vnet/vxlan-gpe/vxlan_gpe_error.def>
+#undef vxlan_gpe_error
+ VXLAN_GPE_N_ERROR,
+} vxlan_gpe_input_error_t;
+
+typedef struct {
+ /* vector of encap tunnel instances */
+ vxlan_gpe_tunnel_t *tunnels;
+
+ /* lookup tunnel by key */
+ uword * vxlan_gpe_tunnel_by_key;
+
+ /* Free vlib hw_if_indices */
+ u32 * free_vxlan_gpe_tunnel_hw_if_indices;
+
+ /* show device instance by real device instance */
+ u32 * dev_inst_by_real;
+
+ /* convenience */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} vxlan_gpe_main_t;
+
+vxlan_gpe_main_t vxlan_gpe_main;
+
+extern vlib_node_registration_t vxlan_gpe_encap_node;
+extern vlib_node_registration_t vxlan_gpe_input_node;
+
+u8 * format_vxlan_gpe_encap_trace (u8 * s, va_list * args);
+
+typedef struct {
+ u8 is_add;
+ ip4_address_t local, remote;
+ u8 protocol;
+ u32 encap_fib_index;
+ u32 decap_fib_index;
+ u32 decap_next_index;
+ u32 vni;
+} vnet_vxlan_gpe_add_del_tunnel_args_t;
+
+
+int vnet_vxlan_gpe_add_del_tunnel
+(vnet_vxlan_gpe_add_del_tunnel_args_t *a, u32 * sw_if_indexp);
+
+
+
+
+
+#endif /* included_vnet_vxlan_gpe_h */
diff --git a/vnet/vnet/vxlan-gpe/vxlan_gpe_error.def b/vnet/vnet/vxlan-gpe/vxlan_gpe_error.def
new file mode 100644
index 00000000000..9cf1b1cb656
--- /dev/null
+++ b/vnet/vnet/vxlan-gpe/vxlan_gpe_error.def
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+vxlan_gpe_error (DECAPSULATED, "good packets decapsulated")
+vxlan_gpe_error (NO_SUCH_TUNNEL, "no such tunnel packets")
diff --git a/vnet/vnet/vxlan-gpe/vxlan_gpe_packet.h b/vnet/vnet/vxlan-gpe/vxlan_gpe_packet.h
new file mode 100644
index 00000000000..3403cc9ebad
--- /dev/null
+++ b/vnet/vnet/vxlan-gpe/vxlan_gpe_packet.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vxlan_gpe_packet_h
+#define included_vxlan_gpe_packet_h
+
+/*
+ * From draft-quinn-vxlan-gpe-03.txt
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |R|R|R|R|I|P|R|O|Ver| Reserved |Next Protocol |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | VXLAN Network Identifier (VNI) | Reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * I Bit: Flag bit 4 indicates that the VNI is valid.
+ *
+ * P Bit: Flag bit 5 is defined as the Next Protocol bit. The P bit
+ * MUST be set to 1 to indicate the presence of the 8 bit next
+ * protocol field.
+ *
+ * O Bit: Flag bit 7 is defined as the O bit. When the O bit is set to 1,
+ *
+ * the packet is an OAM packet and OAM processing MUST occur. The OAM
+ * protocol details are out of scope for this document. As with the
+ * P-bit, bit 7 is currently a reserved flag in VXLAN.
+ *
+ * VXLAN-gpe bits 8 and 9 are defined as version bits. These bits are
+ * reserved in VXLAN. The version field is used to ensure backward
+ * compatibility going forward with future VXLAN-gpe updates.
+ *
+ * The initial version for VXLAN-gpe is 0.
+ *
+ * This draft defines the following Next Protocol values:
+ *
+ * 0x1 : IPv4
+ * 0x2 : IPv6
+ * 0x3 : Ethernet
+ * 0x4 : Network Service Header [NSH]
+ */
+
+#define foreach_vxlan_gpe_protocol \
+_ (0x01, IP4) \
+_ (0x02, IP6) \
+_ (0x03, ETHERNET) \
+_ (0x04, NSH)
+
+
+typedef enum {
+#define _(n,f) VXLAN_GPE_PROTOCOL_##f = n,
+ foreach_vxlan_gpe_protocol
+#undef _
+} vxlan_gpe_protocol_t;
+
+typedef struct {
+ u8 flags;
+ u8 ver_res;
+ u8 res;
+ /* see vxlan_gpe_protocol_t */
+ u8 protocol;
+ u32 vni_res;
+} vxlan_gpe_header_t;
+
+#define VXLAN_GPE_FLAGS_I 0x08
+#define VXLAN_GPE_FLAGS_P 0x04
+#define VXLAN_GPE_FLAGS_O 0x01
+#define VXLAN_GPE_VERSION 0x0
+
+#endif /* included_vxlan_gpe_packet_h */