summaryrefslogtreecommitdiffstats
path: root/vnet/vnet/nsh-vxlan-gpe
diff options
context:
space:
mode:
Diffstat (limited to 'vnet/vnet/nsh-vxlan-gpe')
-rw-r--r--vnet/vnet/nsh-vxlan-gpe/decap.c365
-rw-r--r--vnet/vnet/nsh-vxlan-gpe/encap.c349
-rw-r--r--vnet/vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.c562
-rw-r--r--vnet/vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.h152
-rw-r--r--vnet/vnet/nsh-vxlan-gpe/nsh_vxlan_gpe_error.def16
-rw-r--r--vnet/vnet/nsh-vxlan-gpe/vxlan-gpe-rfc.txt868
-rw-r--r--vnet/vnet/nsh-vxlan-gpe/vxlan_gpe_packet.h74
7 files changed, 2386 insertions, 0 deletions
diff --git a/vnet/vnet/nsh-vxlan-gpe/decap.c b/vnet/vnet/nsh-vxlan-gpe/decap.c
new file mode 100644
index 00000000000..62bb0f81dc7
--- /dev/null
+++ b/vnet/vnet/nsh-vxlan-gpe/decap.c
@@ -0,0 +1,365 @@
+/*
+ * nsh.c: nsh packet processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.h>
+
+vlib_node_registration_t nsh_vxlan_gpe_input_node;
+
+/* From nsh-gre */
+u8 * format_nsh_header_with_length (u8 * s, va_list * args);
+
+typedef struct {
+ u32 next_index;
+ u32 tunnel_index;
+ u32 error;
+ nsh_header_t h;
+} nsh_vxlan_gpe_rx_trace_t;
+
+static u8 * format_nsh_vxlan_gpe_rx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ nsh_vxlan_gpe_rx_trace_t * t = va_arg (*args, nsh_vxlan_gpe_rx_trace_t *);
+
+ if (t->tunnel_index != ~0)
+ {
+ s = format (s, "NSH-VXLAN: tunnel %d next %d error %d", t->tunnel_index,
+ t->next_index, t->error);
+ }
+ else
+ {
+ s = format (s, "NSH-VXLAN: no tunnel next %d error %d\n", t->next_index,
+ t->error);
+ }
+ s = format (s, "\n %U", format_nsh_header_with_length, &t->h,
+ (u32) sizeof (t->h) /* max size */);
+ return s;
+}
+
+static uword
+nsh_vxlan_gpe_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ nsh_vxlan_gpe_main_t * ngm = &nsh_vxlan_gpe_main;
+ u32 last_tunnel_index = ~0;
+ nsh_vxlan_gpe_tunnel_key_t last_key;
+ u32 pkts_decapsulated = 0;
+
+ memset (&last_key, 0xff, sizeof (last_key));
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0, next1;
+ ip4_vxlan_gpe_and_nsh_header_t * iuvn0, * iuvn1;
+ uword * p0, * p1;
+ u32 tunnel_index0, tunnel_index1;
+ nsh_vxlan_gpe_tunnel_t * t0, * t1;
+ nsh_vxlan_gpe_tunnel_key_t key0, key1;
+ u32 error0, error1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* udp leaves current_data pointing at the vxlan header */
+ vlib_buffer_advance
+ (b0, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t)));
+ vlib_buffer_advance
+ (b1, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t)));
+
+ iuvn0 = vlib_buffer_get_current (b0);
+ iuvn1 = vlib_buffer_get_current (b1);
+
+ /* pop (ip, udp, vxlan, nsh) */
+ vlib_buffer_advance (b0, sizeof (*iuvn0));
+ vlib_buffer_advance (b1, sizeof (*iuvn1));
+
+ tunnel_index0 = ~0;
+ error0 = 0;
+ next0 = NSH_VXLAN_GPE_INPUT_NEXT_DROP;
+
+ tunnel_index1 = ~0;
+ error1 = 0;
+ next1 = NSH_VXLAN_GPE_INPUT_NEXT_DROP;
+
+ key0.src = iuvn0->ip4.src_address.as_u32;
+ key0.vni = iuvn0->vxlan.vni_res;
+ key0.spi_si = iuvn0->nsh.spi_si;
+ key0.pad = 0;
+
+ if (PREDICT_FALSE ((key0.as_u64[0] != last_key.as_u64[0])
+ || (key0.as_u64[1] != last_key.as_u64[1])))
+ {
+ p0 = hash_get_mem (ngm->nsh_vxlan_gpe_tunnel_by_key, &key0);
+
+ if (p0 == 0)
+ {
+ error0 = NSH_VXLAN_GPE_ERROR_NO_SUCH_TUNNEL;
+ goto trace0;
+ }
+
+ last_key.as_u64[0] = key0.as_u64[0];
+ last_key.as_u64[1] = key0.as_u64[1];
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
+
+ t0 = pool_elt_at_index (ngm->tunnels, tunnel_index0);
+
+ next0 = t0->decap_next_index;
+
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ vnet_update_l2_len (b0);
+
+ /*
+ * ip[46] lookup in the configured FIB
+ * nsh-vxlan-gpe-encap, here's the encap tunnel sw_if_index
+ */
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->decap_fib_index;
+
+ trace0:
+ b0->error = error0 ? node->errors[error0] : 0;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ nsh_vxlan_gpe_rx_trace_t *tr
+ = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->error = error0;
+ tr->tunnel_index = tunnel_index0;
+ tr->h = iuvn0->nsh;
+ }
+
+ key1.src = iuvn1->ip4.src_address.as_u32;
+ key1.vni = iuvn1->vxlan.vni_res;
+ key1.spi_si = iuvn1->nsh.spi_si;
+ key1.pad = 0;
+
+ if (PREDICT_FALSE ((key1.as_u64[0] != last_key.as_u64[0])
+ || (key1.as_u64[1] != last_key.as_u64[1])))
+ {
+ p1 = hash_get_mem (ngm->nsh_vxlan_gpe_tunnel_by_key, &key1);
+
+ if (p1 == 0)
+ {
+ error1 = NSH_VXLAN_GPE_ERROR_NO_SUCH_TUNNEL;
+ goto trace1;
+ }
+
+ last_key.as_u64[0] = key1.as_u64[0];
+ last_key.as_u64[1] = key1.as_u64[1];
+ tunnel_index1 = last_tunnel_index = p1[0];
+ }
+ else
+ tunnel_index1 = last_tunnel_index;
+
+ t1 = pool_elt_at_index (ngm->tunnels, tunnel_index1);
+
+ next1 = t1->decap_next_index;
+
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ vnet_update_l2_len (b1);
+
+ /*
+ * ip[46] lookup in the configured FIB
+ * nsh-vxlan-gpe-encap, here's the encap tunnel sw_if_index
+ */
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] = t1->decap_fib_index;
+ pkts_decapsulated += 2;
+
+ trace1:
+ b1->error = error1 ? node->errors[error1] : 0;
+
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ nsh_vxlan_gpe_rx_trace_t *tr
+ = vlib_add_trace (vm, node, b1, sizeof (*tr));
+ tr->next_index = next1;
+ tr->error = error1;
+ tr->tunnel_index = tunnel_index1;
+ tr->h = iuvn1->nsh;
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ ip4_vxlan_gpe_and_nsh_header_t * iuvn0;
+ uword * p0;
+ u32 tunnel_index0;
+ nsh_vxlan_gpe_tunnel_t * t0;
+ nsh_vxlan_gpe_tunnel_key_t key0;
+ u32 error0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* udp leaves current_data pointing at the vxlan header */
+ vlib_buffer_advance
+ (b0, -(word)(sizeof(udp_header_t)+sizeof(ip4_header_t)));
+
+ iuvn0 = vlib_buffer_get_current (b0);
+
+ /* pop (ip, udp, vxlan, nsh) */
+ vlib_buffer_advance (b0, sizeof (*iuvn0));
+
+ tunnel_index0 = ~0;
+ error0 = 0;
+ next0 = NSH_VXLAN_GPE_INPUT_NEXT_DROP;
+
+ key0.src = iuvn0->ip4.src_address.as_u32;
+ key0.vni = iuvn0->vxlan.vni_res;
+ key0.spi_si = iuvn0->nsh.spi_si;
+ key0.pad = 0;
+
+ if (PREDICT_FALSE ((key0.as_u64[0] != last_key.as_u64[0])
+ || (key0.as_u64[1] != last_key.as_u64[1])))
+ {
+ p0 = hash_get_mem (ngm->nsh_vxlan_gpe_tunnel_by_key, &key0);
+
+ if (p0 == 0)
+ {
+ error0 = NSH_VXLAN_GPE_ERROR_NO_SUCH_TUNNEL;
+ goto trace00;
+ }
+
+ last_key.as_u64[0] = key0.as_u64[0];
+ last_key.as_u64[1] = key0.as_u64[1];
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
+
+ t0 = pool_elt_at_index (ngm->tunnels, tunnel_index0);
+
+ next0 = t0->decap_next_index;
+
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ vnet_update_l2_len (b0);
+
+ /*
+ * ip[46] lookup in the configured FIB
+ * nsh-vxlan-gpe-encap, here's the encap tunnel sw_if_index
+ */
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->decap_fib_index;
+ pkts_decapsulated ++;
+
+ trace00:
+ b0->error = error0 ? node->errors[error0] : 0;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ nsh_vxlan_gpe_rx_trace_t *tr
+ = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->error = error0;
+ tr->tunnel_index = tunnel_index0;
+ tr->h = iuvn0->nsh;
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, nsh_vxlan_gpe_input_node.index,
+ NSH_VXLAN_GPE_ERROR_DECAPSULATED,
+ pkts_decapsulated);
+ return from_frame->n_vectors;
+}
+
+static char * nsh_vxlan_gpe_error_strings[] = {
+#define nsh_vxlan_gpe_error(n,s) s,
+#include <vnet/nsh-vxlan-gpe/nsh_vxlan_gpe_error.def>
+#undef nsh_vxlan_gpe_error
+#undef _
+};
+
+VLIB_REGISTER_NODE (nsh_vxlan_gpe_input_node) = {
+ .function = nsh_vxlan_gpe_input,
+ .name = "nsh-vxlan-gpe-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = NSH_VXLAN_GPE_N_ERROR,
+ .error_strings = nsh_vxlan_gpe_error_strings,
+
+ .n_next_nodes = NSH_VXLAN_GPE_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [NSH_VXLAN_GPE_INPUT_NEXT_##s] = n,
+ foreach_nsh_vxlan_gpe_input_next
+#undef _
+ },
+
+ .format_buffer = format_nsh_header_with_length,
+ .format_trace = format_nsh_vxlan_gpe_rx_trace,
+ // $$$$ .unformat_buffer = unformat_nsh_vxlan_gpe_header,
+};
diff --git a/vnet/vnet/nsh-vxlan-gpe/encap.c b/vnet/vnet/nsh-vxlan-gpe/encap.c
new file mode 100644
index 00000000000..0ccdf60c6aa
--- /dev/null
+++ b/vnet/vnet/nsh-vxlan-gpe/encap.c
@@ -0,0 +1,349 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.h>
+
+/* Statistics (not really errors) */
+#define foreach_nsh_vxlan_gpe_encap_error \
+_(ENCAPSULATED, "good packets encapsulated")
+
+static char * nsh_vxlan_gpe_encap_error_strings[] = {
+#define _(sym,string) string,
+ foreach_nsh_vxlan_gpe_encap_error
+#undef _
+};
+
+typedef enum {
+#define _(sym,str) NSH_VXLAN_GPE_ENCAP_ERROR_##sym,
+ foreach_nsh_vxlan_gpe_encap_error
+#undef _
+ NSH_VXLAN_GPE_ENCAP_N_ERROR,
+} nsh_vxlan_gpe_encap_error_t;
+
+typedef enum {
+ NSH_VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP,
+ NSH_VXLAN_GPE_ENCAP_NEXT_DROP,
+ NSH_VXLAN_GPE_ENCAP_N_NEXT,
+} nsh_vxlan_gpe_encap_next_t;
+
+typedef struct {
+ u32 tunnel_index;
+} nsh_vxlan_gpe_encap_trace_t;
+
+u8 * format_nsh_vxlan_gpe_encap_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ nsh_vxlan_gpe_encap_trace_t * t
+ = va_arg (*args, nsh_vxlan_gpe_encap_trace_t *);
+
+ s = format (s, "NSH-VXLAN-ENCAP: tunnel %d", t->tunnel_index);
+ return s;
+}
+
+#define foreach_fixed_header_offset \
+_(0) _(1) _(2) _(3) _(4) _(5) _(6)
+
+static uword
+nsh_vxlan_gpe_encap (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ nsh_vxlan_gpe_main_t * ngm = &nsh_vxlan_gpe_main;
+ vnet_main_t * vnm = ngm->vnet_main;
+ u32 pkts_encapsulated = 0;
+ u16 old_l0 = 0, old_l1 = 0;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0 = NSH_VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP;
+ u32 next1 = NSH_VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP;
+ vnet_hw_interface_t * hi0, * hi1;
+ ip4_header_t * ip0, * ip1;
+ udp_header_t * udp0, * udp1;
+ u64 * copy_src0, * copy_dst0;
+ u64 * copy_src1, * copy_dst1;
+ u32 * copy_src_last0, * copy_dst_last0;
+ u32 * copy_src_last1, * copy_dst_last1;
+ nsh_vxlan_gpe_tunnel_t * t0, * t1;
+ u16 new_l0, new_l1;
+ ip_csum_t sum0, sum1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* 1-wide cache? */
+ hi0 = vnet_get_sup_hw_interface
+ (vnm, vnet_buffer(b0)->sw_if_index[VLIB_TX]);
+ hi1 = vnet_get_sup_hw_interface
+ (vnm, vnet_buffer(b1)->sw_if_index[VLIB_TX]);
+
+ t0 = pool_elt_at_index (ngm->tunnels, hi0->dev_instance);
+ t1 = pool_elt_at_index (ngm->tunnels, hi1->dev_instance);
+
+ ASSERT(vec_len(t0->rewrite) >= 24);
+ ASSERT(vec_len(t1->rewrite) >= 24);
+
+ /* Apply the rewrite string. $$$$ vnet_rewrite? */
+ vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite));
+ vlib_buffer_advance (b1, -(word)_vec_len(t1->rewrite));
+
+ ip0 = vlib_buffer_get_current(b0);
+ ip1 = vlib_buffer_get_current(b1);
+ /* Copy the fixed header */
+ copy_dst0 = (u64 *) ip0;
+ copy_src0 = (u64 *) t0->rewrite;
+ copy_dst1 = (u64 *) ip1;
+ copy_src1 = (u64 *) t1->rewrite;
+
+ ASSERT (sizeof (ip4_vxlan_gpe_and_nsh_header_t) == 60);
+
+ /* Copy first 56 octets 8-bytes at a time */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+ foreach_fixed_header_offset;
+#undef _
+#define _(offs) copy_dst1[offs] = copy_src1[offs];
+ foreach_fixed_header_offset;
+#undef _
+
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last0 = (u32 *)(&copy_dst0[7]);
+ copy_src_last0 = (u32 *)(&copy_src0[7]);
+ copy_dst_last1 = (u32 *)(&copy_dst1[7]);
+ copy_src_last1 = (u32 *)(&copy_src1[7]);
+
+ copy_dst_last0[0] = copy_src_last0[0];
+ copy_dst_last1[0] = copy_src_last1[0];
+
+ /* If there are TLVs to copy, do so */
+ if (PREDICT_FALSE (_vec_len(t0->rewrite) > 64))
+ memcpy (&copy_dst0[3], t0->rewrite + 64 ,
+ _vec_len (t0->rewrite)-64);
+
+ if (PREDICT_FALSE (_vec_len(t1->rewrite) > 64))
+ memcpy (&copy_dst0[3], t1->rewrite + 64 ,
+ _vec_len (t1->rewrite)-64);
+
+ /* fix the <bleep>ing outer-IP checksum */
+ sum0 = ip0->checksum;
+ /* old_l0 always 0, see the rewrite setup */
+ new_l0 =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+
+ sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+ length /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+ ip0->length = new_l0;
+
+ sum1 = ip1->checksum;
+ /* old_l1 always 0, see the rewrite setup */
+ new_l1 =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1));
+
+ sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t,
+ length /* changed member */);
+ ip1->checksum = ip_csum_fold (sum1);
+ ip1->length = new_l1;
+
+ /* Fix UDP length */
+ udp0 = (udp_header_t *)(ip0+1);
+ new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
+ - sizeof (*ip0));
+ udp1 = (udp_header_t *)(ip1+1);
+ new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)
+ - sizeof (*ip1));
+
+ udp0->length = new_l0;
+ udp1->length = new_l1;
+
+ /* Reset to look up tunnel partner in the configured FIB */
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index;
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] = t1->encap_fib_index;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ nsh_vxlan_gpe_encap_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->tunnel_index = t0 - ngm->tunnels;
+ }
+
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ nsh_vxlan_gpe_encap_trace_t *tr =
+ vlib_add_trace (vm, node, b1, sizeof (*tr));
+ tr->tunnel_index = t1 - ngm->tunnels;
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0 = NSH_VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP;
+ vnet_hw_interface_t * hi0;
+ ip4_header_t * ip0;
+ udp_header_t * udp0;
+ u64 * copy_src0, * copy_dst0;
+ u32 * copy_src_last0, * copy_dst_last0;
+ nsh_vxlan_gpe_tunnel_t * t0;
+ u16 new_l0;
+ ip_csum_t sum0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* 1-wide cache? */
+ hi0 = vnet_get_sup_hw_interface
+ (vnm, vnet_buffer(b0)->sw_if_index[VLIB_TX]);
+
+ t0 = pool_elt_at_index (ngm->tunnels, hi0->dev_instance);
+
+ ASSERT(vec_len(t0->rewrite) >= 24);
+
+ /* Apply the rewrite string. $$$$ vnet_rewrite? */
+ vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite));
+
+ ip0 = vlib_buffer_get_current(b0);
+ /* Copy the fixed header */
+ copy_dst0 = (u64 *) ip0;
+ copy_src0 = (u64 *) t0->rewrite;
+
+ ASSERT (sizeof (ip4_vxlan_gpe_and_nsh_header_t) == 60);
+
+ /* Copy first 56 octets 8-bytes at a time */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+ foreach_fixed_header_offset;
+#undef _
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last0 = (u32 *)(&copy_dst0[7]);
+ copy_src_last0 = (u32 *)(&copy_src0[7]);
+
+ copy_dst_last0[0] = copy_src_last0[0];
+
+ /* If there are TLVs to copy, do so */
+ if (PREDICT_FALSE (_vec_len(t0->rewrite) > 64))
+ memcpy (&copy_dst0[3], t0->rewrite + 64 ,
+ _vec_len (t0->rewrite)-64);
+
+ /* fix the <bleep>ing outer-IP checksum */
+ sum0 = ip0->checksum;
+ /* old_l0 always 0, see the rewrite setup */
+ new_l0 =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+
+ sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+ length /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+ ip0->length = new_l0;
+
+ /* Fix UDP length */
+ udp0 = (udp_header_t *)(ip0+1);
+ new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
+ - sizeof (*ip0));
+
+ udp0->length = new_l0;
+
+ /* Reset to look up tunnel partner in the configured FIB */
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index;
+ pkts_encapsulated ++;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ nsh_vxlan_gpe_encap_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->tunnel_index = t0 - ngm->tunnels;
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, node->node_index,
+ NSH_VXLAN_GPE_ENCAP_ERROR_ENCAPSULATED,
+ pkts_encapsulated);
+ return from_frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (nsh_vxlan_gpe_encap_node) = {
+ .function = nsh_vxlan_gpe_encap,
+ .name = "nsh-vxlan-gpe-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_nsh_vxlan_gpe_encap_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(nsh_vxlan_gpe_encap_error_strings),
+ .error_strings = nsh_vxlan_gpe_encap_error_strings,
+
+ .n_next_nodes = NSH_VXLAN_GPE_ENCAP_N_NEXT,
+
+ .next_nodes = {
+ [NSH_VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [NSH_VXLAN_GPE_ENCAP_NEXT_DROP] = "error-drop",
+ },
+};
diff --git a/vnet/vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.c b/vnet/vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.c
new file mode 100644
index 00000000000..8cc46d3d3eb
--- /dev/null
+++ b/vnet/vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.c
@@ -0,0 +1,562 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.h>
+
+nsh_vxlan_gpe_main_t nsh_vxlan_gpe_main;
+
+static u8 * format_decap_next (u8 * s, va_list * args)
+{
+ u32 next_index = va_arg (*args, u32);
+
+ switch (next_index)
+ {
+ case NSH_VXLAN_GPE_INPUT_NEXT_DROP:
+ return format (s, "drop");
+ case NSH_VXLAN_GPE_INPUT_NEXT_IP4_INPUT:
+ return format (s, "ip4");
+ case NSH_VXLAN_GPE_INPUT_NEXT_IP6_INPUT:
+ return format (s, "ip6");
+ case NSH_VXLAN_GPE_INPUT_NEXT_NSH_VXLAN_GPE_ENCAP:
+ return format (s, "nsh-vxlan-gpe");
+ default:
+ return format (s, "unknown %d", next_index);
+ }
+ return s;
+}
+
+u8 * format_nsh_vxlan_gpe_tunnel (u8 * s, va_list * args)
+{
+ nsh_vxlan_gpe_tunnel_t * t = va_arg (*args, nsh_vxlan_gpe_tunnel_t *);
+ nsh_vxlan_gpe_main_t * ngm = &nsh_vxlan_gpe_main;
+
+ s = format (s,
+ "[%d] %U (src) %U (dst) fibs: encap %d, decap %d",
+ t - ngm->tunnels,
+ format_ip4_address, &t->src,
+ format_ip4_address, &t->dst,
+ t->encap_fib_index,
+ t->decap_fib_index);
+ s = format (s, " decap next %U\n", format_decap_next, t->decap_next_index);
+ s = format (s, " vxlan VNI %d ", t->vni);
+ s = format (s, "nsh ver %d ", (t->ver_o_c>>6));
+ if (t->ver_o_c & NSH_GRE_O_BIT)
+ s = format (s, "O-set ");
+
+ if (t->ver_o_c & NSH_GRE_C_BIT)
+ s = format (s, "C-set ");
+
+ s = format (s, "len %d (%d bytes) md_type %d next_protocol %d\n",
+ t->length, t->length * 4, t->md_type, t->next_protocol);
+
+ s = format (s, " service path %d service index %d\n",
+ (t->spi_si>>NSH_GRE_SPI_SHIFT) & NSH_GRE_SPI_MASK,
+ t->spi_si & NSH_GRE_SINDEX_MASK);
+
+ s = format (s, " c1 %d c2 %d c3 %d c4 %d\n",
+ t->c1, t->c2, t->c3, t->c4);
+
+ return s;
+}
+
+static u8 * format_nsh_vxlan_gpe_name (u8 * s, va_list * args)
+{
+ nsh_vxlan_gpe_main_t * ngm = &nsh_vxlan_gpe_main;
+ u32 i = va_arg (*args, u32);
+ u32 show_dev_instance = ~0;
+
+ if (i < vec_len (ngm->dev_inst_by_real))
+ show_dev_instance = ngm->dev_inst_by_real[i];
+
+ if (show_dev_instance != ~0)
+ i = show_dev_instance;
+
+ return format (s, "nsh_vxlan_gpe_tunnel%d", i);
+}
+
+static int nsh_vxlan_gpe_name_renumber (vnet_hw_interface_t * hi,
+ u32 new_dev_instance)
+{
+ nsh_vxlan_gpe_main_t * ngm = &nsh_vxlan_gpe_main;
+
+ vec_validate_init_empty (ngm->dev_inst_by_real, hi->dev_instance, ~0);
+
+ ngm->dev_inst_by_real [hi->dev_instance] = new_dev_instance;
+
+ return 0;
+}
+
+static uword dummy_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ clib_warning ("you shouldn't be here, leaking buffers...");
+ return frame->n_vectors;
+}
+
+VNET_DEVICE_CLASS (nsh_vxlan_gpe_device_class,static) = {
+ .name = "NSH_VXLAN_GPE",
+ .format_device_name = format_nsh_vxlan_gpe_name,
+ .format_tx_trace = format_nsh_vxlan_gpe_encap_trace,
+ .tx_function = dummy_interface_tx,
+ .name_renumber = nsh_vxlan_gpe_name_renumber,
+};
+
+static uword dummy_set_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 l3_type,
+ void * dst_address,
+ void * rewrite,
+ uword max_rewrite_bytes)
+{
+ return 0;
+}
+
+static u8 * format_nsh_vxlan_gpe_header_with_length (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ s = format (s, "unimplemented dev %u", dev_instance);
+ return s;
+}
+
+VNET_HW_INTERFACE_CLASS (nsh_vxlan_gpe_hw_class) = {
+ .name = "NSH_VXLAN_GPE",
+ .format_header = format_nsh_vxlan_gpe_header_with_length,
+ .set_rewrite = dummy_set_rewrite,
+};
+
+#define foreach_copy_field \
+_(src.as_u32) \
+_(dst.as_u32) \
+_(vni) \
+_(encap_fib_index) \
+_(decap_fib_index) \
+_(decap_next_index) \
+_(ver_o_c) \
+_(length) \
+_(md_type) \
+_(next_protocol) \
+_(spi_si) \
+_(c1) \
+_(c2) \
+_(c3) \
+_(c4) \
+_(tlvs)
+
+#define foreach_32bit_field \
+_(spi_si) \
+_(c1) \
+_(c2) \
+_(c3) \
+_(c4)
+
+static int nsh_vxlan_gpe_rewrite (nsh_vxlan_gpe_tunnel_t * t)
+{
+ u8 *rw = 0;
+ ip4_header_t * ip0;
+ nsh_header_t * nsh0;
+ ip4_vxlan_gpe_and_nsh_header_t * h0;
+ int len;
+
+ len = sizeof (*h0) + vec_len(t->tlvs)*4;
+
+ vec_validate_aligned (rw, len-1, CLIB_CACHE_LINE_BYTES);
+
+ h0 = (ip4_vxlan_gpe_and_nsh_header_t *) rw;
+
+ /* Fixed portion of the (outer) ip4 header */
+ ip0 = &h0->ip4;
+ ip0->ip_version_and_header_length = 0x45;
+ ip0->ttl = 254;
+ ip0->protocol = IP_PROTOCOL_UDP;
+
+ /* we fix up the ip4 header length and checksum after-the-fact */
+ ip0->src_address.as_u32 = t->src.as_u32;
+ ip0->dst_address.as_u32 = t->dst.as_u32;
+ ip0->checksum = ip4_header_checksum (ip0);
+
+ /* UDP header, randomize src port on something, maybe? */
+ h0->udp.src_port = clib_host_to_net_u16 (4790);
+ h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_vxlan_gpe);
+
+ /* VXLAN header. Are we having fun yet? */
+ h0->vxlan.flags = VXLAN_GPE_FLAGS_I | VXLAN_GPE_FLAGS_P;
+ h0->vxlan.ver_res = VXLAN_GPE_VERSION;
+ h0->vxlan.next_protocol = VXLAN_NEXT_PROTOCOL_NSH;
+ h0->vxlan.vni_res = clib_host_to_net_u32 (t->vni<<8);
+
+ /* NSH header */
+ nsh0 = &h0->nsh;
+ nsh0->ver_o_c = t->ver_o_c;
+ nsh0->md_type = t->md_type;
+ nsh0->next_protocol = t->next_protocol;
+ nsh0->spi_si = t->spi_si;
+ nsh0->c1 = t->c1;
+ nsh0->c2 = t->c2;
+ nsh0->c3 = t->c3;
+ nsh0->c4 = t->c4;
+
+ /* Endian swap 32-bit fields */
+#define _(x) nsh0->x = clib_host_to_net_u32(nsh0->x);
+ foreach_32bit_field;
+#undef _
+
+ /* fix nsh header length */
+ t->length = 6 + vec_len(t->tlvs);
+ nsh0->length = t->length;
+
+ /* Copy any TLVs */
+ if (vec_len(t->tlvs))
+ memcpy (nsh0->tlvs, t->tlvs, 4*vec_len(t->tlvs));
+
+ t->rewrite = rw;
+ return (0);
+}
+
+int vnet_nsh_vxlan_gpe_add_del_tunnel
+(vnet_nsh_vxlan_gpe_add_del_tunnel_args_t *a, u32 * sw_if_indexp)
+{
+ nsh_vxlan_gpe_main_t * ngm = &nsh_vxlan_gpe_main;
+ nsh_vxlan_gpe_tunnel_t *t = 0;
+ vnet_main_t * vnm = ngm->vnet_main;
+ vnet_hw_interface_t * hi;
+ uword * p;
+ u32 hw_if_index = ~0;
+ u32 sw_if_index = ~0;
+ int rv;
+ nsh_vxlan_gpe_tunnel_key_t key, *key_copy;
+ hash_pair_t *hp;
+
+ key.src = a->dst.as_u32; /* decap src in key is encap dst in config */
+ key.vni = clib_host_to_net_u32 (a->vni << 8);
+ key.spi_si = clib_host_to_net_u32(a->spi_si);
+
+ p = hash_get_mem (ngm->nsh_vxlan_gpe_tunnel_by_key, &key);
+
+ if (a->is_add)
+ {
+ /* adding a tunnel: tunnel must not already exist */
+ if (p)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ if (a->decap_next_index >= NSH_VXLAN_GPE_INPUT_N_NEXT)
+ return VNET_API_ERROR_INVALID_DECAP_NEXT;
+
+ pool_get_aligned (ngm->tunnels, t, CLIB_CACHE_LINE_BYTES);
+ memset (t, 0, sizeof (*t));
+
+ /* copy from arg structure */
+#define _(x) t->x = a->x;
+ foreach_copy_field;
+#undef _
+
+ rv = nsh_vxlan_gpe_rewrite (t);
+
+ if (rv)
+ {
+ pool_put (ngm->tunnels, t);
+ return rv;
+ }
+
+ key_copy = clib_mem_alloc (sizeof (*key_copy));
+ memcpy (key_copy, &key, sizeof (*key_copy));
+
+ hash_set_mem (ngm->nsh_vxlan_gpe_tunnel_by_key, key_copy,
+ t - ngm->tunnels);
+
+ if (vec_len (ngm->free_nsh_vxlan_gpe_tunnel_hw_if_indices) > 0)
+ {
+ hw_if_index = ngm->free_nsh_vxlan_gpe_tunnel_hw_if_indices
+ [vec_len (ngm->free_nsh_vxlan_gpe_tunnel_hw_if_indices)-1];
+ _vec_len (ngm->free_nsh_vxlan_gpe_tunnel_hw_if_indices) -= 1;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ hi->dev_instance = t - ngm->tunnels;
+ hi->hw_instance = hi->dev_instance;
+ }
+ else
+ {
+ hw_if_index = vnet_register_interface
+ (vnm, nsh_vxlan_gpe_device_class.index, t - ngm->tunnels,
+ nsh_vxlan_gpe_hw_class.index, t - ngm->tunnels);
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ hi->output_node_index = nsh_vxlan_gpe_encap_node.index;
+ }
+
+ t->hw_if_index = hw_if_index;
+ t->sw_if_index = sw_if_index = hi->sw_if_index;
+
+ vnet_sw_interface_set_flags (vnm, hi->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ }
+ else
+ {
+ /* deleting a tunnel: tunnel must exist */
+ if (!p)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ t = pool_elt_at_index (ngm->tunnels, p[0]);
+
+ vnet_sw_interface_set_flags (vnm, t->sw_if_index, 0 /* down */);
+ vec_add1 (ngm->free_nsh_vxlan_gpe_tunnel_hw_if_indices, t->hw_if_index);
+
+ hp = hash_get_pair (ngm->nsh_vxlan_gpe_tunnel_by_key, &key);
+ key_copy = (void *)(hp->key);
+ hash_unset_mem (ngm->nsh_vxlan_gpe_tunnel_by_key, &key);
+ clib_mem_free (key_copy);
+
+ vec_free (t->rewrite);
+ pool_put (ngm->tunnels, t);
+ }
+
+ if (sw_if_indexp)
+ *sw_if_indexp = sw_if_index;
+
+ return 0;
+}
+
+static u32 fib_index_from_fib_id (u32 fib_id)
+{
+ ip4_main_t * im = &ip4_main;
+ uword * p;
+
+ p = hash_get (im->fib_index_by_table_id, fib_id);
+ if (!p)
+ return ~0;
+
+ return p[0];
+}
+
+static uword unformat_decap_next (unformat_input_t * input, va_list * args)
+{
+ u32 * result = va_arg (*args, u32 *);
+ u32 tmp;
+
+ if (unformat (input, "drop"))
+ *result = NSH_VXLAN_GPE_INPUT_NEXT_DROP;
+ else if (unformat (input, "ip4"))
+ *result = NSH_VXLAN_GPE_INPUT_NEXT_IP4_INPUT;
+ else if (unformat (input, "ip6"))
+ *result = NSH_VXLAN_GPE_INPUT_NEXT_IP6_INPUT;
+ else if (unformat (input, "ethernet"))
+ *result = NSH_VXLAN_GPE_INPUT_NEXT_IP6_INPUT;
+ else if (unformat (input, "nsh-vxlan-gpe"))
+ *result = NSH_VXLAN_GPE_INPUT_NEXT_NSH_VXLAN_GPE_ENCAP;
+ else if (unformat (input, "%d", &tmp))
+ *result = tmp;
+ else
+ return 0;
+ return 1;
+}
+
+static clib_error_t *
+nsh_vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, * line_input = &_line_input;
+ ip4_address_t src, dst;
+ u8 is_add = 1;
+ u8 src_set = 0;
+ u8 dst_set = 0;
+ u32 encap_fib_index = 0;
+ u32 decap_fib_index = 0;
+ u8 ver_o_c = 0;
+ u8 length = 0;
+ u8 md_type = 0;
+ u8 next_protocol = 1; /* default: ip4 */
+ u32 decap_next_index = NSH_VXLAN_GPE_INPUT_NEXT_IP4_INPUT;
+ u32 spi;
+ u8 spi_set = 0;
+ u32 si;
+ u32 vni;
+ u8 vni_set = 0;
+ u8 si_set = 0;
+ u32 spi_si;
+ u32 c1 = 0;
+ u32 c2 = 0;
+ u32 c3 = 0;
+ u32 c4 = 0;
+ u32 *tlvs = 0;
+ u32 tmp;
+ int rv;
+ vnet_nsh_vxlan_gpe_add_del_tunnel_args_t _a, * a = &_a;
+
+ /* Get a line of input. */
+ if (! unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "src %U",
+ unformat_ip4_address, &src))
+ src_set = 1;
+ else if (unformat (line_input, "dst %U",
+ unformat_ip4_address, &dst))
+ dst_set = 1;
+ else if (unformat (line_input, "encap-vrf-id %d", &tmp))
+ {
+ encap_fib_index = fib_index_from_fib_id (tmp);
+ if (encap_fib_index == ~0)
+ return clib_error_return (0, "nonexistent encap fib id %d", tmp);
+ }
+ else if (unformat (line_input, "decap-vrf-id %d", &tmp))
+ {
+ decap_fib_index = fib_index_from_fib_id (tmp);
+ if (decap_fib_index == ~0)
+ return clib_error_return (0, "nonexistent decap fib id %d", tmp);
+ }
+ else if (unformat (line_input, "decap-next %U", unformat_decap_next,
+ &decap_next_index))
+ ;
+ else if (unformat (line_input, "vni %d", &vni))
+ vni_set = 1;
+ else if (unformat (line_input, "version %d", &tmp))
+ ver_o_c |= (tmp & 3) << 6;
+ else if (unformat (line_input, "o-bit %d", &tmp))
+ ver_o_c |= (tmp & 1) << 5;
+ else if (unformat (line_input, "c-bit %d", &tmp))
+ ver_o_c |= (tmp & 1) << 4;
+ else if (unformat (line_input, "md-type %d", &tmp))
+ md_type = tmp;
+ else if (unformat(line_input, "next-ip4"))
+ next_protocol = 1;
+ else if (unformat(line_input, "next-ip6"))
+ next_protocol = 2;
+ else if (unformat(line_input, "next-ethernet"))
+ next_protocol = 3;
+ else if (unformat(line_input, "next-nsh"))
+ next_protocol = 4;
+ else if (unformat (line_input, "c1 %d", &c1))
+ ;
+ else if (unformat (line_input, "c2 %d", &c2))
+ ;
+ else if (unformat (line_input, "c3 %d", &c3))
+ ;
+ else if (unformat (line_input, "c4 %d", &c4))
+ ;
+ else if (unformat (line_input, "spi %d", &spi))
+ spi_set = 1;
+ else if (unformat (line_input, "si %d", &si))
+ si_set = 1;
+ else if (unformat (line_input, "tlv %x"))
+ vec_add1 (tlvs, tmp);
+ else
+ return clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ }
+
+ unformat_free (line_input);
+
+ if (src_set == 0)
+ return clib_error_return (0, "tunnel src address not specified");
+
+ if (dst_set == 0)
+ return clib_error_return (0, "tunnel dst address not specified");
+
+ if (vni_set == 0)
+ return clib_error_return (0, "vni not specified");
+
+ if (spi_set == 0)
+ return clib_error_return (0, "spi not specified");
+
+ if (si_set == 0)
+ return clib_error_return (0, "si not specified");
+
+ spi_si = (spi<<8) | si;
+
+ memset (a, 0, sizeof (*a));
+
+ a->is_add = is_add;
+
+#define _(x) a->x = x;
+ foreach_copy_field;
+#undef _
+
+ rv = vnet_nsh_vxlan_gpe_add_del_tunnel (a, 0 /* hw_if_indexp */);
+
+ switch(rv)
+ {
+ case 0:
+ break;
+ case VNET_API_ERROR_INVALID_DECAP_NEXT:
+ return clib_error_return (0, "invalid decap-next...");
+
+ case VNET_API_ERROR_TUNNEL_EXIST:
+ return clib_error_return (0, "tunnel already exists...");
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ return clib_error_return (0, "tunnel does not exist...");
+
+ default:
+ return clib_error_return
+ (0, "vnet_nsh_vxlan_gpe_add_del_tunnel returned %d", rv);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (create_nsh_vxlan_gpe_tunnel_command, static) = {
+ .path = "nsh vxlan tunnel",
+ .short_help =
+ "nsh vxlan tunnel src <ip4-addr> dst <ip4-addr>"
+ " c1 <nn> c2 <nn> c3 <nn> c4 <nn> spi <nn> si <nn> vni <nn>\n"
+ " [encap-fib-id <nn>] [decap-fib-id <nn>] [o-bit <1|0>] [c-bit <1|0>]\n"
+ " [md-type <nn>][next-ip4][next-ip6][next-ethernet][next-nsh]\n"
+ " [tlv <xx>][decap-next [ip4|ip6|ethernet|nsh-encap]][del]\n",
+ .function = nsh_vxlan_gpe_add_del_tunnel_command_fn,
+};
+
+static clib_error_t *
+show_nsh_vxlan_gpe_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ nsh_vxlan_gpe_main_t * ngm = &nsh_vxlan_gpe_main;
+ nsh_vxlan_gpe_tunnel_t * t;
+
+ if (pool_elts (ngm->tunnels) == 0)
+ vlib_cli_output (vm, "No nsh-vxlan-gpe tunnels configured...");
+
+ pool_foreach (t, ngm->tunnels,
+ ({
+ vlib_cli_output (vm, "%U", format_nsh_vxlan_gpe_tunnel, t);
+ }));
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_nsh_vxlan_gpe_tunnel_command, static) = {
+ .path = "show nsh vxlan tunnel",
+ .function = show_nsh_vxlan_gpe_tunnel_command_fn,
+};
+
+clib_error_t *nsh_vxlan_gpe_init (vlib_main_t *vm)
+{
+ nsh_vxlan_gpe_main_t *ngm = &nsh_vxlan_gpe_main;
+
+ ngm->vnet_main = vnet_get_main();
+ ngm->vlib_main = vm;
+
+ ngm->nsh_vxlan_gpe_tunnel_by_key
+ = hash_create_mem (0, sizeof(nsh_vxlan_gpe_tunnel_key_t), sizeof (uword));
+
+ udp_register_dst_port (vm, UDP_DST_PORT_vxlan_gpe,
+ nsh_vxlan_gpe_input_node.index, 1 /* is_ip4 */);
+ return 0;
+}
+
+VLIB_INIT_FUNCTION(nsh_vxlan_gpe_init);
+
diff --git a/vnet/vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.h b/vnet/vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.h
new file mode 100644
index 00000000000..953035a472b
--- /dev/null
+++ b/vnet/vnet/nsh-vxlan-gpe/nsh_vxlan_gpe.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_nsh_vxlan_gpe_h
+#define included_vnet_nsh_vxlan_gpe_h
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/gre/gre.h>
+#include <vnet/nsh-gre/nsh_gre_packet.h>
+#include <vnet/nsh-vxlan-gpe/vxlan_gpe_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/udp.h>
+
+typedef CLIB_PACKED (struct {
+ ip4_header_t ip4; /* 20 bytes */
+ udp_header_t udp; /* 8 bytes */
+ vxlan_gpe_header_t vxlan; /* 8 bytes */
+ nsh_header_t nsh; /* 28 bytes */
+}) ip4_vxlan_gpe_and_nsh_header_t;
+
+typedef CLIB_PACKED(struct {
+ /*
+ * Key fields: ip src, vxlan vni, nsh spi_si
+ * all fields in NET byte order
+ */
+ union {
+ struct {
+ u32 src;
+ u32 vni; /* shifted 8 bits */
+ u32 spi_si;
+ u32 pad;
+ };
+ u64 as_u64[2];
+ };
+}) nsh_vxlan_gpe_tunnel_key_t;
+
+typedef struct {
+ /* Rewrite string. $$$$ embed vnet_rewrite header */
+ u8 * rewrite;
+
+ /* decap next index */
+ u32 decap_next_index;
+
+ /* tunnel src and dst addresses */
+ ip4_address_t src;
+ ip4_address_t dst;
+
+ /* FIB indices */
+ u32 encap_fib_index; /* tunnel partner lookup here */
+ u32 decap_fib_index; /* inner IP lookup here */
+
+ /* vxlan VNI in HOST byte order, shifted left 8 bits */
+ u32 vni;
+
+ /* vnet intfc hw/sw_if_index */
+ u32 hw_if_index;
+ u32 sw_if_index;
+
+ /* NSH header fields in HOST byte order */
+ u8 ver_o_c;
+ u8 length;
+ u8 md_type;
+ u8 next_protocol;
+ u32 spi_si;
+
+ /* Context headers, always present, in HOST byte order */
+ u32 c1, c2, c3, c4;
+ u32 * tlvs;
+} nsh_vxlan_gpe_tunnel_t;
+
+#define foreach_nsh_vxlan_gpe_input_next \
+_(DROP, "error-drop") \
+_(IP4_INPUT, "ip4-input") \
+_(IP6_INPUT, "ip6-input") \
+_(ETHERNET_INPUT, "ethernet-input") \
+_(NSH_VXLAN_GPE_ENCAP, "nsh-vxlan-gpe-encap")
+
+typedef enum {
+#define _(s,n) NSH_VXLAN_GPE_INPUT_NEXT_##s,
+ foreach_nsh_vxlan_gpe_input_next
+#undef _
+ NSH_VXLAN_GPE_INPUT_N_NEXT,
+} nsh_vxlan_gpe_input_next_t;
+
+typedef enum {
+#define nsh_vxlan_gpe_error(n,s) NSH_VXLAN_GPE_ERROR_##n,
+#include <vnet/nsh-vxlan-gpe/nsh_vxlan_gpe_error.def>
+#undef nsh_vxlan_gpe_error
+ NSH_VXLAN_GPE_N_ERROR,
+} nsh_vxlan_gpe_input_error_t;
+
+typedef struct {
+ /* vector of encap tunnel instances */
+ nsh_vxlan_gpe_tunnel_t *tunnels;
+
+ /* lookup tunnel by key */
+ uword * nsh_vxlan_gpe_tunnel_by_key;
+
+ /* Free vlib hw_if_indices */
+ u32 * free_nsh_vxlan_gpe_tunnel_hw_if_indices;
+
+ /* show device instance by real device instance */
+ u32 * dev_inst_by_real;
+
+ /* convenience */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} nsh_vxlan_gpe_main_t;
+
+nsh_vxlan_gpe_main_t nsh_vxlan_gpe_main;
+
+vlib_node_registration_t nsh_vxlan_gpe_input_node;
+vlib_node_registration_t nsh_vxlan_gpe_encap_node;
+
+u8 * format_nsh_vxlan_gpe_encap_trace (u8 * s, va_list * args);
+
+typedef struct {
+ u8 is_add;
+ ip4_address_t src, dst;
+ u32 encap_fib_index;
+ u32 decap_fib_index;
+ u32 decap_next_index;
+ u32 vni;
+ u8 ver_o_c;
+ u8 length;
+ u8 md_type;
+ u8 next_protocol;
+ u32 spi_si;
+ u32 c1, c2, c3, c4;
+ u32 * tlvs;
+} vnet_nsh_vxlan_gpe_add_del_tunnel_args_t;
+
+int vnet_nsh_vxlan_gpe_add_del_tunnel
+(vnet_nsh_vxlan_gpe_add_del_tunnel_args_t *a, u32 * sw_if_indexp);
+
+#endif /* included_vnet_nsh_vxlan_gpe_h */
diff --git a/vnet/vnet/nsh-vxlan-gpe/nsh_vxlan_gpe_error.def b/vnet/vnet/nsh-vxlan-gpe/nsh_vxlan_gpe_error.def
new file mode 100644
index 00000000000..4ba64fe4dc5
--- /dev/null
+++ b/vnet/vnet/nsh-vxlan-gpe/nsh_vxlan_gpe_error.def
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+nsh_vxlan_gpe_error (DECAPSULATED, "good packets decapsulated")
+nsh_vxlan_gpe_error (NO_SUCH_TUNNEL, "no such tunnel packets")
diff --git a/vnet/vnet/nsh-vxlan-gpe/vxlan-gpe-rfc.txt b/vnet/vnet/nsh-vxlan-gpe/vxlan-gpe-rfc.txt
new file mode 100644
index 00000000000..35cee50f573
--- /dev/null
+++ b/vnet/vnet/nsh-vxlan-gpe/vxlan-gpe-rfc.txt
@@ -0,0 +1,868 @@
+Network Working Group P. Quinn
+Internet-Draft Cisco Systems, Inc.
+Intended status: Experimental P. Agarwal
+Expires: January 4, 2015 Broadcom
+ R. Fernando
+ L. Kreeger
+ D. Lewis
+ F. Maino
+ M. Smith
+ N. Yadav
+ Cisco Systems, Inc.
+ L. Yong
+ Huawei USA
+ X. Xu
+ Huawei Technologies
+ U. Elzur
+ Intel
+ P. Garg
+ Microsoft
+ July 3, 2014
+
+
+ Generic Protocol Extension for VXLAN
+ draft-quinn-vxlan-gpe-03.txt
+
+Abstract
+
+ This draft describes extending Virtual eXtensible Local Area Network
+ (VXLAN), via changes to the VXLAN header, with three new
+ capabilities: support for multi-protocol encapsulation, operations,
+ administration and management (OAM) signaling and explicit
+ versioning.
+
+Status of this Memo
+
+ This Internet-Draft is submitted in full conformance with the
+ provisions of BCP 78 and BCP 79.
+
+ Internet-Drafts are working documents of the Internet Engineering
+ Task Force (IETF). Note that other groups may also distribute
+ working documents as Internet-Drafts. The list of current Internet-
+ Drafts is at http://datatracker.ietf.org/drafts/current/.
+
+ Internet-Drafts are draft documents valid for a maximum of six months
+ and may be updated, replaced, or obsoleted by other documents at any
+ time. It is inappropriate to use Internet-Drafts as reference
+ material or to cite them other than as "work in progress."
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 1]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+ This Internet-Draft will expire on January 4, 2015.
+
+Copyright Notice
+
+ Copyright (c) 2014 IETF Trust and the persons identified as the
+ document authors. All rights reserved.
+
+ This document is subject to BCP 78 and the IETF Trust's Legal
+ Provisions Relating to IETF Documents
+ (http://trustee.ietf.org/license-info) in effect on the date of
+ publication of this document. Please review these documents
+ carefully, as they describe your rights and restrictions with respect
+ to this document. Code Components extracted from this document must
+ include Simplified BSD License text as described in Section 4.e of
+ the Trust Legal Provisions and are provided without warranty as
+ described in the Simplified BSD License.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 2]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+Table of Contents
+
+ 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . . 4
+ 2. VXLAN Without Protocol Extension . . . . . . . . . . . . . . . 5
+ 3. Generic Protocol Extension VXLAN (VXLAN-gpe) . . . . . . . . . 6
+ 3.1. Multi Protocol Support . . . . . . . . . . . . . . . . . . 6
+ 3.2. OAM Support . . . . . . . . . . . . . . . . . . . . . . . 7
+ 3.3. Version Bits . . . . . . . . . . . . . . . . . . . . . . . 7
+ 4. Backward Compatibility . . . . . . . . . . . . . . . . . . . . 8
+ 4.1. VXLAN VTEP to VXLAN-gpe VTEP . . . . . . . . . . . . . . . 8
+ 4.2. VXLAN-gpe VTEP to VXLAN VTEP . . . . . . . . . . . . . . . 8
+ 4.3. VXLAN-gpe UDP Ports . . . . . . . . . . . . . . . . . . . 8
+ 4.4. VXLAN-gpe and Encapsulated IP Header Fields . . . . . . . 8
+ 5. VXLAN-gpe Examples . . . . . . . . . . . . . . . . . . . . . . 9
+ 6. Security Considerations . . . . . . . . . . . . . . . . . . . 11
+ 7. Acknowledgments . . . . . . . . . . . . . . . . . . . . . . . 12
+ 8. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 13
+ 8.1. UDP Port . . . . . . . . . . . . . . . . . . . . . . . . . 13
+ 8.2. VXLAN-gpe Next Protocol . . . . . . . . . . . . . . . . . 13
+ 8.3. VXLAN-gpe Reserved Bits . . . . . . . . . . . . . . . . . 13
+ 9. References . . . . . . . . . . . . . . . . . . . . . . . . . . 14
+ 9.1. Normative References . . . . . . . . . . . . . . . . . . . 14
+ 9.2. Informative References . . . . . . . . . . . . . . . . . . 14
+ Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . . 15
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 3]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+1. Introduction
+
+ Virtual eXtensible Local Area Network [VXLAN] defines an
+ encapsulation format that encapsulates Ethernet frames in an outer
+ UDP/IP transport. As data centers evolve, the need to carry other
+ protocols encapsulated in an IP packet is required, as well as the
+ need to provide increased visibility and diagnostic capabilities
+ within the overlay. The VXLAN header does not specify the protocol
+ being encapsulated and therefore is currently limited to
+ encapsulating only Ethernet frame payload, nor does it provide the
+ ability to define OAM protocols. Rather than defining yet another
+ encapsulation, VXLAN is extended to provide protocol typing and OAM
+ capabilities.
+
+ This document describes extending VXLAN via the following changes:
+
+ Next Protocol Bit (P bit): A reserved flag bit is allocated, and set
+ in the VXLAN-gpe header to indicate that a next protocol field is
+ present.
+
+ OAM Flag Bit (O bit): A reserved flag bit is allocated, and set in
+ the VXLAN-gpe header, to indicate that the packet is an OAM
+ packet.
+
+ Version: Two reserved bits are allocated, and set in the VXLAN-gpe
+ header, to indicate VXLAN-gpe protocol version.
+
+ Next Protocol: A 8 bit next protocol field is present in the VXLAN-
+ gpe header.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 4]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+2. VXLAN Without Protocol Extension
+
+ As described in the introduction, the VXLAN header has no protocol
+ identifier that indicates the type of payload being carried by VXLAN.
+ Because of this, VXLAN is limited to an Ethernet payload.
+ Furthermore, the VXLAN header has no mechanism to signal OAM packets.
+
+ The VXLAN header defines bits 0-7 as flags (some defined, some
+ reserved), the VXLAN network identifier (VNI) field and several
+ reserved bits. The flags provide flexibility to define how the
+ reserved bits can be used to change the definition of the VXLAN
+ header.
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|R|R|R| Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+ Figure 1: VXLAN Header
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 5]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+3. Generic Protocol Extension VXLAN (VXLAN-gpe)
+
+3.1. Multi Protocol Support
+
+ This draft defines the following two changes to the VXLAN header in
+ order to support multi-protocol encapsulation:
+
+ P Bit: Flag bit 5 is defined as the Next Protocol bit. The P bit
+ MUST be set to 1 to indicate the presence of the 8 bit next
+ protocol field.
+
+ P = 0 indicates that the payload MUST conform to VXLAN as defined
+ in [VXLAN].
+
+ Flag bit 5 was chosen as the P bit because this flag bit is
+ currently reserved in VXLAN.
+
+ Next Protocol Field: The lower 8 bits of the first word are used to
+ carry a next protocol. This next protocol field contains the
+ protocol of the encapsulated payload packet. A new protocol
+ registry will be requested from IANA.
+
+ This draft defines the following Next Protocol values:
+
+ 0x1 : IPv4
+ 0x2 : IPv6
+ 0x3 : Ethernet
+ 0x4 : Network Service Header [NSH]
+
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|P|R|R| Reserved |Next Protocol |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 2: VXLAN-gpe Next Protocol
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 6]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+3.2. OAM Support
+
+ Flag bit 7 is defined as the O bit. When the O bit is set to 1, the
+ packet is an OAM packet and OAM processing MUST occur. The OAM
+ protocol details are out of scope for this document. As with the
+ P-bit, bit 7 is currently a reserved flag in VXLAN.
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|P|R|O| Reserved |Next Protocol |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 3: VXLAN-gpe OAM Bit
+
+3.3. Version Bits
+
+ VXLAN-gpe bits 8 and 9 are defined as version bits. These bits are
+ reserved in VXLAN. The version field is used to ensure backward
+ compatibility going forward with future VXLAN-gpe updates.
+
+ The initial version for VXLAN-gpe is 0.
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|P|R|O|Ver| Reserved |Next Protocol |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+
+
+ Figure 4: VXLAN-gpe Version Bits
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 7]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+4. Backward Compatibility
+
+4.1. VXLAN VTEP to VXLAN-gpe VTEP
+
+ As per VXLAN, reserved bits 5 and 7, VXLAN-gpe P and O-bits
+ respectively must be set to zero. The remaining reserved bits must
+ be zero, including the VXLAN-gpe version field, bits 8 and 9. The
+ encapsulated payload MUST be Ethernet.
+
+4.2. VXLAN-gpe VTEP to VXLAN VTEP
+
+ A VXLAN-gpe VTEP MUST NOT encapsulate non-Ethernet frames to a VXLAN
+ VTEP. When encapsulating Ethernet frames to a VXLAN VTEP, the VXLAN-
+ gpe VTEP will set the P bit to 0, the Next Protocol to 0 and use UDP
+ destination port 4789. A VXLAN-gpe VTEP MUST also set O = 0 and Ver
+ = 0 when encapsulating Ethernet frames to VXLAN VTEP. The receiving
+ VXLAN VTEP will threat this packet as a VXLAN packet.
+
+ A method for determining the capabilities of a VXLAN VTEP (gpe or
+ non-gpe) is out of the scope of this draft.
+
+4.3. VXLAN-gpe UDP Ports
+
+ VXLAN-gpe uses a new UDP destination port (to be assigned by IANA)
+ when sending traffic to VXLAN-gpe VTEPs.
+
+4.4. VXLAN-gpe and Encapsulated IP Header Fields
+
+ When encapsulating and decapsulating IPv4 and IPv6 packets, certain
+ fields, such as IPv4 Time to Live (TTL) from the inner IP header need
+ to be considered. VXLAN-gpe IP encapsulation and decapsulation
+ utilizes the techniques described in [RFC6830], section 5.3.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 8]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+5. VXLAN-gpe Examples
+
+ This section provides three examples of protocols encapsulated using
+ the Generic Protocol Extension for VXLAN described in this document.
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|1|R|0|0|0| Reserved | NP = IPv4 |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Original IPv4 Packet |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 5: IPv4 and VXLAN-gpe
+
+
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|1|R|0|0|0| Reserved | NP = IPv6 |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Original IPv6 Packet |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 6: IPv6 and VXLAN-gpe
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 9]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |R|R|R|R|I|1|R|0|0|0| Reserved |NP = Ethernet |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | VXLAN Network Identifier (VNI) | Reserved |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Original Ethernet Frame |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+
+ Figure 7: Ethernet and VXLAN-gpe
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 10]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+6. Security Considerations
+
+ VXLAN's security is focused on issues around L2 encapsulation into
+ L3. With VXLAN-gpe, issues such as spoofing, flooding, and traffic
+ redirection are dependent on the particular protocol payload
+ encapsulated.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 11]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+7. Acknowledgments
+
+ A special thank you goes to Dino Farinacci for his guidance and
+ detailed review.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 12]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+8. IANA Considerations
+
+8.1. UDP Port
+
+ A new UDP port will be requested from IANA.
+
+8.2. VXLAN-gpe Next Protocol
+
+ IANA is requested to set up a registry of "Next Protocol". These are
+ 8-bit values. Next Protocol values 0, 1, 2, 3 and 4 are defined in
+ this draft. New values are assigned via Standards Action [RFC5226].
+
+ +---------------+-------------+---------------+
+ | Next Protocol | Description | Reference |
+ +---------------+-------------+---------------+
+ | 0 | Reserved | This document |
+ | | | |
+ | 1 | IPv4 | This document |
+ | | | |
+ | 2 | IPv6 | This document |
+ | | | |
+ | 3 | Ethernet | This document |
+ | | | |
+ | 4 | NSH | This document |
+ | | | |
+ | 5..253 | Unassigned | |
+ +---------------+-------------+---------------+
+
+ Table 1
+
+8.3. VXLAN-gpe Reserved Bits
+
+ There are ten bits at the beginning of the VXLAN-gpe header. New
+ bits are assigned via Standards Action [RFC5226].
+
+ Bits 0-3 - Reserved
+ Bit 4 - Instance ID (I bit)
+ Bit 5 - Next Protocol (P bit)
+ Bit 6 - Reserved
+ Bit 7 - OAM (O bit)
+ Bits 8-9 - Version
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 13]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+9. References
+
+9.1. Normative References
+
+ [RFC0768] Postel, J., "User Datagram Protocol", STD 6, RFC 768,
+ August 1980.
+
+ [RFC0791] Postel, J., "Internet Protocol", STD 5, RFC 791,
+ September 1981.
+
+ [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate
+ Requirement Levels", BCP 14, RFC 2119, March 1997.
+
+ [RFC5226] Narten, T. and H. Alvestrand, "Guidelines for Writing an
+ IANA Considerations Section in RFCs", BCP 26, RFC 5226,
+ May 2008.
+
+9.2. Informative References
+
+ [NSH] Quinn, P. and et al. , "Network Service Header", 2014.
+
+ [RFC1700] Reynolds, J. and J. Postel, "Assigned Numbers", RFC 1700,
+ October 1994.
+
+ [RFC6830] Farinacci, D., Fuller, V., Meyer, D., and D. Lewis, "The
+ Locator/ID Separation Protocol (LISP)", RFC 6830,
+ January 2013.
+
+ [VXLAN] Dutt, D., Mahalingam, M., Duda, K., Agarwal, P., Kreeger,
+ L., Sridhar, T., Bursell, M., and C. Wright, "VXLAN: A
+ Framework for Overlaying Virtualized Layer 2 Networks over
+ Layer 3 Networks", 2013.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 14]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+Authors' Addresses
+
+ Paul Quinn
+ Cisco Systems, Inc.
+
+ Email: paulq@cisco.com
+
+
+ Puneet Agarwal
+ Broadcom
+
+ Email: pagarwal@broadcom.com
+
+
+ Rex Fernando
+ Cisco Systems, Inc.
+
+ Email: rex@cisco.com
+
+
+ Larry Kreeger
+ Cisco Systems, Inc.
+
+ Email: kreeger@cisco.com
+
+
+ Darrel Lewis
+ Cisco Systems, Inc.
+
+ Email: darlewis@cisco.com
+
+
+ Fabio Maino
+ Cisco Systems, Inc.
+
+ Email: kreeger@cisco.com
+
+
+ Michael Smith
+ Cisco Systems, Inc.
+
+ Email: michsmit@cisco.com
+
+
+
+
+
+
+
+
+
+Quinn, et al. Expires January 4, 2015 [Page 15]
+
+Internet-Draft Generic Protocol Extension for VXLAN July 2014
+
+
+ Navindra Yadav
+ Cisco Systems, Inc.
+
+ Email: nyadav@cisco.com
+
+
+ Lucy Yong
+ Huawei USA
+
+ Email: lucy.yong@huawei.com
+
+
+ Xiaohu Xu
+ Huawei Technologies
+
+ Email: xuxiaohu@huawei.com
+
+
+ Uri Elzur
+ Intel
+
+ Email: uri.elzur@intel.com
+
+
+ Pankaj Garg
+ Microsoft
+
+ Email: Garg.Pankaj@microsoft.com
diff --git a/vnet/vnet/nsh-vxlan-gpe/vxlan_gpe_packet.h b/vnet/vnet/nsh-vxlan-gpe/vxlan_gpe_packet.h
new file mode 100644
index 00000000000..efc85c4bb54
--- /dev/null
+++ b/vnet/vnet/nsh-vxlan-gpe/vxlan_gpe_packet.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vxlan_gpe_packet_h
+#define included_vxlan_gpe_packet_h
+
+/*
+ * From draft-quinn-vxlan-gpe-03.txt
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |R|R|R|R|I|P|R|O|Ver| Reserved |Next Protocol |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | VXLAN Network Identifier (VNI) | Reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * I Bit: Flag bit 4 indicates that the VNI is valid.
+ *
+ * P Bit: Flag bit 5 is defined as the Next Protocol bit. The P bit
+ * MUST be set to 1 to indicate the presence of the 8 bit next
+ * protocol field.
+ *
+ * O Bit: Flag bit 7 is defined as the O bit. When the O bit is set to 1,
+ *
+ * the packet is an OAM packet and OAM processing MUST occur. The OAM
+ * protocol details are out of scope for this document. As with the
+ * P-bit, bit 7 is currently a reserved flag in VXLAN.
+ *
+ * VXLAN-gpe bits 8 and 9 are defined as version bits. These bits are
+ * reserved in VXLAN. The version field is used to ensure backward
+ * compatibility going forward with future VXLAN-gpe updates.
+ *
+ * The initial version for VXLAN-gpe is 0.
+ *
+ * This draft defines the following Next Protocol values:
+ *
+ * 0x1 : IPv4
+ * 0x2 : IPv6
+ * 0x3 : Ethernet
+ * 0x4 : Network Service Header [NSH]
+ */
+
+typedef struct {
+ u8 flags;
+ u8 ver_res;
+ u8 res;
+ u8 next_protocol;
+ u32 vni_res;
+} vxlan_gpe_header_t;
+
+#define VXLAN_GPE_FLAGS_I 0x08
+#define VXLAN_GPE_FLAGS_P 0x04
+#define VXLAN_GPE_FLAGS_O 0x01
+
+#define VXLAN_GPE_VERSION 0x0
+
+#define VXLAN_NEXT_PROTOCOL_IP4 0x1
+#define VXLAN_NEXT_PROTOCOL_IP6 0x2
+#define VXLAN_NEXT_PROTOCOL_ETHERNET 0x3
+#define VXLAN_NEXT_PROTOCOL_NSH 0x4
+
+#endif /* included_vxlan_gpe_packet_h */