aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet')
-rw-r--r--src/vnet/CMakeLists.txt23
-rw-r--r--src/vnet/dpo/load_balance.c7
-rw-r--r--src/vnet/dpo/load_balance.h1
-rw-r--r--src/vnet/fib/fib_entry.c6
-rw-r--r--src/vnet/fib/fib_entry.h1
-rw-r--r--src/vnet/ip/ip4_mtrie.c10
-rw-r--r--src/vnet/ip/ip4_mtrie.h5
-rw-r--r--src/vnet/ip/ip_init.c36
-rw-r--r--src/vnet/ipsec/ah_decrypt.c44
-rw-r--r--src/vnet/ipsec/esp_decrypt.c57
-rw-r--r--src/vnet/ipsec/ipsec_api.c10
-rw-r--r--src/vnet/ipsec/ipsec_format.c5
-rw-r--r--src/vnet/ipsec/ipsec_input.c357
-rw-r--r--src/vnet/ipsec/ipsec_sa.c56
-rw-r--r--src/vnet/ipsec/ipsec_sa.h392
-rw-r--r--src/vnet/session/application_interface.c91
-rw-r--r--src/vnet/session/application_interface.h1
-rw-r--r--src/vnet/session/session.c316
-rw-r--r--src/vnet/session/session.h410
-rw-r--r--src/vnet/session/session_lookup.c65
-rw-r--r--src/vnet/session/session_lookup.h3
-rw-r--r--src/vnet/session/session_node.c48
-rw-r--r--src/vnet/session/transport.c18
-rw-r--r--src/vnet/session/transport.h1
-rw-r--r--src/vnet/srv6/sr_policy_rewrite.c5
-rw-r--r--src/vnet/tcp/tcp.c14
-rw-r--r--src/vnet/tcp/tcp.h6
-rw-r--r--src/vnet/tcp/tcp_cli.c4
-rw-r--r--src/vnet/tcp/tcp_inlines.h9
-rw-r--r--src/vnet/tcp/tcp_input.c60
-rw-r--r--src/vnet/tcp/tcp_output.c11
-rw-r--r--src/vnet/udp/udp_cli.c3
-rw-r--r--src/vnet/vxlan-gpe/FEATURE.yaml10
-rw-r--r--src/vnet/vxlan-gpe/decap.c1167
-rw-r--r--src/vnet/vxlan-gpe/dir.dox32
-rw-r--r--src/vnet/vxlan-gpe/encap.c433
-rw-r--r--src/vnet/vxlan-gpe/vxlan-gpe-rfc.txt868
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe.api140
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe.c1257
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe.h300
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe_api.c360
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe_error.def16
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe_packet.h120
43 files changed, 1129 insertions, 5649 deletions
diff --git a/src/vnet/CMakeLists.txt b/src/vnet/CMakeLists.txt
index a071709542a..9e7734e20cb 100644
--- a/src/vnet/CMakeLists.txt
+++ b/src/vnet/CMakeLists.txt
@@ -750,29 +750,6 @@ list(APPEND VNET_HEADERS
list(APPEND VNET_API_FILES mpls/mpls.api)
##############################################################################
-# Tunnel protocol: vxlan-gpe
-##############################################################################
-
-list(APPEND VNET_SOURCES
- vxlan-gpe/vxlan_gpe.c
- vxlan-gpe/encap.c
- vxlan-gpe/decap.c
- vxlan-gpe/vxlan_gpe_api.c
-)
-
-list (APPEND VNET_MULTIARCH_SOURCES
- vxlan-gpe/decap.c
-)
-
-list(APPEND VNET_HEADERS
- vxlan-gpe/vxlan_gpe.h
- vxlan-gpe/vxlan_gpe_packet.h
- vxlan-gpe/vxlan_gpe_error.def
-)
-
-list(APPEND VNET_API_FILES vxlan-gpe/vxlan_gpe.api)
-
-##############################################################################
# ipv6 segment routing
##############################################################################
diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c
index 8f2a0de6ea8..f6f9392a42b 100644
--- a/src/vnet/dpo/load_balance.c
+++ b/src/vnet/dpo/load_balance.c
@@ -1030,6 +1030,7 @@ load_balance_module_init (void)
* This should never be used, but just in case, stack it on a drop.
*/
lbi = load_balance_create(1, DPO_PROTO_IP4, 0);
+ ASSERT(0 == lbi);
load_balance_set_bucket(lbi, 0, drop_dpo_get(DPO_PROTO_IP4));
load_balance_logger =
@@ -1038,6 +1039,12 @@ load_balance_module_init (void)
load_balance_map_module_init();
}
+void
+load_balance_pool_alloc (uword size)
+{
+ pool_alloc_aligned(load_balance_pool, size, CLIB_CACHE_LINE_BYTES);
+}
+
static clib_error_t *
load_balance_show (vlib_main_t * vm,
unformat_input_t * input,
diff --git a/src/vnet/dpo/load_balance.h b/src/vnet/dpo/load_balance.h
index eee073f5892..76aa7982401 100644
--- a/src/vnet/dpo/load_balance.h
+++ b/src/vnet/dpo/load_balance.h
@@ -260,5 +260,6 @@ load_balance_get_bucket_i (const load_balance_t *lb,
}
extern void load_balance_module_init(void);
+extern void load_balance_pool_alloc (uword size);
#endif
diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c
index adf880b8bbb..c86941fce9a 100644
--- a/src/vnet/fib/fib_entry.c
+++ b/src/vnet/fib/fib_entry.c
@@ -1772,6 +1772,12 @@ fib_entry_module_init (void)
fib_entry_track_module_init();
}
+void
+fib_entry_pool_alloc (uword size)
+{
+ pool_alloc(fib_entry_pool, size);
+}
+
fib_route_path_t *
fib_entry_encode (fib_node_index_t fib_entry_index)
{
diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h
index 7331f803ec4..2c88d1e5f6a 100644
--- a/src/vnet/fib/fib_entry.h
+++ b/src/vnet/fib/fib_entry.h
@@ -480,6 +480,7 @@ extern void fib_entry_set_flow_hash_config(fib_node_index_t fib_entry_index,
flow_hash_config_t hash_config);
extern void fib_entry_module_init(void);
+extern void fib_entry_pool_alloc(uword size);
extern u32 fib_entry_get_stats_index(fib_node_index_t fib_entry_index);
diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c
index 00855f7db43..df70dc9edca 100644
--- a/src/vnet/ip/ip4_mtrie.c
+++ b/src/vnet/ip/ip4_mtrie.c
@@ -190,7 +190,7 @@ ip4_mtrie_8_init (ip4_mtrie_8_t *m)
{
ip4_mtrie_8_ply_t *root;
- pool_get (ip4_ply_pool, root);
+ pool_get_aligned (ip4_ply_pool, root, CLIB_CACHE_LINE_BYTES);
m->root_ply = root - ip4_ply_pool;
ply_8_init (root, IP4_MTRIE_LEAF_EMPTY, 0, 0);
@@ -853,13 +853,19 @@ ip4_mtrie_module_init (vlib_main_t * vm)
clib_error_t *error = NULL;
/* Burn one ply so index 0 is taken */
- pool_get (ip4_ply_pool, p);
+ pool_get_aligned (ip4_ply_pool, p, CLIB_CACHE_LINE_BYTES);
return (error);
}
VLIB_INIT_FUNCTION (ip4_mtrie_module_init);
+void
+ip4_mtrie_pool_alloc (uword size)
+{
+ pool_alloc_aligned (ip4_ply_pool, size, CLIB_CACHE_LINE_BYTES);
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/ip/ip4_mtrie.h b/src/vnet/ip/ip4_mtrie.h
index 16c524745be..2631f07eb2b 100644
--- a/src/vnet/ip/ip4_mtrie.h
+++ b/src/vnet/ip/ip4_mtrie.h
@@ -179,6 +179,11 @@ format_function_t format_ip4_mtrie_8;
extern ip4_mtrie_8_ply_t *ip4_ply_pool;
/**
+ * @brief Pre-allocate the pool of plys
+ */
+extern void ip4_mtrie_pool_alloc (uword size);
+
+/**
* Is the leaf terminal (i.e. an LB index) or non-terminal (i.e. a PLY index)
*/
always_inline u32
diff --git a/src/vnet/ip/ip_init.c b/src/vnet/ip/ip_init.c
index c2490f196ef..cfc3644a1bf 100644
--- a/src/vnet/ip/ip_init.c
+++ b/src/vnet/ip/ip_init.c
@@ -38,6 +38,9 @@
*/
#include <vnet/ip/ip.h>
+#include <vnet/ip/ip4_mtrie.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/dpo/load_balance.h>
ip_main_t ip_main;
@@ -112,6 +115,39 @@ VLIB_INIT_FUNCTION (ip_main_init) = {
"flow_classify_init"),
};
+static clib_error_t *
+ip_config_init (vlib_main_t *vm, unformat_input_t *input)
+{
+ uword lbsz = 0, fibentrysz = 0, mtriesz = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "load-balance-pool-size %U", unformat_memory_size,
+ &lbsz))
+ ;
+ else if (unformat (input, "fib-entry-pool-size %U", unformat_memory_size,
+ &fibentrysz))
+ ;
+ else if (unformat (input, "ip4-mtrie-pool-size %U", unformat_memory_size,
+ &mtriesz))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (lbsz)
+ load_balance_pool_alloc (lbsz);
+ if (fibentrysz)
+ fib_entry_pool_alloc (fibentrysz);
+ if (mtriesz)
+ ip4_mtrie_pool_alloc (mtriesz);
+
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (ip_config_init, "l3fib");
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/ipsec/ah_decrypt.c b/src/vnet/ipsec/ah_decrypt.c
index 6b62ff7f05c..30ea4429a75 100644
--- a/src/vnet/ipsec/ah_decrypt.c
+++ b/src/vnet/ipsec/ah_decrypt.c
@@ -202,16 +202,8 @@ ah_decrypt_inline (vlib_main_t * vm,
pd->seq = clib_host_to_net_u32 (ah0->seq_no);
/* anti-replay check */
- if (PREDICT_FALSE (irt->anti_reply_huge))
- {
- anti_replay_result = ipsec_sa_anti_replay_and_sn_advance (
- irt, pd->seq, ~0, false, &pd->seq_hi, true);
- }
- else
- {
- anti_replay_result = ipsec_sa_anti_replay_and_sn_advance (
- irt, pd->seq, ~0, false, &pd->seq_hi, false);
- }
+ anti_replay_result = ipsec_sa_anti_replay_and_sn_advance (
+ irt, pd->seq, ~0, false, &pd->seq_hi);
if (anti_replay_result)
{
ah_decrypt_set_next_index (b[0], node, vm->thread_index,
@@ -317,32 +309,16 @@ ah_decrypt_inline (vlib_main_t * vm,
if (PREDICT_TRUE (irt->integ_icv_size))
{
/* redo the anti-reply check. see esp_decrypt for details */
- if (PREDICT_FALSE (irt->anti_reply_huge))
+ if (ipsec_sa_anti_replay_and_sn_advance (irt, pd->seq, pd->seq_hi,
+ true, NULL))
{
- if (ipsec_sa_anti_replay_and_sn_advance (
- irt, pd->seq, pd->seq_hi, true, NULL, true))
- {
- ah_decrypt_set_next_index (
- b[0], node, vm->thread_index, AH_DECRYPT_ERROR_REPLAY, 0,
- next, AH_DECRYPT_NEXT_DROP, pd->sa_index);
- goto trace;
- }
- n_lost = ipsec_sa_anti_replay_advance (
- irt, thread_index, pd->seq, pd->seq_hi, true);
- }
- else
- {
- if (ipsec_sa_anti_replay_and_sn_advance (
- irt, pd->seq, pd->seq_hi, true, NULL, false))
- {
- ah_decrypt_set_next_index (
- b[0], node, vm->thread_index, AH_DECRYPT_ERROR_REPLAY, 0,
- next, AH_DECRYPT_NEXT_DROP, pd->sa_index);
- goto trace;
- }
- n_lost = ipsec_sa_anti_replay_advance (
- irt, thread_index, pd->seq, pd->seq_hi, false);
+ ah_decrypt_set_next_index (b[0], node, vm->thread_index,
+ AH_DECRYPT_ERROR_REPLAY, 0, next,
+ AH_DECRYPT_NEXT_DROP, pd->sa_index);
+ goto trace;
}
+ n_lost = ipsec_sa_anti_replay_advance (irt, thread_index, pd->seq,
+ pd->seq_hi);
vlib_prefetch_simple_counter (
&ipsec_sa_err_counters[IPSEC_SA_ERROR_LOST], thread_index,
pd->sa_index);
diff --git a/src/vnet/ipsec/esp_decrypt.c b/src/vnet/ipsec/esp_decrypt.c
index 345a60a7fdd..928f1b06f9b 100644
--- a/src/vnet/ipsec/esp_decrypt.c
+++ b/src/vnet/ipsec/esp_decrypt.c
@@ -60,8 +60,7 @@ typedef enum
typedef struct
{
u32 seq;
- u32 sa_seq;
- u32 sa_seq_hi;
+ u64 sa_seq64;
u32 pkt_seq_hi;
ipsec_crypto_alg_t crypto_alg;
ipsec_integ_alg_t integ_alg;
@@ -81,10 +80,10 @@ format_esp_decrypt_trace (u8 * s, va_list * args)
esp_decrypt_trace_t *t = va_arg (*args, esp_decrypt_trace_t *);
s = format (s,
- "esp: crypto %U integrity %U pkt-seq %d sa-seq %u sa-seq-hi %u "
+ "esp: crypto %U integrity %U pkt-seq %d sa-seq %lu "
"pkt-seq-hi %u",
format_ipsec_crypto_alg, t->crypto_alg, format_ipsec_integ_alg,
- t->integ_alg, t->seq, t->sa_seq, t->sa_seq_hi, t->pkt_seq_hi);
+ t->integ_alg, t->seq, t->sa_seq64, t->pkt_seq_hi);
return s;
}
@@ -810,32 +809,16 @@ esp_decrypt_post_crypto (vlib_main_t *vm, vlib_node_runtime_t *node,
* a sequence s, s+1, s+2, s+3, ... s+n and nothing will prevent any
* implementation, sequential or batching, from decrypting these.
*/
- if (PREDICT_FALSE (irt->anti_reply_huge))
+ if (ipsec_sa_anti_replay_and_sn_advance (irt, pd->seq, pd->seq_hi, true,
+ NULL))
{
- if (ipsec_sa_anti_replay_and_sn_advance (irt, pd->seq, pd->seq_hi, true,
- NULL, true))
- {
- esp_decrypt_set_next_index (b, node, vm->thread_index,
- ESP_DECRYPT_ERROR_REPLAY, 0, next,
- ESP_DECRYPT_NEXT_DROP, pd->sa_index);
- return;
- }
- n_lost = ipsec_sa_anti_replay_advance (irt, vm->thread_index, pd->seq,
- pd->seq_hi, true);
- }
- else
- {
- if (ipsec_sa_anti_replay_and_sn_advance (irt, pd->seq, pd->seq_hi, true,
- NULL, false))
- {
- esp_decrypt_set_next_index (b, node, vm->thread_index,
- ESP_DECRYPT_ERROR_REPLAY, 0, next,
- ESP_DECRYPT_NEXT_DROP, pd->sa_index);
- return;
- }
- n_lost = ipsec_sa_anti_replay_advance (irt, vm->thread_index, pd->seq,
- pd->seq_hi, false);
+ esp_decrypt_set_next_index (b, node, vm->thread_index,
+ ESP_DECRYPT_ERROR_REPLAY, 0, next,
+ ESP_DECRYPT_NEXT_DROP, pd->sa_index);
+ return;
}
+ n_lost =
+ ipsec_sa_anti_replay_advance (irt, vm->thread_index, pd->seq, pd->seq_hi);
vlib_prefetch_simple_counter (&ipsec_sa_err_counters[IPSEC_SA_ERROR_LOST],
vm->thread_index, pd->sa_index);
@@ -1205,16 +1188,8 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
pd->current_length = b[0]->current_length;
/* anti-reply check */
- if (PREDICT_FALSE (irt->anti_reply_huge))
- {
- anti_replay_result = ipsec_sa_anti_replay_and_sn_advance (
- irt, pd->seq, ~0, false, &pd->seq_hi, true);
- }
- else
- {
- anti_replay_result = ipsec_sa_anti_replay_and_sn_advance (
- irt, pd->seq, ~0, false, &pd->seq_hi, false);
- }
+ anti_replay_result = ipsec_sa_anti_replay_and_sn_advance (
+ irt, pd->seq, ~0, false, &pd->seq_hi);
if (anti_replay_result)
{
@@ -1393,8 +1368,7 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
tr->crypto_alg = sa->crypto_alg;
tr->integ_alg = sa->integ_alg;
tr->seq = pd->seq;
- tr->sa_seq = irt->seq;
- tr->sa_seq_hi = irt->seq_hi;
+ tr->sa_seq64 = irt->seq64;
tr->pkt_seq_hi = pd->seq_hi;
}
@@ -1466,8 +1440,7 @@ esp_decrypt_post_inline (vlib_main_t * vm,
tr->crypto_alg = sa->crypto_alg;
tr->integ_alg = sa->integ_alg;
tr->seq = pd->seq;
- tr->sa_seq = irt->seq;
- tr->sa_seq_hi = irt->seq_hi;
+ tr->sa_seq64 = irt->seq64;
}
n_left--;
diff --git a/src/vnet/ipsec/ipsec_api.c b/src/vnet/ipsec/ipsec_api.c
index 262a8cb8c88..d27e9827074 100644
--- a/src/vnet/ipsec/ipsec_api.c
+++ b/src/vnet/ipsec/ipsec_api.c
@@ -44,11 +44,9 @@ static inline u64
ipsec_sa_get_inb_seq (ipsec_sa_t *sa)
{
ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt (sa);
- u64 seq;
-
- seq = irt->seq;
- if (ipsec_sa_is_set_USE_ESN (sa))
- seq |= (u64) irt->seq_hi << 32;
+ u64 seq = irt->seq64;
+ if (!ipsec_sa_is_set_USE_ESN (sa))
+ seq = (u32) seq;
return seq;
}
@@ -1361,7 +1359,7 @@ send_ipsec_sa_v5_details (ipsec_sa_t *sa, void *arg)
mp->replay_window =
clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (irt));
mp->entry.anti_replay_window_size =
- clib_host_to_net_u32 (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (irt));
+ clib_host_to_net_u32 (irt->anti_replay_window_size);
}
if (ort)
diff --git a/src/vnet/ipsec/ipsec_format.c b/src/vnet/ipsec/ipsec_format.c
index 0bbdc85aaed..e27892185e7 100644
--- a/src/vnet/ipsec/ipsec_format.c
+++ b/src/vnet/ipsec/ipsec_format.c
@@ -474,13 +474,12 @@ format_ipsec_sa (u8 * s, va_list * args)
if (ort)
s = format (s, "\n outbound thread-index:%d", ort->thread_index);
if (irt)
- s = format (s, "\n inbound seq %u seq-hi %u", irt->seq, irt->seq_hi);
+ s = format (s, "\n inbound seq %lu", irt->seq64);
if (ort)
s = format (s, "\n outbound seq %lu", ort->seq64);
if (irt)
{
- s = format (s, "\n window-size: %llu",
- IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (irt));
+ s = format (s, "\n window-size: %llu", irt->anti_replay_window_size);
s = format (s, "\n window: Bl <- %U Tl", format_ipsec_replay_window,
ipsec_sa_anti_replay_get_64b_window (irt));
}
diff --git a/src/vnet/ipsec/ipsec_input.c b/src/vnet/ipsec/ipsec_input.c
index 6a25f6c583c..a7c7482a125 100644
--- a/src/vnet/ipsec/ipsec_input.c
+++ b/src/vnet/ipsec/ipsec_input.c
@@ -211,6 +211,39 @@ ipsec_input_policy_match (ipsec_spd_t *spd, u32 sa, u32 da,
return 0;
}
+always_inline uword
+ip6_addr_match_range (ip6_address_t *a, ip6_address_t *la, ip6_address_t *ua)
+{
+ if ((memcmp (a->as_u64, la->as_u64, 2 * sizeof (u64)) >= 0) &&
+ (memcmp (a->as_u64, ua->as_u64, 2 * sizeof (u64)) <= 0))
+ return 1;
+
+ return 0;
+}
+
+always_inline ipsec_policy_t *
+ipsec6_input_policy_match (ipsec_spd_t *spd, ip6_address_t *sa,
+ ip6_address_t *da,
+ ipsec_spd_policy_type_t policy_type)
+{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_policy_t *p;
+ u32 *i;
+
+ vec_foreach (i, spd->policies[policy_type])
+ {
+ p = pool_elt_at_index (im->policies, *i);
+
+ if (!ip6_addr_match_range (sa, &p->raddr.start.ip6, &p->raddr.stop.ip6))
+ continue;
+
+ if (!ip6_addr_match_range (da, &p->laddr.start.ip6, &p->laddr.stop.ip6))
+ continue;
+ return p;
+ }
+ return 0;
+}
+
always_inline ipsec_policy_t *
ipsec_input_protect_policy_match (ipsec_spd_t *spd, u32 sa, u32 da, u32 spi)
{
@@ -263,16 +296,6 @@ ipsec_input_protect_policy_match (ipsec_spd_t *spd, u32 sa, u32 da, u32 spi)
return 0;
}
-always_inline uword
-ip6_addr_match_range (ip6_address_t * a, ip6_address_t * la,
- ip6_address_t * ua)
-{
- if ((memcmp (a->as_u64, la->as_u64, 2 * sizeof (u64)) >= 0) &&
- (memcmp (a->as_u64, ua->as_u64, 2 * sizeof (u64)) <= 0))
- return 1;
- return 0;
-}
-
always_inline void
ipsec_collect_ah_trace (vlib_buffer_t **b, vlib_node_runtime_t *node,
vlib_main_t *vm, ip4_header_t *ip0, ah_header_t *ah0,
@@ -514,7 +537,7 @@ udp_or_esp:
has_space0 = vlib_buffer_has_space (b[0], (clib_address_t) (esp0 + 1) -
(clib_address_t) ip0);
- if (PREDICT_TRUE ((p0 != NULL) & (has_space0)))
+ if (PREDICT_TRUE ((p0 != NULL) && (has_space0)))
{
*ipsec_matched += 1;
@@ -740,8 +763,6 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm,
spd0, b, node, &ipsec_bypassed,
&ipsec_dropped, &ipsec_matched,
&ipsec_unprocessed, next);
- if (ipsec_bypassed > 0)
- goto ipsec_bypassed;
}
}
else if (PREDICT_TRUE (ip0->protocol == IP_PROTOCOL_IPSEC_ESP))
@@ -751,8 +772,6 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm,
spd0, b, node, &ipsec_bypassed,
&ipsec_dropped, &ipsec_matched,
&ipsec_unprocessed, next);
- if (ipsec_bypassed > 0)
- goto ipsec_bypassed;
}
else if (ip0->protocol == IP_PROTOCOL_IPSEC_AH)
{
@@ -764,7 +783,6 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm,
}
else
{
- ipsec_bypassed:
ipsec_unprocessed += 1;
}
n_left_from -= 1;
@@ -813,6 +831,142 @@ VLIB_REGISTER_NODE (ipsec4_input_node) = {
extern vlib_node_registration_t ipsec6_input_node;
+always_inline void
+ipsec6_esp_packet_process (vlib_main_t *vm, ipsec_main_t *im,
+ ip6_header_t *ip0, esp_header_t *esp0,
+ u32 thread_index, ipsec_spd_t *spd0,
+ vlib_buffer_t **b, vlib_node_runtime_t *node,
+ u64 *ipsec_bypassed, u64 *ipsec_dropped,
+ u64 *ipsec_matched, u64 *ipsec_unprocessed,
+ u32 *next)
+
+{
+ ipsec_policy_t *p0 = NULL;
+ u32 pi0 = ~0;
+ u8 has_space0 = 0;
+ ipsec_policy_t *policies[1];
+ ipsec_fp_5tuple_t tuples[1];
+ bool ip_v6 = true;
+
+ if (im->fp_spd_ipv6_in_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip6_in_lookup_hash_idx))
+ ipsec_fp_in_5tuple_from_ip6_range (
+ &tuples[0], &ip0->src_address, &ip0->dst_address,
+ clib_net_to_host_u32 (esp0->spi), IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT);
+
+ if (esp0->spi != 0)
+ {
+ if (im->fp_spd_ipv6_in_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip6_in_lookup_hash_idx))
+ {
+ ipsec_fp_in_policy_match_n (&spd0->fp_spd, ip_v6, tuples, policies,
+ 1);
+ p0 = policies[0];
+ }
+ else /* linear search if fast path is not enabled */
+ {
+ p0 = ipsec6_input_protect_policy_match (
+ spd0, &ip0->src_address, &ip0->dst_address,
+ clib_net_to_host_u32 (esp0->spi));
+ }
+ has_space0 = vlib_buffer_has_space (b[0], (clib_address_t) (esp0 + 1) -
+ (clib_address_t) ip0);
+
+ if (PREDICT_TRUE ((p0 != NULL) && (has_space0)))
+ {
+ *ipsec_matched += 1;
+
+ pi0 = p0 - im->policies;
+ vlib_increment_combined_counter (
+ &ipsec_spd_policy_counters, thread_index, pi0, 1,
+ clib_net_to_host_u16 (ip0->payload_length));
+
+ vnet_buffer (b[0])->ipsec.sad_index = p0->sa_index;
+ next[0] = im->esp6_decrypt_next_index;
+ vlib_buffer_advance (b[0], ((u8 *) esp0 - (u8 *) ip0));
+ goto trace0;
+ }
+ }
+
+ if (im->fp_spd_ipv6_in_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip6_in_lookup_hash_idx))
+ {
+ tuples->action = IPSEC_SPD_POLICY_IP6_INBOUND_BYPASS;
+ ipsec_fp_in_policy_match_n (&spd0->fp_spd, ip_v6, tuples, policies, 1);
+ p0 = policies[0];
+ }
+ else
+ {
+ p0 =
+ ipsec6_input_policy_match (spd0, &ip0->src_address, &ip0->dst_address,
+ IPSEC_SPD_POLICY_IP6_INBOUND_BYPASS);
+ }
+
+ if (PREDICT_TRUE ((p0 != NULL)))
+ {
+ *ipsec_bypassed += 1;
+
+ pi0 = p0 - im->policies;
+ vlib_increment_combined_counter (
+ &ipsec_spd_policy_counters, thread_index, pi0, 1,
+ clib_net_to_host_u16 (ip0->payload_length));
+ goto trace0;
+ }
+ else
+ {
+ p0 = NULL;
+ pi0 = ~0;
+ }
+
+ if (im->fp_spd_ipv6_in_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip6_in_lookup_hash_idx))
+ {
+ tuples->action = IPSEC_SPD_POLICY_IP6_INBOUND_DISCARD;
+ ipsec_fp_in_policy_match_n (&spd0->fp_spd, ip_v6, tuples, policies, 1);
+ p0 = policies[0];
+ }
+ else
+ {
+ p0 =
+ ipsec6_input_policy_match (spd0, &ip0->src_address, &ip0->dst_address,
+ IPSEC_SPD_POLICY_IP6_INBOUND_DISCARD);
+ }
+
+ if (PREDICT_TRUE ((p0 != NULL)))
+ {
+ *ipsec_dropped += 1;
+
+ pi0 = p0 - im->policies;
+ vlib_increment_combined_counter (
+ &ipsec_spd_policy_counters, thread_index, pi0, 1,
+ clib_net_to_host_u16 (ip0->payload_length));
+ next[0] = IPSEC_INPUT_NEXT_DROP;
+ goto trace0;
+ }
+ else
+ {
+ p0 = 0;
+ pi0 = ~0;
+ }
+
+ /* Drop by default if no match on PROTECT, BYPASS or DISCARD */
+ *ipsec_unprocessed += 1;
+ next[0] = IPSEC_INPUT_NEXT_DROP;
+
+trace0:
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+ PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ipsec_input_trace_t *tr = vlib_add_trace (vm, node, b[0], sizeof (*tr));
+
+ tr->proto = ip0->protocol;
+ tr->sa_id = p0 ? p0->sa_id : ~0;
+ tr->spi = has_space0 ? clib_net_to_host_u32 (esp0->spi) : ~0;
+ tr->seq = has_space0 ? clib_net_to_host_u32 (esp0->seq) : ~0;
+ tr->spd = spd0->id;
+ tr->policy_index = pi0;
+ }
+}
VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -822,9 +976,6 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm,
ipsec_main_t *im = &ipsec_main;
u32 ipsec_unprocessed = 0;
u32 ipsec_matched = 0;
- ipsec_policy_t *policies[1];
- ipsec_fp_5tuple_t tuples[1];
- bool ip_v6 = true;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
@@ -843,12 +994,13 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm,
u32 bi0, next0, pi0 = ~0;
vlib_buffer_t *b0;
ip6_header_t *ip0;
- esp_header_t *esp0;
+ esp_header_t *esp0 = NULL;
ip4_ipsec_config_t *c0;
ipsec_spd_t *spd0;
ipsec_policy_t *p0 = 0;
- ah_header_t *ah0;
u32 header_size = sizeof (ip0[0]);
+ u64 ipsec_unprocessed = 0, ipsec_matched = 0;
+ u64 ipsec_dropped = 0, ipsec_bypassed = 0;
bi0 = to_next[0] = from[0];
from += 1;
@@ -864,113 +1016,76 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm,
spd0 = pool_elt_at_index (im->spds, c0->spd_index);
ip0 = vlib_buffer_get_current (b0);
+
+ if (ip0->protocol == IP_PROTOCOL_UDP)
+ {
+ udp_header_t *udp0 = (udp_header_t *) ((u8 *) ip0 + header_size);
+
+ /* RFC5996 Section 2.23: "Port 4500 is reserved for
+ * UDP-encapsulated ESP and IKE."
+ * RFC5996 Section 3.1: "IKE messages use UDP ports 500 and/or
+ 4500"
+ */
+ if ((clib_host_to_net_u16 (500) == udp0->dst_port) ||
+ (clib_host_to_net_u16 (4500) == udp0->dst_port))
+ esp0 = (esp_header_t *) ((u8 *) udp0 + sizeof (udp_header_t));
+ }
+ else if (ip0->protocol == IP_PROTOCOL_IPSEC_ESP)
esp0 = (esp_header_t *) ((u8 *) ip0 + header_size);
- ah0 = (ah_header_t *) ((u8 *) ip0 + header_size);
- if (PREDICT_TRUE (ip0->protocol == IP_PROTOCOL_IPSEC_ESP))
- {
-#if 0
- clib_warning
- ("packet received from %U to %U spi %u size %u spd_id %u",
- format_ip6_address, &ip0->src_address, format_ip6_address,
- &ip0->dst_address, clib_net_to_host_u32 (esp0->spi),
- clib_net_to_host_u16 (ip0->payload_length) + header_size,
- spd0->id);
-#endif
- if (im->fp_spd_ipv6_in_is_enabled &&
- PREDICT_TRUE (INDEX_INVALID !=
- spd0->fp_spd.ip6_in_lookup_hash_idx))
- {
- ipsec_fp_in_5tuple_from_ip6_range (
- &tuples[0], &ip0->src_address, &ip0->dst_address,
- clib_net_to_host_u32 (esp0->spi),
- IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT);
- ipsec_fp_in_policy_match_n (&spd0->fp_spd, ip_v6, tuples,
- policies, 1);
- p0 = policies[0];
- }
- else
- p0 = ipsec6_input_protect_policy_match (
- spd0, &ip0->src_address, &ip0->dst_address,
- clib_net_to_host_u32 (esp0->spi));
-
- if (PREDICT_TRUE (p0 != 0))
- {
- ipsec_matched += 1;
-
- pi0 = p0 - im->policies;
- vlib_increment_combined_counter
- (&ipsec_spd_policy_counters,
- thread_index, pi0, 1,
- clib_net_to_host_u16 (ip0->payload_length) +
- header_size);
-
- vnet_buffer (b0)->ipsec.sad_index = p0->sa_index;
- next0 = im->esp6_decrypt_next_index;
- vlib_buffer_advance (b0, header_size);
- /* TODO Add policy matching for bypass and discard policy
- * type */
- goto trace0;
- }
- else
- {
- pi0 = ~0;
- ipsec_unprocessed += 1;
- next0 = IPSEC_INPUT_NEXT_DROP;
- }
- }
+ if (esp0 != NULL)
+ {
+ ipsec6_esp_packet_process (vm, im, ip0, esp0, thread_index, spd0,
+ &b0, node, &ipsec_bypassed,
+ &ipsec_dropped, &ipsec_matched,
+ &ipsec_unprocessed, &next0);
+ }
else if (ip0->protocol == IP_PROTOCOL_IPSEC_AH)
{
- p0 = ipsec6_input_protect_policy_match (spd0,
- &ip0->src_address,
- &ip0->dst_address,
- clib_net_to_host_u32
- (ah0->spi));
-
- if (PREDICT_TRUE (p0 != 0))
- {
- ipsec_matched += 1;
- pi0 = p0 - im->policies;
- vlib_increment_combined_counter
- (&ipsec_spd_policy_counters,
- thread_index, pi0, 1,
- clib_net_to_host_u16 (ip0->payload_length) +
- header_size);
-
- vnet_buffer (b0)->ipsec.sad_index = p0->sa_index;
- next0 = im->ah6_decrypt_next_index;
- goto trace0;
- }
- else
- {
- pi0 = ~0;
- ipsec_unprocessed += 1;
- next0 = IPSEC_INPUT_NEXT_DROP;
- }
+ ah_header_t *ah0 = (ah_header_t *) ((u8 *) ip0 + header_size);
+
+ p0 = ipsec6_input_protect_policy_match (
+ spd0, &ip0->src_address, &ip0->dst_address,
+ clib_net_to_host_u32 (ah0->spi));
+
+ if (PREDICT_TRUE (p0 != 0))
+ {
+ ipsec_matched += 1;
+ pi0 = p0 - im->policies;
+ vlib_increment_combined_counter (
+ &ipsec_spd_policy_counters, thread_index, pi0, 1,
+ clib_net_to_host_u16 (ip0->payload_length) + header_size);
+
+ vnet_buffer (b0)->ipsec.sad_index = p0->sa_index;
+ next0 = im->ah6_decrypt_next_index;
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+ PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ipsec_input_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+
+ if (p0)
+ {
+ tr->sa_id = p0->sa_id;
+ tr->policy_type = p0->type;
+ }
+
+ tr->proto = ip0->protocol;
+ tr->spi = clib_net_to_host_u32 (ah0->spi);
+ tr->spd = spd0->id;
+ tr->policy_index = pi0;
+ }
+ }
+ else
+ {
+ pi0 = ~0;
+ ipsec_unprocessed += 1;
+ next0 = IPSEC_INPUT_NEXT_DROP;
+ }
}
else
{
- ipsec_unprocessed += 1;
- }
-
- trace0:
- if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
- PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- ipsec_input_trace_t *tr =
- vlib_add_trace (vm, node, b0, sizeof (*tr));
-
- if (p0)
- {
- tr->sa_id = p0->sa_id;
- tr->policy_type = p0->type;
- }
-
- tr->proto = ip0->protocol;
- tr->spi = clib_net_to_host_u32 (esp0->spi);
- tr->seq = clib_net_to_host_u32 (esp0->seq);
- tr->spd = spd0->id;
- tr->policy_index = pi0;
+ ipsec_unprocessed += 1;
}
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
diff --git a/src/vnet/ipsec/ipsec_sa.c b/src/vnet/ipsec/ipsec_sa.c
index eb4270ac2b4..ec281cd9a11 100644
--- a/src/vnet/ipsec/ipsec_sa.c
+++ b/src/vnet/ipsec/ipsec_sa.c
@@ -214,7 +214,6 @@ ipsec_sa_init_runtime (ipsec_sa_t *sa)
if (ipsec_sa_get_inb_rt (sa))
{
ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt (sa);
- irt->anti_reply_huge = ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa);
irt->use_anti_replay = ipsec_sa_is_set_USE_ANTI_REPLAY (sa);
irt->use_esn = ipsec_sa_is_set_USE_ESN (sa);
irt->is_tunnel = ipsec_sa_is_set_IS_TUNNEL (sa);
@@ -426,7 +425,8 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
ipsec_sa_outb_rt_t *ort;
clib_error_t *err;
ipsec_sa_t *sa;
- u32 sa_index;
+ u32 sa_index, irt_sz;
+ u16 thread_index = (vlib_num_workers ()) ? ~0 : 0;
u64 rand[2];
uword *p;
int rv;
@@ -440,17 +440,35 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
pool_get_aligned_zero (im->sa_pool, sa, CLIB_CACHE_LINE_BYTES);
sa_index = sa - im->sa_pool;
+ sa->flags = flags;
+
+ if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && anti_replay_window_size > 64)
+ /* window size rounded up to next power of 2 */
+ anti_replay_window_size = 1 << max_log2 (anti_replay_window_size);
+ else
+ anti_replay_window_size = 64;
+
vec_validate (im->inb_sa_runtimes, sa_index);
vec_validate (im->outb_sa_runtimes, sa_index);
- irt = clib_mem_alloc_aligned (sizeof (ipsec_sa_inb_rt_t),
- _Alignof (ipsec_sa_inb_rt_t));
+ irt_sz = sizeof (ipsec_sa_inb_rt_t);
+ irt_sz += anti_replay_window_size / 8;
+ irt_sz = round_pow2 (irt_sz, CLIB_CACHE_LINE_BYTES);
+
+ irt = clib_mem_alloc_aligned (irt_sz, alignof (ipsec_sa_inb_rt_t));
ort = clib_mem_alloc_aligned (sizeof (ipsec_sa_outb_rt_t),
- _Alignof (ipsec_sa_outb_rt_t));
+ alignof (ipsec_sa_outb_rt_t));
im->inb_sa_runtimes[sa_index] = irt;
im->outb_sa_runtimes[sa_index] = ort;
- clib_memset (irt, 0, sizeof (ipsec_sa_inb_rt_t));
- clib_memset (ort, 0, sizeof (ipsec_sa_outb_rt_t));
+
+ *irt = (ipsec_sa_inb_rt_t){
+ .thread_index = thread_index,
+ .anti_replay_window_size = anti_replay_window_size,
+ };
+
+ *ort = (ipsec_sa_outb_rt_t){
+ .thread_index = thread_index,
+ };
clib_pcg64i_srandom_r (&ort->iv_prng, rand[0], rand[1]);
@@ -470,12 +488,7 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
sa->spi = spi;
sa->stat_index = sa_index;
sa->protocol = proto;
- sa->flags = flags;
sa->salt = salt;
- if (irt)
- irt->thread_index = (vlib_num_workers ()) ? ~0 : 0;
- if (ort)
- ort->thread_index = (vlib_num_workers ()) ? ~0 : 0;
if (integ_alg != IPSEC_INTEG_ALG_NONE)
{
@@ -485,9 +498,6 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
ipsec_sa_set_crypto_alg (sa, crypto_alg);
ipsec_sa_set_async_op_ids (sa);
- if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && anti_replay_window_size > 64)
- ipsec_sa_set_ANTI_REPLAY_HUGE (sa);
-
clib_memcpy (&sa->crypto_key, ck, sizeof (sa->crypto_key));
if (crypto_alg != IPSEC_CRYPTO_ALG_NONE)
@@ -600,17 +610,8 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
ipsec_register_udp_port (dst_port, !ipsec_sa_is_set_IS_TUNNEL_V6 (sa));
}
- /* window size rounded up to next power of 2 */
- if (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa))
- {
- anti_replay_window_size = 1 << max_log2 (anti_replay_window_size);
- irt->replay_window_huge =
- clib_bitmap_set_region (0, 0, 1, anti_replay_window_size);
- }
- else
- {
- irt->replay_window = ~0;
- }
+ for (u32 i = 0; i < anti_replay_window_size / uword_bits; i++)
+ irt->replay_window[i] = ~0ULL;
hash_set (im->sa_index_by_sa_id, sa->id, sa_index);
@@ -651,9 +652,6 @@ ipsec_sa_del (ipsec_sa_t * sa)
vnet_crypto_key_del (vm, sa->crypto_sync_key_index);
if (sa->integ_alg != IPSEC_INTEG_ALG_NONE)
vnet_crypto_key_del (vm, sa->integ_sync_key_index);
- if (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa))
- if (irt && irt->replay_window_huge)
- clib_bitmap_free (irt->replay_window_huge);
foreach_pointer (p, irt, ort)
if (p)
clib_mem_free (p);
diff --git a/src/vnet/ipsec/ipsec_sa.h b/src/vnet/ipsec/ipsec_sa.h
index ce2964a9493..9d8c90bab41 100644
--- a/src/vnet/ipsec/ipsec_sa.h
+++ b/src/vnet/ipsec/ipsec_sa.h
@@ -100,8 +100,7 @@ typedef struct ipsec_key_t_
_ (32, IS_PROTECT, "Protect") \
_ (64, IS_INBOUND, "inbound") \
_ (512, IS_ASYNC, "async") \
- _ (1024, NO_ALGO_NO_DROP, "no-algo-no-drop") \
- _ (4096, ANTI_REPLAY_HUGE, "anti-replay-huge")
+ _ (1024, NO_ALGO_NO_DROP, "no-algo-no-drop")
typedef enum ipsec_sad_flags_t_
{
@@ -149,7 +148,6 @@ typedef struct
u16 is_null_gmac : 1;
u16 use_esn : 1;
u16 use_anti_replay : 1;
- u16 anti_reply_huge : 1;
u16 is_protect : 1;
u16 is_tunnel : 1;
u16 is_transport : 1;
@@ -161,16 +159,12 @@ typedef struct
u8 udp_sz;
u16 thread_index;
u32 salt;
- u32 seq;
- u32 seq_hi;
+ u64 seq64;
u16 async_op_id;
vnet_crypto_key_index_t cipher_key_index;
vnet_crypto_key_index_t integ_key_index;
- union
- {
- u64 replay_window;
- clib_bitmap_t *replay_window_huge;
- };
+ u32 anti_replay_window_size;
+ uword replay_window[];
} ipsec_sa_inb_rt_t;
typedef struct
@@ -186,20 +180,20 @@ typedef struct
u16 use_anti_replay : 1;
u16 drop_no_crypto : 1;
u16 is_async : 1;
- clib_pcg64i_random_t iv_prng;
u16 cipher_op_id;
u16 integ_op_id;
u8 cipher_iv_size;
u8 esp_block_align;
u8 integ_icv_size;
+ ip_dscp_t t_dscp;
+ tunnel_encap_decap_flags_t tunnel_flags;
u16 thread_index;
+ u16 async_op_id;
u32 salt;
- u64 seq64;
u32 spi_be;
- ip_dscp_t t_dscp;
+ u64 seq64;
dpo_id_t dpo;
- tunnel_encap_decap_flags_t tunnel_flags;
- u16 async_op_id;
+ clib_pcg64i_random_t iv_prng;
vnet_crypto_key_index_t cipher_key_index;
vnet_crypto_key_index_t integ_key_index;
union
@@ -332,86 +326,35 @@ extern uword unformat_ipsec_key (unformat_input_t *input, va_list *args);
#define IPSEC_UDP_PORT_NONE ((u16) ~0)
-/*
- * Anti Replay definitions
- */
-
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE(_irt) \
- (u32) (PREDICT_FALSE (_irt->anti_reply_huge) ? \
- clib_bitmap_bytes (_irt->replay_window_huge) * 8 : \
- BITS (_irt->replay_window))
-
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN(_irt, _is_huge) \
- (u32) (_is_huge ? clib_bitmap_bytes (_irt->replay_window_huge) * 8 : \
- BITS (_irt->replay_window))
-
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_N_SEEN(_irt) \
- (u64) (PREDICT_FALSE (_irt->anti_reply_huge) ? \
- clib_bitmap_count_set_bits (_irt->replay_window_huge) : \
- count_set_bits (_irt->replay_window))
-
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_N_SEEN_KNOWN_WIN(_irt, _is_huge) \
- (u64) (_is_huge ? clib_bitmap_count_set_bits (_irt->replay_window_huge) : \
- count_set_bits (_irt->replay_window))
-
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_MAX_INDEX(_irt) \
- (u32) (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (_irt) - 1)
-
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_MAX_INDEX_KNOWN_WIN(_irt, _is_huge) \
- (u32) (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (_irt, _is_huge) - 1)
-
-/*
- * sequence number less than the lower bound are outside of the window
- * From RFC4303 Appendix A:
- * Bl = Tl - W + 1
- */
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND(_sa) \
- (u32) (_sa->seq - IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (_sa) + 1)
-
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND_KNOWN_WIN(_sa, _is_huge) \
- (u32) (_sa->seq - \
- IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (_sa, _is_huge) + 1)
-
always_inline u64
ipsec_sa_anti_replay_get_64b_window (const ipsec_sa_inb_rt_t *irt)
{
- if (!irt->anti_reply_huge)
- return irt->replay_window;
-
u64 w;
- u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (irt);
- u32 tl_win_index = irt->seq & (window_size - 1);
+ u32 window_size = irt->anti_replay_window_size;
+ u32 tl_win_index = irt->seq64 & (window_size - 1);
+ uword *bmp = (uword *) irt->replay_window;
if (PREDICT_TRUE (tl_win_index >= 63))
- return clib_bitmap_get_multiple (irt->replay_window_huge,
- tl_win_index - 63, 64);
+ return uword_bitmap_get_multiple (bmp, tl_win_index - 63, 64);
- w = clib_bitmap_get_multiple_no_check (irt->replay_window_huge, 0,
- tl_win_index + 1)
+ w = uword_bitmap_get_multiple_no_check (bmp, 0, tl_win_index + 1)
<< (63 - tl_win_index);
- w |= clib_bitmap_get_multiple_no_check (irt->replay_window_huge,
- window_size - 63 + tl_win_index,
- 63 - tl_win_index);
+ w |= uword_bitmap_get_multiple_no_check (
+ bmp, window_size - 63 + tl_win_index, 63 - tl_win_index);
return w;
}
always_inline int
-ipsec_sa_anti_replay_check (const ipsec_sa_inb_rt_t *irt, u32 seq,
- bool ar_huge)
+ipsec_sa_anti_replay_check (const ipsec_sa_inb_rt_t *irt, u32 window_size,
+ u32 seq)
{
- u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (irt, ar_huge);
-
/* we assume that the packet is in the window.
* if the packet falls left (sa->seq - seq >= window size),
* the result is wrong */
- if (ar_huge)
- return clib_bitmap_get (irt->replay_window_huge, seq & (window_size - 1));
- else
- return (irt->replay_window >> (window_size + seq - irt->seq - 1)) & 1;
-
- return 0;
+ return uword_bitmap_is_bit_set ((uword *) irt->replay_window,
+ seq & (window_size - 1));
}
/*
@@ -431,13 +374,14 @@ ipsec_sa_anti_replay_check (const ipsec_sa_inb_rt_t *irt, u32 seq,
always_inline int
ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_inb_rt_t *irt, u32 seq,
u32 hi_seq_used, bool post_decrypt,
- u32 *hi_seq_req, bool ar_huge)
+ u32 *hi_seq_req)
{
ASSERT ((post_decrypt == false) == (hi_seq_req != 0));
- u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (irt, ar_huge);
- u32 window_lower_bound =
- IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND_KNOWN_WIN (irt, ar_huge);
+ u32 window_size = irt->anti_replay_window_size;
+ u32 exp_lo = irt->seq64;
+ u32 exp_hi = irt->seq64 >> 32;
+ u32 window_lower_bound = exp_lo - window_size + 1;
if (!irt->use_esn)
{
@@ -448,14 +392,14 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_inb_rt_t *irt, u32 seq,
if (!irt->use_anti_replay)
return 0;
- if (PREDICT_TRUE (seq > irt->seq))
+ if (PREDICT_TRUE (seq > exp_lo))
return 0;
/* does the packet fall out on the left of the window */
- if (irt->seq >= seq + window_size)
+ if (exp_lo >= seq + window_size)
return 1;
- return ipsec_sa_anti_replay_check (irt, seq, ar_huge);
+ return ipsec_sa_anti_replay_check (irt, window_size, seq);
}
if (!irt->use_anti_replay)
@@ -473,20 +417,20 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_inb_rt_t *irt, u32 seq,
*/
if (hi_seq_req)
{
- if (seq >= irt->seq)
+ if (seq >= exp_lo)
/* The packet's sequence number is larger that the SA's.
* that can't be a warp - unless we lost more than
* 2^32 packets ... how could we know? */
- *hi_seq_req = irt->seq_hi;
+ *hi_seq_req = exp_hi;
else
{
/* The packet's SN is less than the SAs, so either the SN has
* wrapped or the SN is just old. */
- if (irt->seq - seq > (1 << 30))
+ if (exp_lo - seq > (1 << 30))
/* It's really really really old => it wrapped */
- *hi_seq_req = irt->seq_hi + 1;
+ *hi_seq_req = exp_hi + 1;
else
- *hi_seq_req = irt->seq_hi;
+ *hi_seq_req = exp_hi;
}
}
/*
@@ -496,7 +440,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_inb_rt_t *irt, u32 seq,
return 0;
}
- if (PREDICT_TRUE (window_size > 0 && irt->seq >= window_size - 1))
+ if (PREDICT_TRUE (exp_lo >= window_size - 1))
{
/*
* the last sequence number VPP received is more than one
@@ -513,7 +457,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_inb_rt_t *irt, u32 seq,
*/
if (post_decrypt)
{
- if (hi_seq_used == irt->seq_hi)
+ if (hi_seq_used == exp_hi)
/* the high sequence number used to succesfully decrypt this
* packet is the same as the last-sequence number of the SA.
* that means this packet did not cause a wrap.
@@ -530,7 +474,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_inb_rt_t *irt, u32 seq,
/* pre-decrypt it might be the packet that causes a wrap, we
* need to decrypt it to find out */
if (hi_seq_req)
- *hi_seq_req = irt->seq_hi + 1;
+ *hi_seq_req = exp_hi + 1;
return 0;
}
}
@@ -541,13 +485,13 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_inb_rt_t *irt, u32 seq,
* end of the window.
*/
if (hi_seq_req)
- *hi_seq_req = irt->seq_hi;
- if (seq <= irt->seq)
+ *hi_seq_req = exp_hi;
+ if (seq <= exp_lo)
/*
* The received seq number is within bounds of the window
* check if it's a duplicate
*/
- return ipsec_sa_anti_replay_check (irt, seq, ar_huge);
+ return ipsec_sa_anti_replay_check (irt, window_size, seq);
else
/*
* The received sequence number is greater than the window
@@ -572,15 +516,15 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_inb_rt_t *irt, u32 seq,
/*
* the sequence number is less than the lower bound.
*/
- if (seq <= irt->seq)
+ if (seq <= exp_lo)
{
/*
* the packet is within the window upper bound.
* check for duplicates.
*/
if (hi_seq_req)
- *hi_seq_req = irt->seq_hi;
- return ipsec_sa_anti_replay_check (irt, seq, ar_huge);
+ *hi_seq_req = exp_hi;
+ return ipsec_sa_anti_replay_check (irt, window_size, seq);
}
else
{
@@ -594,7 +538,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_inb_rt_t *irt, u32 seq,
* we've lost close to 2^32 packets.
*/
if (hi_seq_req)
- *hi_seq_req = irt->seq_hi;
+ *hi_seq_req = exp_hi;
return 0;
}
}
@@ -607,8 +551,8 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_inb_rt_t *irt, u32 seq,
* received packet, the SA has moved on to a higher sequence number.
*/
if (hi_seq_req)
- *hi_seq_req = irt->seq_hi - 1;
- return ipsec_sa_anti_replay_check (irt, seq, ar_huge);
+ *hi_seq_req = exp_hi - 1;
+ return ipsec_sa_anti_replay_check (irt, window_size, seq);
}
}
@@ -618,121 +562,97 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_inb_rt_t *irt, u32 seq,
}
always_inline u32
-ipsec_sa_anti_replay_window_shift (ipsec_sa_inb_rt_t *irt, u32 inc,
- bool ar_huge)
+ipsec_sa_anti_replay_window_shift (ipsec_sa_inb_rt_t *irt, u32 window_size,
+ u32 inc)
{
+ uword *window = irt->replay_window;
+ u32 window_mask = window_size - 1;
u32 n_lost = 0;
u32 seen = 0;
- u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (irt, ar_huge);
if (inc < window_size)
{
- if (ar_huge)
- {
- /* the number of packets we saw in this section of the window */
- clib_bitmap_t *window = irt->replay_window_huge;
- u32 window_lower_bound = (irt->seq + 1) & (window_size - 1);
- u32 window_next_lower_bound =
- (window_lower_bound + inc) & (window_size - 1);
+ /* the number of packets we saw in this section of the window */
+ u32 window_lower_bound = (irt->seq64 + 1) & window_mask;
+ u32 window_next_lower_bound = (window_lower_bound + inc) & window_mask;
- uword i_block, i_word_start, i_word_end, full_words;
- uword n_blocks = window_size >> log2_uword_bits;
- uword mask;
+ uword i_block, i_word_start, i_word_end, full_words;
+ uword n_blocks = window_size >> log2_uword_bits;
+ uword mask;
- i_block = window_lower_bound >> log2_uword_bits;
+ i_block = window_lower_bound >> log2_uword_bits;
- i_word_start = window_lower_bound & (uword_bits - 1);
- i_word_end = window_next_lower_bound & (uword_bits - 1);
+ i_word_start = window_lower_bound & (uword_bits - 1);
+ i_word_end = window_next_lower_bound & (uword_bits - 1);
- /* We stay in the same word */
- if (i_word_start + inc <= uword_bits)
- {
- mask = pow2_mask (inc) << i_word_start;
- seen += count_set_bits (window[i_block] & mask);
- window[i_block] &= ~mask;
- }
- else
+ /* We stay in the same word */
+ if (i_word_start + inc <= uword_bits)
+ {
+ mask = pow2_mask (inc) << i_word_start;
+ seen += count_set_bits (window[i_block] & mask);
+ window[i_block] &= ~mask;
+ }
+ else
+ {
+ full_words =
+ (inc + i_word_start - uword_bits - i_word_end) >> log2_uword_bits;
+
+ /* count set bits in the first word */
+ mask = (uword) ~0 << i_word_start;
+ seen += count_set_bits (window[i_block] & mask);
+ window[i_block] &= ~mask;
+ i_block = (i_block + 1) & (n_blocks - 1);
+
+ /* count set bits in the next full words */
+ /* even if the last word need to be fully counted, we treat it
+ * apart */
+ while (full_words >= 8)
{
- full_words = (inc + i_word_start - uword_bits - i_word_end) >>
- log2_uword_bits;
-
- /* count set bits in the first word */
- mask = (uword) ~0 << i_word_start;
- seen += count_set_bits (window[i_block] & mask);
- window[i_block] &= ~mask;
- i_block = (i_block + 1) & (n_blocks - 1);
-
- /* count set bits in the next full words */
- /* even if the last word need to be fully counted, we treat it
- * apart */
- while (full_words >= 8)
+ if (full_words >= 16)
{
- if (full_words >= 16)
- {
- /* prefect the next 8 blocks (64 bytes) */
- clib_prefetch_store (
- &window[(i_block + 8) & (n_blocks - 1)]);
- }
-
- seen += count_set_bits (window[i_block]);
- seen +=
- count_set_bits (window[(i_block + 1) & (n_blocks - 1)]);
- seen +=
- count_set_bits (window[(i_block + 2) & (n_blocks - 1)]);
- seen +=
- count_set_bits (window[(i_block + 3) & (n_blocks - 1)]);
- seen +=
- count_set_bits (window[(i_block + 4) & (n_blocks - 1)]);
- seen +=
- count_set_bits (window[(i_block + 5) & (n_blocks - 1)]);
- seen +=
- count_set_bits (window[(i_block + 6) & (n_blocks - 1)]);
- seen +=
- count_set_bits (window[(i_block + 7) & (n_blocks - 1)]);
- window[i_block] = 0;
- window[(i_block + 1) & (n_blocks - 1)] = 0;
- window[(i_block + 2) & (n_blocks - 1)] = 0;
- window[(i_block + 3) & (n_blocks - 1)] = 0;
- window[(i_block + 4) & (n_blocks - 1)] = 0;
- window[(i_block + 5) & (n_blocks - 1)] = 0;
- window[(i_block + 6) & (n_blocks - 1)] = 0;
- window[(i_block + 7) & (n_blocks - 1)] = 0;
-
- i_block = (i_block + 8) & (n_blocks - 1);
- full_words -= 8;
- }
- while (full_words > 0)
- {
- // last word is treated after the loop
- seen += count_set_bits (window[i_block]);
- window[i_block] = 0;
- i_block = (i_block + 1) & (n_blocks - 1);
- full_words--;
+ /* prefect the next 8 blocks (64 bytes) */
+ clib_prefetch_store (
+ &window[(i_block + 8) & (n_blocks - 1)]);
}
- /* the last word */
- mask = pow2_mask (i_word_end);
- seen += count_set_bits (window[i_block] & mask);
- window[i_block] &= ~mask;
+ seen += count_set_bits (window[i_block]);
+ seen += count_set_bits (window[(i_block + 1) & (n_blocks - 1)]);
+ seen += count_set_bits (window[(i_block + 2) & (n_blocks - 1)]);
+ seen += count_set_bits (window[(i_block + 3) & (n_blocks - 1)]);
+ seen += count_set_bits (window[(i_block + 4) & (n_blocks - 1)]);
+ seen += count_set_bits (window[(i_block + 5) & (n_blocks - 1)]);
+ seen += count_set_bits (window[(i_block + 6) & (n_blocks - 1)]);
+ seen += count_set_bits (window[(i_block + 7) & (n_blocks - 1)]);
+ window[i_block] = 0;
+ window[(i_block + 1) & (n_blocks - 1)] = 0;
+ window[(i_block + 2) & (n_blocks - 1)] = 0;
+ window[(i_block + 3) & (n_blocks - 1)] = 0;
+ window[(i_block + 4) & (n_blocks - 1)] = 0;
+ window[(i_block + 5) & (n_blocks - 1)] = 0;
+ window[(i_block + 6) & (n_blocks - 1)] = 0;
+ window[(i_block + 7) & (n_blocks - 1)] = 0;
+
+ i_block = (i_block + 8) & (n_blocks - 1);
+ full_words -= 8;
+ }
+ while (full_words > 0)
+ {
+ // last word is treated after the loop
+ seen += count_set_bits (window[i_block]);
+ window[i_block] = 0;
+ i_block = (i_block + 1) & (n_blocks - 1);
+ full_words--;
}
- clib_bitmap_set_no_check (window,
- (irt->seq + inc) & (window_size - 1), 1);
- }
- else
- {
- /*
- * count how many holes there are in the portion
- * of the window that we will right shift of the end
- * as a result of this increments
- */
- u64 old = irt->replay_window & pow2_mask (inc);
- /* the number of packets we saw in this section of the window */
- seen = count_set_bits (old);
- irt->replay_window =
- ((irt->replay_window) >> inc) | (1ULL << (window_size - 1));
+ /* the last word */
+ mask = pow2_mask (i_word_end);
+ seen += count_set_bits (window[i_block] & mask);
+ window[i_block] &= ~mask;
}
+ uword_bitmap_set_bits_at_index (window, (irt->seq64 + inc) & window_mask,
+ 1);
+
/*
* the number we missed is the size of the window section
* minus the number we saw.
@@ -741,24 +661,17 @@ ipsec_sa_anti_replay_window_shift (ipsec_sa_inb_rt_t *irt, u32 inc,
}
else
{
+ u32 n_uwords = window_size / uword_bits;
/* holes in the replay window are lost packets */
- n_lost = window_size -
- IPSEC_SA_ANTI_REPLAY_WINDOW_N_SEEN_KNOWN_WIN (irt, ar_huge);
+ n_lost = window_size - uword_bitmap_count_set_bits (window, n_uwords);
/* any sequence numbers that now fall outside the window
* are forever lost */
n_lost += inc - window_size;
- if (PREDICT_FALSE (ar_huge))
- {
- clib_bitmap_zero (irt->replay_window_huge);
- clib_bitmap_set_no_check (irt->replay_window_huge,
- (irt->seq + inc) & (window_size - 1), 1);
- }
- else
- {
- irt->replay_window = 1ULL << (window_size - 1);
- }
+ uword_bitmap_clear (window, n_uwords);
+ uword_bitmap_set_bits_at_index (window, (irt->seq64 + inc) & window_mask,
+ 1);
}
return n_lost;
@@ -775,65 +688,44 @@ ipsec_sa_anti_replay_window_shift (ipsec_sa_inb_rt_t *irt, u32 inc,
*/
always_inline u64
ipsec_sa_anti_replay_advance (ipsec_sa_inb_rt_t *irt, u32 thread_index,
- u32 seq, u32 hi_seq, bool ar_huge)
+ u32 seq, u32 hi_seq)
{
u64 n_lost = 0;
- u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (irt, ar_huge);
+ u32 window_size = irt->anti_replay_window_size;
+ u32 masked_seq = seq & (window_size - 1);
+ u32 exp_lo = irt->seq64;
+ u32 exp_hi = irt->seq64 >> 32;
u32 pos;
if (irt->use_esn)
{
- int wrap = hi_seq - irt->seq_hi;
+ int wrap = hi_seq - exp_hi;
- if (wrap == 0 && seq > irt->seq)
+ if (wrap == 0 && seq > exp_lo)
{
- pos = seq - irt->seq;
- n_lost = ipsec_sa_anti_replay_window_shift (irt, pos, ar_huge);
- irt->seq = seq;
+ pos = seq - exp_lo;
+ n_lost = ipsec_sa_anti_replay_window_shift (irt, window_size, pos);
+ irt->seq64 = (u64) exp_hi << 32 | seq;
}
else if (wrap > 0)
{
- pos = seq + ~irt->seq + 1;
- n_lost = ipsec_sa_anti_replay_window_shift (irt, pos, ar_huge);
- irt->seq = seq;
- irt->seq_hi = hi_seq;
- }
- else if (wrap < 0)
- {
- pos = ~seq + irt->seq + 1;
- if (ar_huge)
- clib_bitmap_set_no_check (irt->replay_window_huge,
- seq & (window_size - 1), 1);
- else
- irt->replay_window |= (1ULL << (window_size - 1 - pos));
+ pos = seq + ~exp_lo + 1;
+ n_lost = ipsec_sa_anti_replay_window_shift (irt, window_size, pos);
+ irt->seq64 = (u64) hi_seq << 32 | seq;
}
else
- {
- pos = irt->seq - seq;
- if (ar_huge)
- clib_bitmap_set_no_check (irt->replay_window_huge,
- seq & (window_size - 1), 1);
- else
- irt->replay_window |= (1ULL << (window_size - 1 - pos));
- }
+ uword_bitmap_set_bits_at_index (irt->replay_window, masked_seq, 1);
}
else
{
- if (seq > irt->seq)
+ if (seq > exp_lo)
{
- pos = seq - irt->seq;
- n_lost = ipsec_sa_anti_replay_window_shift (irt, pos, ar_huge);
- irt->seq = seq;
+ pos = seq - exp_lo;
+ n_lost = ipsec_sa_anti_replay_window_shift (irt, window_size, pos);
+ irt->seq64 = (u64) exp_hi << 32 | seq;
}
else
- {
- pos = irt->seq - seq;
- if (ar_huge)
- clib_bitmap_set_no_check (irt->replay_window_huge,
- seq & (window_size - 1), 1);
- else
- irt->replay_window |= (1ULL << (window_size - 1 - pos));
- }
+ uword_bitmap_set_bits_at_index (irt->replay_window, masked_seq, 1);
}
return n_lost;
diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c
index a62f914d43a..e2f9a6883fe 100644
--- a/src/vnet/session/application_interface.c
+++ b/src/vnet/session/application_interface.c
@@ -38,12 +38,12 @@
*
*/
uword
-unformat_vnet_uri (unformat_input_t * input, va_list * args)
+unformat_vnet_uri (unformat_input_t *input, va_list *args)
{
session_endpoint_cfg_t *sep = va_arg (*args, session_endpoint_cfg_t *);
u32 transport_proto = 0, port;
- if (unformat (input, "%U://%U/%d", unformat_transport_proto,
+ if (unformat (input, "%U://%U:%d", unformat_transport_proto,
&transport_proto, unformat_ip4_address, &sep->ip.ip4, &port))
{
sep->transport_proto = transport_proto;
@@ -52,6 +52,54 @@ unformat_vnet_uri (unformat_input_t * input, va_list * args)
return 1;
}
else if (unformat (input, "%U://%U/%d", unformat_transport_proto,
+ &transport_proto, unformat_ip4_address, &sep->ip.ip4,
+ &port))
+ {
+ sep->transport_proto = transport_proto;
+ sep->port = clib_host_to_net_u16 (port);
+ sep->is_ip4 = 1;
+ return 1;
+ }
+ else if (unformat (input, "%U://%U", unformat_transport_proto,
+ &transport_proto, unformat_ip4_address, &sep->ip.ip4))
+ {
+ sep->transport_proto = transport_proto;
+ if (sep->transport_proto == TRANSPORT_PROTO_HTTP)
+ port = 80;
+ else if (sep->transport_proto == TRANSPORT_PROTO_TLS)
+ port = 443;
+ else
+ return 0;
+
+ sep->port = clib_host_to_net_u16 (port);
+ sep->is_ip4 = 1;
+ return 1;
+ }
+ else if (unformat (input, "%U://[%U]:%d", unformat_transport_proto,
+ &transport_proto, unformat_ip6_address, &sep->ip.ip6,
+ &port))
+ {
+ sep->transport_proto = transport_proto;
+ sep->port = clib_host_to_net_u16 (port);
+ sep->is_ip4 = 0;
+ return 1;
+ }
+ else if (unformat (input, "%U://[%U]", unformat_transport_proto,
+ &transport_proto, unformat_ip6_address, &sep->ip.ip6))
+ {
+ sep->transport_proto = transport_proto;
+ if (sep->transport_proto == TRANSPORT_PROTO_HTTP)
+ port = 80;
+ else if (sep->transport_proto == TRANSPORT_PROTO_TLS)
+ port = 443;
+ else
+ return 0;
+
+ sep->port = clib_host_to_net_u16 (port);
+ sep->is_ip4 = 0;
+ return 1;
+ }
+ else if (unformat (input, "%U://%U/%d", unformat_transport_proto,
&transport_proto, unformat_ip6_address, &sep->ip.ip6,
&port))
{
@@ -106,6 +154,45 @@ parse_uri (char *uri, session_endpoint_cfg_t *sep)
return 0;
}
+/* Use before 'parse_uri()'. Removes target from URI and copies it to 'char
+ * **target'. char **target is resized automatically.
+ */
+session_error_t
+parse_target (char **uri, char **target)
+{
+ u8 counter = 0;
+
+ for (u32 i = 0; i < (u32) strlen (*uri); i++)
+ {
+ if ((*uri)[i] == '/')
+ counter++;
+
+ if (counter == 3)
+ {
+ /* resize and make space for NULL terminator */
+ if (vec_len (*target) < strlen (*uri) - i + 2)
+ vec_resize (*target, strlen (*uri) - i + 2);
+
+ strncpy (*target, *uri + i, strlen (*uri) - i);
+ (*uri)[i + 1] = '\0';
+ break;
+ }
+ }
+
+ if (!*target)
+ {
+ vec_resize (*target, 2);
+ **target = '/';
+ }
+
+ vec_terminate_c_string (*target);
+
+ if (!*target)
+ return SESSION_E_INVALID;
+
+ return 0;
+}
+
session_error_t
vnet_bind_uri (vnet_listen_args_t *a)
{
diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h
index 21ed97998f2..33b61187fe3 100644
--- a/src/vnet/session/application_interface.h
+++ b/src/vnet/session/application_interface.h
@@ -281,6 +281,7 @@ typedef enum session_fd_flag_
} session_fd_flag_t;
session_error_t parse_uri (char *uri, session_endpoint_cfg_t *sep);
+session_error_t parse_target (char **uri, char **target);
session_error_t vnet_bind_uri (vnet_listen_args_t *);
session_error_t vnet_unbind_uri (vnet_unlisten_args_t *a);
session_error_t vnet_connect_uri (vnet_connect_args_t *a);
diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c
index 2a6ac283fb9..d65371e81e5 100644
--- a/src/vnet/session/session.c
+++ b/src/vnet/session/session.c
@@ -28,9 +28,17 @@
session_main_t session_main;
+typedef enum
+{
+ SESSION_EVT_RPC,
+ SESSION_EVT_IO,
+ SESSION_EVT_SESSION,
+} session_evt_family_t;
+
static inline int
session_send_evt_to_thread (void *data, void *args, u32 thread_index,
- session_evt_type_t evt_type)
+ session_evt_type_t evt_type,
+ session_evt_family_t family)
{
session_worker_t *wrk = session_main_get_worker (thread_index);
session_event_t *evt;
@@ -45,30 +53,33 @@ session_send_evt_to_thread (void *data, void *args, u32 thread_index,
svm_msg_q_unlock (mq);
return -2;
}
- switch (evt_type)
+ switch (family)
{
- case SESSION_CTRL_EVT_RPC:
+ case SESSION_EVT_RPC:
+ ASSERT (evt_type == SESSION_CTRL_EVT_RPC);
msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg);
evt->rpc_args.fp = data;
evt->rpc_args.arg = args;
break;
- case SESSION_IO_EVT_RX:
- case SESSION_IO_EVT_TX:
- case SESSION_IO_EVT_TX_FLUSH:
- case SESSION_IO_EVT_BUILTIN_RX:
+ case SESSION_EVT_IO:
+ ASSERT (evt_type == SESSION_IO_EVT_RX || evt_type == SESSION_IO_EVT_TX ||
+ evt_type == SESSION_IO_EVT_TX_FLUSH ||
+ evt_type == SESSION_IO_EVT_BUILTIN_RX);
msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg);
evt->session_index = *(u32 *) data;
break;
- case SESSION_IO_EVT_TX_MAIN:
- case SESSION_CTRL_EVT_CLOSE:
- case SESSION_CTRL_EVT_RESET:
+ case SESSION_EVT_SESSION:
+ ASSERT (evt_type == SESSION_CTRL_EVT_CLOSE ||
+ evt_type == SESSION_CTRL_EVT_HALF_CLOSE ||
+ evt_type == SESSION_CTRL_EVT_RESET);
msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg);
evt->session_handle = session_handle ((session_t *) data);
break;
default:
+ ASSERT (0);
clib_warning ("evt unhandled!");
svm_msg_q_unlock (mq);
return -1;
@@ -88,7 +99,8 @@ int
session_send_io_evt_to_thread (svm_fifo_t * f, session_evt_type_t evt_type)
{
return session_send_evt_to_thread (&f->vpp_session_index, 0,
- f->master_thread_index, evt_type);
+ f->master_thread_index, evt_type,
+ SESSION_EVT_IO);
}
/* Deprecated, use session_program_* functions */
@@ -96,14 +108,16 @@ int
session_send_io_evt_to_thread_custom (void *data, u32 thread_index,
session_evt_type_t evt_type)
{
- return session_send_evt_to_thread (data, 0, thread_index, evt_type);
+ return session_send_evt_to_thread (data, 0, thread_index, evt_type,
+ SESSION_EVT_IO);
}
int
session_program_tx_io_evt (session_handle_tu_t sh, session_evt_type_t evt_type)
{
return session_send_evt_to_thread ((void *) &sh.session_index, 0,
- (u32) sh.thread_index, evt_type);
+ (u32) sh.thread_index, evt_type,
+ SESSION_EVT_IO);
}
int
@@ -116,9 +130,9 @@ session_program_rx_io_evt (session_handle_tu_t sh)
}
else
{
- return session_send_evt_to_thread ((void *) &sh.session_index, 0,
- (u32) sh.thread_index,
- SESSION_IO_EVT_BUILTIN_RX);
+ return session_send_evt_to_thread (
+ (void *) &sh.session_index, 0, (u32) sh.thread_index,
+ SESSION_IO_EVT_BUILTIN_RX, SESSION_EVT_IO);
}
}
@@ -127,25 +141,24 @@ session_program_transport_io_evt (session_handle_tu_t sh,
session_evt_type_t evt_type)
{
return session_send_evt_to_thread ((void *) &sh.session_index, 0,
- (u32) sh.thread_index, evt_type);
+ (u32) sh.thread_index, evt_type,
+ SESSION_EVT_IO);
}
int
session_send_ctrl_evt_to_thread (session_t * s, session_evt_type_t evt_type)
{
/* only events supported are disconnect, shutdown and reset */
- ASSERT (evt_type == SESSION_CTRL_EVT_CLOSE ||
- evt_type == SESSION_CTRL_EVT_HALF_CLOSE ||
- evt_type == SESSION_CTRL_EVT_RESET);
- return session_send_evt_to_thread (s, 0, s->thread_index, evt_type);
+ return session_send_evt_to_thread (s, 0, s->thread_index, evt_type,
+ SESSION_EVT_SESSION);
}
void
session_send_rpc_evt_to_thread_force (u32 thread_index, void *fp,
void *rpc_args)
{
- session_send_evt_to_thread (fp, rpc_args, thread_index,
- SESSION_CTRL_EVT_RPC);
+ session_send_evt_to_thread (fp, rpc_args, thread_index, SESSION_CTRL_EVT_RPC,
+ SESSION_EVT_RPC);
}
void
@@ -480,115 +493,6 @@ session_alloc_for_half_open (transport_connection_t *tc)
return s;
}
-/**
- * Discards bytes from buffer chain
- *
- * It discards n_bytes_to_drop starting at first buffer after chain_b
- */
-always_inline void
-session_enqueue_discard_chain_bytes (vlib_main_t * vm, vlib_buffer_t * b,
- vlib_buffer_t ** chain_b,
- u32 n_bytes_to_drop)
-{
- vlib_buffer_t *next = *chain_b;
- u32 to_drop = n_bytes_to_drop;
- ASSERT (b->flags & VLIB_BUFFER_NEXT_PRESENT);
- while (to_drop && (next->flags & VLIB_BUFFER_NEXT_PRESENT))
- {
- next = vlib_get_buffer (vm, next->next_buffer);
- if (next->current_length > to_drop)
- {
- vlib_buffer_advance (next, to_drop);
- to_drop = 0;
- }
- else
- {
- to_drop -= next->current_length;
- next->current_length = 0;
- }
- }
- *chain_b = next;
-
- if (to_drop == 0)
- b->total_length_not_including_first_buffer -= n_bytes_to_drop;
-}
-
-/**
- * Enqueue buffer chain tail
- */
-always_inline int
-session_enqueue_chain_tail (session_t * s, vlib_buffer_t * b,
- u32 offset, u8 is_in_order)
-{
- vlib_buffer_t *chain_b;
- u32 chain_bi, len, diff;
- vlib_main_t *vm = vlib_get_main ();
- u8 *data;
- u32 written = 0;
- int rv = 0;
-
- if (is_in_order && offset)
- {
- diff = offset - b->current_length;
- if (diff > b->total_length_not_including_first_buffer)
- return 0;
- chain_b = b;
- session_enqueue_discard_chain_bytes (vm, b, &chain_b, diff);
- chain_bi = vlib_get_buffer_index (vm, chain_b);
- }
- else
- chain_bi = b->next_buffer;
-
- do
- {
- chain_b = vlib_get_buffer (vm, chain_bi);
- data = vlib_buffer_get_current (chain_b);
- len = chain_b->current_length;
- if (!len)
- continue;
- if (is_in_order)
- {
- rv = svm_fifo_enqueue (s->rx_fifo, len, data);
- if (rv == len)
- {
- written += rv;
- }
- else if (rv < len)
- {
- return (rv > 0) ? (written + rv) : written;
- }
- else if (rv > len)
- {
- written += rv;
-
- /* written more than what was left in chain */
- if (written > b->total_length_not_including_first_buffer)
- return written;
-
- /* drop the bytes that have already been delivered */
- session_enqueue_discard_chain_bytes (vm, b, &chain_b, rv - len);
- }
- }
- else
- {
- rv = svm_fifo_enqueue_with_offset (s->rx_fifo, offset, len, data);
- if (rv)
- {
- clib_warning ("failed to enqueue multi-buffer seg");
- return -1;
- }
- offset += len;
- }
- }
- while ((chain_bi = (chain_b->flags & VLIB_BUFFER_NEXT_PRESENT)
- ? chain_b->next_buffer : 0));
-
- if (is_in_order)
- return written;
-
- return 0;
-}
-
void
session_fifo_tuning (session_t * s, svm_fifo_t * f,
session_ft_action_t act, u32 len)
@@ -760,154 +664,6 @@ session_main_flush_enqueue_events (transport_proto_t transport_proto,
wrk->session_to_enqueue[transport_proto] = handles;
}
-/*
- * Enqueue data for delivery to app. If requested, it queues app notification
- * event for later delivery.
- *
- * @param tc Transport connection which is to be enqueued data
- * @param b Buffer to be enqueued
- * @param offset Offset at which to start enqueueing if out-of-order
- * @param queue_event Flag to indicate if peer is to be notified or if event
- * is to be queued. The former is useful when more data is
- * enqueued and only one event is to be generated.
- * @param is_in_order Flag to indicate if data is in order
- * @return Number of bytes enqueued or a negative value if enqueueing failed.
- */
-int
-session_enqueue_stream_connection (transport_connection_t * tc,
- vlib_buffer_t * b, u32 offset,
- u8 queue_event, u8 is_in_order)
-{
- session_t *s;
- int enqueued = 0, rv, in_order_off;
-
- s = session_get (tc->s_index, tc->thread_index);
-
- if (is_in_order)
- {
- enqueued = svm_fifo_enqueue (s->rx_fifo,
- b->current_length,
- vlib_buffer_get_current (b));
- if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_NEXT_PRESENT)
- && enqueued >= 0))
- {
- in_order_off = enqueued > b->current_length ? enqueued : 0;
- rv = session_enqueue_chain_tail (s, b, in_order_off, 1);
- if (rv > 0)
- enqueued += rv;
- }
- }
- else
- {
- rv = svm_fifo_enqueue_with_offset (s->rx_fifo, offset,
- b->current_length,
- vlib_buffer_get_current (b));
- if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) && !rv))
- session_enqueue_chain_tail (s, b, offset + b->current_length, 0);
- /* if something was enqueued, report even this as success for ooo
- * segment handling */
- return rv;
- }
-
- if (queue_event)
- {
- /* Queue RX event on this fifo. Eventually these will need to be
- * flushed by calling @ref session_main_flush_enqueue_events () */
- if (!(s->flags & SESSION_F_RX_EVT))
- {
- session_worker_t *wrk = session_main_get_worker (s->thread_index);
- ASSERT (s->thread_index == vlib_get_thread_index ());
- s->flags |= SESSION_F_RX_EVT;
- vec_add1 (wrk->session_to_enqueue[tc->proto], session_handle (s));
- }
-
- session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0);
- }
-
- return enqueued;
-}
-
-always_inline int
-session_enqueue_dgram_connection_inline (session_t *s,
- session_dgram_hdr_t *hdr,
- vlib_buffer_t *b, u8 proto,
- u8 queue_event, u32 is_cl)
-{
- int rv;
-
- ASSERT (svm_fifo_max_enqueue_prod (s->rx_fifo)
- >= b->current_length + sizeof (*hdr));
-
- if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT)))
- {
- svm_fifo_seg_t segs[2] = {
- { (u8 *) hdr, sizeof (*hdr) },
- { vlib_buffer_get_current (b), b->current_length }
- };
-
- rv = svm_fifo_enqueue_segments (s->rx_fifo, segs, 2,
- 0 /* allow_partial */ );
- }
- else
- {
- vlib_main_t *vm = vlib_get_main ();
- svm_fifo_seg_t *segs = 0, *seg;
- vlib_buffer_t *it = b;
- u32 n_segs = 1;
-
- vec_add2 (segs, seg, 1);
- seg->data = (u8 *) hdr;
- seg->len = sizeof (*hdr);
- while (it)
- {
- vec_add2 (segs, seg, 1);
- seg->data = vlib_buffer_get_current (it);
- seg->len = it->current_length;
- n_segs++;
- if (!(it->flags & VLIB_BUFFER_NEXT_PRESENT))
- break;
- it = vlib_get_buffer (vm, it->next_buffer);
- }
- rv = svm_fifo_enqueue_segments (s->rx_fifo, segs, n_segs,
- 0 /* allow partial */ );
- vec_free (segs);
- }
-
- if (queue_event && rv > 0)
- {
- /* Queue RX event on this fifo. Eventually these will need to be
- * flushed by calling @ref session_main_flush_enqueue_events () */
- if (!(s->flags & SESSION_F_RX_EVT))
- {
- u32 thread_index =
- is_cl ? vlib_get_thread_index () : s->thread_index;
- session_worker_t *wrk = session_main_get_worker (thread_index);
- ASSERT (s->thread_index == vlib_get_thread_index () || is_cl);
- s->flags |= SESSION_F_RX_EVT;
- vec_add1 (wrk->session_to_enqueue[proto], session_handle (s));
- }
-
- session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0);
- }
- return rv > 0 ? rv : 0;
-}
-
-int
-session_enqueue_dgram_connection (session_t *s, session_dgram_hdr_t *hdr,
- vlib_buffer_t *b, u8 proto, u8 queue_event)
-{
- return session_enqueue_dgram_connection_inline (s, hdr, b, proto,
- queue_event, 0 /* is_cl */);
-}
-
-int
-session_enqueue_dgram_connection2 (session_t *s, session_dgram_hdr_t *hdr,
- vlib_buffer_t *b, u8 proto, u8 queue_event)
-{
- return session_enqueue_dgram_connection_inline (s, hdr, b, proto,
- queue_event, 1 /* is_cl */);
-}
-
int
session_enqueue_dgram_connection_cl (session_t *s, session_dgram_hdr_t *hdr,
vlib_buffer_t *b, u8 proto,
diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h
index daa3bf97f56..d6c5cbc6085 100644
--- a/src/vnet/session/session.h
+++ b/src/vnet/session/session.h
@@ -151,6 +151,8 @@ typedef struct session_worker_
/** Per-app-worker bitmap of pending notifications */
uword *app_wrks_pending_ntf;
+ svm_fifo_seg_t *rx_segs;
+
int config_index;
u8 dma_enabled;
session_dma_transfer *dma_trans;
@@ -323,6 +325,67 @@ typedef struct _session_enable_disable_args_t
#define TRANSPORT_PROTO_INVALID (session_main.last_transport_proto_type + 1)
#define TRANSPORT_N_PROTOS (session_main.last_transport_proto_type + 1)
+/*
+ * Session layer functions
+ */
+
+always_inline session_main_t *
+vnet_get_session_main ()
+{
+ return &session_main;
+}
+
+always_inline session_worker_t *
+session_main_get_worker (u32 thread_index)
+{
+ return vec_elt_at_index (session_main.wrk, thread_index);
+}
+
+static inline session_worker_t *
+session_main_get_worker_if_valid (u32 thread_index)
+{
+ if (thread_index > vec_len (session_main.wrk))
+ return 0;
+ return session_main_get_worker (thread_index);
+}
+
+always_inline svm_msg_q_t *
+session_main_get_vpp_event_queue (u32 thread_index)
+{
+ return session_main_get_worker (thread_index)->vpp_event_queue;
+}
+
+always_inline u8
+session_main_is_enabled ()
+{
+ return session_main.is_enabled == 1;
+}
+
+always_inline void
+session_worker_stat_error_inc (session_worker_t *wrk, int error, int value)
+{
+ if ((-(error) >= 0 && -(error) < SESSION_N_ERRORS))
+ wrk->stats.errors[-error] += value;
+ else
+ SESSION_DBG ("unknown session counter");
+}
+
+always_inline void
+session_stat_error_inc (int error, int value)
+{
+ session_worker_t *wrk;
+ wrk = session_main_get_worker (vlib_get_thread_index ());
+ session_worker_stat_error_inc (wrk, error, value);
+}
+
+#define session_cli_return_if_not_enabled() \
+ do \
+ { \
+ if (!session_main.is_enabled) \
+ return clib_error_return (0, "session layer is not enabled"); \
+ } \
+ while (0)
+
static inline void
session_evt_add_old (session_worker_t * wrk, session_evt_elt_t * elt)
{
@@ -517,20 +580,6 @@ uword unformat_transport_connection (unformat_input_t * input,
* Interface to transport protos
*/
-int session_enqueue_stream_connection (transport_connection_t * tc,
- vlib_buffer_t * b, u32 offset,
- u8 queue_event, u8 is_in_order);
-int session_enqueue_dgram_connection (session_t * s,
- session_dgram_hdr_t * hdr,
- vlib_buffer_t * b, u8 proto,
- u8 queue_event);
-int session_enqueue_dgram_connection2 (session_t *s, session_dgram_hdr_t *hdr,
- vlib_buffer_t *b, u8 proto,
- u8 queue_event);
-int session_enqueue_dgram_connection_cl (session_t *s,
- session_dgram_hdr_t *hdr,
- vlib_buffer_t *b, u8 proto,
- u8 queue_event);
int session_stream_connect_notify (transport_connection_t * tc,
session_error_t err);
int session_dgram_connect_notify (transport_connection_t * tc,
@@ -566,9 +615,279 @@ void session_register_transport (transport_proto_t transport_proto,
u32 output_node);
transport_proto_t session_add_transport_proto (void);
void session_register_update_time_fn (session_update_time_fn fn, u8 is_add);
+void session_main_flush_enqueue_events (transport_proto_t transport_proto,
+ u32 thread_index);
+void session_queue_run_on_main_thread (vlib_main_t *vm);
int session_tx_fifo_peek_bytes (transport_connection_t * tc, u8 * buffer,
u32 offset, u32 max_bytes);
u32 session_tx_fifo_dequeue_drop (transport_connection_t * tc, u32 max_bytes);
+int session_enqueue_dgram_connection_cl (session_t *s,
+ session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto,
+ u8 queue_event);
+void session_fifo_tuning (session_t *s, svm_fifo_t *f, session_ft_action_t act,
+ u32 len);
+
+/**
+ * Discards bytes from buffer chain
+ *
+ * It discards n_bytes_to_drop starting at first buffer after chain_b
+ */
+always_inline void
+session_enqueue_discard_chain_bytes (vlib_main_t *vm, vlib_buffer_t *b,
+ vlib_buffer_t **chain_b,
+ u32 n_bytes_to_drop)
+{
+ vlib_buffer_t *next = *chain_b;
+ u32 to_drop = n_bytes_to_drop;
+ ASSERT (b->flags & VLIB_BUFFER_NEXT_PRESENT);
+ while (to_drop && (next->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ next = vlib_get_buffer (vm, next->next_buffer);
+ if (next->current_length > to_drop)
+ {
+ vlib_buffer_advance (next, to_drop);
+ to_drop = 0;
+ }
+ else
+ {
+ to_drop -= next->current_length;
+ next->current_length = 0;
+ }
+ }
+ *chain_b = next;
+
+ if (to_drop == 0)
+ b->total_length_not_including_first_buffer -= n_bytes_to_drop;
+}
+
+/**
+ * Enqueue buffer chain tail
+ */
+always_inline int
+session_enqueue_chain_tail (session_t *s, vlib_buffer_t *b, u32 offset,
+ u8 is_in_order)
+{
+ vlib_buffer_t *chain_b;
+ u32 chain_bi;
+
+ if (is_in_order)
+ {
+ session_worker_t *wrk = session_main_get_worker (s->thread_index);
+ u32 diff, written = 0;
+
+ if (offset)
+ {
+ diff = offset - b->current_length;
+ if (diff > b->total_length_not_including_first_buffer)
+ return 0;
+ chain_b = b;
+ session_enqueue_discard_chain_bytes (wrk->vm, b, &chain_b, diff);
+ chain_bi = vlib_get_buffer_index (wrk->vm, chain_b);
+ }
+ else
+ {
+ chain_bi = b->next_buffer;
+ }
+
+ chain_b = vlib_get_buffer (wrk->vm, chain_bi);
+ svm_fifo_seg_t *seg;
+
+ while (chain_b)
+ {
+ vec_add2 (wrk->rx_segs, seg, 1);
+ seg->data = vlib_buffer_get_current (chain_b);
+ seg->len = chain_b->current_length;
+ chain_b = (chain_b->flags & VLIB_BUFFER_NEXT_PRESENT) ?
+ vlib_get_buffer (wrk->vm, chain_b->next_buffer) :
+ 0;
+ }
+
+ written = svm_fifo_enqueue_segments (s->rx_fifo, wrk->rx_segs,
+ vec_len (wrk->rx_segs),
+ 1 /* allow partial*/);
+
+ vec_reset_length (wrk->rx_segs);
+
+ return written;
+ }
+ else
+ {
+ vlib_main_t *vm = vlib_get_main ();
+ int rv = 0;
+ u8 *data;
+ u32 len;
+
+ /* TODO svm_fifo_enqueue_segments with offset */
+ chain_bi = b->next_buffer;
+ do
+ {
+ chain_b = vlib_get_buffer (vm, chain_bi);
+ data = vlib_buffer_get_current (chain_b);
+ len = chain_b->current_length;
+ if (!len)
+ continue;
+
+ rv = svm_fifo_enqueue_with_offset (s->rx_fifo, offset, len, data);
+ if (rv)
+ {
+ clib_warning ("failed to enqueue multi-buffer seg");
+ return -1;
+ }
+ offset += len;
+ }
+ while ((chain_bi = (chain_b->flags & VLIB_BUFFER_NEXT_PRESENT) ?
+ chain_b->next_buffer :
+ 0));
+
+ return 0;
+ }
+}
+
+/*
+ * Enqueue data for delivery to app. If requested, it queues app notification
+ * event for later delivery.
+ *
+ * @param tc Transport connection which is to be enqueued data
+ * @param b Buffer to be enqueued
+ * @param offset Offset at which to start enqueueing if out-of-order
+ * @param queue_event Flag to indicate if peer is to be notified or if event
+ * is to be queued. The former is useful when more data is
+ * enqueued and only one event is to be generated.
+ * @param is_in_order Flag to indicate if data is in order
+ * @return Number of bytes enqueued or a negative value if enqueueing failed.
+ */
+always_inline int
+session_enqueue_stream_connection (transport_connection_t *tc,
+ vlib_buffer_t *b, u32 offset,
+ u8 queue_event, u8 is_in_order)
+{
+ session_t *s;
+ int enqueued = 0, rv, in_order_off;
+
+ s = session_get (tc->s_index, tc->thread_index);
+
+ if (is_in_order)
+ {
+ enqueued = svm_fifo_enqueue (s->rx_fifo, b->current_length,
+ vlib_buffer_get_current (b));
+ if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) &&
+ enqueued >= 0))
+ {
+ in_order_off = enqueued > b->current_length ? enqueued : 0;
+ rv = session_enqueue_chain_tail (s, b, in_order_off, 1);
+ if (rv > 0)
+ enqueued += rv;
+ }
+ }
+ else
+ {
+ rv = svm_fifo_enqueue_with_offset (s->rx_fifo, offset, b->current_length,
+ vlib_buffer_get_current (b));
+ if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) && !rv))
+ session_enqueue_chain_tail (s, b, offset + b->current_length, 0);
+ /* if something was enqueued, report even this as success for ooo
+ * segment handling */
+ return rv;
+ }
+
+ if (queue_event)
+ {
+ /* Queue RX event on this fifo. Eventually these will need to be
+ * flushed by calling @ref session_main_flush_enqueue_events () */
+ if (!(s->flags & SESSION_F_RX_EVT))
+ {
+ session_worker_t *wrk = session_main_get_worker (s->thread_index);
+ ASSERT (s->thread_index == vlib_get_thread_index ());
+ s->flags |= SESSION_F_RX_EVT;
+ vec_add1 (wrk->session_to_enqueue[tc->proto], session_handle (s));
+ }
+
+ session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0);
+ }
+
+ return enqueued;
+}
+
+always_inline int
+session_enqueue_dgram_connection_inline (session_t *s,
+ session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto,
+ u8 queue_event, u32 is_cl)
+{
+ int rv;
+
+ ASSERT (svm_fifo_max_enqueue_prod (s->rx_fifo) >=
+ b->current_length + sizeof (*hdr));
+
+ if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT)))
+ {
+ svm_fifo_seg_t segs[2] = { { (u8 *) hdr, sizeof (*hdr) },
+ { vlib_buffer_get_current (b),
+ b->current_length } };
+
+ rv =
+ svm_fifo_enqueue_segments (s->rx_fifo, segs, 2, 0 /* allow_partial */);
+ }
+ else
+ {
+ vlib_main_t *vm = vlib_get_main ();
+ svm_fifo_seg_t *segs = 0, *seg;
+ vlib_buffer_t *it = b;
+ u32 n_segs = 1;
+
+ vec_add2 (segs, seg, 1);
+ seg->data = (u8 *) hdr;
+ seg->len = sizeof (*hdr);
+ while (it)
+ {
+ vec_add2 (segs, seg, 1);
+ seg->data = vlib_buffer_get_current (it);
+ seg->len = it->current_length;
+ n_segs++;
+ if (!(it->flags & VLIB_BUFFER_NEXT_PRESENT))
+ break;
+ it = vlib_get_buffer (vm, it->next_buffer);
+ }
+ rv = svm_fifo_enqueue_segments (s->rx_fifo, segs, n_segs,
+ 0 /* allow partial */);
+ vec_free (segs);
+ }
+
+ if (queue_event && rv > 0)
+ {
+ /* Queue RX event on this fifo. Eventually these will need to be
+ * flushed by calling @ref session_main_flush_enqueue_events () */
+ if (!(s->flags & SESSION_F_RX_EVT))
+ {
+ u32 thread_index =
+ is_cl ? vlib_get_thread_index () : s->thread_index;
+ session_worker_t *wrk = session_main_get_worker (thread_index);
+ ASSERT (s->thread_index == vlib_get_thread_index () || is_cl);
+ s->flags |= SESSION_F_RX_EVT;
+ vec_add1 (wrk->session_to_enqueue[proto], session_handle (s));
+ }
+
+ session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0);
+ }
+ return rv > 0 ? rv : 0;
+}
+
+always_inline int
+session_enqueue_dgram_connection (session_t *s, session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto, u8 queue_event)
+{
+ return session_enqueue_dgram_connection_inline (s, hdr, b, proto,
+ queue_event, 0 /* is_cl */);
+}
+
+always_inline int
+session_enqueue_dgram_connection2 (session_t *s, session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto, u8 queue_event)
+{
+ return session_enqueue_dgram_connection_inline (s, hdr, b, proto,
+ queue_event, 1 /* is_cl */);
+}
always_inline void
session_set_state (session_t *s, session_state_t session_state)
@@ -753,69 +1072,6 @@ ho_session_free (session_t *s)
transport_connection_t *listen_session_get_transport (session_t * s);
-/*
- * Session layer functions
- */
-
-always_inline session_main_t *
-vnet_get_session_main ()
-{
- return &session_main;
-}
-
-always_inline session_worker_t *
-session_main_get_worker (u32 thread_index)
-{
- return vec_elt_at_index (session_main.wrk, thread_index);
-}
-
-static inline session_worker_t *
-session_main_get_worker_if_valid (u32 thread_index)
-{
- if (thread_index > vec_len (session_main.wrk))
- return 0;
- return session_main_get_worker (thread_index);
-}
-
-always_inline svm_msg_q_t *
-session_main_get_vpp_event_queue (u32 thread_index)
-{
- return session_main_get_worker (thread_index)->vpp_event_queue;
-}
-
-always_inline u8
-session_main_is_enabled ()
-{
- return session_main.is_enabled == 1;
-}
-
-always_inline void
-session_worker_stat_error_inc (session_worker_t *wrk, int error, int value)
-{
- if ((-(error) >= 0 && -(error) < SESSION_N_ERRORS))
- wrk->stats.errors[-error] += value;
- else
- SESSION_DBG ("unknown session counter");
-}
-
-always_inline void
-session_stat_error_inc (int error, int value)
-{
- session_worker_t *wrk;
- wrk = session_main_get_worker (vlib_get_thread_index ());
- session_worker_stat_error_inc (wrk, error, value);
-}
-
-#define session_cli_return_if_not_enabled() \
-do { \
- if (!session_main.is_enabled) \
- return clib_error_return (0, "session layer is not enabled"); \
-} while (0)
-
-void session_main_flush_enqueue_events (transport_proto_t transport_proto,
- u32 thread_index);
-void session_queue_run_on_main_thread (vlib_main_t * vm);
-
/**
* Add session node pending buffer with custom node
*
diff --git a/src/vnet/session/session_lookup.c b/src/vnet/session/session_lookup.c
index 28a1feb1ed8..7678b0e0761 100644
--- a/src/vnet/session/session_lookup.c
+++ b/src/vnet/session/session_lookup.c
@@ -1380,6 +1380,71 @@ session_lookup_connection (u32 fib_index, ip46_address_t * lcl,
lcl_port, rmt_port, proto);
}
+/**
+ * Lookup exact match 6-tuple amongst established and half-open sessions
+ *
+ * Does not look into session rules table and does not try to find a listener.
+ */
+transport_connection_t *
+session_lookup_6tuple (u32 fib_index, ip46_address_t *lcl, ip46_address_t *rmt,
+ u16 lcl_port, u16 rmt_port, u8 proto, u8 is_ip4)
+{
+ session_table_t *st;
+ session_t *s;
+ int rv;
+
+ if (is_ip4)
+ {
+ session_kv4_t kv4;
+
+ st = session_table_get_for_fib_index (FIB_PROTOCOL_IP4, fib_index);
+ if (PREDICT_FALSE (!st))
+ return 0;
+
+ /*
+ * Lookup session amongst established ones
+ */
+ make_v4_ss_kv (&kv4, &lcl->ip4, &rmt->ip4, lcl_port, rmt_port, proto);
+ rv = clib_bihash_search_inline_16_8 (&st->v4_session_hash, &kv4);
+ if (rv == 0)
+ {
+ s = session_get_from_handle (kv4.value);
+ return transport_get_connection (proto, s->connection_index,
+ s->thread_index);
+ }
+
+ /*
+ * Try half-open connections
+ */
+ rv = clib_bihash_search_inline_16_8 (&st->v4_half_open_hash, &kv4);
+ if (rv == 0)
+ return transport_get_half_open (proto, kv4.value & 0xFFFFFFFF);
+ }
+ else
+ {
+ session_kv6_t kv6;
+
+ st = session_table_get_for_fib_index (FIB_PROTOCOL_IP6, fib_index);
+ if (PREDICT_FALSE (!st))
+ return 0;
+
+ make_v6_ss_kv (&kv6, &lcl->ip6, &rmt->ip6, lcl_port, rmt_port, proto);
+ rv = clib_bihash_search_inline_48_8 (&st->v6_session_hash, &kv6);
+ if (rv == 0)
+ {
+ s = session_get_from_handle (kv6.value);
+ return transport_get_connection (proto, s->connection_index,
+ s->thread_index);
+ }
+
+ /* Try half-open connections */
+ rv = clib_bihash_search_inline_48_8 (&st->v6_half_open_hash, &kv6);
+ if (rv == 0)
+ return transport_get_half_open (proto, kv6.value & 0xFFFFFFFF);
+ }
+ return 0;
+}
+
session_error_t
vnet_session_rule_add_del (session_rule_add_del_args_t *args)
{
diff --git a/src/vnet/session/session_lookup.h b/src/vnet/session/session_lookup.h
index 9f56af20a87..8f9ff7ee9bc 100644
--- a/src/vnet/session/session_lookup.h
+++ b/src/vnet/session/session_lookup.h
@@ -72,6 +72,9 @@ transport_connection_t *session_lookup_connection (u32 fib_index,
ip46_address_t * rmt,
u16 lcl_port, u16 rmt_port,
u8 proto, u8 is_ip4);
+transport_connection_t *
+session_lookup_6tuple (u32 fib_index, ip46_address_t *lcl, ip46_address_t *rmt,
+ u16 lcl_port, u16 rmt_port, u8 proto, u8 is_ip4);
session_t *session_lookup_listener4 (u32 fib_index, ip4_address_t * lcl,
u16 lcl_port, u8 proto, u8 use_wildcard);
session_t *session_lookup_listener6 (u32 fib_index, ip6_address_t * lcl,
diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c
index c0ff1de39bc..655f7ada09e 100644
--- a/src/vnet/session/session_node.c
+++ b/src/vnet/session/session_node.c
@@ -836,8 +836,7 @@ vlib_node_registration_t session_queue_node;
typedef struct
{
- u32 session_index;
- u32 server_thread_index;
+ u32 thread_index;
} session_queue_trace_t;
/* packet trace format function */
@@ -848,8 +847,7 @@ format_session_queue_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
session_queue_trace_t *t = va_arg (*args, session_queue_trace_t *);
- s = format (s, "session index %d thread index %d",
- t->session_index, t->server_thread_index);
+ s = format (s, "thread index %d", t->thread_index);
return s;
}
@@ -880,25 +878,25 @@ enum
};
static void
-session_tx_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
- u32 next_index, vlib_buffer_t **bufs, u16 n_segs,
- session_t *s, u32 n_trace)
+session_tx_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node, u32 *bis,
+ u16 *nexts, u16 n_bufs)
{
- vlib_buffer_t **b = bufs;
+ u32 n_trace = vlib_get_trace_count (vm, node), *bi = bis;
+ u16 *next = nexts;
+ vlib_buffer_t *b;
- while (n_trace && n_segs)
+ while (n_trace && n_bufs)
{
- if (PREDICT_TRUE (vlib_trace_buffer (vm, node, next_index, b[0],
- 1 /* follow_chain */)))
+ b = vlib_get_buffer (vm, bi[0]);
+ if (PREDICT_TRUE (
+ vlib_trace_buffer (vm, node, next[0], b, 1 /* follow_chain */)))
{
- session_queue_trace_t *t =
- vlib_add_trace (vm, node, b[0], sizeof (*t));
- t->session_index = s->session_index;
- t->server_thread_index = s->thread_index;
+ session_queue_trace_t *t = vlib_add_trace (vm, node, b, sizeof (*t));
+ t->thread_index = vm->thread_index;
n_trace--;
}
- b++;
- n_segs--;
+ bi++;
+ n_bufs--;
}
vlib_set_trace_count (vm, node, n_trace);
}
@@ -1194,7 +1192,7 @@ session_tx_not_ready (session_t * s, u8 peek_data)
}
else
{
- if (s->session_state == SESSION_STATE_TRANSPORT_DELETED)
+ if (s->session_state == SESSION_STATE_TRANSPORT_DELETED || !s->tx_fifo)
return 2;
}
return 0;
@@ -1402,7 +1400,7 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
session_evt_elt_t * elt,
int *n_tx_packets, u8 peek_data)
{
- u32 n_trace, n_left, pbi, next_index, max_burst;
+ u32 n_left, pbi, next_index, max_burst;
session_tx_context_t *ctx = &wrk->ctx;
session_main_t *smm = &session_main;
session_event_t *e = &elt->evt;
@@ -1576,10 +1574,6 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
ctx->transport_vft->push_header (ctx->tc, ctx->transport_pending_bufs,
ctx->n_segs_per_evt);
- if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node)) > 0))
- session_tx_trace_frame (vm, node, next_index, ctx->transport_pending_bufs,
- ctx->n_segs_per_evt, ctx->s, n_trace);
-
if (PREDICT_FALSE (n_bufs))
vlib_buffer_free (vm, ctx->tx_buffers, n_bufs);
@@ -2072,7 +2066,13 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
SESSION_EVT (SESSION_EVT_DSP_CNTRS, OLD_IO_EVTS, wrk);
if (vec_len (wrk->pending_tx_buffers))
- session_flush_pending_tx_buffers (wrk, node);
+ {
+ if (PREDICT_FALSE (vlib_get_trace_count (vm, node) > 0))
+ session_tx_trace_frame (vm, node, wrk->pending_tx_buffers,
+ wrk->pending_tx_nexts,
+ vec_len (wrk->pending_tx_nexts));
+ session_flush_pending_tx_buffers (wrk, node);
+ }
vlib_node_increment_counter (vm, session_queue_node.index,
SESSION_QUEUE_ERROR_TX, n_tx_packets);
diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c
index ac9b54f333a..3bd12d82fd8 100644
--- a/src/vnet/session/transport.c
+++ b/src/vnet/session/transport.c
@@ -592,6 +592,7 @@ transport_endpoint_mark_used (u8 proto, u32 fib_index, ip46_address_t *ip,
/* Pool reallocs with worker barrier */
lep = transport_endpoint_alloc ();
clib_memcpy_fast (&lep->ep.ip, ip, sizeof (*ip));
+ lep->ep.fib_index = fib_index;
lep->ep.port = port;
lep->proto = proto;
lep->refcnt = 1;
@@ -661,8 +662,8 @@ transport_alloc_local_port (u8 proto, ip46_address_t *lcl_addr,
break;
/* IP:port pair already in use, check if 6-tuple available */
- if (session_lookup_connection (rmt->fib_index, lcl_addr, &rmt->ip, port,
- rmt->port, proto, rmt->is_ip4))
+ if (session_lookup_6tuple (rmt->fib_index, lcl_addr, &rmt->ip, port,
+ rmt->port, proto, rmt->is_ip4))
continue;
/* 6-tuple is available so increment lcl endpoint refcount */
@@ -683,6 +684,13 @@ transport_port_alloc_max_tries ()
return tm->port_alloc_max_tries;
}
+u32
+transport_port_local_in_use ()
+{
+ transport_main_t *tm = &tp_main;
+ return pool_elts (tm->local_endpoints) - vec_len (tm->lcl_endpts_freelist);
+}
+
void
transport_clear_stats ()
{
@@ -792,9 +800,9 @@ transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt_cfg,
return 0;
/* IP:port pair already in use, check if 6-tuple available */
- if (session_lookup_connection (rmt->fib_index, lcl_addr, &rmt->ip,
- rmt_cfg->peer.port, rmt->port, proto,
- rmt->is_ip4))
+ if (session_lookup_6tuple (rmt->fib_index, lcl_addr, &rmt->ip,
+ rmt_cfg->peer.port, rmt->port, proto,
+ rmt->is_ip4))
return SESSION_E_PORTINUSE;
/* 6-tuple is available so increment lcl endpoint refcount */
diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h
index c864be139f9..de2a6becaae 100644
--- a/src/vnet/session/transport.h
+++ b/src/vnet/session/transport.h
@@ -253,6 +253,7 @@ void transport_share_local_endpoint (u8 proto, u32 fib_index,
int transport_release_local_endpoint (u8 proto, u32 fib_index,
ip46_address_t *lcl_ip, u16 port);
u16 transport_port_alloc_max_tries ();
+u32 transport_port_local_in_use ();
void transport_clear_stats ();
void transport_enable_disable (vlib_main_t * vm, u8 is_en);
void transport_init (void);
diff --git a/src/vnet/srv6/sr_policy_rewrite.c b/src/vnet/srv6/sr_policy_rewrite.c
index a9114628f95..92586669378 100644
--- a/src/vnet/srv6/sr_policy_rewrite.c
+++ b/src/vnet/srv6/sr_policy_rewrite.c
@@ -503,8 +503,9 @@ update_lb (ip6_sr_policy_t * sr_policy)
};
/* Add FIB entry for BSID */
- fhc = fib_table_get_flow_hash_config (sr_policy->fib_table,
- FIB_PROTOCOL_IP6);
+ fhc = fib_table_get_flow_hash_config (
+ fib_table_find (FIB_PROTOCOL_IP6, sr_policy->fib_table),
+ FIB_PROTOCOL_IP6);
dpo_set (&sr_policy->bsid_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP6,
load_balance_create (0, DPO_PROTO_IP6, fhc));
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index 8851fb9c77e..02239d991bd 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -1467,7 +1467,7 @@ tcp_stats_collector_fn (vlib_stats_collector_data_t *d)
tcp_wrk_stats_t acc = {};
tcp_worker_ctx_t *wrk;
- vec_foreach (wrk, tm->wrk_ctx)
+ vec_foreach (wrk, tm->wrk)
{
#define _(name, type, str) acc.name += wrk->stats.name;
foreach_tcp_wrk_stat
@@ -1515,7 +1515,7 @@ tcp_main_enable (vlib_main_t * vm)
int thread;
/* Already initialized */
- if (tm->wrk_ctx)
+ if (tm->wrk)
return 0;
if ((error = vlib_call_init_function (vm, ip_main_init)))
@@ -1537,11 +1537,11 @@ tcp_main_enable (vlib_main_t * vm)
*/
num_threads = 1 /* main thread */ + vtm->n_threads;
- vec_validate (tm->wrk_ctx, num_threads - 1);
+ vec_validate (tm->wrk, num_threads - 1);
n_workers = num_threads == 1 ? 1 : vtm->n_threads;
prealloc_conn_per_wrk = tcp_cfg.preallocated_connections / n_workers;
- wrk = &tm->wrk_ctx[0];
+ wrk = &tm->wrk[0];
wrk->tco_next_node[0] = vlib_node_get_next (vm, session_queue_node.index,
tcp4_output_node.index);
wrk->tco_next_node[1] = vlib_node_get_next (vm, session_queue_node.index,
@@ -1549,7 +1549,7 @@ tcp_main_enable (vlib_main_t * vm)
for (thread = 0; thread < num_threads; thread++)
{
- wrk = &tm->wrk_ctx[thread];
+ wrk = &tm->wrk[thread];
vec_validate (wrk->pending_deq_acked, 255);
vec_validate (wrk->pending_disconnects, 255);
@@ -1562,8 +1562,8 @@ tcp_main_enable (vlib_main_t * vm)
if (thread > 0)
{
- wrk->tco_next_node[0] = tm->wrk_ctx[0].tco_next_node[0];
- wrk->tco_next_node[1] = tm->wrk_ctx[0].tco_next_node[1];
+ wrk->tco_next_node[0] = tm->wrk[0].tco_next_node[0];
+ wrk->tco_next_node[1] = tm->wrk[0].tco_next_node[1];
}
/*
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index 8feac807d59..830b81df9ee 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -220,7 +220,7 @@ typedef struct tcp_configuration_
typedef struct _tcp_main
{
/** per-worker context */
- tcp_worker_ctx_t *wrk_ctx;
+ tcp_worker_ctx_t *wrk;
/* Pool of listeners. */
tcp_connection_t *listener_pool;
@@ -301,8 +301,8 @@ vnet_get_tcp_main ()
always_inline tcp_worker_ctx_t *
tcp_get_worker (u32 thread_index)
{
- ASSERT (thread_index < vec_len (tcp_main.wrk_ctx));
- return &tcp_main.wrk_ctx[thread_index];
+ ASSERT (thread_index < vec_len (tcp_main.wrk));
+ return &tcp_main.wrk[thread_index];
}
tcp_connection_t *tcp_connection_alloc (u8 thread_index);
diff --git a/src/vnet/tcp/tcp_cli.c b/src/vnet/tcp/tcp_cli.c
index 55bc5764df2..c14994aa440 100644
--- a/src/vnet/tcp/tcp_cli.c
+++ b/src/vnet/tcp/tcp_cli.c
@@ -919,7 +919,7 @@ show_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input,
if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
return clib_error_return (0, "unknown input `%U'", format_unformat_error,
input);
- for (thread = 0; thread < vec_len (tm->wrk_ctx); thread++)
+ for (thread = 0; thread < vec_len (tm->wrk); thread++)
{
wrk = tcp_get_worker (thread);
vlib_cli_output (vm, "Thread %u:\n", thread);
@@ -957,7 +957,7 @@ clear_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input,
return clib_error_return (0, "unknown input `%U'", format_unformat_error,
input);
- for (thread = 0; thread < vec_len (tm->wrk_ctx); thread++)
+ for (thread = 0; thread < vec_len (tm->wrk); thread++)
{
wrk = tcp_get_worker (thread);
clib_memset (&wrk->stats, 0, sizeof (wrk->stats));
diff --git a/src/vnet/tcp/tcp_inlines.h b/src/vnet/tcp/tcp_inlines.h
index ccd0e3fe3ee..4c48f9ecfc5 100644
--- a/src/vnet/tcp/tcp_inlines.h
+++ b/src/vnet/tcp/tcp_inlines.h
@@ -68,7 +68,7 @@ always_inline tcp_connection_t *
tcp_connection_get_if_valid (u32 conn_index, u32 thread_index)
{
tcp_worker_ctx_t *wrk;
- if (thread_index >= vec_len (tcp_main.wrk_ctx))
+ if (thread_index >= vec_len (tcp_main.wrk))
return 0;
wrk = tcp_get_worker (thread_index);
if (pool_is_free_index (wrk->connections, conn_index))
@@ -217,7 +217,7 @@ tcp_is_lost_fin (tcp_connection_t * tc)
always_inline u32
tcp_time_tstamp (u32 thread_index)
{
- return tcp_main.wrk_ctx[thread_index].time_tstamp;
+ return tcp_main.wrk[thread_index].time_tstamp;
}
/**
@@ -226,14 +226,13 @@ tcp_time_tstamp (u32 thread_index)
always_inline u32
tcp_tstamp (tcp_connection_t * tc)
{
- return (tcp_main.wrk_ctx[tc->c_thread_index].time_tstamp -
- tc->timestamp_delta);
+ return (tcp_main.wrk[tc->c_thread_index].time_tstamp - tc->timestamp_delta);
}
always_inline f64
tcp_time_now_us (u32 thread_index)
{
- return tcp_main.wrk_ctx[thread_index].time_us;
+ return tcp_main.wrk[thread_index].time_us;
}
always_inline void
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index cd3e4b7700c..15b2c92dcf1 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -217,20 +217,6 @@ static int
tcp_segment_validate (tcp_worker_ctx_t * wrk, tcp_connection_t * tc0,
vlib_buffer_t * b0, tcp_header_t * th0, u32 * error0)
{
- /* We could get a burst of RSTs interleaved with acks */
- if (PREDICT_FALSE (tc0->state == TCP_STATE_CLOSED))
- {
- tcp_send_reset (tc0);
- *error0 = TCP_ERROR_CONNECTION_CLOSED;
- goto error;
- }
-
- if (PREDICT_FALSE (!tcp_ack (th0) && !tcp_rst (th0) && !tcp_syn (th0)))
- {
- *error0 = TCP_ERROR_SEGMENT_INVALID;
- goto error;
- }
-
if (PREDICT_FALSE (tcp_options_parse (th0, &tc0->rcv_opts, 0)))
{
*error0 = TCP_ERROR_OPTIONS;
@@ -1372,6 +1358,42 @@ tcp_established_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
}
}
+always_inline int
+tcp_segment_is_exception (tcp_connection_t *tc, tcp_header_t *th)
+{
+ /* TODO(fcoras): tcp-input should not allow segments without one of ack, rst,
+ * syn flags, so we shouldn't be checking for their presence. Leave the check
+ * in for now, remove in due time */
+ ASSERT (th->flags & (TCP_FLAG_ACK | TCP_FLAG_RST | TCP_FLAG_SYN));
+ return !tc || tc->state == TCP_STATE_CLOSED ||
+ !(th->flags & (TCP_FLAG_ACK | TCP_FLAG_RST | TCP_FLAG_SYN));
+}
+
+always_inline void
+tcp_segment_handle_exception (tcp_connection_t *tc, tcp_header_t *th,
+ u32 *error)
+{
+ if (!tc)
+ {
+ *error = TCP_ERROR_INVALID_CONNECTION;
+ return;
+ }
+
+ /* We could get a burst of RSTs interleaved with acks */
+ if (tc->state == TCP_STATE_CLOSED)
+ {
+ tcp_send_reset (tc);
+ *error = TCP_ERROR_CONNECTION_CLOSED;
+ return;
+ }
+
+ if (!(th->flags & (TCP_FLAG_ACK | TCP_FLAG_RST | TCP_FLAG_SYN)))
+ {
+ *error = TCP_ERROR_SEGMENT_INVALID;
+ return;
+ }
+}
+
always_inline uword
tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame, int is_ip4)
@@ -1404,15 +1426,14 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
tc = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
thread_index);
+ th = tcp_buffer_hdr (b[0]);
- if (PREDICT_FALSE (tc == 0))
+ if (PREDICT_FALSE (tcp_segment_is_exception (tc, th)))
{
- error = TCP_ERROR_INVALID_CONNECTION;
+ tcp_segment_handle_exception (tc, th, &error);
goto done;
}
- th = tcp_buffer_hdr (b[0]);
-
/* TODO header prediction fast path */
/* 1-4: check SEQ, RST, SYN */
@@ -2819,8 +2840,6 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
}
- next[0] = next[1] = TCP_INPUT_NEXT_DROP;
-
tc0 = tcp_input_lookup_buffer (b[0], thread_index, &error0, is_ip4,
is_nolookup);
tc1 = tcp_input_lookup_buffer (b[1], thread_index, &error1, is_ip4,
@@ -2881,7 +2900,6 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
CLIB_PREFETCH (b[1]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
}
- next[0] = TCP_INPUT_NEXT_DROP;
tc0 = tcp_input_lookup_buffer (b[0], thread_index, &error0, is_ip4,
is_nolookup);
if (PREDICT_TRUE (tc0 != 0))
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index 2fd20acf241..2e8a10896eb 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -299,7 +299,7 @@ tcp_make_options (tcp_connection_t * tc, tcp_options_t * opts,
void
tcp_update_burst_snd_vars (tcp_connection_t * tc)
{
- tcp_main_t *tm = &tcp_main;
+ tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
/* Compute options to be used for connection. These may be reused when
* sending data or to compute the effective mss (snd_mss) */
@@ -310,8 +310,7 @@ tcp_update_burst_snd_vars (tcp_connection_t * tc)
tc->snd_mss = clib_min (tc->mss, tc->rcv_opts.mss) - tc->snd_opts_len;
ASSERT (tc->snd_mss > 0);
- tcp_options_write (tm->wrk_ctx[tc->c_thread_index].cached_opts,
- &tc->snd_opts);
+ tcp_options_write (wrk->cached_opts, &tc->snd_opts);
tcp_update_rcv_wnd (tc);
@@ -875,7 +874,6 @@ tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b, u32 snd_nxt,
{
u8 tcp_hdr_opts_len, flags = TCP_FLAG_ACK;
u32 advertise_wnd, data_len;
- tcp_main_t *tm = &tcp_main;
tcp_header_t *th;
data_len = b->current_length;
@@ -907,9 +905,8 @@ tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b, u32 snd_nxt,
if (maybe_burst)
{
- clib_memcpy_fast ((u8 *) (th + 1),
- tm->wrk_ctx[tc->c_thread_index].cached_opts,
- tc->snd_opts_len);
+ tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
+ clib_memcpy_fast ((u8 *) (th + 1), wrk->cached_opts, tc->snd_opts_len);
}
else
{
diff --git a/src/vnet/udp/udp_cli.c b/src/vnet/udp/udp_cli.c
index 6c8992cd0de..dd1da0a01d6 100644
--- a/src/vnet/udp/udp_cli.c
+++ b/src/vnet/udp/udp_cli.c
@@ -97,7 +97,8 @@ format_udp_vars (u8 * s, va_list * args)
s = format (s, " index %u%U flags: %U\n", uc->c_c_index,
format_udp_cfg_flags, uc, format_udp_connection_flags, uc);
- s = format (s, " fib_index: %u next_node: %u opaque: %u ", uc->c_fib_index);
+ s = format (s, " fib_index: %u next_node: %u opaque: %u ", uc->c_fib_index,
+ uc->next_node_index, uc->next_node_opaque);
if (!(uc->flags & UDP_CONN_F_LISTEN))
s = format (s, " sw_if_index: %d mss: %u\n", uc->sw_if_index, uc->mss);
else
diff --git a/src/vnet/vxlan-gpe/FEATURE.yaml b/src/vnet/vxlan-gpe/FEATURE.yaml
deleted file mode 100644
index f4ec2f4c517..00000000000
--- a/src/vnet/vxlan-gpe/FEATURE.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
----
-name: VxLAN-GPE
-maintainer: Hongjun Ni <hongjun.ni@intel.com>
-features:
- - VxLAN-GPE decapsulation
- - VxLAN-GPE encapsulation
-
-description: "VxLAN-GPE tunnel handling"
-state: production
-properties: [API, CLI, MULTITHREAD]
diff --git a/src/vnet/vxlan-gpe/decap.c b/src/vnet/vxlan-gpe/decap.c
deleted file mode 100644
index d4c7424630d..00000000000
--- a/src/vnet/vxlan-gpe/decap.c
+++ /dev/null
@@ -1,1167 +0,0 @@
-/*
- * decap.c - decapsulate VXLAN GPE
- *
- * Copyright (c) 2013 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * @file
- * @brief Functions for decapsulating VXLAN GPE tunnels
- *
-*/
-
-#include <vlib/vlib.h>
-#include <vnet/udp/udp_local.h>
-#include <vnet/vxlan-gpe/vxlan_gpe.h>
-
-/**
- * @brief Struct for VXLAN GPE decap packet tracing
- *
- */
-typedef struct
-{
- u32 next_index;
- u32 tunnel_index;
- u32 error;
-} vxlan_gpe_rx_trace_t;
-
-/**
- * @brief Tracing function for VXLAN GPE packet decapsulation
- *
- * @param *s
- * @param *args
- *
- * @return *s
- *
- */
-static u8 *
-format_vxlan_gpe_rx_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- vxlan_gpe_rx_trace_t *t = va_arg (*args, vxlan_gpe_rx_trace_t *);
-
- if (t->tunnel_index != ~0)
- {
- s = format (s, "VXLAN-GPE: tunnel %d next %d error %d", t->tunnel_index,
- t->next_index, t->error);
- }
- else
- {
- s = format (s, "VXLAN-GPE: no tunnel next %d error %d\n", t->next_index,
- t->error);
- }
- return s;
-}
-
-/**
- * @brief Tracing function for VXLAN GPE packet decapsulation including length
- *
- * @param *s
- * @param *args
- *
- * @return *s
- *
- */
-static u8 *
-format_vxlan_gpe_with_length (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
-
- return s;
-}
-
-typedef struct
-{
- vxlan4_gpe_tunnel_key_t key;
- vxlan_gpe_decap_info_t val;
-} vxlan4_gpe_tunnel_cache_t;
-
-static const vxlan_gpe_decap_info_t decap_not_found = {
- .tunnel_index = ~0,
- .next_index = VXLAN_GPE_INPUT_NEXT_DROP,
- .error = VXLAN_GPE_ERROR_NO_SUCH_TUNNEL
-};
-
-always_inline vxlan_gpe_decap_info_t
-vxlan4_gpe_find_tunnel (vxlan_gpe_main_t *nngm,
- vxlan4_gpe_tunnel_cache_t *cache,
- ip4_vxlan_gpe_header_t *iuvn4_0)
-{
- /* Make sure VXLAN GPE tunnel exist according to packet S/D IP, UDP port and
- * VNI */
- vxlan4_gpe_tunnel_key_t key4 = {
- .local = iuvn4_0->ip4.dst_address.as_u32,
- .remote = iuvn4_0->ip4.src_address.as_u32,
- .vni = iuvn4_0->vxlan.vni_res,
- .port = (u32) iuvn4_0->udp.dst_port,
- };
-
- if (PREDICT_TRUE (key4.as_u64[0] == cache->key.as_u64[0] &&
- key4.as_u64[1] == cache->key.as_u64[1]))
- {
- /* cache hit */
- return cache->val;
- }
-
- uword *p = hash_get_mem (nngm->vxlan4_gpe_tunnel_by_key, &key4);
- if (PREDICT_TRUE (p != 0))
- {
- u32 next = (iuvn4_0->vxlan.protocol < VXLAN_GPE_PROTOCOL_MAX) ?
- nngm->decap_next_node_list[iuvn4_0->vxlan.protocol] :
- VXLAN_GPE_INPUT_NEXT_DROP;
-
- cache->key.as_u64[0] = key4.as_u64[0];
- cache->key.as_u64[1] = key4.as_u64[1];
-
- cache->val.error = 0;
- cache->val.tunnel_index = p[0];
- cache->val.next_index = next;
-
- return cache->val;
- }
-
- return decap_not_found;
-}
-
-typedef struct
-{
- vxlan6_gpe_tunnel_key_t key;
- vxlan_gpe_decap_info_t val;
-} vxlan6_gpe_tunnel_cache_t;
-
-always_inline vxlan_gpe_decap_info_t
-vxlan6_gpe_find_tunnel (vxlan_gpe_main_t *nngm,
- vxlan6_gpe_tunnel_cache_t *cache,
- ip6_vxlan_gpe_header_t *iuvn6_0)
-{
- /* Make sure VXLAN GPE tunnel exist according to packet S/D IP, UDP port and
- * VNI */
- vxlan6_gpe_tunnel_key_t key6;
-
- ip6_address_copy (&key6.local, &iuvn6_0->ip6.dst_address);
- ip6_address_copy (&key6.remote, &iuvn6_0->ip6.src_address);
- key6.vni = iuvn6_0->vxlan.vni_res;
- key6.port = iuvn6_0->udp.dst_port;
-
- if (PREDICT_TRUE (memcmp (&key6, &cache->key, sizeof (cache->key)) == 0))
- {
- /* cache hit */
- return cache->val;
- }
-
- uword *p = hash_get_mem (nngm->vxlan6_gpe_tunnel_by_key, &key6);
- if (PREDICT_TRUE (p != 0))
- {
- u32 next = (iuvn6_0->vxlan.protocol < VXLAN_GPE_PROTOCOL_MAX) ?
- nngm->decap_next_node_list[iuvn6_0->vxlan.protocol] :
- VXLAN_GPE_INPUT_NEXT_DROP;
-
- clib_memcpy_fast (&cache->key, &key6, sizeof (key6));
- cache->val.error = 0;
- cache->val.tunnel_index = p[0];
- cache->val.next_index = next;
-
- return cache->val;
- }
-
- return decap_not_found;
-}
-
-/**
- * @brief Common processing for IPv4 and IPv6 VXLAN GPE decap dispatch functions
- *
- * It is worth noting that other than trivial UDP forwarding (transit), VXLAN GPE
- * tunnels are "terminate local". This means that there is no "TX" interface for this
- * decap case, so that field in the buffer_metadata can be "used for something else".
- * The something else in this case is, for the IPv4/IPv6 inner-packet type case, the
- * FIB index used to look up the inner-packet's adjacency.
- *
- * vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->decap_fib_index;
- *
- * @param *vm
- * @param *node
- * @param *from_frame
- * @param is_ip4
- *
- * @return from_frame->n_vectors
- *
- */
-always_inline uword
-vxlan_gpe_input (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame, u8 is_ip4)
-{
- u32 n_left_from, next_index, *from, *to_next;
- vxlan_gpe_main_t *nngm = &vxlan_gpe_main;
- vnet_main_t *vnm = nngm->vnet_main;
- vnet_interface_main_t *im = &vnm->interface_main;
- vxlan4_gpe_tunnel_cache_t last4;
- vxlan6_gpe_tunnel_cache_t last6;
- u32 pkts_decapsulated = 0;
- u32 thread_index = vm->thread_index;
- u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
-
- if (is_ip4)
- clib_memset (&last4, 0xff, sizeof (last4));
- else
- clib_memset (&last6, 0xff, sizeof (last6));
-
- from = vlib_frame_vector_args (from_frame);
- n_left_from = from_frame->n_vectors;
-
- next_index = node->cached_next_index;
- stats_sw_if_index = node->runtime_data[0];
- stats_n_packets = stats_n_bytes = 0;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- u32 bi0, bi1;
- vlib_buffer_t *b0, *b1;
- u32 next0, next1;
- ip4_vxlan_gpe_header_t *iuvn4_0, *iuvn4_1;
- ip6_vxlan_gpe_header_t *iuvn6_0, *iuvn6_1;
- vxlan_gpe_decap_info_t di0, di1;
- vxlan_gpe_tunnel_t *t0, *t1;
- u32 error0, error1;
- u32 sw_if_index0, sw_if_index1, len0, len1;
-
- /* Prefetch next iteration. */
- {
- vlib_buffer_t *p2, *p3;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
-
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
-
- CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- }
-
- bi0 = from[0];
- bi1 = from[1];
- to_next[0] = bi0;
- to_next[1] = bi1;
- from += 2;
- to_next += 2;
- n_left_to_next -= 2;
- n_left_from -= 2;
-
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
-
- if (is_ip4)
- {
- /* udp leaves current_data pointing at the vxlan-gpe header */
- vlib_buffer_advance (b0,
- -(word) (sizeof (udp_header_t) +
- sizeof (ip4_header_t)));
- vlib_buffer_advance (b1,
- -(word) (sizeof (udp_header_t) +
- sizeof (ip4_header_t)));
-
- iuvn4_0 = vlib_buffer_get_current (b0);
- iuvn4_1 = vlib_buffer_get_current (b1);
-
- /* pop (ip, udp, vxlan) */
- vlib_buffer_advance (b0, sizeof (*iuvn4_0));
- vlib_buffer_advance (b1, sizeof (*iuvn4_1));
-
- di0 = vxlan4_gpe_find_tunnel (nngm, &last4, iuvn4_0);
- di1 = vxlan4_gpe_find_tunnel (nngm, &last4, iuvn4_1);
- }
- else
- {
- /* udp leaves current_data pointing at the vxlan-gpe header */
- vlib_buffer_advance (b0,
- -(word) (sizeof (udp_header_t) +
- sizeof (ip6_header_t)));
- vlib_buffer_advance (b1,
- -(word) (sizeof (udp_header_t) +
- sizeof (ip6_header_t)));
-
- iuvn6_0 = vlib_buffer_get_current (b0);
- iuvn6_1 = vlib_buffer_get_current (b1);
-
- /* pop (ip, udp, vxlan) */
- vlib_buffer_advance (b0, sizeof (*iuvn6_0));
- vlib_buffer_advance (b1, sizeof (*iuvn6_1));
-
- di0 = vxlan6_gpe_find_tunnel (nngm, &last6, iuvn6_0);
- di1 = vxlan6_gpe_find_tunnel (nngm, &last6, iuvn6_1);
- }
-
- /* Process packet 0 */
- next0 = di0.next_index;
- error0 = di0.error;
- if (error0 != 0)
- {
- goto trace0;
- }
-
- t0 = pool_elt_at_index (nngm->tunnels, di0.tunnel_index);
-
- sw_if_index0 = t0->sw_if_index;
- len0 = vlib_buffer_length_in_chain (vm, b0);
-
- /* Required to make the l2 tag push / pop code work on l2 subifs */
- vnet_update_l2_len (b0);
-
- /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
- vnet_buffer (b0)->sw_if_index[VLIB_RX] = t0->sw_if_index;
-
- /**
- * ip[46] lookup in the configured FIB
- */
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = t0->decap_fib_index;
-
- pkts_decapsulated++;
- stats_n_packets += 1;
- stats_n_bytes += len0;
-
- if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index))
- {
- stats_n_packets -= 1;
- stats_n_bytes -= len0;
- if (stats_n_packets)
- vlib_increment_combined_counter (im->combined_sw_if_counters +
- VNET_INTERFACE_COUNTER_RX,
- thread_index,
- stats_sw_if_index,
- stats_n_packets,
- stats_n_bytes);
- stats_n_packets = 1;
- stats_n_bytes = len0;
- stats_sw_if_index = sw_if_index0;
- }
-
- trace0:b0->error = error0 ? node->errors[error0] : 0;
-
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_gpe_rx_trace_t *tr =
- vlib_add_trace (vm, node, b0, sizeof (*tr));
- tr->next_index = next0;
- tr->error = error0;
- tr->tunnel_index = di0.tunnel_index;
- }
-
- /* Process packet 1 */
- next1 = di1.next_index;
- error1 = di1.error;
- if (error1 != 0)
- {
- goto trace1;
- }
-
- t1 = pool_elt_at_index (nngm->tunnels, di1.tunnel_index);
-
- sw_if_index1 = t1->sw_if_index;
- len1 = vlib_buffer_length_in_chain (vm, b1);
-
- /* Required to make the l2 tag push / pop code work on l2 subifs */
- vnet_update_l2_len (b1);
-
- /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
- vnet_buffer (b1)->sw_if_index[VLIB_RX] = t1->sw_if_index;
-
- /*
- * ip[46] lookup in the configured FIB
- */
- vnet_buffer (b1)->sw_if_index[VLIB_TX] = t1->decap_fib_index;
-
- pkts_decapsulated++;
- stats_n_packets += 1;
- stats_n_bytes += len1;
-
- /* Batch stats increment on the same vxlan tunnel so counter
- is not incremented per packet */
- if (PREDICT_FALSE (sw_if_index1 != stats_sw_if_index))
- {
- stats_n_packets -= 1;
- stats_n_bytes -= len1;
- if (stats_n_packets)
- vlib_increment_combined_counter (im->combined_sw_if_counters +
- VNET_INTERFACE_COUNTER_RX,
- thread_index,
- stats_sw_if_index,
- stats_n_packets,
- stats_n_bytes);
- stats_n_packets = 1;
- stats_n_bytes = len1;
- stats_sw_if_index = sw_if_index1;
- }
- vnet_buffer (b1)->sw_if_index[VLIB_TX] = t1->decap_fib_index;
-
- trace1:b1->error = error1 ? node->errors[error1] : 0;
-
- if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_gpe_rx_trace_t *tr =
- vlib_add_trace (vm, node, b1, sizeof (*tr));
- tr->next_index = next1;
- tr->error = error1;
- tr->tunnel_index = di1.tunnel_index;
- }
-
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
- n_left_to_next, bi0, bi1, next0,
- next1);
- }
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0;
- vlib_buffer_t *b0;
- u32 next0;
- ip4_vxlan_gpe_header_t *iuvn4_0;
- ip6_vxlan_gpe_header_t *iuvn6_0;
- vxlan_gpe_decap_info_t di0;
- vxlan_gpe_tunnel_t *t0;
- u32 error0;
- u32 sw_if_index0, len0;
-
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
-
- if (is_ip4)
- {
- /* udp leaves current_data pointing at the vxlan-gpe header */
- vlib_buffer_advance (b0,
- -(word) (sizeof (udp_header_t) +
- sizeof (ip4_header_t)));
-
- iuvn4_0 = vlib_buffer_get_current (b0);
-
- /* pop (ip, udp, vxlan) */
- vlib_buffer_advance (b0, sizeof (*iuvn4_0));
-
- di0 = vxlan4_gpe_find_tunnel (nngm, &last4, iuvn4_0);
- }
- else
- {
- /* udp leaves current_data pointing at the vxlan-gpe header */
- vlib_buffer_advance (b0,
- -(word) (sizeof (udp_header_t) +
- sizeof (ip6_header_t)));
-
- iuvn6_0 = vlib_buffer_get_current (b0);
-
- /* pop (ip, udp, vxlan) */
- vlib_buffer_advance (b0, sizeof (*iuvn6_0));
-
- di0 = vxlan6_gpe_find_tunnel (nngm, &last6, iuvn6_0);
- }
-
- next0 = di0.next_index;
- error0 = di0.error;
- if (error0 != 0)
- {
- goto trace00;
- }
-
- t0 = pool_elt_at_index (nngm->tunnels, di0.tunnel_index);
-
- sw_if_index0 = t0->sw_if_index;
- len0 = vlib_buffer_length_in_chain (vm, b0);
-
- /* Required to make the l2 tag push / pop code work on l2 subifs */
- vnet_update_l2_len (b0);
-
- /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
- vnet_buffer (b0)->sw_if_index[VLIB_RX] = t0->sw_if_index;
-
- /*
- * ip[46] lookup in the configured FIB
- */
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = t0->decap_fib_index;
-
- pkts_decapsulated++;
- stats_n_packets += 1;
- stats_n_bytes += len0;
-
- /* Batch stats increment on the same vxlan-gpe tunnel so counter
- is not incremented per packet */
- if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index))
- {
- stats_n_packets -= 1;
- stats_n_bytes -= len0;
- if (stats_n_packets)
- vlib_increment_combined_counter (im->combined_sw_if_counters +
- VNET_INTERFACE_COUNTER_RX,
- thread_index,
- stats_sw_if_index,
- stats_n_packets,
- stats_n_bytes);
- stats_n_packets = 1;
- stats_n_bytes = len0;
- stats_sw_if_index = sw_if_index0;
- }
-
- trace00:b0->error = error0 ? node->errors[error0] : 0;
-
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_gpe_rx_trace_t *tr =
- vlib_add_trace (vm, node, b0, sizeof (*tr));
- tr->next_index = next0;
- tr->error = error0;
- tr->tunnel_index = di0.tunnel_index;
- }
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- vlib_node_increment_counter (vm,
- is_ip4 ? vxlan4_gpe_input_node.index :
- vxlan6_gpe_input_node.index,
- VXLAN_GPE_ERROR_DECAPSULATED,
- pkts_decapsulated);
-
- /* Increment any remaining batch stats */
- if (stats_n_packets)
- {
- vlib_increment_combined_counter (im->combined_sw_if_counters +
- VNET_INTERFACE_COUNTER_RX,
- thread_index, stats_sw_if_index,
- stats_n_packets, stats_n_bytes);
- node->runtime_data[0] = stats_sw_if_index;
- }
- return from_frame->n_vectors;
-}
-
-/**
- * @brief Graph processing dispatch function for IPv4 VXLAN GPE
- *
- * @node vxlan4-gpe-input
- * @param *vm
- * @param *node
- * @param *from_frame
- *
- * @return from_frame->n_vectors
- *
- */
-VLIB_NODE_FN (vxlan4_gpe_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return vxlan_gpe_input (vm, node, from_frame, /* is_ip4 */ 1);
-}
-
-#ifndef CLIB_MARCH_VARIANT
-void
-vxlan_gpe_register_decap_protocol (u8 protocol_id, uword next_node_index)
-{
- vxlan_gpe_main_t *hm = &vxlan_gpe_main;
- hm->decap_next_node_list[protocol_id] = next_node_index;
- return;
-}
-
-void
-vxlan_gpe_unregister_decap_protocol (u8 protocol_id, uword next_node_index)
-{
- vxlan_gpe_main_t *hm = &vxlan_gpe_main;
- hm->decap_next_node_list[protocol_id] = VXLAN_GPE_INPUT_NEXT_DROP;
- return;
-}
-#endif /* CLIB_MARCH_VARIANT */
-
-/**
- * @brief Graph processing dispatch function for IPv6 VXLAN GPE
- *
- * @node vxlan6-gpe-input
- * @param *vm
- * @param *node
- * @param *from_frame
- *
- * @return from_frame->n_vectors - uword
- *
- */
-VLIB_NODE_FN (vxlan6_gpe_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return vxlan_gpe_input (vm, node, from_frame, /* is_ip4 */ 0);
-}
-
-/**
- * @brief VXLAN GPE error strings
- */
-static char *vxlan_gpe_error_strings[] = {
-#define vxlan_gpe_error(n,s) s,
-#include <vnet/vxlan-gpe/vxlan_gpe_error.def>
-#undef vxlan_gpe_error
-#undef _
-};
-
-VLIB_REGISTER_NODE (vxlan4_gpe_input_node) = {
- .name = "vxlan4-gpe-input",
- /* Takes a vector of packets. */
- .vector_size = sizeof (u32),
- .type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(vxlan_gpe_error_strings),
- .error_strings = vxlan_gpe_error_strings,
-
- .n_next_nodes = VXLAN_GPE_INPUT_N_NEXT,
- .next_nodes = {
-#define _(s,n) [VXLAN_GPE_INPUT_NEXT_##s] = n,
- foreach_vxlan_gpe_input_next
-#undef _
- },
-
- .format_buffer = format_vxlan_gpe_with_length,
- .format_trace = format_vxlan_gpe_rx_trace,
- // $$$$ .unformat_buffer = unformat_vxlan_gpe_header,
-};
-
-VLIB_REGISTER_NODE (vxlan6_gpe_input_node) = {
- .name = "vxlan6-gpe-input",
- /* Takes a vector of packets. */
- .vector_size = sizeof (u32),
- .type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(vxlan_gpe_error_strings),
- .error_strings = vxlan_gpe_error_strings,
-
- .n_next_nodes = VXLAN_GPE_INPUT_N_NEXT,
- .next_nodes = {
-#define _(s,n) [VXLAN_GPE_INPUT_NEXT_##s] = n,
- foreach_vxlan_gpe_input_next
-#undef _
- },
-
- .format_buffer = format_vxlan_gpe_with_length,
- .format_trace = format_vxlan_gpe_rx_trace,
- // $$$$ .unformat_buffer = unformat_vxlan_gpe_header,
-};
-
-typedef enum
-{
- IP_VXLAN_BYPASS_NEXT_DROP,
- IP_VXLAN_BYPASS_NEXT_VXLAN,
- IP_VXLAN_BYPASS_N_NEXT,
-} ip_vxlan_bypass_next_t;
-
-always_inline uword
-ip_vxlan_gpe_bypass_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, u32 is_ip4)
-{
- vxlan_gpe_main_t *ngm = &vxlan_gpe_main;
- u32 *from, *to_next, n_left_from, n_left_to_next, next_index;
- vlib_node_runtime_t *error_node =
- vlib_node_get_runtime (vm, ip4_input_node.index);
- vtep4_key_t last_vtep4; /* last IPv4 address / fib index
- matching a local VTEP address */
- vtep6_key_t last_vtep6; /* last IPv6 address / fib index
- matching a local VTEP address */
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
-
- vxlan4_gpe_tunnel_cache_t last4;
- vxlan6_gpe_tunnel_cache_t last6;
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
-
- vlib_get_buffers (vm, from, bufs, n_left_from);
-
- if (node->flags & VLIB_NODE_FLAG_TRACE)
- ip4_forward_next_trace (vm, node, frame, VLIB_TX);
-
- if (is_ip4)
- {
- vtep4_key_init (&last_vtep4);
- clib_memset (&last4, 0xff, sizeof last4);
- }
- else
- {
- vtep6_key_init (&last_vtep6);
- clib_memset (&last6, 0xff, sizeof last6);
- }
-
- while (n_left_from > 0)
- {
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- vlib_buffer_t *b0, *b1;
- ip4_header_t *ip40, *ip41;
- ip6_header_t *ip60, *ip61;
- udp_header_t *udp0, *udp1;
- ip4_vxlan_gpe_header_t *iuvn4_0, *iuvn4_1;
- ip6_vxlan_gpe_header_t *iuvn6_0, *iuvn6_1;
- vxlan_gpe_decap_info_t di0, di1;
- u32 bi0, ip_len0, udp_len0, flags0, next0;
- u32 bi1, ip_len1, udp_len1, flags1, next1;
- i32 len_diff0, len_diff1;
- u8 error0, good_udp0, proto0;
- u8 error1, good_udp1, proto1;
-
- /* Prefetch next iteration. */
- {
- vlib_prefetch_buffer_header (b[2], LOAD);
- vlib_prefetch_buffer_header (b[3], LOAD);
-
- CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- }
-
- bi0 = to_next[0] = from[0];
- bi1 = to_next[1] = from[1];
- from += 2;
- n_left_from -= 2;
- to_next += 2;
- n_left_to_next -= 2;
-
- b0 = b[0];
- b1 = b[1];
- b += 2;
- if (is_ip4)
- {
- ip40 = vlib_buffer_get_current (b0);
- ip41 = vlib_buffer_get_current (b1);
- }
- else
- {
- ip60 = vlib_buffer_get_current (b0);
- ip61 = vlib_buffer_get_current (b1);
- }
-
- /* Setup packet for next IP feature */
- vnet_feature_next (&next0, b0);
- vnet_feature_next (&next1, b1);
-
- if (is_ip4)
- {
- proto0 = ip40->protocol;
- proto1 = ip41->protocol;
- }
- else
- {
- proto0 = ip60->protocol;
- proto1 = ip61->protocol;
- }
-
- /* Process packet 0 */
- if (proto0 != IP_PROTOCOL_UDP)
- goto exit0; /* not UDP packet */
-
- if (is_ip4)
- {
- udp0 = ip4_next_header (ip40);
- iuvn4_0 = vlib_buffer_get_current (b0);
- di0 = vxlan4_gpe_find_tunnel (ngm, &last4, iuvn4_0);
- }
- else
- {
- udp0 = ip6_next_header (ip60);
- iuvn6_0 = vlib_buffer_get_current (b0);
- di0 = vxlan6_gpe_find_tunnel (ngm, &last6, iuvn6_0);
- }
-
- if (PREDICT_FALSE (di0.tunnel_index == ~0))
- goto exit0; /* unknown interface */
-
- /* Validate DIP against VTEPs */
- if (is_ip4)
- {
-#ifdef CLIB_HAVE_VEC512
- if (!vtep4_check_vector (&ngm->vtep_table, b0, ip40, &last_vtep4,
- &ngm->vtep4_u512))
-#else
- if (!vtep4_check (&ngm->vtep_table, b0, ip40, &last_vtep4))
-#endif
- goto exit0; /* no local VTEP for VXLAN packet */
- }
- else
- {
- if (!vtep6_check (&ngm->vtep_table, b0, ip60, &last_vtep6))
- goto exit0; /* no local VTEP for VXLAN packet */
- }
-
- flags0 = b0->flags;
- good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
-
- /* Don't verify UDP checksum for packets with explicit zero checksum. */
- good_udp0 |= udp0->checksum == 0;
-
- /* Verify UDP length */
- if (is_ip4)
- ip_len0 = clib_net_to_host_u16 (ip40->length);
- else
- ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
- udp_len0 = clib_net_to_host_u16 (udp0->length);
- len_diff0 = ip_len0 - udp_len0;
-
- /* Verify UDP checksum */
- if (PREDICT_FALSE (!good_udp0))
- {
- if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
- {
- if (is_ip4)
- flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
- else
- flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
- good_udp0 =
- (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
- }
- }
-
- if (is_ip4)
- {
- error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
- error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
- }
- else
- {
- error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
- error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
- }
-
- next0 = error0 ?
- IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
- b0->error = error0 ? error_node->errors[error0] : 0;
-
- /* vxlan_gpe-input node expect current at VXLAN header */
- if (is_ip4)
- vlib_buffer_advance (b0,
- sizeof (ip4_header_t) +
- sizeof (udp_header_t));
- else
- vlib_buffer_advance (b0,
- sizeof (ip6_header_t) +
- sizeof (udp_header_t));
-
- exit0:
- /* Process packet 1 */
- if (proto1 != IP_PROTOCOL_UDP)
- goto exit1; /* not UDP packet */
-
- if (is_ip4)
- {
- udp1 = ip4_next_header (ip41);
- iuvn4_1 = vlib_buffer_get_current (b1);
- di1 = vxlan4_gpe_find_tunnel (ngm, &last4, iuvn4_1);
- }
- else
- {
- udp1 = ip6_next_header (ip61);
- iuvn6_1 = vlib_buffer_get_current (b1);
- di1 = vxlan6_gpe_find_tunnel (ngm, &last6, iuvn6_1);
- }
-
- if (PREDICT_FALSE (di1.tunnel_index == ~0))
- goto exit1; /* unknown interface */
-
- /* Validate DIP against VTEPs */
- if (is_ip4)
- {
-#ifdef CLIB_HAVE_VEC512
- if (!vtep4_check_vector (&ngm->vtep_table, b1, ip41, &last_vtep4,
- &ngm->vtep4_u512))
-#else
- if (!vtep4_check (&ngm->vtep_table, b1, ip41, &last_vtep4))
-#endif
- goto exit1; /* no local VTEP for VXLAN packet */
- }
- else
- {
- if (!vtep6_check (&ngm->vtep_table, b1, ip61, &last_vtep6))
- goto exit1; /* no local VTEP for VXLAN packet */
- }
-
- flags1 = b1->flags;
- good_udp1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
-
- /* Don't verify UDP checksum for packets with explicit zero checksum. */
- good_udp1 |= udp1->checksum == 0;
-
- /* Verify UDP length */
- if (is_ip4)
- ip_len1 = clib_net_to_host_u16 (ip41->length);
- else
- ip_len1 = clib_net_to_host_u16 (ip61->payload_length);
- udp_len1 = clib_net_to_host_u16 (udp1->length);
- len_diff1 = ip_len1 - udp_len1;
-
- /* Verify UDP checksum */
- if (PREDICT_FALSE (!good_udp1))
- {
- if ((flags1 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
- {
- if (is_ip4)
- flags1 = ip4_tcp_udp_validate_checksum (vm, b1);
- else
- flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, b1);
- good_udp1 =
- (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
- }
- }
-
- if (is_ip4)
- {
- error1 = good_udp1 ? 0 : IP4_ERROR_UDP_CHECKSUM;
- error1 = (len_diff1 >= 0) ? error1 : IP4_ERROR_UDP_LENGTH;
- }
- else
- {
- error1 = good_udp1 ? 0 : IP6_ERROR_UDP_CHECKSUM;
- error1 = (len_diff1 >= 0) ? error1 : IP6_ERROR_UDP_LENGTH;
- }
-
- next1 = error1 ?
- IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
- b1->error = error1 ? error_node->errors[error1] : 0;
-
- /* vxlan_gpe-input node expect current at VXLAN header */
- if (is_ip4)
- vlib_buffer_advance (b1,
- sizeof (ip4_header_t) +
- sizeof (udp_header_t));
- else
- vlib_buffer_advance (b1,
- sizeof (ip6_header_t) +
- sizeof (udp_header_t));
-
- exit1:
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- vlib_buffer_t *b0;
- ip4_header_t *ip40;
- ip6_header_t *ip60;
- udp_header_t *udp0;
- ip4_vxlan_gpe_header_t *iuvn4_0;
- ip6_vxlan_gpe_header_t *iuvn6_0;
- vxlan_gpe_decap_info_t di0;
- u32 bi0, ip_len0, udp_len0, flags0, next0;
- i32 len_diff0;
- u8 error0, good_udp0, proto0;
-
- bi0 = to_next[0] = from[0];
- from += 1;
- n_left_from -= 1;
- to_next += 1;
- n_left_to_next -= 1;
-
- b0 = b[0];
- b++;
- if (is_ip4)
- ip40 = vlib_buffer_get_current (b0);
- else
- ip60 = vlib_buffer_get_current (b0);
-
- /* Setup packet for next IP feature */
- vnet_feature_next (&next0, b0);
-
- if (is_ip4)
- proto0 = ip40->protocol;
- else
- proto0 = ip60->protocol;
-
- if (proto0 != IP_PROTOCOL_UDP)
- goto exit; /* not UDP packet */
-
- if (is_ip4)
- {
- udp0 = ip4_next_header (ip40);
- iuvn4_0 = vlib_buffer_get_current (b0);
- di0 = vxlan4_gpe_find_tunnel (ngm, &last4, iuvn4_0);
- }
- else
- {
- udp0 = ip6_next_header (ip60);
- iuvn6_0 = vlib_buffer_get_current (b0);
- di0 = vxlan6_gpe_find_tunnel (ngm, &last6, iuvn6_0);
- }
-
- if (PREDICT_FALSE (di0.tunnel_index == ~0))
- goto exit; /* unknown interface */
-
- /* Validate DIP against VTEPs */
-
- if (is_ip4)
- {
-#ifdef CLIB_HAVE_VEC512
- if (!vtep4_check_vector (&ngm->vtep_table, b0, ip40, &last_vtep4,
- &ngm->vtep4_u512))
-#else
- if (!vtep4_check (&ngm->vtep_table, b0, ip40, &last_vtep4))
-#endif
- goto exit; /* no local VTEP for VXLAN packet */
- }
- else
- {
- if (!vtep6_check (&ngm->vtep_table, b0, ip60, &last_vtep6))
- goto exit; /* no local VTEP for VXLAN packet */
- }
-
- flags0 = b0->flags;
- good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
-
- /* Don't verify UDP checksum for packets with explicit zero checksum. */
- good_udp0 |= udp0->checksum == 0;
-
- /* Verify UDP length */
- if (is_ip4)
- ip_len0 = clib_net_to_host_u16 (ip40->length);
- else
- ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
- udp_len0 = clib_net_to_host_u16 (udp0->length);
- len_diff0 = ip_len0 - udp_len0;
-
- /* Verify UDP checksum */
- if (PREDICT_FALSE (!good_udp0))
- {
- if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
- {
- if (is_ip4)
- flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
- else
- flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
- good_udp0 =
- (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
- }
- }
-
- if (is_ip4)
- {
- error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
- error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
- }
- else
- {
- error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
- error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
- }
-
- next0 = error0 ?
- IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
- b0->error = error0 ? error_node->errors[error0] : 0;
-
- /* vxlan_gpe-input node expect current at VXLAN header */
- if (is_ip4)
- vlib_buffer_advance (b0,
- sizeof (ip4_header_t) +
- sizeof (udp_header_t));
- else
- vlib_buffer_advance (b0,
- sizeof (ip6_header_t) +
- sizeof (udp_header_t));
-
- exit:
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return frame->n_vectors;
-}
-
-VLIB_NODE_FN (ip4_vxlan_gpe_bypass_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return ip_vxlan_gpe_bypass_inline (vm, node, frame, /* is_ip4 */ 1);
-}
-
-VLIB_REGISTER_NODE (ip4_vxlan_gpe_bypass_node) = {
- .name = "ip4-vxlan-gpe-bypass",
- .vector_size = sizeof (u32),
-
- .n_next_nodes = IP_VXLAN_BYPASS_N_NEXT,
- .next_nodes = {
- [IP_VXLAN_BYPASS_NEXT_DROP] = "error-drop",
- [IP_VXLAN_BYPASS_NEXT_VXLAN] = "vxlan4-gpe-input",
- },
-
- .format_buffer = format_ip4_header,
- .format_trace = format_ip4_forward_next_trace,
-};
-
-#ifndef CLIB_MARCH_VARIANT
-/* Dummy init function to get us linked in. */
-clib_error_t *
-ip4_vxlan_gpe_bypass_init (vlib_main_t * vm)
-{
- return 0;
-}
-
-VLIB_INIT_FUNCTION (ip4_vxlan_gpe_bypass_init);
-#endif /* CLIB_MARCH_VARIANT */
-
-VLIB_NODE_FN (ip6_vxlan_gpe_bypass_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return ip_vxlan_gpe_bypass_inline (vm, node, frame, /* is_ip4 */ 0);
-}
-
-VLIB_REGISTER_NODE (ip6_vxlan_gpe_bypass_node) = {
- .name = "ip6-vxlan-gpe-bypass",
- .vector_size = sizeof (u32),
-
- .n_next_nodes = IP_VXLAN_BYPASS_N_NEXT,
- .next_nodes = {
- [IP_VXLAN_BYPASS_NEXT_DROP] = "error-drop",
- [IP_VXLAN_BYPASS_NEXT_VXLAN] = "vxlan6-gpe-input",
- },
-
- .format_buffer = format_ip6_header,
- .format_trace = format_ip6_forward_next_trace,
-};
-
-#ifndef CLIB_MARCH_VARIANT
-/* Dummy init function to get us linked in. */
-clib_error_t *
-ip6_vxlan_gpe_bypass_init (vlib_main_t * vm)
-{
- return 0;
-}
-
-VLIB_INIT_FUNCTION (ip6_vxlan_gpe_bypass_init);
-#endif /* CLIB_MARCH_VARIANT */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan-gpe/dir.dox b/src/vnet/vxlan-gpe/dir.dox
deleted file mode 100644
index c154733b21f..00000000000
--- a/src/vnet/vxlan-gpe/dir.dox
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *
- * Copyright (c) 2013 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- @dir
- @brief VXLAN GPE
-
- Based on IETF: draft-quinn-vxlan-gpe-03.txt
-
-Abstract
-
- This draft describes extending Virtual eXtensible Local Area Network
- (VXLAN), via changes to the VXLAN header, with three new
- capabilities: support for multi-protocol encapsulation, operations,
- administration and management (OAM) signaling and explicit
- versioning.
-
- See file: vxlan-gpe-rfc.txt
-
-*/ \ No newline at end of file
diff --git a/src/vnet/vxlan-gpe/encap.c b/src/vnet/vxlan-gpe/encap.c
deleted file mode 100644
index a769861577d..00000000000
--- a/src/vnet/vxlan-gpe/encap.c
+++ /dev/null
@@ -1,433 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * @file
- * @brief Functions for encapsulating VXLAN GPE tunnels
- *
-*/
-#include <vppinfra/error.h>
-#include <vppinfra/hash.h>
-#include <vnet/vnet.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/udp/udp_inlines.h>
-#include <vnet/vxlan-gpe/vxlan_gpe.h>
-
-/** Statistics (not really errors) */
-#define foreach_vxlan_gpe_encap_error \
-_(ENCAPSULATED, "good packets encapsulated")
-
-/**
- * @brief VXLAN GPE encap error strings
- */
-static char *vxlan_gpe_encap_error_strings[] = {
-#define _(sym,string) string,
- foreach_vxlan_gpe_encap_error
-#undef _
-};
-
-/**
- * @brief Struct for VXLAN GPE errors/counters
- */
-typedef enum
-{
-#define _(sym,str) VXLAN_GPE_ENCAP_ERROR_##sym,
- foreach_vxlan_gpe_encap_error
-#undef _
- VXLAN_GPE_ENCAP_N_ERROR,
-} vxlan_gpe_encap_error_t;
-
-/**
- * @brief Struct for tracing VXLAN GPE encapsulated packets
- */
-typedef struct
-{
- u32 tunnel_index;
-} vxlan_gpe_encap_trace_t;
-
-/**
- * @brief Trace of packets encapsulated in VXLAN GPE
- *
- * @param *s
- * @param *args
- *
- * @return *s
- *
- */
-u8 *
-format_vxlan_gpe_encap_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- vxlan_gpe_encap_trace_t *t = va_arg (*args, vxlan_gpe_encap_trace_t *);
-
- s = format (s, "VXLAN-GPE-ENCAP: tunnel %d", t->tunnel_index);
- return s;
-}
-
-/**
- * @brief Instantiates UDP + VXLAN-GPE header then set next node to IP4|6 lookup
- *
- * @param *ngm
- * @param *b0
- * @param *t0 contains rewrite header
- * @param *next0 relative index of next dispatch function (next node)
- * @param is_v4 Is this IPv4? (or IPv6)
- *
- */
-always_inline void
-vxlan_gpe_encap_one_inline (vxlan_gpe_main_t *ngm, vlib_buffer_t *b0,
- vxlan_gpe_tunnel_t *t0, u32 *next0,
- ip_address_family_t af)
-{
- ASSERT (sizeof (ip4_vxlan_gpe_header_t) == 36);
- ASSERT (sizeof (ip6_vxlan_gpe_header_t) == 56);
-
- ip_udp_encap_one (ngm->vlib_main, b0, t0->rewrite, t0->rewrite_size, af,
- N_AF, UDP_ENCAP_FIXUP_NONE);
- next0[0] = t0->encap_next_node;
-}
-
-/**
- * @brief Instantiates UDP + VXLAN-GPE header then set next node to IP4|6 lookup for two packets
- *
- * @param *ngm
- * @param *b0 Packet0
- * @param *b1 Packet1
- * @param *t0 contains rewrite header for Packet0
- * @param *t1 contains rewrite header for Packet1
- * @param *next0 relative index of next dispatch function (next node) for Packet0
- * @param *next1 relative index of next dispatch function (next node) for Packet1
- * @param is_v4 Is this IPv4? (or IPv6)
- *
- */
-always_inline void
-vxlan_gpe_encap_two_inline (vxlan_gpe_main_t *ngm, vlib_buffer_t *b0,
- vlib_buffer_t *b1, vxlan_gpe_tunnel_t *t0,
- vxlan_gpe_tunnel_t *t1, u32 *next0, u32 *next1,
- ip_address_family_t af)
-{
- ASSERT (sizeof (ip4_vxlan_gpe_header_t) == 36);
- ASSERT (sizeof (ip6_vxlan_gpe_header_t) == 56);
-
- ip_udp_encap_one (ngm->vlib_main, b0, t0->rewrite, t0->rewrite_size, af,
- N_AF, UDP_ENCAP_FIXUP_NONE);
- ip_udp_encap_one (ngm->vlib_main, b1, t1->rewrite, t1->rewrite_size, af,
- N_AF, UDP_ENCAP_FIXUP_NONE);
- next0[0] = next1[0] = t0->encap_next_node;
-}
-
-/**
- * @brief Common processing for IPv4 and IPv6 VXLAN GPE encap dispatch functions
- *
- * It is worth noting that other than trivial UDP forwarding (transit), VXLAN GPE
- * tunnels are "establish local". This means that we don't have a TX interface as yet
- * as we need to look up where the outer-header dest is. By setting the TX index in the
- * buffer metadata to the encap FIB, we can do a lookup to get the adjacency and real TX.
- *
- * vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index;
- *
- * @node vxlan-gpe-input
- * @param *vm
- * @param *node
- * @param *from_frame
- *
- * @return from_frame->n_vectors
- *
- */
-static uword
-vxlan_gpe_encap (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * from_frame)
-{
- u32 n_left_from, next_index, *from, *to_next;
- vxlan_gpe_main_t *ngm = &vxlan_gpe_main;
- vnet_main_t *vnm = ngm->vnet_main;
- vnet_interface_main_t *im = &vnm->interface_main;
- u32 pkts_encapsulated = 0;
- u32 thread_index = vm->thread_index;
- u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
-
- from = vlib_frame_vector_args (from_frame);
- n_left_from = from_frame->n_vectors;
-
- next_index = node->cached_next_index;
- stats_sw_if_index = node->runtime_data[0];
- stats_n_packets = stats_n_bytes = 0;
- vlib_get_buffers (vm, from, bufs, n_left_from);
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
- u32 sw_if_index0 = ~0, sw_if_index1 = ~0, len0, len1;
- vnet_hw_interface_t *hi0, *hi1;
- vxlan_gpe_tunnel_t *t0 = NULL, *t1 = NULL;
- ip_address_family_t af_0 = AF_IP4, af_1 = AF_IP4;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- u32 bi0, bi1;
- u32 next0, next1;
-
- next0 = next1 = VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP;
-
- /* Prefetch next iteration. */
- {
- vlib_prefetch_buffer_header (b[2], LOAD);
- vlib_prefetch_buffer_header (b[3], LOAD);
-
- CLIB_PREFETCH (b[2]->data - CLIB_CACHE_LINE_BYTES,
- 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (b[3]->data - CLIB_CACHE_LINE_BYTES,
- 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- }
-
- bi0 = from[0];
- bi1 = from[1];
- to_next[0] = bi0;
- to_next[1] = bi1;
- from += 2;
- to_next += 2;
- n_left_to_next -= 2;
- n_left_from -= 2;
-
- /* get "af_0" */
- if (sw_if_index0 != vnet_buffer (b[0])->sw_if_index[VLIB_TX])
- {
- sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
- hi0 =
- vnet_get_sup_hw_interface (vnm,
- vnet_buffer (b[0])->sw_if_index
- [VLIB_TX]);
- t0 = pool_elt_at_index (ngm->tunnels, hi0->dev_instance);
- af_0 = (t0->flags & VXLAN_GPE_TUNNEL_IS_IPV4 ? AF_IP4 : AF_IP6);
- }
-
- /* get "af_1" */
- if (sw_if_index1 != vnet_buffer (b[1])->sw_if_index[VLIB_TX])
- {
- if (sw_if_index0 == vnet_buffer (b[1])->sw_if_index[VLIB_TX])
- {
- sw_if_index1 = sw_if_index0;
- hi1 = hi0;
- t1 = t0;
- af_1 = af_0;
- }
- else
- {
- sw_if_index1 = vnet_buffer (b[1])->sw_if_index[VLIB_TX];
- hi1 =
- vnet_get_sup_hw_interface (vnm,
- vnet_buffer (b[1])->sw_if_index
- [VLIB_TX]);
- t1 = pool_elt_at_index (ngm->tunnels, hi1->dev_instance);
- af_1 =
- (t1->flags & VXLAN_GPE_TUNNEL_IS_IPV4 ? AF_IP4 : AF_IP6);
- }
- }
-
- if (PREDICT_TRUE (af_0 == af_1))
- {
- vxlan_gpe_encap_two_inline (ngm, b[0], b[1], t0, t1, &next0,
- &next1, af_0);
- }
- else
- {
- vxlan_gpe_encap_one_inline (ngm, b[0], t0, &next0, af_0);
- vxlan_gpe_encap_one_inline (ngm, b[1], t1, &next1, af_1);
- }
-
- /* Reset to look up tunnel partner in the configured FIB */
- vnet_buffer (b[0])->sw_if_index[VLIB_TX] = t0->encap_fib_index;
- vnet_buffer (b[1])->sw_if_index[VLIB_TX] = t1->encap_fib_index;
- vnet_buffer (b[0])->sw_if_index[VLIB_RX] = sw_if_index0;
- vnet_buffer (b[1])->sw_if_index[VLIB_RX] = sw_if_index1;
- pkts_encapsulated += 2;
-
- len0 = vlib_buffer_length_in_chain (vm, b[0]);
- len1 = vlib_buffer_length_in_chain (vm, b[1]);
- stats_n_packets += 2;
- stats_n_bytes += len0 + len1;
-
- /* Batch stats increment on the same vxlan tunnel so counter is not
- incremented per packet. Note stats are still incremented for deleted
- and admin-down tunnel where packets are dropped. It is not worthwhile
- to check for this rare case and affect normal path performance. */
- if (PREDICT_FALSE ((sw_if_index0 != stats_sw_if_index)
- || (sw_if_index1 != stats_sw_if_index)))
- {
- stats_n_packets -= 2;
- stats_n_bytes -= len0 + len1;
- if (sw_if_index0 == sw_if_index1)
- {
- if (stats_n_packets)
- vlib_increment_combined_counter
- (im->combined_sw_if_counters +
- VNET_INTERFACE_COUNTER_TX, thread_index,
- stats_sw_if_index, stats_n_packets, stats_n_bytes);
- stats_sw_if_index = sw_if_index0;
- stats_n_packets = 2;
- stats_n_bytes = len0 + len1;
- }
- else
- {
- vlib_increment_combined_counter (im->combined_sw_if_counters
- +
- VNET_INTERFACE_COUNTER_TX,
- thread_index, sw_if_index0,
- 1, len0);
- vlib_increment_combined_counter (im->combined_sw_if_counters
- +
- VNET_INTERFACE_COUNTER_TX,
- thread_index, sw_if_index1,
- 1, len1);
- }
- }
-
- if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_gpe_encap_trace_t *tr =
- vlib_add_trace (vm, node, b[0], sizeof (*tr));
- tr->tunnel_index = t0 - ngm->tunnels;
- }
-
- if (PREDICT_FALSE (b[1]->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_gpe_encap_trace_t *tr = vlib_add_trace (vm, node, b[1],
- sizeof (*tr));
- tr->tunnel_index = t1 - ngm->tunnels;
- }
- b += 2;
-
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
- n_left_to_next, bi0, bi1, next0,
- next1);
- }
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0;
- u32 next0 = VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP;
-
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- /* get "af_0" */
- if (sw_if_index0 != vnet_buffer (b[0])->sw_if_index[VLIB_TX])
- {
- sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
- hi0 =
- vnet_get_sup_hw_interface (vnm,
- vnet_buffer (b[0])->sw_if_index
- [VLIB_TX]);
-
- t0 = pool_elt_at_index (ngm->tunnels, hi0->dev_instance);
-
- af_0 = (t0->flags & VXLAN_GPE_TUNNEL_IS_IPV4 ? AF_IP4 : AF_IP6);
- }
-
- vxlan_gpe_encap_one_inline (ngm, b[0], t0, &next0, af_0);
-
- /* Reset to look up tunnel partner in the configured FIB */
- vnet_buffer (b[0])->sw_if_index[VLIB_TX] = t0->encap_fib_index;
- vnet_buffer (b[0])->sw_if_index[VLIB_RX] = sw_if_index0;
- pkts_encapsulated++;
-
- len0 = vlib_buffer_length_in_chain (vm, b[0]);
- stats_n_packets += 1;
- stats_n_bytes += len0;
-
- /* Batch stats increment on the same vxlan tunnel so counter is not
- * incremented per packet. Note stats are still incremented for deleted
- * and admin-down tunnel where packets are dropped. It is not worthwhile
- * to check for this rare case and affect normal path performance. */
- if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index))
- {
- stats_n_packets -= 1;
- stats_n_bytes -= len0;
- if (stats_n_packets)
- vlib_increment_combined_counter (im->combined_sw_if_counters +
- VNET_INTERFACE_COUNTER_TX,
- thread_index,
- stats_sw_if_index,
- stats_n_packets,
- stats_n_bytes);
- stats_n_packets = 1;
- stats_n_bytes = len0;
- stats_sw_if_index = sw_if_index0;
- }
- if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_gpe_encap_trace_t *tr = vlib_add_trace (vm, node, b[0],
- sizeof (*tr));
- tr->tunnel_index = t0 - ngm->tunnels;
- }
- b += 1;
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
- vlib_node_increment_counter (vm, node->node_index,
- VXLAN_GPE_ENCAP_ERROR_ENCAPSULATED,
- pkts_encapsulated);
- /* Increment any remaining batch stats */
- if (stats_n_packets)
- {
- vlib_increment_combined_counter (im->combined_sw_if_counters +
- VNET_INTERFACE_COUNTER_TX,
- thread_index, stats_sw_if_index,
- stats_n_packets, stats_n_bytes);
- node->runtime_data[0] = stats_sw_if_index;
- }
-
- return from_frame->n_vectors;
-}
-
-VLIB_REGISTER_NODE (vxlan_gpe_encap_node) = {
- .function = vxlan_gpe_encap,
- .name = "vxlan-gpe-encap",
- .vector_size = sizeof (u32),
- .format_trace = format_vxlan_gpe_encap_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(vxlan_gpe_encap_error_strings),
- .error_strings = vxlan_gpe_encap_error_strings,
-
- .n_next_nodes = VXLAN_GPE_ENCAP_N_NEXT,
-
- .next_nodes = {
- [VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP] = "ip4-lookup",
- [VXLAN_GPE_ENCAP_NEXT_IP6_LOOKUP] = "ip6-lookup",
- [VXLAN_GPE_ENCAP_NEXT_DROP] = "error-drop",
- },
-};
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan-gpe/vxlan-gpe-rfc.txt b/src/vnet/vxlan-gpe/vxlan-gpe-rfc.txt
deleted file mode 100644
index 35cee50f573..00000000000
--- a/src/vnet/vxlan-gpe/vxlan-gpe-rfc.txt
+++ /dev/null
@@ -1,868 +0,0 @@
-Network Working Group P. Quinn
-Internet-Draft Cisco Systems, Inc.
-Intended status: Experimental P. Agarwal
-Expires: January 4, 2015 Broadcom
- R. Fernando
- L. Kreeger
- D. Lewis
- F. Maino
- M. Smith
- N. Yadav
- Cisco Systems, Inc.
- L. Yong
- Huawei USA
- X. Xu
- Huawei Technologies
- U. Elzur
- Intel
- P. Garg
- Microsoft
- July 3, 2014
-
-
- Generic Protocol Extension for VXLAN
- draft-quinn-vxlan-gpe-03.txt
-
-Abstract
-
- This draft describes extending Virtual eXtensible Local Area Network
- (VXLAN), via changes to the VXLAN header, with three new
- capabilities: support for multi-protocol encapsulation, operations,
- administration and management (OAM) signaling and explicit
- versioning.
-
-Status of this Memo
-
- This Internet-Draft is submitted in full conformance with the
- provisions of BCP 78 and BCP 79.
-
- Internet-Drafts are working documents of the Internet Engineering
- Task Force (IETF). Note that other groups may also distribute
- working documents as Internet-Drafts. The list of current Internet-
- Drafts is at http://datatracker.ietf.org/drafts/current/.
-
- Internet-Drafts are draft documents valid for a maximum of six months
- and may be updated, replaced, or obsoleted by other documents at any
- time. It is inappropriate to use Internet-Drafts as reference
- material or to cite them other than as "work in progress."
-
-
-
-
-Quinn, et al. Expires January 4, 2015 [Page 1]
-
-Internet-Draft Generic Protocol Extension for VXLAN July 2014
-
-
- This Internet-Draft will expire on January 4, 2015.
-
-Copyright Notice
-
- Copyright (c) 2014 IETF Trust and the persons identified as the
- document authors. All rights reserved.
-
- This document is subject to BCP 78 and the IETF Trust's Legal
- Provisions Relating to IETF Documents
- (http://trustee.ietf.org/license-info) in effect on the date of
- publication of this document. Please review these documents
- carefully, as they describe your rights and restrictions with respect
- to this document. Code Components extracted from this document must
- include Simplified BSD License text as described in Section 4.e of
- the Trust Legal Provisions and are provided without warranty as
- described in the Simplified BSD License.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Quinn, et al. Expires January 4, 2015 [Page 2]
-
-Internet-Draft Generic Protocol Extension for VXLAN July 2014
-
-
-Table of Contents
-
- 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . . 4
- 2. VXLAN Without Protocol Extension . . . . . . . . . . . . . . . 5
- 3. Generic Protocol Extension VXLAN (VXLAN-gpe) . . . . . . . . . 6
- 3.1. Multi Protocol Support . . . . . . . . . . . . . . . . . . 6
- 3.2. OAM Support . . . . . . . . . . . . . . . . . . . . . . . 7
- 3.3. Version Bits . . . . . . . . . . . . . . . . . . . . . . . 7
- 4. Backward Compatibility . . . . . . . . . . . . . . . . . . . . 8
- 4.1. VXLAN VTEP to VXLAN-gpe VTEP . . . . . . . . . . . . . . . 8
- 4.2. VXLAN-gpe VTEP to VXLAN VTEP . . . . . . . . . . . . . . . 8
- 4.3. VXLAN-gpe UDP Ports . . . . . . . . . . . . . . . . . . . 8
- 4.4. VXLAN-gpe and Encapsulated IP Header Fields . . . . . . . 8
- 5. VXLAN-gpe Examples . . . . . . . . . . . . . . . . . . . . . . 9
- 6. Security Considerations . . . . . . . . . . . . . . . . . . . 11
- 7. Acknowledgments . . . . . . . . . . . . . . . . . . . . . . . 12
- 8. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 13
- 8.1. UDP Port . . . . . . . . . . . . . . . . . . . . . . . . . 13
- 8.2. VXLAN-gpe Next Protocol . . . . . . . . . . . . . . . . . 13
- 8.3. VXLAN-gpe Reserved Bits . . . . . . . . . . . . . . . . . 13
- 9. References . . . . . . . . . . . . . . . . . . . . . . . . . . 14
- 9.1. Normative References . . . . . . . . . . . . . . . . . . . 14
- 9.2. Informative References . . . . . . . . . . . . . . . . . . 14
- Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . . 15
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Quinn, et al. Expires January 4, 2015 [Page 3]
-
-Internet-Draft Generic Protocol Extension for VXLAN July 2014
-
-
-1. Introduction
-
- Virtual eXtensible Local Area Network [VXLAN] defines an
- encapsulation format that encapsulates Ethernet frames in an outer
- UDP/IP transport. As data centers evolve, the need to carry other
- protocols encapsulated in an IP packet is required, as well as the
- need to provide increased visibility and diagnostic capabilities
- within the overlay. The VXLAN header does not specify the protocol
- being encapsulated and therefore is currently limited to
- encapsulating only Ethernet frame payload, nor does it provide the
- ability to define OAM protocols. Rather than defining yet another
- encapsulation, VXLAN is extended to provide protocol typing and OAM
- capabilities.
-
- This document describes extending VXLAN via the following changes:
-
- Next Protocol Bit (P bit): A reserved flag bit is allocated, and set
- in the VXLAN-gpe header to indicate that a next protocol field is
- present.
-
- OAM Flag Bit (O bit): A reserved flag bit is allocated, and set in
- the VXLAN-gpe header, to indicate that the packet is an OAM
- packet.
-
- Version: Two reserved bits are allocated, and set in the VXLAN-gpe
- header, to indicate VXLAN-gpe protocol version.
-
- Next Protocol: A 8 bit next protocol field is present in the VXLAN-
- gpe header.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Quinn, et al. Expires January 4, 2015 [Page 4]
-
-Internet-Draft Generic Protocol Extension for VXLAN July 2014
-
-
-2. VXLAN Without Protocol Extension
-
- As described in the introduction, the VXLAN header has no protocol
- identifier that indicates the type of payload being carried by VXLAN.
- Because of this, VXLAN is limited to an Ethernet payload.
- Furthermore, the VXLAN header has no mechanism to signal OAM packets.
-
- The VXLAN header defines bits 0-7 as flags (some defined, some
- reserved), the VXLAN network identifier (VNI) field and several
- reserved bits. The flags provide flexibility to define how the
- reserved bits can be used to change the definition of the VXLAN
- header.
-
-
-
- 0 1 2 3
- 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- |R|R|R|R|I|R|R|R| Reserved |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | VXLAN Network Identifier (VNI) | Reserved |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-
- Figure 1: VXLAN Header
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Quinn, et al. Expires January 4, 2015 [Page 5]
-
-Internet-Draft Generic Protocol Extension for VXLAN July 2014
-
-
-3. Generic Protocol Extension VXLAN (VXLAN-gpe)
-
-3.1. Multi Protocol Support
-
- This draft defines the following two changes to the VXLAN header in
- order to support multi-protocol encapsulation:
-
- P Bit: Flag bit 5 is defined as the Next Protocol bit. The P bit
- MUST be set to 1 to indicate the presence of the 8 bit next
- protocol field.
-
- P = 0 indicates that the payload MUST conform to VXLAN as defined
- in [VXLAN].
-
- Flag bit 5 was chosen as the P bit because this flag bit is
- currently reserved in VXLAN.
-
- Next Protocol Field: The lower 8 bits of the first word are used to
- carry a next protocol. This next protocol field contains the
- protocol of the encapsulated payload packet. A new protocol
- registry will be requested from IANA.
-
- This draft defines the following Next Protocol values:
-
- 0x1 : IPv4
- 0x2 : IPv6
- 0x3 : Ethernet
- 0x4 : Network Service Header [NSH]
-
-
-
-
- 0 1 2 3
- 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- |R|R|R|R|I|P|R|R| Reserved |Next Protocol |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | VXLAN Network Identifier (VNI) | Reserved |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-
-
- Figure 2: VXLAN-gpe Next Protocol
-
-
-
-
-
-
-
-
-Quinn, et al. Expires January 4, 2015 [Page 6]
-
-Internet-Draft Generic Protocol Extension for VXLAN July 2014
-
-
-3.2. OAM Support
-
- Flag bit 7 is defined as the O bit. When the O bit is set to 1, the
- packet is an OAM packet and OAM processing MUST occur. The OAM
- protocol details are out of scope for this document. As with the
- P-bit, bit 7 is currently a reserved flag in VXLAN.
-
-
-
- 0 1 2 3
- 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- |R|R|R|R|I|P|R|O| Reserved |Next Protocol |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | VXLAN Network Identifier (VNI) | Reserved |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-
-
- Figure 3: VXLAN-gpe OAM Bit
-
-3.3. Version Bits
-
- VXLAN-gpe bits 8 and 9 are defined as version bits. These bits are
- reserved in VXLAN. The version field is used to ensure backward
- compatibility going forward with future VXLAN-gpe updates.
-
- The initial version for VXLAN-gpe is 0.
-
-
-
- 0 1 2 3
- 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- |R|R|R|R|I|P|R|O|Ver| Reserved |Next Protocol |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | VXLAN Network Identifier (VNI) | Reserved |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-
-
-
-
- Figure 4: VXLAN-gpe Version Bits
-
-
-
-
-
-
-
-Quinn, et al. Expires January 4, 2015 [Page 7]
-
-Internet-Draft Generic Protocol Extension for VXLAN July 2014
-
-
-4. Backward Compatibility
-
-4.1. VXLAN VTEP to VXLAN-gpe VTEP
-
- As per VXLAN, reserved bits 5 and 7, VXLAN-gpe P and O-bits
- respectively must be set to zero. The remaining reserved bits must
- be zero, including the VXLAN-gpe version field, bits 8 and 9. The
- encapsulated payload MUST be Ethernet.
-
-4.2. VXLAN-gpe VTEP to VXLAN VTEP
-
- A VXLAN-gpe VTEP MUST NOT encapsulate non-Ethernet frames to a VXLAN
- VTEP. When encapsulating Ethernet frames to a VXLAN VTEP, the VXLAN-
- gpe VTEP will set the P bit to 0, the Next Protocol to 0 and use UDP
- destination port 4789. A VXLAN-gpe VTEP MUST also set O = 0 and Ver
- = 0 when encapsulating Ethernet frames to VXLAN VTEP. The receiving
- VXLAN VTEP will threat this packet as a VXLAN packet.
-
- A method for determining the capabilities of a VXLAN VTEP (gpe or
- non-gpe) is out of the scope of this draft.
-
-4.3. VXLAN-gpe UDP Ports
-
- VXLAN-gpe uses a new UDP destination port (to be assigned by IANA)
- when sending traffic to VXLAN-gpe VTEPs.
-
-4.4. VXLAN-gpe and Encapsulated IP Header Fields
-
- When encapsulating and decapsulating IPv4 and IPv6 packets, certain
- fields, such as IPv4 Time to Live (TTL) from the inner IP header need
- to be considered. VXLAN-gpe IP encapsulation and decapsulation
- utilizes the techniques described in [RFC6830], section 5.3.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Quinn, et al. Expires January 4, 2015 [Page 8]
-
-Internet-Draft Generic Protocol Extension for VXLAN July 2014
-
-
-5. VXLAN-gpe Examples
-
- This section provides three examples of protocols encapsulated using
- the Generic Protocol Extension for VXLAN described in this document.
-
-
-
- 0 1 2 3
- 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- |R|R|R|R|I|1|R|0|0|0| Reserved | NP = IPv4 |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | VXLAN Network Identifier (VNI) | Reserved |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Original IPv4 Packet |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-
-
- Figure 5: IPv4 and VXLAN-gpe
-
-
-
-
- 0 1 2 3
- 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- |R|R|R|R|I|1|R|0|0|0| Reserved | NP = IPv6 |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | VXLAN Network Identifier (VNI) | Reserved |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Original IPv6 Packet |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-
-
- Figure 6: IPv6 and VXLAN-gpe
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Quinn, et al. Expires January 4, 2015 [Page 9]
-
-Internet-Draft Generic Protocol Extension for VXLAN July 2014
-
-
- 0 1 2 3
- 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- |R|R|R|R|I|1|R|0|0|0| Reserved |NP = Ethernet |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | VXLAN Network Identifier (VNI) | Reserved |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Original Ethernet Frame |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-
-
- Figure 7: Ethernet and VXLAN-gpe
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Quinn, et al. Expires January 4, 2015 [Page 10]
-
-Internet-Draft Generic Protocol Extension for VXLAN July 2014
-
-
-6. Security Considerations
-
- VXLAN's security is focused on issues around L2 encapsulation into
- L3. With VXLAN-gpe, issues such as spoofing, flooding, and traffic
- redirection are dependent on the particular protocol payload
- encapsulated.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Quinn, et al. Expires January 4, 2015 [Page 11]
-
-Internet-Draft Generic Protocol Extension for VXLAN July 2014
-
-
-7. Acknowledgments
-
- A special thank you goes to Dino Farinacci for his guidance and
- detailed review.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Quinn, et al. Expires January 4, 2015 [Page 12]
-
-Internet-Draft Generic Protocol Extension for VXLAN July 2014
-
-
-8. IANA Considerations
-
-8.1. UDP Port
-
- A new UDP port will be requested from IANA.
-
-8.2. VXLAN-gpe Next Protocol
-
- IANA is requested to set up a registry of "Next Protocol". These are
- 8-bit values. Next Protocol values 0, 1, 2, 3 and 4 are defined in
- this draft. New values are assigned via Standards Action [RFC5226].
-
- +---------------+-------------+---------------+
- | Next Protocol | Description | Reference |
- +---------------+-------------+---------------+
- | 0 | Reserved | This document |
- | | | |
- | 1 | IPv4 | This document |
- | | | |
- | 2 | IPv6 | This document |
- | | | |
- | 3 | Ethernet | This document |
- | | | |
- | 4 | NSH | This document |
- | | | |
- | 5..253 | Unassigned | |
- +---------------+-------------+---------------+
-
- Table 1
-
-8.3. VXLAN-gpe Reserved Bits
-
- There are ten bits at the beginning of the VXLAN-gpe header. New
- bits are assigned via Standards Action [RFC5226].
-
- Bits 0-3 - Reserved
- Bit 4 - Instance ID (I bit)
- Bit 5 - Next Protocol (P bit)
- Bit 6 - Reserved
- Bit 7 - OAM (O bit)
- Bits 8-9 - Version
-
-
-
-
-
-
-
-
-
-
-Quinn, et al. Expires January 4, 2015 [Page 13]
-
-Internet-Draft Generic Protocol Extension for VXLAN July 2014
-
-
-9. References
-
-9.1. Normative References
-
- [RFC0768] Postel, J., "User Datagram Protocol", STD 6, RFC 768,
- August 1980.
-
- [RFC0791] Postel, J., "Internet Protocol", STD 5, RFC 791,
- September 1981.
-
- [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate
- Requirement Levels", BCP 14, RFC 2119, March 1997.
-
- [RFC5226] Narten, T. and H. Alvestrand, "Guidelines for Writing an
- IANA Considerations Section in RFCs", BCP 26, RFC 5226,
- May 2008.
-
-9.2. Informative References
-
- [NSH] Quinn, P. and et al. , "Network Service Header", 2014.
-
- [RFC1700] Reynolds, J. and J. Postel, "Assigned Numbers", RFC 1700,
- October 1994.
-
- [RFC6830] Farinacci, D., Fuller, V., Meyer, D., and D. Lewis, "The
- Locator/ID Separation Protocol (LISP)", RFC 6830,
- January 2013.
-
- [VXLAN] Dutt, D., Mahalingam, M., Duda, K., Agarwal, P., Kreeger,
- L., Sridhar, T., Bursell, M., and C. Wright, "VXLAN: A
- Framework for Overlaying Virtualized Layer 2 Networks over
- Layer 3 Networks", 2013.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Quinn, et al. Expires January 4, 2015 [Page 14]
-
-Internet-Draft Generic Protocol Extension for VXLAN July 2014
-
-
-Authors' Addresses
-
- Paul Quinn
- Cisco Systems, Inc.
-
- Email: paulq@cisco.com
-
-
- Puneet Agarwal
- Broadcom
-
- Email: pagarwal@broadcom.com
-
-
- Rex Fernando
- Cisco Systems, Inc.
-
- Email: rex@cisco.com
-
-
- Larry Kreeger
- Cisco Systems, Inc.
-
- Email: kreeger@cisco.com
-
-
- Darrel Lewis
- Cisco Systems, Inc.
-
- Email: darlewis@cisco.com
-
-
- Fabio Maino
- Cisco Systems, Inc.
-
- Email: kreeger@cisco.com
-
-
- Michael Smith
- Cisco Systems, Inc.
-
- Email: michsmit@cisco.com
-
-
-
-
-
-
-
-
-
-Quinn, et al. Expires January 4, 2015 [Page 15]
-
-Internet-Draft Generic Protocol Extension for VXLAN July 2014
-
-
- Navindra Yadav
- Cisco Systems, Inc.
-
- Email: nyadav@cisco.com
-
-
- Lucy Yong
- Huawei USA
-
- Email: lucy.yong@huawei.com
-
-
- Xiaohu Xu
- Huawei Technologies
-
- Email: xuxiaohu@huawei.com
-
-
- Uri Elzur
- Intel
-
- Email: uri.elzur@intel.com
-
-
- Pankaj Garg
- Microsoft
-
- Email: Garg.Pankaj@microsoft.com
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.api b/src/vnet/vxlan-gpe/vxlan_gpe.api
deleted file mode 100644
index 3cbd7ab7f71..00000000000
--- a/src/vnet/vxlan-gpe/vxlan_gpe.api
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (c) 2015-2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-option version = "2.1.0";
-
-import "vnet/interface_types.api";
-import "vnet/ip/ip_types.api";
-
-define vxlan_gpe_add_del_tunnel
-{
- u32 client_index;
- u32 context;
- vl_api_address_t local;
- vl_api_address_t remote;
- vl_api_interface_index_t mcast_sw_if_index;
- u32 encap_vrf_id;
- u32 decap_vrf_id;
- vl_api_ip_proto_t protocol;
- u32 vni;
- bool is_add [default=true];
-};
-
-/** \brief Create or delete a VXLAN-GPE tunnel
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param local - Source IP address
- @param remote - Destination IP address, can be multicast
- @param local_port - Source UDP port. It is not included in sent packets. Used only for port registration
- @param remote_port - Destination UDP port
- @param mcast_sw_if_index - Interface for multicast destination
- @param encap_vrf_id - Encap route table FIB index
- @param decap_vrf_id - Decap route table FIB index
- @param protocol - Encapsulated protocol
- @param vni - The VXLAN Network Identifier, uint24
- @param is_add - Use 1 to create the tunnel, 0 to remove it
-*/
-define vxlan_gpe_add_del_tunnel_v2
-{
- u32 client_index;
- u32 context;
- vl_api_address_t local;
- vl_api_address_t remote;
- u16 local_port;
- u16 remote_port;
- vl_api_interface_index_t mcast_sw_if_index;
- u32 encap_vrf_id;
- u32 decap_vrf_id;
- vl_api_ip_proto_t protocol;
- u32 vni;
- bool is_add [default=true];
-};
-
-define vxlan_gpe_add_del_tunnel_reply
-{
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
-};
-define vxlan_gpe_add_del_tunnel_v2_reply
-{
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
-};
-
-define vxlan_gpe_tunnel_dump
-{
- u32 client_index;
- u32 context;
- vl_api_interface_index_t sw_if_index;
-};
-define vxlan_gpe_tunnel_v2_dump
-{
- u32 client_index;
- u32 context;
- vl_api_interface_index_t sw_if_index;
-};
-
-define vxlan_gpe_tunnel_details
-{
- u32 context;
- vl_api_interface_index_t sw_if_index;
- vl_api_address_t local;
- vl_api_address_t remote;
- u32 vni;
- vl_api_ip_proto_t protocol;
- vl_api_interface_index_t mcast_sw_if_index;
- u32 encap_vrf_id;
- u32 decap_vrf_id;
- bool is_ipv6;
-};
-define vxlan_gpe_tunnel_v2_details
-{
- u32 context;
- vl_api_interface_index_t sw_if_index;
- vl_api_address_t local;
- vl_api_address_t remote;
- u16 local_port;
- u16 remote_port;
- u32 vni;
- vl_api_ip_proto_t protocol;
- vl_api_interface_index_t mcast_sw_if_index;
- u32 encap_vrf_id;
- u32 decap_vrf_id;
- bool is_ipv6;
-};
-
-/** \brief Interface set vxlan-gpe-bypass request
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param sw_if_index - interface used to reach neighbor
- @param is_ipv6 - if non-zero, enable ipv6-vxlan-bypass, else ipv4-vxlan-bypass
- @param enable - if non-zero enable, else disable
-*/
-autoreply define sw_interface_set_vxlan_gpe_bypass
-{
- u32 client_index;
- u32 context;
- vl_api_interface_index_t sw_if_index;
- bool is_ipv6;
- bool enable [default=true];
-};
-
-/*
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.c b/src/vnet/vxlan-gpe/vxlan_gpe.c
deleted file mode 100644
index 5a5262ea9db..00000000000
--- a/src/vnet/vxlan-gpe/vxlan_gpe.c
+++ /dev/null
@@ -1,1257 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * @file
- * @brief Common utility functions for IPv4 and IPv6 VXLAN GPE tunnels
- *
-*/
-#include <vnet/vxlan-gpe/vxlan_gpe.h>
-#include <vnet/fib/fib.h>
-#include <vnet/ip/format.h>
-#include <vnet/fib/fib_entry.h>
-#include <vnet/fib/fib_table.h>
-#include <vnet/fib/fib_entry_track.h>
-#include <vnet/mfib/mfib_table.h>
-#include <vnet/adj/adj_mcast.h>
-#include <vnet/interface.h>
-#include <vnet/udp/udp_local.h>
-#include <vlib/vlib.h>
-
-/**
- * @file
- * @brief VXLAN-GPE.
- *
- * VXLAN-GPE provides the features needed to allow L2 bridge domains (BDs)
- * to span multiple servers. This is done by building an L2 overlay on
- * top of an L3 network underlay using VXLAN-GPE tunnels.
- *
- * This makes it possible for servers to be co-located in the same data
- * center or be separated geographically as long as they are reachable
- * through the underlay L3 network.
- *
- * You can refer to this kind of L2 overlay bridge domain as a VXLAN-GPE segment.
- */
-
-vxlan_gpe_main_t vxlan_gpe_main;
-
-static u8 *
-format_decap_next (u8 * s, va_list * args)
-{
- vxlan_gpe_tunnel_t *t = va_arg (*args, vxlan_gpe_tunnel_t *);
-
- switch (t->protocol)
- {
- case VXLAN_GPE_PROTOCOL_IP4:
- s = format (s, "protocol ip4 fib-idx %d", t->decap_fib_index);
- break;
- case VXLAN_GPE_PROTOCOL_IP6:
- s = format (s, "protocol ip6 fib-idx %d", t->decap_fib_index);
- break;
- case VXLAN_GPE_PROTOCOL_ETHERNET:
- s = format (s, "protocol ethernet");
- break;
- case VXLAN_GPE_PROTOCOL_NSH:
- s = format (s, "protocol nsh");
- break;
- default:
- s = format (s, "protocol unknown %d", t->protocol);
- }
-
- return s;
-}
-
-/**
- * @brief Format function for VXLAN GPE tunnel
- *
- * @param *s formatting string
- * @param *args
- *
- * @return *s formatted string
- *
- */
-u8 *
-format_vxlan_gpe_tunnel (u8 * s, va_list * args)
-{
- vxlan_gpe_tunnel_t *t = va_arg (*args, vxlan_gpe_tunnel_t *);
- vxlan_gpe_main_t *ngm = &vxlan_gpe_main;
-
- s = format (s,
- "[%d] lcl %U rmt %U lcl_port %d rmt_port %d vni %d "
- "fib-idx %d sw-if-idx %d ",
- t - ngm->tunnels, format_ip46_address, &t->local, IP46_TYPE_ANY,
- format_ip46_address, &t->remote, IP46_TYPE_ANY, t->local_port,
- t->remote_port, t->vni, t->encap_fib_index, t->sw_if_index);
-
-#if 0
- /* next_dpo not yet used by vxlan-gpe-encap node */
- s = format (s, "encap-dpo-idx %d ", t->next_dpo.dpoi_index);
- */
-#endif
- s = format (s, "decap-next-%U ", format_decap_next, t);
-
- if (PREDICT_FALSE (ip46_address_is_multicast (&t->remote)))
- s = format (s, "mcast-sw-if-idx %d ", t->mcast_sw_if_index);
-
- return s;
-}
-
-/**
- * @brief Naming for VXLAN GPE tunnel
- *
- * @param *s formatting string
- * @param *args
- *
- * @return *s formatted string
- *
- */
-static u8 *
-format_vxlan_gpe_name (u8 * s, va_list * args)
-{
- u32 dev_instance = va_arg (*args, u32);
- return format (s, "vxlan_gpe_tunnel%d", dev_instance);
-}
-
-/**
- * @brief CLI function for VXLAN GPE admin up/down
- *
- * @param *vnm
- * @param hw_if_index
- * @param flag
- *
- * @return *rc
- *
- */
-static clib_error_t *
-vxlan_gpe_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
- u32 flags)
-{
- u32 hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
- VNET_HW_INTERFACE_FLAG_LINK_UP : 0;
- vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
-
- return 0;
-}
-
-VNET_DEVICE_CLASS (vxlan_gpe_device_class,static) = {
- .name = "VXLAN_GPE",
- .format_device_name = format_vxlan_gpe_name,
- .format_tx_trace = format_vxlan_gpe_encap_trace,
- .admin_up_down_function = vxlan_gpe_interface_admin_up_down,
-};
-
-
-/**
- * @brief Formatting function for tracing VXLAN GPE with length
- *
- * @param *s
- * @param *args
- *
- * @return *s
- *
- */
-static u8 *
-format_vxlan_gpe_header_with_length (u8 * s, va_list * args)
-{
- u32 dev_instance = va_arg (*args, u32);
- s = format (s, "unimplemented dev %u", dev_instance);
- return s;
-}
-
-VNET_HW_INTERFACE_CLASS (vxlan_gpe_hw_class) = {
- .name = "VXLAN_GPE",
- .format_header = format_vxlan_gpe_header_with_length,
- .build_rewrite = default_build_rewrite,
-};
-
-static void
-vxlan_gpe_tunnel_restack_dpo (vxlan_gpe_tunnel_t * t)
-{
- dpo_id_t dpo = DPO_INVALID;
- u32 encap_index = vxlan_gpe_encap_node.index;
- fib_forward_chain_type_t forw_type = ip46_address_is_ip4 (&t->remote) ?
- FIB_FORW_CHAIN_TYPE_UNICAST_IP4 : FIB_FORW_CHAIN_TYPE_UNICAST_IP6;
-
- fib_entry_contribute_forwarding (t->fib_entry_index, forw_type, &dpo);
- dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
- dpo_reset (&dpo);
-}
-
-static vxlan_gpe_tunnel_t *
-vxlan_gpe_tunnel_from_fib_node (fib_node_t * node)
-{
- ASSERT (FIB_NODE_TYPE_VXLAN_GPE_TUNNEL == node->fn_type);
- return ((vxlan_gpe_tunnel_t *) (((char *) node) -
- STRUCT_OFFSET_OF (vxlan_gpe_tunnel_t,
- node)));
-}
-
-/**
- * Function definition to backwalk a FIB node -
- * Here we will restack the new dpo of VXLAN_GPE DIP to encap node.
- */
-static fib_node_back_walk_rc_t
-vxlan_gpe_tunnel_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx)
-{
- vxlan_gpe_tunnel_restack_dpo (vxlan_gpe_tunnel_from_fib_node (node));
- return (FIB_NODE_BACK_WALK_CONTINUE);
-}
-
-/**
- * Function definition to get a FIB node from its index
- */
-static fib_node_t *
-vxlan_gpe_tunnel_fib_node_get (fib_node_index_t index)
-{
- vxlan_gpe_tunnel_t *t;
- vxlan_gpe_main_t *ngm = &vxlan_gpe_main;
-
- t = pool_elt_at_index (ngm->tunnels, index);
-
- return (&t->node);
-}
-
-/**
- * Function definition to inform the FIB node that its last lock has gone.
- */
-static void
-vxlan_gpe_tunnel_last_lock_gone (fib_node_t * node)
-{
- /*
- * The VXLAN_GPE tunnel is a root of the graph. As such
- * it never has children and thus is never locked.
- */
- ASSERT (0);
-}
-
-/*
- * Virtual function table registered by VXLAN_GPE tunnels
- * for participation in the FIB object graph.
- */
-const static fib_node_vft_t vxlan_gpe_vft = {
- .fnv_get = vxlan_gpe_tunnel_fib_node_get,
- .fnv_last_lock = vxlan_gpe_tunnel_last_lock_gone,
- .fnv_back_walk = vxlan_gpe_tunnel_back_walk,
-};
-
-#define foreach_gpe_copy_field \
- _ (vni) \
- _ (protocol) \
- _ (mcast_sw_if_index) \
- _ (encap_fib_index) \
- _ (decap_fib_index) \
- _ (local_port) \
- _ (remote_port)
-
-#define foreach_copy_ipv4 { \
- _(local.ip4.as_u32) \
- _(remote.ip4.as_u32) \
-}
-
-#define foreach_copy_ipv6 { \
- _(local.ip6.as_u64[0]) \
- _(local.ip6.as_u64[1]) \
- _(remote.ip6.as_u64[0]) \
- _(remote.ip6.as_u64[1]) \
-}
-
-
-/**
- * @brief Calculate IPv4 VXLAN GPE rewrite header
- *
- * @param *t
- *
- * @return rc
- *
- */
-int
-vxlan4_gpe_rewrite (vxlan_gpe_tunnel_t * t, u32 extension_size,
- u8 protocol_override, uword encap_next_node)
-{
- u8 *rw = 0;
- ip4_header_t *ip0;
- ip4_vxlan_gpe_header_t *h0;
- int len;
-
- len = sizeof (*h0) + extension_size;
-
- vec_free (t->rewrite);
- vec_validate_aligned (rw, len - 1, CLIB_CACHE_LINE_BYTES);
-
- h0 = (ip4_vxlan_gpe_header_t *) rw;
-
- /* Fixed portion of the (outer) ip4 header */
- ip0 = &h0->ip4;
- ip0->ip_version_and_header_length = 0x45;
- ip0->ttl = 254;
- ip0->protocol = IP_PROTOCOL_UDP;
-
- /* we fix up the ip4 header length and checksum after-the-fact */
- ip0->src_address.as_u32 = t->local.ip4.as_u32;
- ip0->dst_address.as_u32 = t->remote.ip4.as_u32;
- ip0->checksum = ip4_header_checksum (ip0);
-
- /* UDP header, randomize src port on something, maybe? */
- h0->udp.src_port = clib_host_to_net_u16 (t->local_port);
- h0->udp.dst_port = clib_host_to_net_u16 (t->remote_port);
-
- /* VXLAN header. Are we having fun yet? */
- h0->vxlan.flags = VXLAN_GPE_FLAGS_I | VXLAN_GPE_FLAGS_P;
- h0->vxlan.ver_res = VXLAN_GPE_VERSION;
- if (protocol_override)
- {
- h0->vxlan.protocol = protocol_override;
- }
- else
- {
- h0->vxlan.protocol = t->protocol;
- }
- t->rewrite_size = sizeof (ip4_vxlan_gpe_header_t) + extension_size;
- h0->vxlan.vni_res = clib_host_to_net_u32 (t->vni << 8);
-
- t->rewrite = rw;
- t->encap_next_node = encap_next_node;
- return (0);
-}
-
-/**
- * @brief Calculate IPv6 VXLAN GPE rewrite header
- *
- * @param *t
- *
- * @return rc
- *
- */
-int
-vxlan6_gpe_rewrite (vxlan_gpe_tunnel_t * t, u32 extension_size,
- u8 protocol_override, uword encap_next_node)
-{
- u8 *rw = 0;
- ip6_header_t *ip0;
- ip6_vxlan_gpe_header_t *h0;
- int len;
-
- len = sizeof (*h0) + extension_size;
-
- vec_free (t->rewrite);
- vec_validate_aligned (rw, len - 1, CLIB_CACHE_LINE_BYTES);
-
- h0 = (ip6_vxlan_gpe_header_t *) rw;
-
- /* Fixed portion of the (outer) ip4 header */
- ip0 = &h0->ip6;
- ip0->ip_version_traffic_class_and_flow_label =
- clib_host_to_net_u32 (6 << 28);
- ip0->hop_limit = 255;
- ip0->protocol = IP_PROTOCOL_UDP;
-
- ip0->src_address.as_u64[0] = t->local.ip6.as_u64[0];
- ip0->src_address.as_u64[1] = t->local.ip6.as_u64[1];
- ip0->dst_address.as_u64[0] = t->remote.ip6.as_u64[0];
- ip0->dst_address.as_u64[1] = t->remote.ip6.as_u64[1];
-
- /* UDP header, randomize src port on something, maybe? */
- h0->udp.src_port = clib_host_to_net_u16 (t->local_port);
- h0->udp.dst_port = clib_host_to_net_u16 (t->remote_port);
-
- /* VXLAN header. Are we having fun yet? */
- h0->vxlan.flags = VXLAN_GPE_FLAGS_I | VXLAN_GPE_FLAGS_P;
- h0->vxlan.ver_res = VXLAN_GPE_VERSION;
- if (protocol_override)
- {
- h0->vxlan.protocol = t->protocol;
- }
- else
- {
- h0->vxlan.protocol = protocol_override;
- }
- t->rewrite_size = sizeof (ip4_vxlan_gpe_header_t) + extension_size;
- h0->vxlan.vni_res = clib_host_to_net_u32 (t->vni << 8);
-
- t->rewrite = rw;
- t->encap_next_node = encap_next_node;
- return (0);
-}
-
-typedef CLIB_PACKED(union {
- struct {
- fib_node_index_t mfib_entry_index;
- adj_index_t mcast_adj_index;
- };
- u64 as_u64;
-}) mcast_shared_t;
-
-static inline mcast_shared_t
-mcast_shared_get (ip46_address_t * ip)
-{
- ASSERT (ip46_address_is_multicast (ip));
- uword *p = hash_get_mem (vxlan_gpe_main.mcast_shared, ip);
- ALWAYS_ASSERT (p);
- return (mcast_shared_t)
- {
- .as_u64 = *p};
-}
-
-static inline void
-mcast_shared_add (ip46_address_t * remote,
- fib_node_index_t mfei, adj_index_t ai)
-{
- mcast_shared_t new_ep = {
- .mcast_adj_index = ai,
- .mfib_entry_index = mfei,
- };
-
- hash_set_mem_alloc (&vxlan_gpe_main.mcast_shared, remote, new_ep.as_u64);
-}
-
-static inline void
-mcast_shared_remove (ip46_address_t * remote)
-{
- mcast_shared_t ep = mcast_shared_get (remote);
-
- adj_unlock (ep.mcast_adj_index);
- mfib_table_entry_delete_index (ep.mfib_entry_index, MFIB_SOURCE_VXLAN_GPE);
-
- hash_unset_mem_free (&vxlan_gpe_main.mcast_shared, remote);
-}
-
-/**
- * @brief Add or Del a VXLAN GPE tunnel
- *
- * @param *a
- * @param *sw_if_index
- *
- * @return rc
- *
- */
-int vnet_vxlan_gpe_add_del_tunnel
- (vnet_vxlan_gpe_add_del_tunnel_args_t * a, u32 * sw_if_indexp)
-{
- vxlan_gpe_main_t *ngm = &vxlan_gpe_main;
- vxlan_gpe_tunnel_t *t = 0;
- vnet_main_t *vnm = ngm->vnet_main;
- vnet_hw_interface_t *hi;
- uword *p;
- u32 hw_if_index = ~0;
- u32 sw_if_index = ~0;
- int rv;
- vxlan4_gpe_tunnel_key_t key4, *key4_copy;
- vxlan6_gpe_tunnel_key_t key6, *key6_copy;
- u32 is_ip6 = a->is_ip6;
-
- /* Set udp-ports */
- if (a->local_port == 0)
- a->local_port = is_ip6 ? UDP_DST_PORT_VXLAN6_GPE : UDP_DST_PORT_VXLAN_GPE;
-
- if (a->remote_port == 0)
- a->remote_port = is_ip6 ? UDP_DST_PORT_VXLAN6_GPE : UDP_DST_PORT_VXLAN_GPE;
-
- if (!is_ip6)
- {
- key4.local = a->local.ip4.as_u32;
- key4.remote = a->remote.ip4.as_u32;
- key4.vni = clib_host_to_net_u32 (a->vni << 8);
- key4.port = (u32) clib_host_to_net_u16 (a->local_port);
-
- p = hash_get_mem (ngm->vxlan4_gpe_tunnel_by_key, &key4);
- }
- else
- {
- key6.local.as_u64[0] = a->local.ip6.as_u64[0];
- key6.local.as_u64[1] = a->local.ip6.as_u64[1];
- key6.remote.as_u64[0] = a->remote.ip6.as_u64[0];
- key6.remote.as_u64[1] = a->remote.ip6.as_u64[1];
- key6.vni = clib_host_to_net_u32 (a->vni << 8);
- key6.port = (u32) clib_host_to_net_u16 (a->local_port);
-
- p = hash_get_mem (ngm->vxlan6_gpe_tunnel_by_key, &key6);
- }
-
- if (a->is_add)
- {
- l2input_main_t *l2im = &l2input_main;
-
- /* adding a tunnel: tunnel must not already exist */
- if (p)
- return VNET_API_ERROR_TUNNEL_EXIST;
-
- pool_get_aligned (ngm->tunnels, t, CLIB_CACHE_LINE_BYTES);
- clib_memset (t, 0, sizeof (*t));
-
- /* copy from arg structure */
-#define _(x) t->x = a->x;
- foreach_gpe_copy_field;
- if (!a->is_ip6)
- foreach_copy_ipv4
- else
- foreach_copy_ipv6
-#undef _
-
- if (!a->is_ip6)
- t->flags |= VXLAN_GPE_TUNNEL_IS_IPV4;
-
- if (!a->is_ip6)
- {
- rv = vxlan4_gpe_rewrite (t, 0, 0, VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP);
- }
- else
- {
- rv = vxlan6_gpe_rewrite (t, 0, 0, VXLAN_GPE_ENCAP_NEXT_IP6_LOOKUP);
- }
-
- if (rv)
- {
- pool_put (ngm->tunnels, t);
- return rv;
- }
-
- if (!is_ip6)
- {
- key4_copy = clib_mem_alloc (sizeof (*key4_copy));
- clib_memcpy_fast (key4_copy, &key4, sizeof (*key4_copy));
- hash_set_mem (ngm->vxlan4_gpe_tunnel_by_key, key4_copy,
- t - ngm->tunnels);
- }
- else
- {
- key6_copy = clib_mem_alloc (sizeof (*key6_copy));
- clib_memcpy_fast (key6_copy, &key6, sizeof (*key6_copy));
- hash_set_mem (ngm->vxlan6_gpe_tunnel_by_key, key6_copy,
- t - ngm->tunnels);
- }
-
- if (vec_len (ngm->free_vxlan_gpe_tunnel_hw_if_indices) > 0)
- {
- vnet_interface_main_t *im = &vnm->interface_main;
- hw_if_index = ngm->free_vxlan_gpe_tunnel_hw_if_indices
- [vec_len (ngm->free_vxlan_gpe_tunnel_hw_if_indices) - 1];
- vec_dec_len (ngm->free_vxlan_gpe_tunnel_hw_if_indices, 1);
-
- hi = vnet_get_hw_interface (vnm, hw_if_index);
- hi->dev_instance = t - ngm->tunnels;
- hi->hw_instance = hi->dev_instance;
- /* clear old stats of freed tunnel before reuse */
- sw_if_index = hi->sw_if_index;
- vnet_interface_counter_lock (im);
- vlib_zero_combined_counter
- (&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_TX],
- sw_if_index);
- vlib_zero_combined_counter (&im->combined_sw_if_counters
- [VNET_INTERFACE_COUNTER_RX],
- sw_if_index);
- vlib_zero_simple_counter (&im->sw_if_counters
- [VNET_INTERFACE_COUNTER_DROP],
- sw_if_index);
- vnet_interface_counter_unlock (im);
- }
- else
- {
- hw_if_index = vnet_register_interface
- (vnm, vxlan_gpe_device_class.index, t - ngm->tunnels,
- vxlan_gpe_hw_class.index, t - ngm->tunnels);
- hi = vnet_get_hw_interface (vnm, hw_if_index);
- }
-
- /* Set vxlan-gpe tunnel output node */
- u32 encap_index = vxlan_gpe_encap_node.index;
- vnet_set_interface_output_node (vnm, hw_if_index, encap_index);
-
- t->hw_if_index = hw_if_index;
- t->sw_if_index = sw_if_index = hi->sw_if_index;
- vec_validate_init_empty (ngm->tunnel_index_by_sw_if_index, sw_if_index,
- ~0);
- ngm->tunnel_index_by_sw_if_index[sw_if_index] = t - ngm->tunnels;
-
- /* setup l2 input config with l2 feature and bd 0 to drop packet */
- vec_validate (l2im->configs, sw_if_index);
- l2im->configs[sw_if_index].feature_bitmap = L2INPUT_FEAT_DROP;
- l2im->configs[sw_if_index].bd_index = 0;
-
- vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
- si->flags &= ~VNET_SW_INTERFACE_FLAG_HIDDEN;
- vnet_sw_interface_set_flags (vnm, hi->sw_if_index,
- VNET_SW_INTERFACE_FLAG_ADMIN_UP);
- fib_node_init (&t->node, FIB_NODE_TYPE_VXLAN_GPE_TUNNEL);
- fib_prefix_t tun_remote_pfx;
- vnet_flood_class_t flood_class = VNET_FLOOD_CLASS_TUNNEL_NORMAL;
-
- fib_protocol_t fp = fib_ip_proto (is_ip6);
- fib_prefix_from_ip46_addr (fp, &t->remote, &tun_remote_pfx);
- if (!ip46_address_is_multicast (&t->remote))
- {
- /* Unicast tunnel -
- * source the FIB entry for the tunnel's destination
- * and become a child thereof. The tunnel will then get poked
- * when the forwarding for the entry updates, and the tunnel can
- * re-stack accordingly
- */
- vtep_addr_ref (&ngm->vtep_table, t->encap_fib_index, &t->local);
- t->fib_entry_index = fib_entry_track (t->encap_fib_index,
- &tun_remote_pfx,
- FIB_NODE_TYPE_VXLAN_GPE_TUNNEL,
- t - ngm->tunnels,
- &t->sibling_index);
- vxlan_gpe_tunnel_restack_dpo (t);
- }
- else
- {
- /* Multicast tunnel -
- * as the same mcast group can be used for multiple mcast tunnels
- * with different VNIs, create the output fib adjacency only if
- * it does not already exist
- */
- if (vtep_addr_ref (&ngm->vtep_table,
- t->encap_fib_index, &t->remote) == 1)
- {
- fib_node_index_t mfei;
- adj_index_t ai;
- fib_route_path_t path = {
- .frp_proto = fib_proto_to_dpo (fp),
- .frp_addr = zero_addr,
- .frp_sw_if_index = 0xffffffff,
- .frp_fib_index = ~0,
- .frp_weight = 1,
- .frp_flags = FIB_ROUTE_PATH_LOCAL,
- .frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
- };
- const mfib_prefix_t mpfx = {
- .fp_proto = fp,
- .fp_len = (is_ip6 ? 128 : 32),
- .fp_grp_addr = tun_remote_pfx.fp_addr,
- };
-
- /*
- * Setup the (*,G) to receive traffic on the mcast group
- * - the forwarding interface is for-us
- * - the accepting interface is that from the API
- */
- mfib_table_entry_path_update (t->encap_fib_index, &mpfx,
- MFIB_SOURCE_VXLAN_GPE,
- MFIB_ENTRY_FLAG_NONE, &path);
-
- path.frp_sw_if_index = a->mcast_sw_if_index;
- path.frp_flags = FIB_ROUTE_PATH_FLAG_NONE;
- path.frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT;
- mfei = mfib_table_entry_path_update (
- t->encap_fib_index, &mpfx, MFIB_SOURCE_VXLAN_GPE,
- MFIB_ENTRY_FLAG_NONE, &path);
-
- /*
- * Create the mcast adjacency to send traffic to the group
- */
- ai = adj_mcast_add_or_lock (fp,
- fib_proto_to_link (fp),
- a->mcast_sw_if_index);
-
- /*
- * create a new end-point
- */
- mcast_shared_add (&t->remote, mfei, ai);
- }
-
- dpo_id_t dpo = DPO_INVALID;
- mcast_shared_t ep = mcast_shared_get (&t->remote);
-
- /* Stack shared mcast remote mac addr rewrite on encap */
- dpo_set (&dpo, DPO_ADJACENCY_MCAST,
- fib_proto_to_dpo (fp), ep.mcast_adj_index);
-
- dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
- dpo_reset (&dpo);
- flood_class = VNET_FLOOD_CLASS_TUNNEL_MASTER;
- }
-
- vnet_get_sw_interface (vnet_get_main (), sw_if_index)->flood_class =
- flood_class;
- }
- else
- {
- /* deleting a tunnel: tunnel must exist */
- if (!p)
- return VNET_API_ERROR_NO_SUCH_ENTRY;
-
- t = pool_elt_at_index (ngm->tunnels, p[0]);
-
- sw_if_index = t->sw_if_index;
- vnet_sw_interface_set_flags (vnm, t->sw_if_index, 0 /* down */ );
- vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, t->sw_if_index);
- si->flags |= VNET_SW_INTERFACE_FLAG_HIDDEN;
- set_int_l2_mode (ngm->vlib_main, vnm, MODE_L3, t->sw_if_index, 0,
- L2_BD_PORT_TYPE_NORMAL, 0, 0);
- vec_add1 (ngm->free_vxlan_gpe_tunnel_hw_if_indices, t->hw_if_index);
-
- ngm->tunnel_index_by_sw_if_index[t->sw_if_index] = ~0;
-
- if (!is_ip6)
- hash_unset (ngm->vxlan4_gpe_tunnel_by_key, key4.as_u64);
- else
- hash_unset_mem_free (&ngm->vxlan6_gpe_tunnel_by_key, &key6);
-
- if (!ip46_address_is_multicast (&t->remote))
- {
- vtep_addr_unref (&ngm->vtep_table, t->encap_fib_index, &t->local);
- fib_entry_untrack (t->fib_entry_index, t->sibling_index);
- }
- else if (vtep_addr_unref (&ngm->vtep_table,
- t->encap_fib_index, &t->remote) == 0)
- {
- mcast_shared_remove (&t->remote);
- }
-
- fib_node_deinit (&t->node);
- vec_free (t->rewrite);
- pool_put (ngm->tunnels, t);
- }
-
- if (sw_if_indexp)
- *sw_if_indexp = sw_if_index;
-
- if (a->is_add)
- {
- /* register udp ports */
- if (!is_ip6 && !udp_is_valid_dst_port (a->local_port, 1))
- udp_register_dst_port (ngm->vlib_main, a->local_port,
- vxlan4_gpe_input_node.index, 1 /* is_ip4 */);
- if (is_ip6 && !udp_is_valid_dst_port (a->remote_port, 0))
- udp_register_dst_port (ngm->vlib_main, a->remote_port,
- vxlan6_gpe_input_node.index, 0 /* is_ip4 */);
- }
-
- return 0;
-}
-
-static clib_error_t *
-vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- u8 is_add = 1;
- ip46_address_t local, remote;
- u8 local_set = 0;
- u8 remote_set = 0;
- u8 grp_set = 0;
- u8 ipv4_set = 0;
- u8 ipv6_set = 0;
- u32 mcast_sw_if_index = ~0;
- u32 encap_fib_index = 0;
- u32 decap_fib_index = 0;
- u8 protocol = VXLAN_GPE_PROTOCOL_IP4;
- u32 vni;
- u8 vni_set = 0;
- u32 local_port = 0;
- u32 remote_port = 0;
- int rv;
- u32 tmp;
- vnet_vxlan_gpe_add_del_tunnel_args_t _a, *a = &_a;
- u32 sw_if_index;
- clib_error_t *error = NULL;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "del"))
- is_add = 0;
- else if (unformat (line_input, "local %U",
- unformat_ip4_address, &local.ip4))
- {
- local_set = 1;
- ipv4_set = 1;
- }
- else if (unformat (line_input, "remote %U",
- unformat_ip4_address, &remote.ip4))
- {
- remote_set = 1;
- ipv4_set = 1;
- }
- else if (unformat (line_input, "local %U",
- unformat_ip6_address, &local.ip6))
- {
- local_set = 1;
- ipv6_set = 1;
- }
- else if (unformat (line_input, "remote %U",
- unformat_ip6_address, &remote.ip6))
- {
- remote_set = 1;
- ipv6_set = 1;
- }
- else if (unformat (line_input, "group %U %U",
- unformat_ip4_address, &remote.ip4,
- unformat_vnet_sw_interface,
- vnet_get_main (), &mcast_sw_if_index))
- {
- grp_set = remote_set = 1;
- ipv4_set = 1;
- }
- else if (unformat (line_input, "group %U %U",
- unformat_ip6_address, &remote.ip6,
- unformat_vnet_sw_interface,
- vnet_get_main (), &mcast_sw_if_index))
- {
- grp_set = remote_set = 1;
- ipv6_set = 1;
- }
- else if (unformat (line_input, "encap-vrf-id %d", &tmp))
- {
- if (ipv6_set)
- encap_fib_index = fib_table_find (FIB_PROTOCOL_IP6, tmp);
- else
- encap_fib_index = fib_table_find (FIB_PROTOCOL_IP4, tmp);
-
- if (encap_fib_index == ~0)
- {
- error =
- clib_error_return (0, "nonexistent encap fib id %d", tmp);
- goto done;
- }
- }
- else if (unformat (line_input, "decap-vrf-id %d", &tmp))
- {
- if (ipv6_set)
- decap_fib_index = fib_table_find (FIB_PROTOCOL_IP6, tmp);
- else
- decap_fib_index = fib_table_find (FIB_PROTOCOL_IP4, tmp);
-
- if (decap_fib_index == ~0)
- {
- error =
- clib_error_return (0, "nonexistent decap fib id %d", tmp);
- goto done;
- }
- }
- else if (unformat (line_input, "vni %d", &vni))
- vni_set = 1;
- else if (unformat (line_input, "local_port %d", &local_port))
- ;
- else if (unformat (line_input, "remote_port %d", &remote_port))
- ;
- else if (unformat (line_input, "next-ip4"))
- protocol = VXLAN_GPE_PROTOCOL_IP4;
- else if (unformat (line_input, "next-ip6"))
- protocol = VXLAN_GPE_PROTOCOL_IP6;
- else if (unformat (line_input, "next-ethernet"))
- protocol = VXLAN_GPE_PROTOCOL_ETHERNET;
- else if (unformat (line_input, "next-nsh"))
- protocol = VXLAN_GPE_PROTOCOL_NSH;
- else
- {
- error = clib_error_return (0, "parse error: '%U'",
- format_unformat_error, line_input);
- goto done;
- }
- }
-
- if (local_set == 0)
- {
- error = clib_error_return (0, "tunnel local address not specified");
- goto done;
- }
-
- if (remote_set == 0)
- {
- error = clib_error_return (0, "tunnel remote address not specified");
- goto done;
- }
-
- if (grp_set && !ip46_address_is_multicast (&remote))
- {
- error = clib_error_return (0, "tunnel group address not multicast");
- goto done;
- }
-
- if (grp_set == 0 && ip46_address_is_multicast (&remote))
- {
- error = clib_error_return (0, "remote address must be unicast");
- goto done;
- }
-
- if (grp_set && mcast_sw_if_index == ~0)
- {
- error = clib_error_return (0, "tunnel nonexistent multicast device");
- goto done;
- }
- if (ipv4_set && ipv6_set)
- {
- error = clib_error_return (0, "both IPv4 and IPv6 addresses specified");
- goto done;
- }
-
- if ((ipv4_set && memcmp (&local.ip4, &remote.ip4, sizeof (local.ip4)) == 0)
- || (ipv6_set
- && memcmp (&local.ip6, &remote.ip6, sizeof (local.ip6)) == 0))
- {
- error = clib_error_return (0, "src and remote addresses are identical");
- goto done;
- }
-
- if (vni_set == 0)
- {
- error = clib_error_return (0, "vni not specified");
- goto done;
- }
-
- clib_memset (a, 0, sizeof (*a));
-
- a->is_add = is_add;
- a->is_ip6 = ipv6_set;
-
-#define _(x) a->x = x;
- foreach_gpe_copy_field;
- if (ipv4_set)
- foreach_copy_ipv4
- else
- foreach_copy_ipv6
-#undef _
-
- rv = vnet_vxlan_gpe_add_del_tunnel (a, &sw_if_index);
-
- switch (rv)
- {
- case 0:
- vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
- vnet_get_main (), sw_if_index);
- break;
- case VNET_API_ERROR_INVALID_DECAP_NEXT:
- error = clib_error_return (0, "invalid decap-next...");
- goto done;
-
- case VNET_API_ERROR_TUNNEL_EXIST:
- error = clib_error_return (0, "tunnel already exists...");
- goto done;
-
- case VNET_API_ERROR_NO_SUCH_ENTRY:
- error = clib_error_return (0, "tunnel does not exist...");
- goto done;
-
- default:
- error = clib_error_return
- (0, "vnet_vxlan_gpe_add_del_tunnel returned %d", rv);
- goto done;
- }
-
-done:
- unformat_free (line_input);
-
- return error;
-}
-
-/*?
- * Add or delete a VXLAN-GPE Tunnel.
- *
- * VXLAN-GPE provides the features needed to allow L2 bridge domains (BDs)
- * to span multiple servers. This is done by building an L2 overlay on
- * top of an L3 network underlay using VXLAN-GPE tunnels.
- *
- * This makes it possible for servers to be co-located in the same data
- * center or be separated geographically as long as they are reachable
- * through the underlay L3 network.
- *
- * You can refer to this kind of L2 overlay bridge domain as a VXLAN-GPE segment.
- *
- * @cliexpar
- * Example of how to create a VXLAN-GPE Tunnel:
- * @cliexcmd{create vxlan-gpe tunnel local 10.0.3.1 remote 10.0.3.3 vni 13 encap-vrf-id 7}
- * Example of how to delete a VXLAN-GPE Tunnel:
- * @cliexcmd{create vxlan-gpe tunnel local 10.0.3.1 remote 10.0.3.3 vni 13 del}
- ?*/
-VLIB_CLI_COMMAND (create_vxlan_gpe_tunnel_command, static) = {
- .path = "create vxlan-gpe tunnel",
- .short_help =
- "create vxlan-gpe tunnel local <local-addr> "
- " {remote <remote-addr>|group <mcast-addr> <intf-name>}"
- " vni <nn> [next-ip4][next-ip6][next-ethernet][next-nsh]"
- " [encap-vrf-id <nn>] [decap-vrf-id <nn>] [del]\n",
- .function = vxlan_gpe_add_del_tunnel_command_fn,
-};
-
-/**
- * @brief CLI function for showing VXLAN GPE tunnels
- *
- * @param *vm
- * @param *input
- * @param *cmd
- *
- * @return error
- *
- */
-static clib_error_t *
-show_vxlan_gpe_tunnel_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- vxlan_gpe_main_t *ngm = &vxlan_gpe_main;
- vxlan_gpe_tunnel_t *t;
-
- if (pool_elts (ngm->tunnels) == 0)
- vlib_cli_output (vm, "No vxlan-gpe tunnels configured.");
-
- pool_foreach (t, ngm->tunnels)
- {
- vlib_cli_output (vm, "%U", format_vxlan_gpe_tunnel, t);
- }
-
- return 0;
-}
-
-/*?
- * Display all the VXLAN-GPE Tunnel entries.
- *
- * @cliexpar
- * Example of how to display the VXLAN-GPE Tunnel entries:
- * @cliexstart{show vxlan-gpe tunnel}
- * [0] local 10.0.3.1 remote 10.0.3.3 vni 13 encap_fib_index 0 sw_if_index 5 decap_next l2
- * @cliexend
- ?*/
-VLIB_CLI_COMMAND (show_vxlan_gpe_tunnel_command, static) = {
- .path = "show vxlan-gpe",
- .function = show_vxlan_gpe_tunnel_command_fn,
-};
-
-void
-vnet_int_vxlan_gpe_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable)
-{
- if (is_ip6)
- vnet_feature_enable_disable ("ip6-unicast", "ip6-vxlan-gpe-bypass",
- sw_if_index, is_enable, 0, 0);
- else
- vnet_feature_enable_disable ("ip4-unicast", "ip4-vxlan-gpe-bypass",
- sw_if_index, is_enable, 0, 0);
-}
-
-
-static clib_error_t *
-set_ip_vxlan_gpe_bypass (u32 is_ip6,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- vnet_main_t *vnm = vnet_get_main ();
- clib_error_t *error = 0;
- u32 sw_if_index, is_enable;
-
- sw_if_index = ~0;
- is_enable = 1;
-
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat_user
- (line_input, unformat_vnet_sw_interface, vnm, &sw_if_index))
- ;
- else if (unformat (line_input, "del"))
- is_enable = 0;
- else
- {
- error = unformat_parse_error (line_input);
- goto done;
- }
- }
-
- if (~0 == sw_if_index)
- {
- error = clib_error_return (0, "unknown interface `%U'",
- format_unformat_error, line_input);
- goto done;
- }
-
- vnet_int_vxlan_gpe_bypass_mode (sw_if_index, is_ip6, is_enable);
-
-done:
- unformat_free (line_input);
-
- return error;
-}
-
-static clib_error_t *
-set_ip4_vxlan_gpe_bypass (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- return set_ip_vxlan_gpe_bypass (0, input, cmd);
-}
-
-/*?
- * This command adds the 'ip4-vxlan-gpe-bypass' graph node for a given
- * interface. By adding the IPv4 vxlan-gpe-bypass graph node to an interface,
- * the node checks for and validate input vxlan_gpe packet and bypass
- * ip4-lookup, ip4-local, ip4-udp-lookup nodes to speedup vxlan_gpe packet
- * forwarding. This node will cause extra overhead to for non-vxlan_gpe
- * packets which is kept at a minimum.
- *
- * @cliexpar
- * @parblock
- * Example of graph node before ip4-vxlan-gpe-bypass is enabled:
- * @cliexstart{show vlib graph ip4-vxlan-gpe-bypass}
- * Name Next Previous
- * ip4-vxlan-gpe-bypass error-drop [0]
- * vxlan4-gpe-input [1]
- * ip4-lookup [2]
- * @cliexend
- *
- * Example of how to enable ip4-vxlan-gpe-bypass on an interface:
- * @cliexcmd{set interface ip vxlan-gpe-bypass GigabitEthernet2/0/0}
- *
- * Example of graph node after ip4-vxlan-gpe-bypass is enabled:
- * @cliexstart{show vlib graph ip4-vxlan-gpe-bypass}
- * Name Next Previous
- * ip4-vxlan-gpe-bypass error-drop [0] ip4-input
- * vxlan4-gpe-input [1] ip4-input-no-checksum
- * ip4-lookup [2]
- * @cliexend
- *
- * Example of how to display the feature enabled on an interface:
- * @cliexstart{show ip interface features GigabitEthernet2/0/0}
- * IP feature paths configured on GigabitEthernet2/0/0...
- * ...
- * ipv4 unicast:
- * ip4-vxlan-gpe-bypass
- * ip4-lookup
- * ...
- * @cliexend
- *
- * Example of how to disable ip4-vxlan-gpe-bypass on an interface:
- * @cliexcmd{set interface ip vxlan-gpe-bypass GigabitEthernet2/0/0 del}
- * @endparblock
-?*/
-VLIB_CLI_COMMAND (set_interface_ip_vxlan_gpe_bypass_command, static) = {
- .path = "set interface ip vxlan-gpe-bypass",
- .function = set_ip4_vxlan_gpe_bypass,
- .short_help = "set interface ip vxlan-gpe-bypass <interface> [del]",
-};
-
-static clib_error_t *
-set_ip6_vxlan_gpe_bypass (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- return set_ip_vxlan_gpe_bypass (1, input, cmd);
-}
-
-/*?
- * This command adds the 'ip6-vxlan-gpe-bypass' graph node for a given
- * interface. By adding the IPv6 vxlan-gpe-bypass graph node to an interface,
- * the node checks for and validate input vxlan_gpe packet and bypass
- * ip6-lookup, ip6-local, ip6-udp-lookup nodes to speedup vxlan_gpe packet
- * forwarding. This node will cause extra overhead to for non-vxlan_gpe packets
- * which is kept at a minimum.
- *
- * @cliexpar
- * @parblock
- * Example of graph node before ip6-vxlan-gpe-bypass is enabled:
- * @cliexstart{show vlib graph ip6-vxlan-gpe-bypass}
- * Name Next Previous
- * ip6-vxlan-gpe-bypass error-drop [0]
- * vxlan6-gpe-input [1]
- * ip6-lookup [2]
- * @cliexend
- *
- * Example of how to enable ip6-vxlan-gpe-bypass on an interface:
- * @cliexcmd{set interface ip6 vxlan-gpe-bypass GigabitEthernet2/0/0}
- *
- * Example of graph node after ip6-vxlan-gpe-bypass is enabled:
- * @cliexstart{show vlib graph ip6-vxlan-gpe-bypass}
- * Name Next Previous
- * ip6-vxlan-gpe-bypass error-drop [0] ip6-input
- * vxlan6-gpe-input [1] ip4-input-no-checksum
- * ip6-lookup [2]
- * @cliexend
- *
- * Example of how to display the feature enabled on an interface:
- * @cliexstart{show ip interface features GigabitEthernet2/0/0}
- * IP feature paths configured on GigabitEthernet2/0/0...
- * ...
- * ipv6 unicast:
- * ip6-vxlan-gpe-bypass
- * ip6-lookup
- * ...
- * @cliexend
- *
- * Example of how to disable ip6-vxlan-gpe-bypass on an interface:
- * @cliexcmd{set interface ip6 vxlan-gpe-bypass GigabitEthernet2/0/0 del}
- * @endparblock
-?*/
-VLIB_CLI_COMMAND (set_interface_ip6_vxlan_gpe_bypass_command, static) = {
- .path = "set interface ip6 vxlan-gpe-bypass",
- .function = set_ip6_vxlan_gpe_bypass,
- .short_help = "set interface ip6 vxlan-gpe-bypass <interface> [del]",
-};
-
-VNET_FEATURE_INIT (ip4_vxlan_gpe_bypass, static) =
-{
- .arc_name = "ip4-unicast",
- .node_name = "ip4-vxlan-gpe-bypass",
- .runs_before = VNET_FEATURES ("ip4-lookup"),
-};
-
-VNET_FEATURE_INIT (ip6_vxlan_gpe_bypass, static) =
-{
- .arc_name = "ip6-unicast",
- .node_name = "ip6-vxlan-gpe-bypass",
- .runs_before = VNET_FEATURES ("ip6-lookup"),
-};
-
-/**
- * @brief Feature init function for VXLAN GPE
- *
- * @param *vm
- *
- * @return error
- *
- */
-clib_error_t *
-vxlan_gpe_init (vlib_main_t * vm)
-{
- vxlan_gpe_main_t *ngm = &vxlan_gpe_main;
-
- ngm->vnet_main = vnet_get_main ();
- ngm->vlib_main = vm;
-
- ngm->vxlan4_gpe_tunnel_by_key
- = hash_create_mem (0, sizeof (vxlan4_gpe_tunnel_key_t), sizeof (uword));
-
- ngm->vxlan6_gpe_tunnel_by_key
- = hash_create_mem (0, sizeof (vxlan6_gpe_tunnel_key_t), sizeof (uword));
-
-
- ngm->mcast_shared = hash_create_mem (0,
- sizeof (ip46_address_t),
- sizeof (mcast_shared_t));
- ngm->vtep_table = vtep_table_create ();
-
- /* Register the list of standard decap protocols supported */
- vxlan_gpe_register_decap_protocol (VXLAN_GPE_PROTOCOL_IP4,
- VXLAN_GPE_INPUT_NEXT_IP4_INPUT);
- vxlan_gpe_register_decap_protocol (VXLAN_GPE_PROTOCOL_IP6,
- VXLAN_GPE_INPUT_NEXT_IP6_INPUT);
- vxlan_gpe_register_decap_protocol (VXLAN_GPE_PROTOCOL_ETHERNET,
- VXLAN_GPE_INPUT_NEXT_L2_INPUT);
-
- fib_node_register_type (FIB_NODE_TYPE_VXLAN_GPE_TUNNEL, &vxlan_gpe_vft);
-
- return 0;
-}
-
-VLIB_INIT_FUNCTION (vxlan_gpe_init);
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.h b/src/vnet/vxlan-gpe/vxlan_gpe.h
deleted file mode 100644
index aabaafeee6f..00000000000
--- a/src/vnet/vxlan-gpe/vxlan_gpe.h
+++ /dev/null
@@ -1,300 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * @file
- * @brief VXLAN GPE definitions
- *
-*/
-#ifndef included_vnet_vxlan_gpe_h
-#define included_vnet_vxlan_gpe_h
-
-#include <vppinfra/error.h>
-#include <vppinfra/hash.h>
-#include <vnet/vnet.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ip/vtep.h>
-#include <vnet/l2/l2_input.h>
-#include <vnet/l2/l2_output.h>
-#include <vnet/l2/l2_bd.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/vxlan-gpe/vxlan_gpe_packet.h>
-#include <vnet/ip/ip4_packet.h>
-#include <vnet/ip/ip6_packet.h>
-#include <vnet/udp/udp_packet.h>
-#include <vnet/dpo/dpo.h>
-#include <vnet/adj/adj_types.h>
-
-/**
- * @brief VXLAN GPE header struct
- *
- */
-typedef CLIB_PACKED (struct {
- /** 20 bytes */
- ip4_header_t ip4;
- /** 8 bytes */
- udp_header_t udp;
- /** 8 bytes */
- vxlan_gpe_header_t vxlan;
-}) ip4_vxlan_gpe_header_t;
-
-typedef CLIB_PACKED (struct {
- /** 40 bytes */
- ip6_header_t ip6;
- /** 8 bytes */
- udp_header_t udp;
- /** 8 bytes */
- vxlan_gpe_header_t vxlan;
-}) ip6_vxlan_gpe_header_t;
-
-/**
- * @brief Key struct for IPv4 VXLAN GPE tunnel.
- * Key fields: local remote, vni, udp-port
- * all fields in NET byte order
- * VNI shifted 8 bits
- */
-typedef CLIB_PACKED(struct {
- union {
- struct {
- u32 local;
- u32 remote;
-
- u32 vni;
- u32 port;
- };
- u64 as_u64[2];
- };
-}) vxlan4_gpe_tunnel_key_t;
-
-/**
- * @brief Key struct for IPv6 VXLAN GPE tunnel.
- * Key fields: local remote, vni, udp-port
- * all fields in NET byte order
- * VNI shifted 8 bits
- */
-typedef CLIB_PACKED(struct {
- ip6_address_t local;
- ip6_address_t remote;
- u32 vni;
- u32 port;
-}) vxlan6_gpe_tunnel_key_t;
-
-typedef union
-{
- struct
- {
- u32 tunnel_index;
- u16 next_index;
- u8 error;
- };
- u64 as_u64;
-} vxlan_gpe_decap_info_t;
-
-/**
- * @brief Struct for VXLAN GPE tunnel
- */
-typedef struct
-{
- /* Required for pool_get_aligned */
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
-
- /** Rewrite string. $$$$ embed vnet_rewrite header */
- u8 *rewrite;
-
- /** encapsulated protocol */
- u8 protocol;
-
- /* FIB DPO for IP forwarding of VXLAN-GPE encap packet */
- dpo_id_t next_dpo;
- /** tunnel local address */
- ip46_address_t local;
- /** tunnel remote address */
- ip46_address_t remote;
- /** local udp-port **/
- u16 local_port;
- /** remote udp-port **/
- u16 remote_port;
-
- /* mcast packet output intfc index (used only if dst is mcast) */
- u32 mcast_sw_if_index;
-
- /** FIB indices - tunnel partner lookup here */
- u32 encap_fib_index;
- /** FIB indices - inner IP packet lookup here */
- u32 decap_fib_index;
-
- /** VXLAN GPE VNI in HOST byte order, shifted left 8 bits */
- u32 vni;
-
- /** vnet intfc hw_if_index */
- u32 hw_if_index;
- /** vnet intfc sw_if_index */
- u32 sw_if_index;
-
- /** flags */
- u32 flags;
-
- /** rewrite size for dynamic plugins like iOAM */
- u8 rewrite_size;
-
- /** Next node after VxLAN-GPE encap */
- uword encap_next_node;
-
- /**
- * Linkage into the FIB object graph
- */
- fib_node_t node;
-
- /*
- * The FIB entry for (depending on VXLAN-GPE tunnel is unicast or mcast)
- * sending unicast VXLAN-GPE encap packets or receiving mcast VXLAN-GPE packets
- */
- fib_node_index_t fib_entry_index;
- adj_index_t mcast_adj_index;
-
- /**
- * The tunnel is a child of the FIB entry for its destination. This is
- * so it receives updates when the forwarding information for that entry
- * changes.
- * The tunnels sibling index on the FIB entry's dependency list.
- */
- u32 sibling_index;
-
-} vxlan_gpe_tunnel_t;
-
-/** Flags for vxlan_gpe_tunnel_t */
-#define VXLAN_GPE_TUNNEL_IS_IPV4 1
-
-/** next nodes for VXLAN GPE input */
-#define foreach_vxlan_gpe_input_next \
-_(DROP, "error-drop") \
-_(IP4_INPUT, "ip4-input") \
-_(IP6_INPUT, "ip6-input") \
-_(L2_INPUT, "l2-input")
-
-/** struct for next nodes for VXLAN GPE input */
-typedef enum
-{
-#define _(s,n) VXLAN_GPE_INPUT_NEXT_##s,
- foreach_vxlan_gpe_input_next
-#undef _
- VXLAN_GPE_INPUT_N_NEXT,
-} vxlan_gpe_input_next_t;
-
-/** struct for VXLAN GPE errors */
-typedef enum
-{
-#define vxlan_gpe_error(n,s) VXLAN_GPE_ERROR_##n,
-#include <vnet/vxlan-gpe/vxlan_gpe_error.def>
-#undef vxlan_gpe_error
- VXLAN_GPE_N_ERROR,
-} vxlan_gpe_input_error_t;
-
-/** Struct for VXLAN GPE node state */
-typedef struct
-{
- /** vector of encap tunnel instances */
- vxlan_gpe_tunnel_t *tunnels;
-
- /** lookup IPv4 VXLAN GPE tunnel by key */
- uword *vxlan4_gpe_tunnel_by_key;
- /** lookup IPv6 VXLAN GPE tunnel by key */
- uword *vxlan6_gpe_tunnel_by_key;
-
- /* local VTEP IPs ref count used by vxlan-bypass node to check if
- received VXLAN packet DIP matches any local VTEP address */
- vtep_table_t vtep_table;
- /* mcast shared info */
- uword *mcast_shared; /* keyed on mcast ip46 addr */
- /** Free vlib hw_if_indices */
- u32 *free_vxlan_gpe_tunnel_hw_if_indices;
-
- /** Mapping from sw_if_index to tunnel index */
- u32 *tunnel_index_by_sw_if_index;
-
- /** State convenience vlib_main_t */
- vlib_main_t *vlib_main;
- /** State convenience vnet_main_t */
- vnet_main_t *vnet_main;
-
- /* cache for last 8 vxlan_gpe tunnel */
- vtep4_cache_t vtep4_u512;
-
- /** List of next nodes for the decap indexed on protocol */
- uword decap_next_node_list[VXLAN_GPE_PROTOCOL_MAX];
-} vxlan_gpe_main_t;
-
-extern vxlan_gpe_main_t vxlan_gpe_main;
-
-extern vlib_node_registration_t vxlan_gpe_encap_node;
-extern vlib_node_registration_t vxlan4_gpe_input_node;
-extern vlib_node_registration_t vxlan6_gpe_input_node;
-
-u8 *format_vxlan_gpe_encap_trace (u8 * s, va_list * args);
-
-/** Struct for VXLAN GPE add/del args */
-typedef struct
-{
- u8 is_add;
- u8 is_ip6;
- ip46_address_t local, remote;
- u8 protocol;
- u32 mcast_sw_if_index;
- u32 encap_fib_index;
- u32 decap_fib_index;
- u32 vni;
- u16 local_port;
- u16 remote_port;
-} vnet_vxlan_gpe_add_del_tunnel_args_t;
-
-
-int vnet_vxlan_gpe_add_del_tunnel
- (vnet_vxlan_gpe_add_del_tunnel_args_t * a, u32 * sw_if_indexp);
-
-
-int vxlan4_gpe_rewrite (vxlan_gpe_tunnel_t * t, u32 extension_size,
- u8 protocol_override, uword encap_next_node);
-int vxlan6_gpe_rewrite (vxlan_gpe_tunnel_t * t, u32 extension_size,
- u8 protocol_override, uword encap_next_node);
-
-/**
- * @brief Struct for defining VXLAN GPE next nodes
- */
-typedef enum
-{
- VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP,
- VXLAN_GPE_ENCAP_NEXT_IP6_LOOKUP,
- VXLAN_GPE_ENCAP_NEXT_DROP,
- VXLAN_GPE_ENCAP_N_NEXT
-} vxlan_gpe_encap_next_t;
-
-
-void vxlan_gpe_unregister_decap_protocol (u8 protocol_id,
- uword next_node_index);
-
-void vxlan_gpe_register_decap_protocol (u8 protocol_id,
- uword next_node_index);
-
-void vnet_int_vxlan_gpe_bypass_mode (u32 sw_if_index, u8 is_ip6,
- u8 is_enable);
-
-#endif /* included_vnet_vxlan_gpe_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe_api.c b/src/vnet/vxlan-gpe/vxlan_gpe_api.c
deleted file mode 100644
index cc74e1f58d4..00000000000
--- a/src/vnet/vxlan-gpe/vxlan_gpe_api.c
+++ /dev/null
@@ -1,360 +0,0 @@
-/*
- *------------------------------------------------------------------
- * vxlan_gpe_api.c - vxlan_gpe api
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <vnet/vnet.h>
-#include <vlibmemory/api.h>
-
-#include <vnet/interface.h>
-#include <vnet/api_errno.h>
-#include <vnet/feature/feature.h>
-#include <vnet/vxlan-gpe/vxlan_gpe.h>
-#include <vnet/fib/fib_table.h>
-#include <vnet/format_fns.h>
-
-#include <vnet/ip/ip_types_api.h>
-#include <vnet/vxlan-gpe/vxlan_gpe.api_enum.h>
-#include <vnet/vxlan-gpe/vxlan_gpe.api_types.h>
-
-#define REPLY_MSG_ID_BASE msg_id_base
-#include <vlibapi/api_helper_macros.h>
-
-static u16 msg_id_base;
-
-static void
- vl_api_sw_interface_set_vxlan_gpe_bypass_t_handler
- (vl_api_sw_interface_set_vxlan_gpe_bypass_t * mp)
-{
- vl_api_sw_interface_set_vxlan_gpe_bypass_reply_t *rmp;
- int rv = 0;
- u32 sw_if_index = ntohl (mp->sw_if_index);
-
- VALIDATE_SW_IF_INDEX (mp);
-
- vnet_int_vxlan_gpe_bypass_mode (sw_if_index, mp->is_ipv6, mp->enable);
- BAD_SW_IF_INDEX_LABEL;
-
- REPLY_MACRO (VL_API_SW_INTERFACE_SET_VXLAN_GPE_BYPASS_REPLY);
-}
-
-static void
- vl_api_vxlan_gpe_add_del_tunnel_t_handler
- (vl_api_vxlan_gpe_add_del_tunnel_t * mp)
-{
- vl_api_vxlan_gpe_add_del_tunnel_reply_t *rmp;
- int rv = 0;
- vnet_vxlan_gpe_add_del_tunnel_args_t _a, *a = &_a;
- u32 encap_fib_index, decap_fib_index;
- u8 protocol;
- uword *p;
- ip4_main_t *im = &ip4_main;
- u32 sw_if_index = ~0;
-
- p = hash_get (im->fib_index_by_table_id, ntohl (mp->encap_vrf_id));
- if (!p)
- {
- rv = VNET_API_ERROR_NO_SUCH_FIB;
- goto out;
- }
- encap_fib_index = p[0];
-
- protocol = mp->protocol;
-
- /* Interpret decap_vrf_id as an opaque if sending to other-than-ip4-input */
- if (protocol == VXLAN_GPE_INPUT_NEXT_IP4_INPUT)
- {
- p = hash_get (im->fib_index_by_table_id, ntohl (mp->decap_vrf_id));
- if (!p)
- {
- rv = VNET_API_ERROR_NO_SUCH_INNER_FIB;
- goto out;
- }
- decap_fib_index = p[0];
- }
- else
- {
- decap_fib_index = ntohl (mp->decap_vrf_id);
- }
-
-
- clib_memset (a, 0, sizeof (*a));
-
- a->is_add = mp->is_add;
- ip_address_decode (&mp->local, &a->local);
- ip_address_decode (&mp->remote, &a->remote);
-
- /* Check src & dst are different */
- if (ip46_address_is_equal (&a->local, &a->remote))
- {
- rv = VNET_API_ERROR_SAME_SRC_DST;
- goto out;
- }
-
- a->is_ip6 = !ip46_address_is_ip4 (&a->local);
- a->mcast_sw_if_index = ntohl (mp->mcast_sw_if_index);
- a->encap_fib_index = encap_fib_index;
- a->decap_fib_index = decap_fib_index;
- a->protocol = protocol;
- a->vni = ntohl (mp->vni);
- rv = vnet_vxlan_gpe_add_del_tunnel (a, &sw_if_index);
-
-out:
- REPLY_MACRO2(VL_API_VXLAN_GPE_ADD_DEL_TUNNEL_REPLY,
- ({
- rmp->sw_if_index = ntohl (sw_if_index);
- }));
-}
-
-static void
-vl_api_vxlan_gpe_add_del_tunnel_v2_t_handler (
- vl_api_vxlan_gpe_add_del_tunnel_v2_t *mp)
-{
- vl_api_vxlan_gpe_add_del_tunnel_v2_reply_t *rmp;
- int rv = 0;
- vnet_vxlan_gpe_add_del_tunnel_args_t _a, *a = &_a;
- u32 encap_fib_index, decap_fib_index;
- u8 protocol;
- uword *p;
- ip4_main_t *im = &ip4_main;
- u32 sw_if_index = ~0;
-
- p = hash_get (im->fib_index_by_table_id, ntohl (mp->encap_vrf_id));
- if (!p)
- {
- rv = VNET_API_ERROR_NO_SUCH_FIB;
- goto out;
- }
- encap_fib_index = p[0];
-
- protocol = mp->protocol;
-
- /* Interpret decap_vrf_id as an opaque if sending to other-than-ip4-input */
- if (protocol == VXLAN_GPE_INPUT_NEXT_IP4_INPUT)
- {
- p = hash_get (im->fib_index_by_table_id, ntohl (mp->decap_vrf_id));
- if (!p)
- {
- rv = VNET_API_ERROR_NO_SUCH_INNER_FIB;
- goto out;
- }
- decap_fib_index = p[0];
- }
- else
- {
- decap_fib_index = ntohl (mp->decap_vrf_id);
- }
-
- clib_memset (a, 0, sizeof (*a));
-
- a->is_add = mp->is_add;
- ip_address_decode (&mp->local, &a->local);
- ip_address_decode (&mp->remote, &a->remote);
-
- /* Check src & dst are different */
- if (ip46_address_is_equal (&a->local, &a->remote))
- {
- rv = VNET_API_ERROR_SAME_SRC_DST;
- goto out;
- }
-
- a->local_port = ntohs (mp->local_port);
- a->remote_port = ntohs (mp->remote_port);
- a->is_ip6 = !ip46_address_is_ip4 (&a->local);
- a->mcast_sw_if_index = ntohl (mp->mcast_sw_if_index);
- a->encap_fib_index = encap_fib_index;
- a->decap_fib_index = decap_fib_index;
- a->protocol = protocol;
- a->vni = ntohl (mp->vni);
- rv = vnet_vxlan_gpe_add_del_tunnel (a, &sw_if_index);
-
-out:
- REPLY_MACRO2 (VL_API_VXLAN_GPE_ADD_DEL_TUNNEL_V2_REPLY,
- ({ rmp->sw_if_index = ntohl (sw_if_index); }));
-}
-
-static void send_vxlan_gpe_tunnel_details
- (vxlan_gpe_tunnel_t * t, vl_api_registration_t * reg, u32 context)
-{
- vl_api_vxlan_gpe_tunnel_details_t *rmp;
- ip4_main_t *im4 = &ip4_main;
- ip6_main_t *im6 = &ip6_main;
- u8 is_ipv6 = !(t->flags & VXLAN_GPE_TUNNEL_IS_IPV4);
-
- rmp = vl_msg_api_alloc (sizeof (*rmp));
- clib_memset (rmp, 0, sizeof (*rmp));
- rmp->_vl_msg_id =
- ntohs (REPLY_MSG_ID_BASE + VL_API_VXLAN_GPE_TUNNEL_DETAILS);
-
- ip_address_encode (&t->local, is_ipv6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4,
- &rmp->local);
- ip_address_encode (&t->remote, is_ipv6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4,
- &rmp->remote);
-
- if (ip46_address_is_ip4 (&t->local))
- {
- rmp->encap_vrf_id = htonl (im4->fibs[t->encap_fib_index].ft_table_id);
- rmp->decap_vrf_id = htonl (im4->fibs[t->decap_fib_index].ft_table_id);
- }
- else
- {
- rmp->encap_vrf_id = htonl (im6->fibs[t->encap_fib_index].ft_table_id);
- rmp->decap_vrf_id = htonl (im6->fibs[t->decap_fib_index].ft_table_id);
- }
- rmp->mcast_sw_if_index = htonl (t->mcast_sw_if_index);
- rmp->vni = htonl (t->vni);
- rmp->protocol = t->protocol;
- rmp->sw_if_index = htonl (t->sw_if_index);
- rmp->context = context;
-
- vl_api_send_msg (reg, (u8 *) rmp);
-}
-
-static void vl_api_vxlan_gpe_tunnel_dump_t_handler
- (vl_api_vxlan_gpe_tunnel_dump_t * mp)
-{
- vl_api_registration_t *reg;
- vxlan_gpe_main_t *vgm = &vxlan_gpe_main;
- vxlan_gpe_tunnel_t *t;
- u32 sw_if_index;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- sw_if_index = ntohl (mp->sw_if_index);
-
- if (~0 == sw_if_index)
- {
- pool_foreach (t, vgm->tunnels)
- {
- send_vxlan_gpe_tunnel_details (t, reg, mp->context);
- }
- }
- else
- {
- if ((sw_if_index >= vec_len (vgm->tunnel_index_by_sw_if_index)) ||
- (~0 == vgm->tunnel_index_by_sw_if_index[sw_if_index]))
- {
- return;
- }
- t = &vgm->tunnels[vgm->tunnel_index_by_sw_if_index[sw_if_index]];
- send_vxlan_gpe_tunnel_details (t, reg, mp->context);
- }
-}
-
-static void
-send_vxlan_gpe_tunnel_v2_details (vxlan_gpe_tunnel_t *t,
- vl_api_registration_t *reg, u32 context)
-{
- vl_api_vxlan_gpe_tunnel_v2_details_t *rmp;
- ip4_main_t *im4 = &ip4_main;
- ip6_main_t *im6 = &ip6_main;
- u8 is_ipv6 = !(t->flags & VXLAN_GPE_TUNNEL_IS_IPV4);
-
- rmp = vl_msg_api_alloc (sizeof (*rmp));
- clib_memset (rmp, 0, sizeof (*rmp));
- rmp->_vl_msg_id =
- ntohs (REPLY_MSG_ID_BASE + VL_API_VXLAN_GPE_TUNNEL_V2_DETAILS);
-
- ip_address_encode (&t->local, is_ipv6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4,
- &rmp->local);
- ip_address_encode (&t->remote, is_ipv6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4,
- &rmp->remote);
- rmp->local_port = htons (t->local_port);
- rmp->remote_port = htons (t->remote_port);
-
- if (ip46_address_is_ip4 (&t->local))
- {
- rmp->encap_vrf_id = htonl (im4->fibs[t->encap_fib_index].ft_table_id);
- rmp->decap_vrf_id = htonl (im4->fibs[t->decap_fib_index].ft_table_id);
- }
- else
- {
- rmp->encap_vrf_id = htonl (im6->fibs[t->encap_fib_index].ft_table_id);
- rmp->decap_vrf_id = htonl (im6->fibs[t->decap_fib_index].ft_table_id);
- }
- rmp->mcast_sw_if_index = htonl (t->mcast_sw_if_index);
- rmp->vni = htonl (t->vni);
- rmp->protocol = t->protocol;
- rmp->sw_if_index = htonl (t->sw_if_index);
- rmp->context = context;
-
- vl_api_send_msg (reg, (u8 *) rmp);
-}
-
-static void
-vl_api_vxlan_gpe_tunnel_v2_dump_t_handler (
- vl_api_vxlan_gpe_tunnel_v2_dump_t *mp)
-{
- vl_api_registration_t *reg;
- vxlan_gpe_main_t *vgm = &vxlan_gpe_main;
- vxlan_gpe_tunnel_t *t;
- u32 sw_if_index;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- sw_if_index = ntohl (mp->sw_if_index);
-
- if (~0 == sw_if_index)
- {
- pool_foreach (t, vgm->tunnels)
- {
- send_vxlan_gpe_tunnel_v2_details (t, reg, mp->context);
- }
- }
- else
- {
- if ((sw_if_index >= vec_len (vgm->tunnel_index_by_sw_if_index)) ||
- (~0 == vgm->tunnel_index_by_sw_if_index[sw_if_index]))
- {
- return;
- }
- t = &vgm->tunnels[vgm->tunnel_index_by_sw_if_index[sw_if_index]];
- send_vxlan_gpe_tunnel_v2_details (t, reg, mp->context);
- }
-}
-
-#include <vxlan-gpe/vxlan_gpe.api.c>
-
-static clib_error_t *
-vxlan_gpe_api_hookup (vlib_main_t * vm)
-{
- api_main_t *am = vlibapi_get_main ();
-
- vl_api_increase_msg_trace_size (am, VL_API_VXLAN_GPE_ADD_DEL_TUNNEL,
- 17 * sizeof (u32));
-
- /*
- * Set up the (msg_name, crc, message-id) table
- */
- msg_id_base = setup_message_id_table ();
-
- return 0;
-}
-
-VLIB_API_INIT_FUNCTION (vxlan_gpe_api_hookup);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe_error.def b/src/vnet/vxlan-gpe/vxlan_gpe_error.def
deleted file mode 100644
index 9cf1b1cb656..00000000000
--- a/src/vnet/vxlan-gpe/vxlan_gpe_error.def
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-vxlan_gpe_error (DECAPSULATED, "good packets decapsulated")
-vxlan_gpe_error (NO_SUCH_TUNNEL, "no such tunnel packets")
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe_packet.h b/src/vnet/vxlan-gpe/vxlan_gpe_packet.h
deleted file mode 100644
index f5e5ddc2347..00000000000
--- a/src/vnet/vxlan-gpe/vxlan_gpe_packet.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * @file
- * @brief VXLAN GPE packet header structure
- *
-*/
-#ifndef included_vxlan_gpe_packet_h
-#define included_vxlan_gpe_packet_h
-
-/**
- * From draft-quinn-vxlan-gpe-03.txt
- *
- * 0 1 2 3
- * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * |R|R|R|R|I|P|R|O|Ver| Reserved |Next Protocol |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | VXLAN Network Identifier (VNI) | Reserved |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *
- * I Bit: Flag bit 4 indicates that the VNI is valid.
- *
- * P Bit: Flag bit 5 is defined as the Next Protocol bit. The P bit
- * MUST be set to 1 to indicate the presence of the 8 bit next
- * protocol field.
- *
- * O Bit: Flag bit 7 is defined as the O bit. When the O bit is set to 1,
- *
- * the packet is an OAM packet and OAM processing MUST occur. The OAM
- * protocol details are out of scope for this document. As with the
- * P-bit, bit 7 is currently a reserved flag in VXLAN.
- *
- * VXLAN-gpe bits 8 and 9 are defined as version bits. These bits are
- * reserved in VXLAN. The version field is used to ensure backward
- * compatibility going forward with future VXLAN-gpe updates.
- *
- * The initial version for VXLAN-gpe is 0.
- *
- * This draft defines the following Next Protocol values:
- *
- * 0x1 : IPv4
- * 0x2 : IPv6
- * 0x3 : Ethernet
- * 0x4 : Network Service Header [NSH]
- */
-
-/**
- * @brief VXLAN GPE support inner protocol definition.
- * 1 - IP4
- * 2 - IP6
- * 3 - ETHERNET
- * 4 - NSH
- */
-#define foreach_vxlan_gpe_protocol \
-_ (0x01, IP4) \
-_ (0x02, IP6) \
-_ (0x03, ETHERNET) \
-_ (0x04, NSH) \
-_ (0x05, IOAM)
-
-
-/**
- * @brief Struct for VXLAN GPE support inner protocol definition.
- * 1 - IP4
- * 2 - IP6
- * 3 - ETHERNET
- * 4 - NSH
- * 5 - IOAM
- */
-typedef enum
-{
-#define _(n,f) VXLAN_GPE_PROTOCOL_##f = n,
- foreach_vxlan_gpe_protocol
-#undef _
- VXLAN_GPE_PROTOCOL_MAX,
-} vxlan_gpe_protocol_t;
-
-/**
- * @brief VXLAN GPE Header definition
- */
-typedef struct
-{
- u8 flags;
- /** Version and Reserved */
- u8 ver_res;
- /** Reserved */
- u8 res;
- /** see vxlan_gpe_protocol_t */
- u8 protocol;
- /** VNI and Reserved */
- u32 vni_res;
-} vxlan_gpe_header_t;
-
-#define VXLAN_GPE_FLAGS_I 0x08
-#define VXLAN_GPE_FLAGS_P 0x04
-#define VXLAN_GPE_FLAGS_O 0x01
-#define VXLAN_GPE_VERSION 0x0
-
-#endif /* included_vxlan_gpe_packet_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */