diff options
author | Neale Ranns <nranns@cisco.com> | 2017-02-02 02:18:18 -0800 |
---|---|---|
committer | Damjan Marion <dmarion.lists@gmail.com> | 2017-02-02 14:27:13 +0000 |
commit | a9374df5f351d25e968f5f90a827796203cbafdd (patch) | |
tree | 7876e45e388ebc739722777c1cadcf666dda6439 | |
parent | ed0e49c51841e84c72a5bb2c6d538ee779b734d4 (diff) |
Fix SR multicast post mfib commit
1 - use the SR policy to construct the replicate DPO. Each bucket therein is a SR tunnel.
2 - install a special mfib entry that links via this replicate
3 - forwarding is now mfib-lookup -> replicate -> sr_rewrite (per-tunnel)
no need for a separate sr_replicate node.
4 - Stack the sr tunnel on the forwarding DPO of the first-hop FIB entry.
no need for a second lookup post SR encap.
5 - fix some path-list lock leaks in the MFIB entry.
Change-Id: I20de96ea4c4be4fae252625bde159d9c435c8315
Signed-off-by: Neale Ranns <nranns@cisco.com>
-rw-r--r-- | src/scripts/vnet/sr/mcast | 58 | ||||
-rw-r--r-- | src/vnet.am | 1 | ||||
-rw-r--r-- | src/vnet/dpo/replicate_dpo.c | 14 | ||||
-rw-r--r-- | src/vnet/mfib/mfib_entry.c | 90 | ||||
-rw-r--r-- | src/vnet/mfib/mfib_entry.h | 5 | ||||
-rw-r--r-- | src/vnet/mfib/mfib_table.c | 35 | ||||
-rw-r--r-- | src/vnet/mfib/mfib_table.h | 32 | ||||
-rw-r--r-- | src/vnet/mfib/mfib_test.c | 56 | ||||
-rw-r--r-- | src/vnet/mfib/mfib_types.h | 14 | ||||
-rw-r--r-- | src/vnet/sr/sr.c | 327 | ||||
-rw-r--r-- | src/vnet/sr/sr.h | 30 | ||||
-rw-r--r-- | src/vnet/sr/sr_api.c | 5 | ||||
-rw-r--r-- | src/vnet/sr/sr_replicate.c | 491 |
13 files changed, 508 insertions, 650 deletions
diff --git a/src/scripts/vnet/sr/mcast b/src/scripts/vnet/sr/mcast new file mode 100644 index 00000000000..50e73efabb4 --- /dev/null +++ b/src/scripts/vnet/sr/mcast @@ -0,0 +1,58 @@ + +loop create +loop create +loop create +loop create + +set int state loop0 up +set int state loop1 up +set int state loop2 up +set int state loop3 up + +set int ip address loop0 2001::1/64 +set int ip address loop1 2001:1::1/64 +set int ip address loop2 2001:2::1/64 +set int ip address loop3 2001:3::1/64 + +set ip6 neighbor loop1 2001:1::2 00:00:dd:ee:cc:d1 +set ip6 neighbor loop2 2001:2::2 00:00:dd:ee:cc:d2 +set ip6 neighbor loop3 2001:3::2 00:00:dd:ee:cc:d3 + +ip route 3001::1/128 via 2001:1::2 loop1 +ip route 3001::2/128 via 2001:2::2 loop2 +ip route 3001::3/128 via 2001:3::2 loop3 + +sr tunnel name SR1 src aaaa::2:1 dst ff19::1/128 next 3001::1 clean +sr tunnel name SR2 src aaaa::2:2 dst ff19::2/128 next 3001::2 clean +sr tunnel name SR3 src aaaa::2:3 dst ff19::3/128 next 3001::3 clean + +sr policy name MCAST1 tunnel SR1 tunnel SR2 tunnel SR3 + +sr multicast-map address ff18::1 sr-policy MCAST1 + +packet-generator new { + name x + limit 1 + node ethernet-input + size 64-64 + no-recycle + data { + IP6: 1.2.3 -> 4.5.6 + ICMP: 3002::2 -> ff18::1 + ICMP echo_request + incrementing 100 + } +} +trace add pg-input 100 + +sr multicast-map del address ff18::1 sr-policy MCAST1 +sr policy del name MCAST1 tunnel SR1 tunnel SR2 tunnel SR3 + +ip route del 3001::1/128 via 2001:1::2 loop1 +ip route del 3001::2/128 via 2001:2::2 loop2 +ip route del 3001::3/128 via 2001:3::2 loop3 + +sr tunnel del name SR1 src aaaa::2:1 dst ff19::1/128 next 3001::1 clean +sr tunnel del name SR2 src aaaa::2:2 dst ff19::2/128 next 3001::2 clean +sr tunnel del name SR3 src aaaa::2:3 dst ff19::3/128 next 3001::3 clean + diff --git a/src/vnet.am b/src/vnet.am index 78d864dcede..9b148f69ffa 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -694,7 +694,6 @@ nobase_include_HEADERS += \ if WITH_LIBSSL libvnet_la_SOURCES += \ vnet/sr/sr.c \ - vnet/sr/sr_replicate.c \ vnet/sr/sr_api.c endif diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c index a2d5fdb68bd..8bad75ee5ed 100644 --- a/src/vnet/dpo/replicate_dpo.c +++ b/src/vnet/dpo/replicate_dpo.c @@ -601,8 +601,7 @@ VLIB_CLI_COMMAND (replicate_show_command, static) = { typedef struct replicate_trace_t_ { index_t rep_index; - index_t dpo_index; - dpo_type_t dpo_type; + dpo_id_t dpo; } replicate_trace_t; static uword @@ -656,8 +655,7 @@ replicate_inline (vlib_main_t * vm, { replicate_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); t->rep_index = repi0; - t->dpo_index = dpo0->dpoi_index; - t->dpo_type = dpo0->dpoi_type; + t->dpo = *dpo0; } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, @@ -682,8 +680,7 @@ replicate_inline (vlib_main_t * vm, { replicate_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); t->rep_index = repi0; - t->dpo_index = dpo0->dpoi_index; - t->dpo_type = dpo0->dpoi_type; + t->dpo = *dpo0; } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, @@ -705,10 +702,9 @@ format_replicate_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); replicate_trace_t *t = va_arg (*args, replicate_trace_t *); - s = format (s, "replicate: %d via %U:%d", + s = format (s, "replicate: %d via %U", t->rep_index, - format_dpo_type, t->dpo_type, - t->dpo_index); + format_dpo_id, &t->dpo); return s; } diff --git a/src/vnet/mfib/mfib_entry.c b/src/vnet/mfib/mfib_entry.c index 479ce5f1442..5170080c4ca 100644 --- a/src/vnet/mfib/mfib_entry.c +++ b/src/vnet/mfib/mfib_entry.c @@ -292,6 +292,7 @@ mfib_entry_src_flush (mfib_entry_src_t *msrc) ({ mfib_itf_delete(mfib_itf_get(mfii)); })); + fib_path_list_unlock(msrc->mfes_pl); } static void @@ -474,37 +475,60 @@ mfib_entry_src_collect_forwarding (fib_node_index_t pl_index, static void mfib_entry_stack (mfib_entry_t *mfib_entry) { - mfib_entry_collect_forwarding_ctx_t ctx = { - .next_hops = NULL, - .fct = mfib_entry_get_default_chain_type(mfib_entry), - }; dpo_proto_t dp; dp = fib_proto_to_dpo(mfib_entry_get_proto(mfib_entry)); if (FIB_NODE_INDEX_INVALID != mfib_entry->mfe_parent) { + mfib_entry_collect_forwarding_ctx_t ctx = { + .next_hops = NULL, + .fct = mfib_entry_get_default_chain_type(mfib_entry), + }; + fib_path_list_walk(mfib_entry->mfe_parent, mfib_entry_src_collect_forwarding, &ctx); - if (!dpo_id_is_valid(&mfib_entry->mfe_rep) || - dpo_is_drop(&mfib_entry->mfe_rep)) + if (!(MFIB_ENTRY_FLAG_EXCLUSIVE & mfib_entry->mfe_flags)) { - dpo_id_t tmp_dpo = DPO_INVALID; + /* + * each path contirbutes a next-hop. form a replicate + * from those choices. + */ + if (!dpo_id_is_valid(&mfib_entry->mfe_rep) || + dpo_is_drop(&mfib_entry->mfe_rep)) + { + dpo_id_t tmp_dpo = DPO_INVALID; - dpo_set(&tmp_dpo, - DPO_REPLICATE, dp, - replicate_create(0, dp)); + dpo_set(&tmp_dpo, + DPO_REPLICATE, dp, + replicate_create(0, dp)); + + dpo_stack(DPO_MFIB_ENTRY, dp, + &mfib_entry->mfe_rep, + &tmp_dpo); + + dpo_reset(&tmp_dpo); + } + replicate_multipath_update(&mfib_entry->mfe_rep, + ctx.next_hops); + } + else + { + /* + * for exclusive routes the source provided a replicate DPO + * we we stashed inthe special path list with one path + * so we can stack directly on that. + */ + ASSERT(1 == vec_len(ctx.next_hops)); dpo_stack(DPO_MFIB_ENTRY, dp, &mfib_entry->mfe_rep, - &tmp_dpo); - - dpo_reset(&tmp_dpo); + &ctx.next_hops[0].path_dpo); + dpo_reset(&ctx.next_hops[0].path_dpo); + vec_free(ctx.next_hops); } - replicate_multipath_update(&mfib_entry->mfe_rep, - ctx.next_hops); } else { @@ -521,6 +545,8 @@ mfib_entry_forwarding_path_add (mfib_entry_src_t *msrc, fib_node_index_t old_pl_index; fib_route_path_t *rpaths; + ASSERT(!(MFIB_ENTRY_FLAG_EXCLUSIVE & msrc->mfes_flags)); + /* * path-lists require a vector of paths */ @@ -555,6 +581,8 @@ mfib_entry_forwarding_path_remove (mfib_entry_src_t *msrc, fib_node_index_t old_pl_index; fib_route_path_t *rpaths; + ASSERT(!(MFIB_ENTRY_FLAG_EXCLUSIVE & msrc->mfes_flags)); + /* * path-lists require a vector of paths */ @@ -650,7 +678,8 @@ mfib_entry_src_ok_for_delete (const mfib_entry_src_t *msrc) int mfib_entry_update (fib_node_index_t mfib_entry_index, mfib_source_t source, - mfib_entry_flags_t entry_flags) + mfib_entry_flags_t entry_flags, + index_t repi) { mfib_entry_t *mfib_entry; mfib_entry_src_t *msrc; @@ -659,6 +688,35 @@ mfib_entry_update (fib_node_index_t mfib_entry_index, msrc = mfib_entry_src_find_or_create(mfib_entry, source); msrc->mfes_flags = entry_flags; + if (INDEX_INVALID != repi) + { + /* + * The source is providing its own replicate DPO. + * Create a sepcial path-list to manage it, that way + * this entry and the source are equivalent to a normal + * entry + */ + fib_node_index_t old_pl_index; + fib_protocol_t fp; + dpo_id_t dpo = DPO_INVALID; + + fp = mfib_entry_get_proto(mfib_entry); + old_pl_index = msrc->mfes_pl; + + dpo_set(&dpo, DPO_REPLICATE, + fib_proto_to_dpo(fp), + repi); + + msrc->mfes_pl = + fib_path_list_create_special(fp, + FIB_PATH_LIST_FLAG_EXCLUSIVE, + &dpo); + + dpo_reset(&dpo); + fib_path_list_lock(msrc->mfes_pl); + fib_path_list_unlock(old_pl_index); + } + if (mfib_entry_src_ok_for_delete(msrc)) { /* diff --git a/src/vnet/mfib/mfib_entry.h b/src/vnet/mfib/mfib_entry.h index cc5d5326ef6..36fc73e1670 100644 --- a/src/vnet/mfib/mfib_entry.h +++ b/src/vnet/mfib/mfib_entry.h @@ -65,7 +65,7 @@ typedef struct mfib_entry_t_ { CLIB_CACHE_LINE_ALIGN_MARK(cacheline1); /** - * The Replicate used for forwarding. + * The Replicate DPO used for forwarding. */ dpo_id_t mfe_rep; @@ -94,7 +94,8 @@ extern fib_node_index_t mfib_entry_create(u32 fib_index, extern int mfib_entry_update(fib_node_index_t fib_entry_index, mfib_source_t source, - mfib_entry_flags_t entry_flags); + mfib_entry_flags_t entry_flags, + index_t rep_dpo); extern void mfib_entry_path_update(fib_node_index_t fib_entry_index, mfib_source_t source, diff --git a/src/vnet/mfib/mfib_table.c b/src/vnet/mfib/mfib_table.c index e4c0936d6c9..b4e855ff071 100644 --- a/src/vnet/mfib/mfib_table.c +++ b/src/vnet/mfib/mfib_table.c @@ -195,7 +195,10 @@ mfib_table_entry_update (u32 fib_index, { mfib_entry_lock(mfib_entry_index); - if (mfib_entry_update(mfib_entry_index, source, entry_flags)) + if (mfib_entry_update(mfib_entry_index, + source, + entry_flags, + INDEX_INVALID)) { /* * this update means we can now remove the entry. @@ -283,6 +286,36 @@ mfib_table_entry_path_remove (u32 fib_index, } } +fib_node_index_t +mfib_table_entry_special_add (u32 fib_index, + const mfib_prefix_t *prefix, + mfib_source_t source, + mfib_entry_flags_t entry_flags, + index_t rep_dpo) +{ + fib_node_index_t mfib_entry_index; + mfib_table_t *mfib_table; + + mfib_table = mfib_table_get(fib_index, prefix->fp_proto); + mfib_entry_index = mfib_table_lookup_exact_match_i(mfib_table, prefix); + + if (FIB_NODE_INDEX_INVALID == mfib_entry_index) + { + mfib_entry_index = mfib_entry_create(fib_index, + source, + prefix, + MFIB_ENTRY_FLAG_NONE); + + mfib_table_entry_insert(mfib_table, prefix, mfib_entry_index); + } + + mfib_entry_update(mfib_entry_index, source, + (MFIB_ENTRY_FLAG_EXCLUSIVE | entry_flags), + rep_dpo); + + return (mfib_entry_index); +} + static void mfib_table_entry_delete_i (u32 fib_index, fib_node_index_t mfib_entry_index, diff --git a/src/vnet/mfib/mfib_table.h b/src/vnet/mfib/mfib_table.h index 4faa69ee999..4c51b70fd72 100644 --- a/src/vnet/mfib/mfib_table.h +++ b/src/vnet/mfib/mfib_table.h @@ -18,6 +18,7 @@ #include <vnet/ip/ip.h> #include <vnet/adj/adj.h> +#include <vnet/dpo/replicate_dpo.h> #include <vnet/mfib/mfib_types.h> @@ -214,6 +215,37 @@ extern void mfib_table_entry_delete_index(fib_node_index_t entry_index, /** * @brief + * Add a 'special' entry to the mFIB that links to the DPO passed + * A special entry is an entry that the FIB is not expect to resolve + * via the usual mechanisms (i.e. recurisve or neighbour adj DB lookup). + * Instead the client/source provides the index of a replicate DPO to link to. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix to add + * + * @param source + * The ID of the client/source adding the entry. + * + * @param flags + * Flags for the entry. + * + * @param rep_dpo + * The replicate DPO index to link to. + * + * @return + * the index of the fib_entry_t that is created (or existed already). + */ +extern fib_node_index_t mfib_table_entry_special_add(u32 fib_index, + const mfib_prefix_t *prefix, + mfib_source_t source, + mfib_entry_flags_t flags, + index_t rep_dpo); + +/** + * @brief * Flush all entries from a table for the source * * @param fib_index diff --git a/src/vnet/mfib/mfib_test.c b/src/vnet/mfib/mfib_test.c index 8082a6bb606..36a303e844d 100644 --- a/src/vnet/mfib/mfib_test.c +++ b/src/vnet/mfib/mfib_test.c @@ -19,6 +19,7 @@ #include <vnet/mfib/mfib_entry.h> #include <vnet/mfib/mfib_signal.h> #include <vnet/mfib/ip6_mfib.h> +#include <vnet/fib/fib_path_list.h> #include <vnet/dpo/replicate_dpo.h> #include <vnet/adj/adj_mcast.h> @@ -337,7 +338,7 @@ mfib_test_i (fib_protocol_t PROTO, const mfib_prefix_t *pfx_star_g_slash_m) { fib_node_index_t mfei, mfei_dflt, mfei_no_f, mfei_s_g, mfei_g_1, mfei_g_2, mfei_g_3, mfei_g_m; - u32 fib_index, n_entries, n_itfs, n_reps; + u32 fib_index, n_entries, n_itfs, n_reps, n_pls; fib_node_index_t ai_1, ai_2, ai_3; test_main_t *tm; @@ -347,6 +348,7 @@ mfib_test_i (fib_protocol_t PROTO, n_entries = pool_elts(mfib_entry_pool); n_itfs = pool_elts(mfib_itf_pool); n_reps = pool_elts(replicate_pool); + n_pls = fib_path_list_pool_size(); tm = &test_main; ai_1 = adj_mcast_add_or_lock(PROTO, @@ -1024,6 +1026,54 @@ mfib_test_i (fib_protocol_t PROTO, format_mfib_prefix, pfx_star_g_slash_m); /* + * Add a prefix as a special/exclusive route + */ + dpo_id_t td = DPO_INVALID; + index_t repi = replicate_create(1, fib_proto_to_dpo(PROTO)); + + dpo_set(&td, DPO_ADJACENCY_MCAST, fib_proto_to_dpo(PROTO), ai_2); + replicate_set_bucket(repi, 0, &td); + + mfei = mfib_table_entry_special_add(fib_index, + pfx_star_g_3, + MFIB_SOURCE_SRv6, + MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF, + repi); + MFIB_TEST(mfib_test_entry(mfei, + (MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF | + MFIB_ENTRY_FLAG_EXCLUSIVE), + 1, + DPO_ADJACENCY_MCAST, ai_2), + "%U exclusive replicate OK", + format_mfib_prefix, pfx_star_g_3); + + /* + * update a special/exclusive route + */ + index_t repi2 = replicate_create(1, fib_proto_to_dpo(PROTO)); + + dpo_set(&td, DPO_ADJACENCY_MCAST, fib_proto_to_dpo(PROTO), ai_1); + replicate_set_bucket(repi2, 0, &td); + + mfei = mfib_table_entry_special_add(fib_index, + pfx_star_g_3, + MFIB_SOURCE_SRv6, + MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF, + repi2); + MFIB_TEST(mfib_test_entry(mfei, + (MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF | + MFIB_ENTRY_FLAG_EXCLUSIVE), + 1, + DPO_ADJACENCY_MCAST, ai_1), + "%U exclusive update replicate OK", + format_mfib_prefix, pfx_star_g_3); + + mfib_table_entry_delete(fib_index, + pfx_star_g_3, + MFIB_SOURCE_SRv6); + dpo_reset(&td); + + /* * Unlock the table - it's the last lock so should be gone thereafter */ mfib_table_unlock(fib_index, PROTO); @@ -1040,6 +1090,8 @@ mfib_test_i (fib_protocol_t PROTO, * test we've leaked no resources */ MFIB_TEST(0 == adj_mcast_db_size(), "%d MCAST adjs", adj_mcast_db_size()); + MFIB_TEST(n_pls == fib_path_list_pool_size(), "%d=%d path-lists", + n_pls, fib_path_list_pool_size()); MFIB_TEST(n_reps == pool_elts(replicate_pool), "%d=%d replicates", n_reps, pool_elts(replicate_pool)); MFIB_TEST(n_entries == pool_elts(mfib_entry_pool), @@ -1214,7 +1266,7 @@ mfib_test (vlib_main_t * vm, VLIB_CLI_COMMAND (test_fib_command, static) = { .path = "test mfib", - .short_help = "fib unit tests - DO NOT RUN ON A LIVE SYSTEM", + .short_help = "mfib unit tests - DO NOT RUN ON A LIVE SYSTEM", .function = mfib_test, }; diff --git a/src/vnet/mfib/mfib_types.h b/src/vnet/mfib/mfib_types.h index 37898a07b00..fe53aa6807a 100644 --- a/src/vnet/mfib/mfib_types.h +++ b/src/vnet/mfib/mfib_types.h @@ -68,6 +68,13 @@ typedef enum mfib_entry_attribute_t_ * Use with extreme caution */ MFIB_ENTRY_ACCEPT_ALL_ITF, + /** + * Exclusive - like its unicast counterpart. the source has provided + * the forwarding DPO directly. The entry therefore does not resolve + * paths via a path-list + */ + MFIB_ENTRY_EXCLUSIVE, + MFIB_ENTRY_INHERIT_ACCEPT, MFIB_ENTRY_ATTRIBUTE_LAST = MFIB_ENTRY_INHERIT_ACCEPT, } mfib_entry_attribute_t; @@ -83,6 +90,7 @@ typedef enum mfib_entry_attribute_t_ [MFIB_ENTRY_DROP] = "D", \ [MFIB_ENTRY_ACCEPT_ALL_ITF] = "AA", \ [MFIB_ENTRY_INHERIT_ACCEPT] = "IA", \ + [MFIB_ENTRY_EXCLUSIVE] = "E", \ } #define MFIB_ENTRY_NAMES_LONG { \ @@ -91,6 +99,7 @@ typedef enum mfib_entry_attribute_t_ [MFIB_ENTRY_DROP] = "Drop", \ [MFIB_ENTRY_ACCEPT_ALL_ITF] = "Accept-all-itf", \ [MFIB_ENTRY_INHERIT_ACCEPT] = "Inherit-Accept", \ + [MFIB_ENTRY_EXCLUSIVE] = "Exclusive", \ } typedef enum mfib_entry_flags_t_ @@ -99,8 +108,9 @@ typedef enum mfib_entry_flags_t_ MFIB_ENTRY_FLAG_SIGNAL = (1 << MFIB_ENTRY_SIGNAL), MFIB_ENTRY_FLAG_DROP = (1 << MFIB_ENTRY_DROP), MFIB_ENTRY_FLAG_CONNECTED = (1 << MFIB_ENTRY_CONNECTED), - MFIB_ENTRY_FLAG_INHERIT_ACCEPT = (1 << MFIB_ENTRY_INHERIT_ACCEPT), MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF = (1 << MFIB_ENTRY_ACCEPT_ALL_ITF), + MFIB_ENTRY_FLAG_EXCLUSIVE = (1 << MFIB_ENTRY_EXCLUSIVE), + MFIB_ENTRY_FLAG_INHERIT_ACCEPT = (1 << MFIB_ENTRY_INHERIT_ACCEPT), } mfib_entry_flags_t; typedef enum mfib_itf_attribute_t_ @@ -155,6 +165,7 @@ typedef enum mfib_source_t_ MFIB_SOURCE_CLI, MFIB_SOURCE_VXLAN, MFIB_SOURCE_DHCP, + MFIB_SOURCE_SRv6, MFIB_SOURCE_DEFAULT_ROUTE, } mfib_source_t; @@ -164,6 +175,7 @@ typedef enum mfib_source_t_ [MFIB_SOURCE_CLI] = "CLI", \ [MFIB_SOURCE_DHCP] = "DHCP", \ [MFIB_SOURCE_VXLAN] = "VXLAN", \ + [MFIB_SOURCE_SRv6] = "SRv6", \ [MFIB_SOURCE_DEFAULT_ROUTE] = "Default Route", \ } diff --git a/src/vnet/sr/sr.c b/src/vnet/sr/sr.c index f30c0da940e..012d45428ec 100644 --- a/src/vnet/sr/sr.c +++ b/src/vnet/sr/sr.c @@ -23,7 +23,9 @@ #include <vnet/vnet.h> #include <vnet/sr/sr.h> #include <vnet/fib/ip6_fib.h> +#include <vnet/mfib/mfib_table.h> #include <vnet/dpo/dpo.h> +#include <vnet/dpo/replicate_dpo.h> #include <openssl/hmac.h> @@ -36,6 +38,11 @@ static vlib_node_registration_t sr_local_node; static dpo_type_t sr_dpo_type; /** + * @brief Dynamically added SR FIB Node type + */ +static fib_node_type_t sr_fib_node_type; + +/** * @brief Use passed HMAC key in ip6_sr_header_t in OpenSSL HMAC routines * * @param sm ip6_sr_main_t * @@ -258,20 +265,10 @@ format_ip6_sr_header_with_length (u8 * s, va_list * args) /** * @brief Defined valid next nodes - * @note Cannot call replicate yet without DPDK */ -#if DPDK > 0 -#define foreach_sr_rewrite_next \ -_(ERROR, "error-drop") \ -_(IP6_LOOKUP, "ip6-lookup") \ -_(SR_LOCAL, "sr-local") \ -_(SR_REPLICATE,"sr-replicate") -#else #define foreach_sr_rewrite_next \ _(ERROR, "error-drop") \ -_(IP6_LOOKUP, "ip6-lookup") \ _(SR_LOCAL, "sr-local") -#endif /* DPDK */ /** * @brief Struct for defined valid next nodes @@ -384,8 +381,8 @@ sr_rewrite (vlib_main_t * vm, ip6_header_t *ip0, *ip1; ip6_sr_header_t *sr0, *sr1; ip6_sr_tunnel_t *t0, *t1; - u32 next0 = SR_REWRITE_NEXT_IP6_LOOKUP; - u32 next1 = SR_REWRITE_NEXT_IP6_LOOKUP; + u32 next0; + u32 next1; u16 new_l0 = 0; u16 new_l1 = 0; @@ -433,16 +430,6 @@ sr_rewrite (vlib_main_t * vm, ip0 = vlib_buffer_get_current (b0); ip1 = vlib_buffer_get_current (b1); -#if DPDK > 0 /* Cannot call replication node yet without DPDK */ - /* add a replication node */ - if (PREDICT_FALSE (t0->policy_index != ~0)) - { - vnet_buffer (b0)->ip.save_protocol = t0->policy_index; - next0 = SR_REWRITE_NEXT_SR_REPLICATE; - sr0 = (ip6_sr_header_t *) (t0->rewrite); - goto processnext; - } -#endif /* DPDK */ /* * SR-unaware service chaining case: pkt coming back from @@ -506,8 +493,11 @@ sr_rewrite (vlib_main_t * vm, sr_fix_hmac (sm, ip0, sr0); - next0 = sr_local_cb ? sr_local_cb (vm, node, b0, ip0, sr0) : - next0; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = + t0->first_hop_dpo.dpoi_index; + next0 = t0->first_hop_dpo.dpoi_next_node; + next0 = (sr_local_cb ? + sr_local_cb (vm, node, b0, ip0, sr0) : next0); /* * Ignore "do not rewrite" shtik in this path @@ -519,17 +509,7 @@ sr_rewrite (vlib_main_t * vm, b0->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK]; } } -#if DPDK > 0 /* Cannot call replication node yet without DPDK */ - processnext: - /* add a replication node */ - if (PREDICT_FALSE (t1->policy_index != ~0)) - { - vnet_buffer (b1)->ip.save_protocol = t1->policy_index; - next1 = SR_REWRITE_NEXT_SR_REPLICATE; - sr1 = (ip6_sr_header_t *) (t1->rewrite); - goto trace00; - } -#endif /* DPDK */ + if (PREDICT_FALSE (ip1->protocol == IPPROTO_IPV6_ROUTE)) { vlib_buffer_advance (b1, sizeof (ip1)); @@ -584,8 +564,11 @@ sr_rewrite (vlib_main_t * vm, sr_fix_hmac (sm, ip1, sr1); - next1 = sr_local_cb ? sr_local_cb (vm, node, b1, ip1, sr1) : - next1; + vnet_buffer (b1)->ip.adj_index[VLIB_TX] = + t1->first_hop_dpo.dpoi_index; + next1 = t1->first_hop_dpo.dpoi_next_node; + next1 = (sr_local_cb ? + sr_local_cb (vm, node, b1, ip1, sr1) : next1); /* * Ignore "do not rewrite" shtik in this path @@ -597,9 +580,6 @@ sr_rewrite (vlib_main_t * vm, b1->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK]; } } -#if DPDK > 0 /* Cannot run replicate without DPDK and only replicate uses this label */ - trace00: -#endif /* DPDK */ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -641,7 +621,7 @@ sr_rewrite (vlib_main_t * vm, ip6_header_t *ip0 = 0; ip6_sr_header_t *sr0 = 0; ip6_sr_tunnel_t *t0; - u32 next0 = SR_REWRITE_NEXT_IP6_LOOKUP; + u32 next0; u16 new_l0 = 0; bi0 = from[0]; @@ -661,16 +641,6 @@ sr_rewrite (vlib_main_t * vm, t0 = pool_elt_at_index (sm->tunnels, vnet_buffer (b0)->ip.adj_index[VLIB_TX]); -#if DPDK > 0 /* Cannot call replication node yet without DPDK */ - /* add a replication node */ - if (PREDICT_FALSE (t0->policy_index != ~0)) - { - vnet_buffer (b0)->ip.save_protocol = t0->policy_index; - next0 = SR_REWRITE_NEXT_SR_REPLICATE; - sr0 = (ip6_sr_header_t *) (t0->rewrite); - goto trace0; - } -#endif /* DPDK */ ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= ((word) vec_len (t0->rewrite)) + b0->current_data); @@ -740,8 +710,11 @@ sr_rewrite (vlib_main_t * vm, sr_fix_hmac (sm, ip0, sr0); - next0 = sr_local_cb ? sr_local_cb (vm, node, b0, ip0, sr0) : - next0; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = + t0->first_hop_dpo.dpoi_index; + next0 = t0->first_hop_dpo.dpoi_next_node; + next0 = (sr_local_cb ? + sr_local_cb (vm, node, b0, ip0, sr0) : next0); /* * Ignore "do not rewrite" shtik in this path @@ -753,9 +726,6 @@ sr_rewrite (vlib_main_t * vm, b0->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK]; } } -#if DPDK > 0 /* Cannot run replicate without DPDK and only replicate uses this label */ - trace0: -#endif /* DPDK */ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -809,20 +779,69 @@ VLIB_NODE_FUNCTION_MULTIARCH (sr_rewrite_node, sr_rewrite) /* *INDENT-ON* */ static int -ip6_delete_route_no_next_hop (ip6_address_t * dst_address_arg, - u32 dst_address_length, u32 rx_table_id) +ip6_routes_add_del (ip6_sr_tunnel_t * t, int is_del) { + ip6_sr_main_t *sm = &sr_main; + + /* + * the prefix for the tunnel's destination + */ + /* *INDENT-OFF* */ fib_prefix_t pfx = { - .fp_len = dst_address_length, .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = t->dst_mask_width, .fp_addr = { - .ip6 = *dst_address_arg, - } + .ip6 = t->key.dst, + } }; + /* *INDENT-ON* */ - fib_table_entry_delete (fib_table_id_find_fib_index (FIB_PROTOCOL_IP6, - rx_table_id), - &pfx, FIB_SOURCE_SR); + if (is_del) + { + fib_table_entry_delete (t->rx_fib_index, &pfx, FIB_SOURCE_SR); + } + else + { + dpo_id_t dpo = DPO_INVALID; + + dpo_set (&dpo, sr_dpo_type, DPO_PROTO_IP6, t - sm->tunnels); + fib_table_entry_special_dpo_add (t->rx_fib_index, + &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, &dpo); + dpo_reset (&dpo); + } + + /* + * Track the first hop address so we don't need to perform an extra + * lookup in the data-path + */ + /* *INDENT-OFF* */ + const fib_prefix_t first_hop_pfx = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = t->first_hop, + } + }; + /* *INDENT-ON* */ + + if (is_del) + { + fib_entry_child_remove (t->fib_entry_index, t->sibling_index); + fib_table_entry_delete_index (t->fib_entry_index, FIB_SOURCE_RR); + } + else + { + t->fib_entry_index = + fib_table_entry_special_add (t->rx_fib_index, + &first_hop_pfx, + FIB_SOURCE_RR, + FIB_ENTRY_FLAG_NONE, ADJ_INDEX_INVALID); + t->sibling_index = + fib_entry_child_add (t->fib_entry_index, + sr_fib_node_type, t - sm->tunnels); + } return 0; } @@ -886,6 +905,18 @@ find_or_add_shared_secret (ip6_sr_main_t * sm, u8 * secret, u32 * indexp) } /** + * @brief Stack a tunnel on the forwarding chain of the first-hop + */ +static void +sr_tunnel_stack (ip6_sr_tunnel_t * st) +{ + dpo_stack (sr_dpo_type, + DPO_PROTO_IP6, + &st->first_hop_dpo, + fib_entry_contribute_ip_forwarding (st->fib_entry_index)); +} + +/** * @brief Add or Delete a Segment Routing tunnel. * * @param a ip6_sr_add_del_tunnel_args_t * @@ -909,7 +940,6 @@ ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a) u8 hmac_key_index = 0; ip6_sr_policy_t *pt; int i; - dpo_id_t dpo = DPO_INVALID; /* Make sure that the rx FIB exists */ p = hash_get (im->fib_index_by_table_id, a->rx_table_id); @@ -981,8 +1011,8 @@ ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a) /* Delete existing tunnel */ t = pool_elt_at_index (sm->tunnels, p[0]); - ip6_delete_route_no_next_hop (&t->key.dst, t->dst_mask_width, - a->rx_table_id); + ip6_routes_add_del (t, 1); + vec_free (t->rewrite); /* Remove tunnel from any policy if associated */ if (t->policy_index != ~0) @@ -1014,6 +1044,7 @@ ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a) hash_unset_mem (sm->tunnel_index_by_name, t->name); vec_free (t->name); } + dpo_reset (&t->first_hop_dpo); pool_put (sm->tunnels, t); hp = hash_get_pair (sm->tunnel_index_by_key, &key); key_copy = (void *) (hp->key); @@ -1026,6 +1057,7 @@ ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a) pool_get (sm->tunnels, t); memset (t, 0, sizeof (*t)); t->policy_index = ~0; + fib_node_init (&t->node, sr_fib_node_type); clib_memcpy (&t->key, &key, sizeof (t->key)); t->dst_mask_width = a->dst_mask_width; @@ -1124,20 +1156,13 @@ ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a) * We don't handle ugly RFC-related cases yet, but I'm sure PL will complain * at some point... */ - dpo_set (&dpo, sr_dpo_type, DPO_PROTO_IP6, t - sm->tunnels); - fib_prefix_t pfx = { - .fp_proto = FIB_PROTOCOL_IP6, - .fp_len = a->dst_mask_width, - .fp_addr = { - .ip6 = *a->dst_address, - } - }; - fib_table_entry_special_dpo_add (rx_fib_index, - &pfx, - FIB_SOURCE_SR, - FIB_ENTRY_FLAG_EXCLUSIVE, &dpo); - dpo_reset (&dpo); + /* + * Add the routes for the tunnel destination and first-hop, then stack + * the tunnel on the appropriate forwarding DPOs. + */ + ip6_routes_add_del (t, 0); + sr_tunnel_stack (t); if (a->policy_name) { @@ -1197,7 +1222,7 @@ format_sr_dpo (u8 * s, va_list * args) return (format (s, "SR: tunnel:[%d]", index)); } -const static dpo_vft_t sr_vft = { +const static dpo_vft_t sr_dpo_vft = { .dv_lock = sr_dpo_lock, .dv_unlock = sr_dpo_unlock, .dv_format = format_sr_dpo, @@ -1212,6 +1237,65 @@ const static char *const *const sr_nodes[DPO_PROTO_NUM] = { [DPO_PROTO_IP6] = sr_ip6_nodes, }; +static ip6_sr_tunnel_t * +sr_tunnel_from_fib_node (fib_node_t * node) +{ +#if (CLIB_DEBUG > 0) + ASSERT (sr_fib_node_type == node->fn_type); +#endif + return ((ip6_sr_tunnel_t *) (((char *) node) - + STRUCT_OFFSET_OF (ip6_sr_tunnel_t, node))); +} + +/** + * Function definition to backwalk a FIB node + */ +static fib_node_back_walk_rc_t +sr_tunnel_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx) +{ + sr_tunnel_stack (sr_tunnel_from_fib_node (node)); + + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +/** + * Function definition to get a FIB node from its index + */ +static fib_node_t * +sr_tunnel_fib_node_get (fib_node_index_t index) +{ + ip6_sr_tunnel_t *st; + ip6_sr_main_t *sm; + + sm = &sr_main; + st = pool_elt_at_index (sm->tunnels, index); + + return (&st->node); +} + +/** + * Function definition to inform the FIB node that its last lock has gone. + */ +static void +sr_tunnel_last_lock_gone (fib_node_t * node) +{ + /* + * The SR tunnel is a root of the graph. As such + * it never has children and thus is never locked. + */ + ASSERT (0); +} + +/* + * Virtual function table registered by SR tunnels + * for participation in the FIB object graph. + */ +const static fib_node_vft_t sr_fib_vft = { + .fnv_get = sr_tunnel_fib_node_get, + .fnv_last_lock = sr_tunnel_last_lock_gone, + .fnv_back_walk = sr_tunnel_back_walk, +}; + /** * @brief CLI parser for Add or Delete a Segment Routing tunnel. * @@ -1764,6 +1848,8 @@ ip6_sr_add_del_multicastmap (ip6_sr_add_del_multicastmap_args_t * a) ip6_sr_tunnel_t *t; ip6_sr_main_t *sm = &sr_main; ip6_sr_policy_t *pt; + index_t rep; + u32 ii; if (a->is_del) { @@ -1803,23 +1889,49 @@ ip6_sr_add_del_multicastmap (ip6_sr_add_del_multicastmap_args_t * a) * We don't handle ugly RFC-related cases yet, but I'm sure PL will complain * at some point... */ - dpo_id_t dpo = DPO_INVALID; - - dpo_set (&dpo, sr_dpo_type, DPO_PROTO_IP6, t - sm->tunnels); - /* Construct a FIB entry for multicast using the rx/tx fib from the first tunnel */ - fib_prefix_t pfx = { + /* + * Construct an mFIB entry for the multicast address, + * using the rx/tx fib from the first tunnel. + * There is no RPF information for this address (I need to discuss this with + * Pablo), so for now accept from anywhere... + */ + /* *INDENT-OFF* */ + mfib_prefix_t pfx = { .fp_proto = FIB_PROTOCOL_IP6, .fp_len = 128, - .fp_addr = { - .ip6 = *a->multicast_address, - } + .fp_grp_addr = { + .ip6 = *a->multicast_address, + } }; - fib_table_entry_special_dpo_add (t->rx_fib_index, - &pfx, - FIB_SOURCE_SR, - FIB_ENTRY_FLAG_EXCLUSIVE, &dpo); - dpo_reset (&dpo); + /* *INDENT-ON* */ + + if (a->is_del) + mfib_table_entry_delete (t->rx_fib_index, &pfx, MFIB_SOURCE_SRv6); + else + { + /* + * Construct a replicate DPO that will replicate received packets over + * each tunnel in the policy + */ + dpo_id_t dpo = DPO_INVALID; + + rep = replicate_create (vec_len (pt->tunnel_indices), DPO_PROTO_IP6); + + vec_foreach_index (ii, pt->tunnel_indices) + { + dpo_set (&dpo, sr_dpo_type, DPO_PROTO_IP6, pt->tunnel_indices[ii]); + + replicate_set_bucket (rep, ii, &dpo); + } + + mfib_table_entry_special_add (t->rx_fib_index, + &pfx, + MFIB_SOURCE_SRv6, + MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF, rep); + + dpo_reset (&dpo); + } u8 *mcast_copy = 0; mcast_copy = vec_new (ip6_address_t, 1); @@ -1829,13 +1941,12 @@ ip6_sr_add_del_multicastmap (ip6_sr_add_del_multicastmap_args_t * a) { hash_unset_mem (sm->policy_index_by_multicast_address, mcast_copy); vec_free (mcast_copy); - return 0; } - /* else */ - - hash_set_mem (sm->policy_index_by_multicast_address, mcast_copy, - pt - sm->policies); - + else + { + hash_set_mem (sm->policy_index_by_multicast_address, mcast_copy, + pt - sm->policies); + } return 0; } @@ -1888,12 +1999,7 @@ sr_add_del_multicast_map_command_fn (vlib_main_t * vm, a->multicast_address = &multicast_address; a->policy_name = policy_name; -#if DPDK > 0 /*Cannot call replicate or configure multicast map yet without DPDK */ rv = ip6_sr_add_del_multicastmap (a); -#else - return clib_error_return (0, - "cannot use multicast replicate spray case without DPDK installed"); -#endif /* DPDK */ switch (rv) { @@ -2295,12 +2401,6 @@ sr_init (vlib_main_t * vm) ip6_rewrite_node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite"); ASSERT (ip6_rewrite_node); -#if DPDK > 0 /* Cannot run replicate without DPDK */ - /* Add a disposition to sr_replicate for the sr multicast replicate node */ - sm->ip6_lookup_sr_replicate_index = - vlib_node_add_next (vm, ip6_lookup_node->index, sr_replicate_node.index); -#endif /* DPDK */ - /* Add a disposition to ip6_rewrite for the sr dst address hack node */ sm->ip6_rewrite_sr_next_index = vlib_node_add_next (vm, ip6_rewrite_node->index, @@ -2311,7 +2411,8 @@ sr_init (vlib_main_t * vm) sm->md = (void *) EVP_get_digestbyname ("sha1"); sm->hmac_ctx = clib_mem_alloc (sizeof (HMAC_CTX)); - sr_dpo_type = dpo_register_new_type (&sr_vft, sr_nodes); + sr_dpo_type = dpo_register_new_type (&sr_dpo_vft, sr_nodes); + sr_fib_node_type = fib_node_register_new_type (&sr_fib_vft); return error; } @@ -3087,7 +3188,7 @@ set_ip6_sr_rewrite_fn (vlib_main_t * vm, hi = vnet_get_sup_hw_interface (vnm, sw_if_index); adj->rewrite_header.node_index = sr_fix_dst_addr_node.index; - /* $$$$$ hack... steal the mcast group index */ + /* $$$$$ hack... steal the interface address index */ adj->if_address_index = vlib_node_add_next (vm, sr_fix_dst_addr_node.index, hi->output_node_index); diff --git a/src/vnet/sr/sr.h b/src/vnet/sr/sr.h index 610b36996f3..3c50b7358a1 100644 --- a/src/vnet/sr/sr.h +++ b/src/vnet/sr/sr.h @@ -15,8 +15,6 @@ /** * @file * @brief Segment Routing header - * - * @note sr_replicate only works using DPDK today */ #ifndef included_vnet_sr_h #define included_vnet_sr_h @@ -71,6 +69,27 @@ typedef struct /** Indicates that this tunnel is part of a policy comprising of multiple tunnels. If == ~0 tunnel is not part of a policy */ u32 policy_index; + + /** + * The FIB node graph linkage + */ + fib_node_t node; + + /** + * The FIB entry index for the first hop. We track this so we + * don't need an extra lookup for it in the data plane + */ + fib_node_index_t fib_entry_index; + + /** + * This tunnel's sibling index in the children of the FIB entry + */ + u32 sibling_index; + + /** + * The DPO contributed by the first-hop FIB entry. + */ + dpo_id_t first_hop_dpo; } ip6_sr_tunnel_t; /** @@ -205,9 +224,6 @@ typedef struct /** ip6-rewrite next index for reinstalling the original dst address */ u32 ip6_rewrite_sr_next_index; - /** ip6-replicate next index for multicast tunnel */ - u32 ip6_lookup_sr_replicate_index; - /** application API callback */ void *sr_local_cb; @@ -238,10 +254,6 @@ format_function_t format_ip6_sr_header_with_length; vlib_node_registration_t ip6_sr_input_node; -#if DPDK > 0 -extern vlib_node_registration_t sr_replicate_node; -#endif /* DPDK */ - int ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a); int ip6_sr_add_del_policy (ip6_sr_add_del_policy_args_t * a); int ip6_sr_add_del_multicastmap (ip6_sr_add_del_multicastmap_args_t * a); diff --git a/src/vnet/sr/sr_api.c b/src/vnet/sr/sr_api.c index 6c6eb9b6449..bab0fc84aba 100644 --- a/src/vnet/sr/sr_api.c +++ b/src/vnet/sr/sr_api.c @@ -190,12 +190,7 @@ static void vl_api_sr_multicast_map_add_del_t_handler goto out; } -#if DPDK > 0 /* Cannot call replicate without DPDK */ rv = ip6_sr_add_del_multicastmap (a); -#else - clib_warning ("multicast replication without DPDK not implemented"); - rv = VNET_API_ERROR_UNIMPLEMENTED; -#endif /* DPDK */ out: diff --git a/src/vnet/sr/sr_replicate.c b/src/vnet/sr/sr_replicate.c deleted file mode 100644 index fa5a68c31c2..00000000000 --- a/src/vnet/sr/sr_replicate.c +++ /dev/null @@ -1,491 +0,0 @@ -/* - * sr_replicate.c: ipv6 segment routing replicator for multicast - * - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - * @file - * @brief Functions for replicating packets across SR tunnels. - * - * Leverages rte_pktmbuf_clone() so there is no memcpy for - * invariant parts of the packet. - * - * @note Currently requires DPDK -*/ - -#if DPDK > 0 /* Cannot run replicate without DPDK */ -#include <vlib/vlib.h> -#include <vnet/vnet.h> -#include <vnet/pg/pg.h> -#include <vnet/sr/sr.h> -#include <vnet/devices/dpdk/dpdk.h> -#include <vnet/devices/dpdk/dpdk_priv.h> -#include <vnet/ip/ip.h> -#include <vnet/fib/ip6_fib.h> - -#include <vppinfra/hash.h> -#include <vppinfra/error.h> -#include <vppinfra/elog.h> - -/** - * @brief sr_replicate state. - * -*/ -typedef struct -{ - /* convenience */ - vlib_main_t *vlib_main; - vnet_main_t *vnet_main; -} sr_replicate_main_t; - -sr_replicate_main_t sr_replicate_main; - -/** - * @brief Information to display in packet trace. - * -*/ -typedef struct -{ - ip6_address_t src, dst; - u16 length; - u32 next_index; - u32 tunnel_index; - u8 sr[256]; -} sr_replicate_trace_t; - -/** - * @brief packet trace format function. - * - * @param *s u8 used for string output - * @param *args va_list structured input to va_arg to output @ref sr_replicate_trace_t - * @return *s u8 - formatted trace output -*/ -static u8 * -format_sr_replicate_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - sr_replicate_trace_t *t = va_arg (*args, sr_replicate_trace_t *); - ip6_sr_main_t *sm = &sr_main; - ip6_sr_tunnel_t *tun = pool_elt_at_index (sm->tunnels, t->tunnel_index); - ip6_fib_t *rx_fib, *tx_fib; - - rx_fib = ip6_fib_get (tun->rx_fib_index); - tx_fib = ip6_fib_get (tun->tx_fib_index); - - s = format - (s, "SR-REPLICATE: next %s ip6 src %U dst %U len %u\n" - " rx-fib-id %d tx-fib-id %d\n%U", - "ip6-lookup", - format_ip6_address, &t->src, - format_ip6_address, &t->dst, t->length, - rx_fib->table_id, tx_fib->table_id, - format_ip6_sr_header, t->sr, 0 /* print_hmac */ ); - return s; - -} - -#define foreach_sr_replicate_error \ -_(REPLICATED, "sr packets replicated") \ -_(NO_BUFFERS, "error allocating buffers for replicas") \ -_(NO_REPLICAS, "no replicas were needed") \ -_(NO_BUFFER_DROPS, "sr no buffer drops") - -/** - * @brief Struct for SR replicate errors - */ -typedef enum -{ -#define _(sym,str) SR_REPLICATE_ERROR_##sym, - foreach_sr_replicate_error -#undef _ - SR_REPLICATE_N_ERROR, -} sr_replicate_error_t; - -/** - * @brief Error strings for SR replicate - */ -static char *sr_replicate_error_strings[] = { -#define _(sym,string) string, - foreach_sr_replicate_error -#undef _ -}; - -/** - * @brief Defines next-nodes for packet processing. - * -*/ -typedef enum -{ - SR_REPLICATE_NEXT_IP6_LOOKUP, - SR_REPLICATE_N_NEXT, -} sr_replicate_next_t; - -/** - * @brief Single loop packet replicator. - * - * @node sr-replicate - * @param vm vlib_main_t - * @return frame->n_vectors uword -*/ -static uword -sr_replicate_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - dpdk_main_t *dm = &dpdk_main; - u32 n_left_from, *from, *to_next; - sr_replicate_next_t next_index; - int pkts_replicated = 0; - ip6_sr_main_t *sm = &sr_main; - int no_buffer_drops = 0; - vlib_buffer_free_list_t *fl; - unsigned socket_id = rte_socket_id (); - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - - fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0, hdr_bi0; - vlib_buffer_t *b0, *orig_b0; - struct rte_mbuf *orig_mb0 = 0, *hdr_mb0 = 0, *clone0 = 0; - struct rte_mbuf **hdr_vec = 0, **rte_mbuf_vec = 0; - ip6_sr_policy_t *pol0 = 0; - ip6_sr_tunnel_t *t0 = 0; - ip6_sr_header_t *hdr_sr0 = 0; - ip6_header_t *ip0 = 0, *hdr_ip0 = 0; - int num_replicas = 0; - int i; - u32 len_bytes = sizeof (ip6_header_t); - u8 next_hdr, ip_next_hdr = IPPROTO_IPV6_ROUTE; - - bi0 = from[0]; - - b0 = vlib_get_buffer (vm, bi0); - orig_b0 = b0; - - pol0 = pool_elt_at_index (sm->policies, - vnet_buffer (b0)->ip.save_protocol); - - ip0 = vlib_buffer_get_current (b0); - /* Skip forward to the punch-in point */ - vlib_buffer_advance (b0, sizeof (*ip0)); - next_hdr = ip0->protocol; - - /* HBH must immediately follow ipv6 header */ - if (PREDICT_FALSE - (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) - { - ip6_hop_by_hop_ext_t *ext_hdr = - (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); - u32 ext_hdr_len = 0; - ext_hdr_len = ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr); - len_bytes += ext_hdr_len; - next_hdr = ext_hdr->next_hdr; - ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE; - ip_next_hdr = IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS; - /* Skip forward to the punch-in point */ - vlib_buffer_advance (b0, ext_hdr_len); - - } - - orig_mb0 = rte_mbuf_from_vlib_buffer (b0); - - i16 delta0 = vlib_buffer_length_in_chain (vm, orig_b0) - - (i16) orig_mb0->pkt_len; - - u16 new_data_len0 = (u16) ((i16) orig_mb0->data_len + delta0); - u16 new_pkt_len0 = (u16) ((i16) orig_mb0->pkt_len + delta0); - - orig_mb0->data_len = new_data_len0; - orig_mb0->pkt_len = new_pkt_len0; - orig_mb0->data_off += (u16) (b0->current_data); - - /* - Before entering loop determine if we can allocate: - - all the new HEADER RTE_MBUFs and assign them to a vector - - all the clones - - if successful, then iterate over vectors of resources - - */ - num_replicas = vec_len (pol0->tunnel_indices); - - if (PREDICT_FALSE (num_replicas == 0)) - { - b0->error = node->errors[SR_REPLICATE_ERROR_NO_REPLICAS]; - goto do_trace0; - } - - vec_reset_length (hdr_vec); - vec_reset_length (rte_mbuf_vec); - - for (i = 0; i < num_replicas; i++) - { - uint8_t nb_seg; - struct rte_mbuf *clone0i; - vlib_buffer_t *clone0_c, *clone_b0; - - t0 = vec_elt_at_index (sm->tunnels, pol0->tunnel_indices[i]); - hdr_mb0 = rte_pktmbuf_alloc (dm->pktmbuf_pools[socket_id]); - - if (i < (num_replicas - 1)) - { - /* Not the last tunnel to process */ - clone0 = rte_pktmbuf_clone - (orig_mb0, dm->pktmbuf_pools[socket_id]); - if (clone0 == 0) - goto clone_fail; - nb_seg = 0; - clone0i = clone0; - clone0_c = NULL; - while ((clone0->nb_segs >= 1) && (nb_seg < clone0->nb_segs)) - { - - clone_b0 = vlib_buffer_from_rte_mbuf (clone0i); - vlib_buffer_init_for_free_list (clone_b0, fl); - - ASSERT ((clone_b0->flags & VLIB_BUFFER_NEXT_PRESENT) == - 0); - ASSERT (clone_b0->current_data == 0); - - clone_b0->current_data = - (clone0i->buf_addr + clone0i->data_off) - - (void *) clone_b0->data; - - clone_b0->current_length = clone0i->data_len; - if (PREDICT_FALSE (clone0_c != NULL)) - { - clone0_c->flags |= VLIB_BUFFER_NEXT_PRESENT; - clone0_c->next_buffer = - vlib_get_buffer_index (vm, clone_b0); - } - clone0_c = clone_b0; - clone0i = clone0i->next; - nb_seg++; - } - } - else - /* First tunnel to process, use original MB */ - clone0 = orig_mb0; - - - if (PREDICT_FALSE (!clone0 || !hdr_mb0)) - { - clone_fail: - b0->error = node->errors[SR_REPLICATE_ERROR_NO_BUFFERS]; - - vec_foreach_index (i, rte_mbuf_vec) - { - rte_pktmbuf_free (rte_mbuf_vec[i]); - } - vec_free (rte_mbuf_vec); - - vec_foreach_index (i, hdr_vec) - { - rte_pktmbuf_free (hdr_vec[i]); - } - vec_free (hdr_vec); - - goto do_trace0; - } - - vec_add1 (hdr_vec, hdr_mb0); - vec_add1 (rte_mbuf_vec, clone0); - - } - - for (i = 0; i < num_replicas; i++) - { - vlib_buffer_t *hdr_b0; - u16 new_l0 = 0; - - t0 = vec_elt_at_index (sm->tunnels, pol0->tunnel_indices[i]); - /* Our replicas */ - hdr_mb0 = hdr_vec[i]; - clone0 = rte_mbuf_vec[i]; - - hdr_mb0->data_len = len_bytes + vec_len (t0->rewrite); - hdr_mb0->pkt_len = hdr_mb0->data_len + - vlib_buffer_length_in_chain (vm, orig_b0); - - hdr_b0 = vlib_buffer_from_rte_mbuf (hdr_mb0); - - vlib_buffer_init_for_free_list (hdr_b0, fl); - - memcpy (hdr_b0->data, ip0, len_bytes); - memcpy (hdr_b0->data + len_bytes, t0->rewrite, - vec_len (t0->rewrite)); - - hdr_b0->current_data = 0; - hdr_b0->current_length = len_bytes + vec_len (t0->rewrite); - hdr_b0->flags = orig_b0->flags | VLIB_BUFFER_NEXT_PRESENT; - hdr_b0->trace_index = orig_b0->trace_index; - vnet_buffer (hdr_b0)->l2_classify.opaque_index = 0; - - hdr_b0->total_length_not_including_first_buffer = - hdr_mb0->pkt_len - hdr_b0->current_length; - vnet_buffer (hdr_b0)->sw_if_index[VLIB_TX] = t0->tx_fib_index; - - hdr_ip0 = (ip6_header_t *) hdr_b0->data; - new_l0 = clib_net_to_host_u16 (ip0->payload_length) + - vec_len (t0->rewrite); - hdr_ip0->payload_length = clib_host_to_net_u16 (new_l0); - hdr_sr0 = (ip6_sr_header_t *) ((u8 *) hdr_ip0 + len_bytes); - /* $$$ tune */ - clib_memcpy (hdr_sr0, t0->rewrite, vec_len (t0->rewrite)); - hdr_sr0->protocol = next_hdr; - hdr_ip0->protocol = ip_next_hdr; - - /* Copy dst address into the DA slot in the segment list */ - clib_memcpy (hdr_sr0->segments, ip0->dst_address.as_u64, - sizeof (ip6_address_t)); - - /* Rewrite the ip6 dst address */ - hdr_ip0->dst_address.as_u64[0] = t0->first_hop.as_u64[0]; - hdr_ip0->dst_address.as_u64[1] = t0->first_hop.as_u64[1]; - - sr_fix_hmac (sm, hdr_ip0, hdr_sr0); - - /* prepend new header to invariant piece */ - hdr_mb0->next = clone0; - hdr_b0->next_buffer = - vlib_get_buffer_index (vm, - vlib_buffer_from_rte_mbuf (clone0)); - - /* update header's fields */ - hdr_mb0->pkt_len = - (uint16_t) (hdr_mb0->data_len + clone0->pkt_len); - hdr_mb0->nb_segs = (uint8_t) (clone0->nb_segs + 1); - - /* copy metadata from source packet */ - hdr_mb0->port = clone0->port; - hdr_mb0->vlan_tci = clone0->vlan_tci; - hdr_mb0->vlan_tci_outer = clone0->vlan_tci_outer; - hdr_mb0->tx_offload = clone0->tx_offload; - hdr_mb0->hash = clone0->hash; - - hdr_mb0->ol_flags = clone0->ol_flags & ~(IND_ATTACHED_MBUF); - - __rte_mbuf_sanity_check (hdr_mb0, 1); - - hdr_bi0 = vlib_get_buffer_index (vm, hdr_b0); - - to_next[0] = hdr_bi0; - to_next += 1; - n_left_to_next -= 1; - - if (n_left_to_next == 0) - { - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - vlib_get_next_frame (vm, node, next_index, - to_next, n_left_to_next); - - } - pkts_replicated++; - } - - from += 1; - n_left_from -= 1; - - do_trace0: - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_replicate_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - tr->tunnel_index = t0 - sm->tunnels; - tr->length = 0; - if (hdr_ip0) - { - memcpy (tr->src.as_u8, hdr_ip0->src_address.as_u8, - sizeof (tr->src.as_u8)); - memcpy (tr->dst.as_u8, hdr_ip0->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - if (hdr_ip0->payload_length) - tr->length = clib_net_to_host_u16 - (hdr_ip0->payload_length); - } - tr->next_index = next_index; - if (hdr_sr0) - memcpy (tr->sr, hdr_sr0, sizeof (tr->sr)); - } - - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - vlib_node_increment_counter (vm, sr_replicate_node.index, - SR_REPLICATE_ERROR_REPLICATED, - pkts_replicated); - - vlib_node_increment_counter (vm, sr_replicate_node.index, - SR_REPLICATE_ERROR_NO_BUFFER_DROPS, - no_buffer_drops); - - return frame->n_vectors; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (sr_replicate_node) = { - .function = sr_replicate_node_fn, - .name = "sr-replicate", - .vector_size = sizeof (u32), - .format_trace = format_sr_replicate_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = ARRAY_LEN(sr_replicate_error_strings), - .error_strings = sr_replicate_error_strings, - - .n_next_nodes = SR_REPLICATE_N_NEXT, - - .next_nodes = { - [SR_REPLICATE_NEXT_IP6_LOOKUP] = "ip6-lookup", - }, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (sr_replicate_node, sr_replicate_node_fn) -/* *INDENT-ON* */ - -clib_error_t * -sr_replicate_init (vlib_main_t * vm) -{ - sr_replicate_main_t *msm = &sr_replicate_main; - - msm->vlib_main = vm; - msm->vnet_main = vnet_get_main (); - - return 0; -} - -VLIB_INIT_FUNCTION (sr_replicate_init); - -#endif /* DPDK */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ |