From 32e1c010b0c34fd0984f7fc45fae648a182025c5 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Tue, 22 Nov 2016 17:07:28 +0000 Subject: IP Multicast FIB (mfib) - IPv[46] mfib tables with support for (*,G/m), (*,G) and (S,G) exact and longest prefix match - Replication represented via a new replicate DPO. - RPF configuration and data-plane checking - data-plane signals sent to listening control planes. The functions of multicast forwarding entries differ from their unicast conterparts, so we introduce a new mfib_table_t and mfib_entry_t objects. However, we re-use the fib_path_list to resolve and build the entry's output list. the fib_path_list provides the service to construct a replicate DPO for multicast. 'make tests' is added to with two new suites; TEST=mfib, this is invocation of the CLI command 'test mfib' which deals with many path add/remove, flag set/unset scenarios, TEST=ip-mcast, data-plane forwarding tests. Updated applications to use the new MIFB functions; - IPv6 NS/RA. - DHCPv6 unit tests for these are undated accordingly. Change-Id: I49ec37b01f1b170335a5697541c8fd30e6d3a961 Signed-off-by: Neale Ranns --- src/vnet/dpo/dpo.c | 2 + src/vnet/dpo/dpo.h | 8 +- src/vnet/dpo/load_balance.c | 13 +- src/vnet/dpo/load_balance.h | 8 + src/vnet/dpo/replicate_dpo.c | 759 +++++++++++++++++++++++++++++++++++++++++++ src/vnet/dpo/replicate_dpo.h | 143 ++++++++ 6 files changed, 931 insertions(+), 2 deletions(-) create mode 100644 src/vnet/dpo/replicate_dpo.c create mode 100644 src/vnet/dpo/replicate_dpo.h (limited to 'src/vnet/dpo') diff --git a/src/vnet/dpo/dpo.c b/src/vnet/dpo/dpo.c index 688d2892412..cc2fa0eb91c 100644 --- a/src/vnet/dpo/dpo.c +++ b/src/vnet/dpo/dpo.c @@ -36,6 +36,7 @@ #include #include #include +#include /** * Array of char* names for the DPO types and protos @@ -449,6 +450,7 @@ dpo_module_init (vlib_main_t * vm) classify_dpo_module_init(); lookup_dpo_module_init(); ip_null_dpo_module_init(); + replicate_module_init(); return (NULL); } diff --git a/src/vnet/dpo/dpo.h b/src/vnet/dpo/dpo.h index 1efcbc8834b..aff4e1b82cc 100644 --- a/src/vnet/dpo/dpo.h +++ b/src/vnet/dpo/dpo.h @@ -100,15 +100,18 @@ typedef enum dpo_type_t_ { * @brief load-balancing over a choice of [un]equal cost paths */ DPO_LOAD_BALANCE, + DPO_REPLICATE, DPO_ADJACENCY, DPO_ADJACENCY_INCOMPLETE, DPO_ADJACENCY_MIDCHAIN, DPO_ADJACENCY_GLEAN, + DPO_ADJACENCY_MCAST, DPO_RECEIVE, DPO_LOOKUP, DPO_LISP_CP, DPO_CLASSIFY, DPO_MPLS_LABEL, + DPO_MFIB_ENTRY, DPO_LAST, } __attribute__((packed)) dpo_type_t; @@ -123,12 +126,15 @@ typedef enum dpo_type_t_ { [DPO_ADJACENCY_INCOMPLETE] = "dpo-adjacency-incomplete", \ [DPO_ADJACENCY_MIDCHAIN] = "dpo-adjacency-midcahin", \ [DPO_ADJACENCY_GLEAN] = "dpo-glean", \ + [DPO_ADJACENCY_MCAST] = "dpo-adj-mcast", \ [DPO_RECEIVE] = "dpo-receive", \ [DPO_LOOKUP] = "dpo-lookup", \ [DPO_LOAD_BALANCE] = "dpo-load-balance", \ + [DPO_REPLICATE] = "dpo-replicate", \ [DPO_LISP_CP] = "dpo-lisp-cp", \ [DPO_CLASSIFY] = "dpo-classify", \ - [DPO_MPLS_LABEL] = "dpo-mpls-label" \ + [DPO_MPLS_LABEL] = "dpo-mpls-label", \ + [DPO_MFIB_ENTRY] = "dpo-mfib_entry" \ } /** diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c index e70a7a306e1..f11b4e4de84 100644 --- a/src/vnet/dpo/load_balance.c +++ b/src/vnet/dpo/load_balance.c @@ -238,6 +238,17 @@ load_balance_is_drop (const dpo_id_t *dpo) return (0); } +void +load_balance_set_fib_entry_flags (index_t lbi, + fib_entry_flag_t flags) +{ + load_balance_t *lb; + + lb = load_balance_get(lbi); + lb->lb_fib_entry_flags = flags; +} + + void load_balance_set_urpf (index_t lbi, index_t urpf) @@ -683,7 +694,7 @@ load_balance_multipath_update (const dpo_id_t *dpo, buckets, n_buckets); - for (ii = old_n_buckets-n_buckets; ii < old_n_buckets; ii++) + for (ii = n_buckets; ii < old_n_buckets; ii++) { dpo_reset(&buckets[ii]); } diff --git a/src/vnet/dpo/load_balance.h b/src/vnet/dpo/load_balance.h index 1799653628d..b901c5beb84 100644 --- a/src/vnet/dpo/load_balance.h +++ b/src/vnet/dpo/load_balance.h @@ -36,6 +36,7 @@ #include #include #include +#include /** * Load-balance main @@ -98,6 +99,11 @@ typedef struct load_balance_t_ { */ dpo_proto_t lb_proto; + /** + * Flags from the load-balance's associated fib_entry_t + */ + fib_entry_flag_t lb_fib_entry_flags; + /** * The number of locks, which is approximately the number of users, * of this load-balance. @@ -167,6 +173,8 @@ extern void load_balance_set_bucket(index_t lbi, const dpo_id_t *next); extern void load_balance_set_urpf(index_t lbi, index_t urpf); +extern void load_balance_set_fib_entry_flags(index_t lbi, + fib_entry_flag_t flags); extern index_t load_balance_get_urpf(index_t lbi); extern u8* format_load_balance(u8 * s, va_list * args); diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c new file mode 100644 index 00000000000..a2d5fdb68bd --- /dev/null +++ b/src/vnet/dpo/replicate_dpo.c @@ -0,0 +1,759 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#undef REP_DEBUG + +#ifdef REP_DEBUG +#define REP_DBG(_rep, _fmt, _args...) \ +{ \ + u8* _tmp =NULL; \ + clib_warning("rep:[%s]:" _fmt, \ + replicate_format(replicate_get_index((_rep)), \ + 0, _tmp), \ + ##_args); \ + vec_free(_tmp); \ +} +#else +#define REP_DBG(_p, _fmt, _args...) +#endif + + +/** + * Pool of all DPOs. It's not static so the DP can have fast access + */ +replicate_t *replicate_pool; + +/** + * The one instance of replicate main + */ +replicate_main_t replicate_main; + +static inline index_t +replicate_get_index (const replicate_t *rep) +{ + return (rep - replicate_pool); +} + +static inline dpo_id_t* +replicate_get_buckets (replicate_t *rep) +{ + if (REP_HAS_INLINE_BUCKETS(rep)) + { + return (rep->rep_buckets_inline); + } + else + { + return (rep->rep_buckets); + } +} + +static replicate_t * +replicate_alloc_i (void) +{ + replicate_t *rep; + + pool_get_aligned(replicate_pool, rep, CLIB_CACHE_LINE_BYTES); + memset(rep, 0, sizeof(*rep)); + + vlib_validate_combined_counter(&(replicate_main.repm_counters), + replicate_get_index(rep)); + vlib_zero_combined_counter(&(replicate_main.repm_counters), + replicate_get_index(rep)); + + return (rep); +} + +static u8* +replicate_format (index_t repi, + replicate_format_flags_t flags, + u32 indent, + u8 *s) +{ + vlib_counter_t to; + replicate_t *rep; + dpo_id_t *buckets; + u32 i; + + rep = replicate_get(repi); + vlib_get_combined_counter(&(replicate_main.repm_counters), repi, &to); + buckets = replicate_get_buckets(rep); + + s = format(s, "%U: ", format_dpo_type, DPO_REPLICATE); + s = format(s, "[index:%d buckets:%d ", repi, rep->rep_n_buckets); + s = format(s, "to:[%Ld:%Ld]]", to.packets, to.bytes); + + for (i = 0; i < rep->rep_n_buckets; i++) + { + s = format(s, "\n%U", format_white_space, indent+2); + s = format(s, "[%d]", i); + s = format(s, " %U", format_dpo_id, &buckets[i], indent+6); + } + return (s); +} + +u8* +format_replicate (u8 * s, va_list * args) +{ + index_t repi = va_arg(*args, index_t); + replicate_format_flags_t flags = va_arg(*args, replicate_format_flags_t); + + return (replicate_format(repi, flags, 0, s)); +} +static u8* +format_replicate_dpo (u8 * s, va_list * args) +{ + index_t repi = va_arg(*args, index_t); + u32 indent = va_arg(*args, u32); + + return (replicate_format(repi, REPLICATE_FORMAT_DETAIL, indent, s)); +} + + +static replicate_t * +replicate_create_i (u32 num_buckets, + dpo_proto_t rep_proto) +{ + replicate_t *rep; + + rep = replicate_alloc_i(); + rep->rep_n_buckets = num_buckets; + rep->rep_proto = rep_proto; + + if (!REP_HAS_INLINE_BUCKETS(rep)) + { + vec_validate_aligned(rep->rep_buckets, + rep->rep_n_buckets - 1, + CLIB_CACHE_LINE_BYTES); + } + + REP_DBG(rep, "create"); + + return (rep); +} + +index_t +replicate_create (u32 n_buckets, + dpo_proto_t rep_proto) +{ + return (replicate_get_index(replicate_create_i(n_buckets, rep_proto))); +} + +static inline void +replicate_set_bucket_i (replicate_t *rep, + u32 bucket, + dpo_id_t *buckets, + const dpo_id_t *next) +{ + dpo_stack(DPO_REPLICATE, rep->rep_proto, &buckets[bucket], next); +} + +void +replicate_set_bucket (index_t repi, + u32 bucket, + const dpo_id_t *next) +{ + replicate_t *rep; + dpo_id_t *buckets; + + rep = replicate_get(repi); + buckets = replicate_get_buckets(rep); + + ASSERT(bucket < rep->rep_n_buckets); + + replicate_set_bucket_i(rep, bucket, buckets, next); +} + +int +replicate_is_drop (const dpo_id_t *dpo) +{ + replicate_t *rep; + + if (DPO_REPLICATE != dpo->dpoi_type) + return (0); + + rep = replicate_get(dpo->dpoi_index); + + if (1 == rep->rep_n_buckets) + { + return (dpo_is_drop(replicate_get_bucket_i(rep, 0))); + } + return (0); +} + +const dpo_id_t * +replicate_get_bucket (index_t repi, + u32 bucket) +{ + replicate_t *rep; + + rep = replicate_get(repi); + + return (replicate_get_bucket_i(rep, bucket)); +} + + +static load_balance_path_t * +replicate_multipath_next_hop_fixup (load_balance_path_t *nhs, + dpo_proto_t drop_proto) +{ + if (0 == vec_len(nhs)) + { + load_balance_path_t *nh; + + /* + * we need something for the replicate. so use the drop + */ + vec_add2(nhs, nh, 1); + + nh->path_weight = 1; + dpo_copy(&nh->path_dpo, drop_dpo_get(drop_proto)); + } + + return (nhs); +} + +/* + * Fill in adjacencies in block based on corresponding + * next hop adjacencies. + */ +static void +replicate_fill_buckets (replicate_t *rep, + load_balance_path_t *nhs, + dpo_id_t *buckets, + u32 n_buckets) +{ + load_balance_path_t * nh; + u16 ii, bucket; + + bucket = 0; + + /* + * the next-hops have normalised weights. that means their sum is the number + * of buckets we need to fill. + */ + vec_foreach (nh, nhs) + { + for (ii = 0; ii < nh->path_weight; ii++) + { + ASSERT(bucket < n_buckets); + replicate_set_bucket_i(rep, bucket++, buckets, &nh->path_dpo); + } + } +} + +static inline void +replicate_set_n_buckets (replicate_t *rep, + u32 n_buckets) +{ + rep->rep_n_buckets = n_buckets; +} + +void +replicate_multipath_update (const dpo_id_t *dpo, + load_balance_path_t * next_hops) +{ + load_balance_path_t * nh, * nhs; + dpo_id_t *tmp_dpo; + u32 ii, n_buckets; + replicate_t *rep; + + ASSERT(DPO_REPLICATE == dpo->dpoi_type); + rep = replicate_get(dpo->dpoi_index); + nhs = replicate_multipath_next_hop_fixup(next_hops, + rep->rep_proto); + n_buckets = vec_len(nhs); + + if (0 == rep->rep_n_buckets) + { + /* + * first time initialisation. no packets inflight, so we can write + * at leisure. + */ + replicate_set_n_buckets(rep, n_buckets); + + if (!REP_HAS_INLINE_BUCKETS(rep)) + vec_validate_aligned(rep->rep_buckets, + rep->rep_n_buckets - 1, + CLIB_CACHE_LINE_BYTES); + + replicate_fill_buckets(rep, nhs, + replicate_get_buckets(rep), + n_buckets); + } + else + { + /* + * This is a modification of an existing replicate. + * We need to ensure that packets in flight see a consistent state, that + * is the number of reported buckets the REP has + * is not more than it actually has. So if the + * number of buckets is increasing, we must update the bucket array first, + * then the reported number. vice-versa if the number of buckets goes down. + */ + if (n_buckets == rep->rep_n_buckets) + { + /* + * no change in the number of buckets. we can simply fill what + * is new over what is old. + */ + replicate_fill_buckets(rep, nhs, + replicate_get_buckets(rep), + n_buckets); + } + else if (n_buckets > rep->rep_n_buckets) + { + /* + * we have more buckets. the old replicate map (if there is one) + * will remain valid, i.e. mapping to indices within range, so we + * update it last. + */ + if (n_buckets > REP_NUM_INLINE_BUCKETS && + rep->rep_n_buckets <= REP_NUM_INLINE_BUCKETS) + { + /* + * the new increased number of buckets is crossing the threshold + * from the inline storage to out-line. Alloc the outline buckets + * first, then fixup the number. then reset the inlines. + */ + ASSERT(NULL == rep->rep_buckets); + vec_validate_aligned(rep->rep_buckets, + n_buckets - 1, + CLIB_CACHE_LINE_BYTES); + + replicate_fill_buckets(rep, nhs, + rep->rep_buckets, + n_buckets); + CLIB_MEMORY_BARRIER(); + replicate_set_n_buckets(rep, n_buckets); + + CLIB_MEMORY_BARRIER(); + + for (ii = 0; ii < REP_NUM_INLINE_BUCKETS; ii++) + { + dpo_reset(&rep->rep_buckets_inline[ii]); + } + } + else + { + if (n_buckets <= REP_NUM_INLINE_BUCKETS) + { + /* + * we are not crossing the threshold and it's still inline buckets. + * we can write the new on the old.. + */ + replicate_fill_buckets(rep, nhs, + replicate_get_buckets(rep), + n_buckets); + CLIB_MEMORY_BARRIER(); + replicate_set_n_buckets(rep, n_buckets); + } + else + { + /* + * we are not crossing the threshold. We need a new bucket array to + * hold the increased number of choices. + */ + dpo_id_t *new_buckets, *old_buckets, *tmp_dpo; + + new_buckets = NULL; + old_buckets = replicate_get_buckets(rep); + + vec_validate_aligned(new_buckets, + n_buckets - 1, + CLIB_CACHE_LINE_BYTES); + + replicate_fill_buckets(rep, nhs, new_buckets, n_buckets); + CLIB_MEMORY_BARRIER(); + rep->rep_buckets = new_buckets; + CLIB_MEMORY_BARRIER(); + replicate_set_n_buckets(rep, n_buckets); + + vec_foreach(tmp_dpo, old_buckets) + { + dpo_reset(tmp_dpo); + } + vec_free(old_buckets); + } + } + } + else + { + /* + * bucket size shrinkage. + */ + if (n_buckets <= REP_NUM_INLINE_BUCKETS && + rep->rep_n_buckets > REP_NUM_INLINE_BUCKETS) + { + /* + * the new decreased number of buckets is crossing the threshold + * from out-line storage to inline: + * 1 - Fill the inline buckets, + * 2 - fixup the number (and this point the inline buckets are + * used). + * 3 - free the outline buckets + */ + replicate_fill_buckets(rep, nhs, + rep->rep_buckets_inline, + n_buckets); + CLIB_MEMORY_BARRIER(); + replicate_set_n_buckets(rep, n_buckets); + CLIB_MEMORY_BARRIER(); + + vec_foreach(tmp_dpo, rep->rep_buckets) + { + dpo_reset(tmp_dpo); + } + vec_free(rep->rep_buckets); + } + else + { + /* + * not crossing the threshold. + * 1 - update the number to the smaller size + * 2 - write the new buckets + * 3 - reset those no longer used. + */ + dpo_id_t *buckets; + u32 old_n_buckets; + + old_n_buckets = rep->rep_n_buckets; + buckets = replicate_get_buckets(rep); + + replicate_set_n_buckets(rep, n_buckets); + CLIB_MEMORY_BARRIER(); + + replicate_fill_buckets(rep, nhs, + buckets, + n_buckets); + + for (ii = n_buckets; ii < old_n_buckets; ii++) + { + dpo_reset(&buckets[ii]); + } + } + } + } + + vec_foreach (nh, nhs) + { + dpo_reset(&nh->path_dpo); + } + vec_free(nhs); +} + +static void +replicate_lock (dpo_id_t *dpo) +{ + replicate_t *rep; + + rep = replicate_get(dpo->dpoi_index); + + rep->rep_locks++; +} + +static void +replicate_destroy (replicate_t *rep) +{ + dpo_id_t *buckets; + int i; + + buckets = replicate_get_buckets(rep); + + for (i = 0; i < rep->rep_n_buckets; i++) + { + dpo_reset(&buckets[i]); + } + + REP_DBG(rep, "destroy"); + if (!REP_HAS_INLINE_BUCKETS(rep)) + { + vec_free(rep->rep_buckets); + } + + pool_put(replicate_pool, rep); +} + +static void +replicate_unlock (dpo_id_t *dpo) +{ + replicate_t *rep; + + rep = replicate_get(dpo->dpoi_index); + + rep->rep_locks--; + + if (0 == rep->rep_locks) + { + replicate_destroy(rep); + } +} + +static void +replicate_mem_show (void) +{ + fib_show_memory_usage("replicate", + pool_elts(replicate_pool), + pool_len(replicate_pool), + sizeof(replicate_t)); +} + +const static dpo_vft_t rep_vft = { + .dv_lock = replicate_lock, + .dv_unlock = replicate_unlock, + .dv_format = format_replicate_dpo, + .dv_mem_show = replicate_mem_show, +}; + +/** + * @brief The per-protocol VLIB graph nodes that are assigned to a replicate + * object. + * + * this means that these graph nodes are ones from which a replicate is the + * parent object in the DPO-graph. + */ +const static char* const replicate_ip4_nodes[] = +{ + "ip4-replicate", + NULL, +}; +const static char* const replicate_ip6_nodes[] = +{ + "ip6-replicate", + NULL, +}; +const static char* const replicate_mpls_nodes[] = +{ + "mpls-replicate", + NULL, +}; + +const static char* const * const replicate_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = replicate_ip4_nodes, + [DPO_PROTO_IP6] = replicate_ip6_nodes, + [DPO_PROTO_MPLS] = replicate_mpls_nodes, +}; + +void +replicate_module_init (void) +{ + dpo_register(DPO_REPLICATE, &rep_vft, replicate_nodes); +} + +static clib_error_t * +replicate_show (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + index_t repi = INDEX_INVALID; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%d", &repi)) + ; + else + break; + } + + if (INDEX_INVALID != repi) + { + vlib_cli_output (vm, "%U", format_replicate, repi, + REPLICATE_FORMAT_DETAIL); + } + else + { + replicate_t *rep; + + pool_foreach(rep, replicate_pool, + ({ + vlib_cli_output (vm, "%U", format_replicate, + replicate_get_index(rep), + REPLICATE_FORMAT_NONE); + })); + } + + return 0; +} + +VLIB_CLI_COMMAND (replicate_show_command, static) = { + .path = "show replicate", + .short_help = "show replicate []", + .function = replicate_show, +}; + +typedef struct replicate_trace_t_ +{ + index_t rep_index; + index_t dpo_index; + dpo_type_t dpo_type; +} replicate_trace_t; + +static uword +replicate_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + vlib_combined_counter_main_t * cm = &replicate_main.repm_counters; + u32 n_left_from, * from, * to_next, next_index; + u32 cpu_index = os_get_cpu_number(); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 next0, ci0, bi0, bucket, repi0; + const replicate_t *rep0; + vlib_buffer_t * b0, *c0; + const dpo_id_t *dpo0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + repi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + rep0 = replicate_get(repi0); + + vlib_increment_combined_counter( + cm, cpu_index, repi0, 1, + vlib_buffer_length_in_chain(vm, b0)); + + /* ship the original to the first bucket */ + dpo0 = replicate_get_bucket_i(rep0, 0); + next0 = dpo0->dpoi_next_node; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + replicate_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->rep_index = repi0; + t->dpo_index = dpo0->dpoi_index; + t->dpo_type = dpo0->dpoi_type; + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + + /* ship copies to the rest of the buckets */ + for (bucket = 1; bucket < rep0->rep_n_buckets; bucket++) + { + /* Make a copy */ + c0 = vlib_buffer_copy(vm, b0); + ci0 = vlib_get_buffer_index(vm, c0); + + to_next[0] = ci0; + to_next += 1; + n_left_to_next -= 1; + + dpo0 = replicate_get_bucket_i(rep0, bucket); + next0 = dpo0->dpoi_next_node; + vnet_buffer (c0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + replicate_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->rep_index = repi0; + t->dpo_index = dpo0->dpoi_index; + t->dpo_type = dpo0->dpoi_type; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + ci0, next0); + } + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +static u8 * +format_replicate_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + replicate_trace_t *t = va_arg (*args, replicate_trace_t *); + + s = format (s, "replicate: %d via %U:%d", + t->rep_index, + format_dpo_type, t->dpo_type, + t->dpo_index); + return s; +} + +static uword +ip4_replicate (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (replicate_inline (vm, node, frame)); +} + +/** + * @brief + */ +VLIB_REGISTER_NODE (ip4_replicate_node) = { + .function = ip4_replicate, + .name = "ip4-replicate", + .vector_size = sizeof (u32), + + .format_trace = format_replicate_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +static uword +ip6_replicate (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (replicate_inline (vm, node, frame)); +} + +/** + * @brief + */ +VLIB_REGISTER_NODE (ip6_replicate_node) = { + .function = ip6_replicate, + .name = "ip6-replicate", + .vector_size = sizeof (u32), + + .format_trace = format_replicate_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; diff --git a/src/vnet/dpo/replicate_dpo.h b/src/vnet/dpo/replicate_dpo.h new file mode 100644 index 00000000000..a564739c9f2 --- /dev/null +++ b/src/vnet/dpo/replicate_dpo.h @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + * + */ + +#ifndef __REPLICATE_DPO_H__ +#define __REPLICATE_DPO_H__ + +#include +#include +#include +#include +#include + +/** + * replicate main + */ +typedef struct replicate_main_t_ +{ + vlib_combined_counter_main_t repm_counters; +} replicate_main_t; + +extern replicate_main_t replicate_main; + +/** + * The number of buckets that a load-balance object can have and still + * fit in one cache-line + */ +#define REP_NUM_INLINE_BUCKETS 4 + +/** + * The FIB DPO provieds; + * - load-balancing over the next DPOs in the chain/graph + * - per-route counters + */ +typedef struct replicate_t_ { + /** + * number of buckets in the load-balance. always a power of 2. + */ + u16 rep_n_buckets; + + /** + * The protocol of packets that traverse this REP. + * need in combination with the flow hash config to determine how to hash. + * u8. + */ + dpo_proto_t rep_proto; + + /** + * The number of locks, which is approximately the number of users, + * of this load-balance. + * Load-balance objects of via-entries are heavily shared by recursives, + * so the lock count is a u32. + */ + u32 rep_locks; + + /** + * Vector of buckets containing the next DPOs, sized as repo_num + */ + dpo_id_t *rep_buckets; + + /** + * The rest of the cache line is used for buckets. In the common case + * where there there are less than 4 buckets, then the buckets are + * on the same cachlie and we save ourselves a pointer dereferance in + * the data-path. + */ + dpo_id_t rep_buckets_inline[REP_NUM_INLINE_BUCKETS]; +} replicate_t; + +STATIC_ASSERT(sizeof(replicate_t) <= CLIB_CACHE_LINE_BYTES, + "A replicate object size exceeds one cachline"); + +/** + * Flags controlling load-balance formatting/display + */ +typedef enum replicate_format_flags_t_ { + REPLICATE_FORMAT_NONE, + REPLICATE_FORMAT_DETAIL = (1 << 0), +} replicate_format_flags_t; + +extern index_t replicate_create(u32 num_buckets, + dpo_proto_t rep_proto); +extern void replicate_multipath_update( + const dpo_id_t *dpo, + load_balance_path_t *next_hops); + +extern void replicate_set_bucket(index_t repi, + u32 bucket, + const dpo_id_t *next); + +extern u8* format_replicate(u8 * s, va_list * args); + +extern const dpo_id_t *replicate_get_bucket(index_t repi, + u32 bucket); +extern int replicate_is_drop(const dpo_id_t *dpo); + +/** + * The encapsulation breakages are for fast DP access + */ +extern replicate_t *replicate_pool; +static inline replicate_t* +replicate_get (index_t repi) +{ + return (pool_elt_at_index(replicate_pool, repi)); +} + +#define REP_HAS_INLINE_BUCKETS(_rep) \ + ((_rep)->rep_n_buckets <= REP_NUM_INLINE_BUCKETS) + +static inline const dpo_id_t * +replicate_get_bucket_i (const replicate_t *rep, + u32 bucket) +{ + ASSERT(bucket < rep->rep_n_buckets); + + if (PREDICT_TRUE(REP_HAS_INLINE_BUCKETS(rep))) + { + return (&rep->rep_buckets_inline[bucket]); + } + else + { + return (&rep->rep_buckets[bucket]); + } +} + +extern void replicate_module_init(void); + +#endif -- cgit 1.2.3-korg