diff options
Diffstat (limited to 'src/vnet/dpo/replicate_dpo.c')
-rw-r--r-- | src/vnet/dpo/replicate_dpo.c | 759 |
1 files changed, 759 insertions, 0 deletions
diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c new file mode 100644 index 00000000000..a2d5fdb68bd --- /dev/null +++ b/src/vnet/dpo/replicate_dpo.c @@ -0,0 +1,759 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/ip/lookup.h> +#include <vnet/dpo/replicate_dpo.h> +#include <vnet/dpo/drop_dpo.h> +#include <vnet/adj/adj.h> + +#undef REP_DEBUG + +#ifdef REP_DEBUG +#define REP_DBG(_rep, _fmt, _args...) \ +{ \ + u8* _tmp =NULL; \ + clib_warning("rep:[%s]:" _fmt, \ + replicate_format(replicate_get_index((_rep)), \ + 0, _tmp), \ + ##_args); \ + vec_free(_tmp); \ +} +#else +#define REP_DBG(_p, _fmt, _args...) +#endif + + +/** + * Pool of all DPOs. It's not static so the DP can have fast access + */ +replicate_t *replicate_pool; + +/** + * The one instance of replicate main + */ +replicate_main_t replicate_main; + +static inline index_t +replicate_get_index (const replicate_t *rep) +{ + return (rep - replicate_pool); +} + +static inline dpo_id_t* +replicate_get_buckets (replicate_t *rep) +{ + if (REP_HAS_INLINE_BUCKETS(rep)) + { + return (rep->rep_buckets_inline); + } + else + { + return (rep->rep_buckets); + } +} + +static replicate_t * +replicate_alloc_i (void) +{ + replicate_t *rep; + + pool_get_aligned(replicate_pool, rep, CLIB_CACHE_LINE_BYTES); + memset(rep, 0, sizeof(*rep)); + + vlib_validate_combined_counter(&(replicate_main.repm_counters), + replicate_get_index(rep)); + vlib_zero_combined_counter(&(replicate_main.repm_counters), + replicate_get_index(rep)); + + return (rep); +} + +static u8* +replicate_format (index_t repi, + replicate_format_flags_t flags, + u32 indent, + u8 *s) +{ + vlib_counter_t to; + replicate_t *rep; + dpo_id_t *buckets; + u32 i; + + rep = replicate_get(repi); + vlib_get_combined_counter(&(replicate_main.repm_counters), repi, &to); + buckets = replicate_get_buckets(rep); + + s = format(s, "%U: ", format_dpo_type, DPO_REPLICATE); + s = format(s, "[index:%d buckets:%d ", repi, rep->rep_n_buckets); + s = format(s, "to:[%Ld:%Ld]]", to.packets, to.bytes); + + for (i = 0; i < rep->rep_n_buckets; i++) + { + s = format(s, "\n%U", format_white_space, indent+2); + s = format(s, "[%d]", i); + s = format(s, " %U", format_dpo_id, &buckets[i], indent+6); + } + return (s); +} + +u8* +format_replicate (u8 * s, va_list * args) +{ + index_t repi = va_arg(*args, index_t); + replicate_format_flags_t flags = va_arg(*args, replicate_format_flags_t); + + return (replicate_format(repi, flags, 0, s)); +} +static u8* +format_replicate_dpo (u8 * s, va_list * args) +{ + index_t repi = va_arg(*args, index_t); + u32 indent = va_arg(*args, u32); + + return (replicate_format(repi, REPLICATE_FORMAT_DETAIL, indent, s)); +} + + +static replicate_t * +replicate_create_i (u32 num_buckets, + dpo_proto_t rep_proto) +{ + replicate_t *rep; + + rep = replicate_alloc_i(); + rep->rep_n_buckets = num_buckets; + rep->rep_proto = rep_proto; + + if (!REP_HAS_INLINE_BUCKETS(rep)) + { + vec_validate_aligned(rep->rep_buckets, + rep->rep_n_buckets - 1, + CLIB_CACHE_LINE_BYTES); + } + + REP_DBG(rep, "create"); + + return (rep); +} + +index_t +replicate_create (u32 n_buckets, + dpo_proto_t rep_proto) +{ + return (replicate_get_index(replicate_create_i(n_buckets, rep_proto))); +} + +static inline void +replicate_set_bucket_i (replicate_t *rep, + u32 bucket, + dpo_id_t *buckets, + const dpo_id_t *next) +{ + dpo_stack(DPO_REPLICATE, rep->rep_proto, &buckets[bucket], next); +} + +void +replicate_set_bucket (index_t repi, + u32 bucket, + const dpo_id_t *next) +{ + replicate_t *rep; + dpo_id_t *buckets; + + rep = replicate_get(repi); + buckets = replicate_get_buckets(rep); + + ASSERT(bucket < rep->rep_n_buckets); + + replicate_set_bucket_i(rep, bucket, buckets, next); +} + +int +replicate_is_drop (const dpo_id_t *dpo) +{ + replicate_t *rep; + + if (DPO_REPLICATE != dpo->dpoi_type) + return (0); + + rep = replicate_get(dpo->dpoi_index); + + if (1 == rep->rep_n_buckets) + { + return (dpo_is_drop(replicate_get_bucket_i(rep, 0))); + } + return (0); +} + +const dpo_id_t * +replicate_get_bucket (index_t repi, + u32 bucket) +{ + replicate_t *rep; + + rep = replicate_get(repi); + + return (replicate_get_bucket_i(rep, bucket)); +} + + +static load_balance_path_t * +replicate_multipath_next_hop_fixup (load_balance_path_t *nhs, + dpo_proto_t drop_proto) +{ + if (0 == vec_len(nhs)) + { + load_balance_path_t *nh; + + /* + * we need something for the replicate. so use the drop + */ + vec_add2(nhs, nh, 1); + + nh->path_weight = 1; + dpo_copy(&nh->path_dpo, drop_dpo_get(drop_proto)); + } + + return (nhs); +} + +/* + * Fill in adjacencies in block based on corresponding + * next hop adjacencies. + */ +static void +replicate_fill_buckets (replicate_t *rep, + load_balance_path_t *nhs, + dpo_id_t *buckets, + u32 n_buckets) +{ + load_balance_path_t * nh; + u16 ii, bucket; + + bucket = 0; + + /* + * the next-hops have normalised weights. that means their sum is the number + * of buckets we need to fill. + */ + vec_foreach (nh, nhs) + { + for (ii = 0; ii < nh->path_weight; ii++) + { + ASSERT(bucket < n_buckets); + replicate_set_bucket_i(rep, bucket++, buckets, &nh->path_dpo); + } + } +} + +static inline void +replicate_set_n_buckets (replicate_t *rep, + u32 n_buckets) +{ + rep->rep_n_buckets = n_buckets; +} + +void +replicate_multipath_update (const dpo_id_t *dpo, + load_balance_path_t * next_hops) +{ + load_balance_path_t * nh, * nhs; + dpo_id_t *tmp_dpo; + u32 ii, n_buckets; + replicate_t *rep; + + ASSERT(DPO_REPLICATE == dpo->dpoi_type); + rep = replicate_get(dpo->dpoi_index); + nhs = replicate_multipath_next_hop_fixup(next_hops, + rep->rep_proto); + n_buckets = vec_len(nhs); + + if (0 == rep->rep_n_buckets) + { + /* + * first time initialisation. no packets inflight, so we can write + * at leisure. + */ + replicate_set_n_buckets(rep, n_buckets); + + if (!REP_HAS_INLINE_BUCKETS(rep)) + vec_validate_aligned(rep->rep_buckets, + rep->rep_n_buckets - 1, + CLIB_CACHE_LINE_BYTES); + + replicate_fill_buckets(rep, nhs, + replicate_get_buckets(rep), + n_buckets); + } + else + { + /* + * This is a modification of an existing replicate. + * We need to ensure that packets in flight see a consistent state, that + * is the number of reported buckets the REP has + * is not more than it actually has. So if the + * number of buckets is increasing, we must update the bucket array first, + * then the reported number. vice-versa if the number of buckets goes down. + */ + if (n_buckets == rep->rep_n_buckets) + { + /* + * no change in the number of buckets. we can simply fill what + * is new over what is old. + */ + replicate_fill_buckets(rep, nhs, + replicate_get_buckets(rep), + n_buckets); + } + else if (n_buckets > rep->rep_n_buckets) + { + /* + * we have more buckets. the old replicate map (if there is one) + * will remain valid, i.e. mapping to indices within range, so we + * update it last. + */ + if (n_buckets > REP_NUM_INLINE_BUCKETS && + rep->rep_n_buckets <= REP_NUM_INLINE_BUCKETS) + { + /* + * the new increased number of buckets is crossing the threshold + * from the inline storage to out-line. Alloc the outline buckets + * first, then fixup the number. then reset the inlines. + */ + ASSERT(NULL == rep->rep_buckets); + vec_validate_aligned(rep->rep_buckets, + n_buckets - 1, + CLIB_CACHE_LINE_BYTES); + + replicate_fill_buckets(rep, nhs, + rep->rep_buckets, + n_buckets); + CLIB_MEMORY_BARRIER(); + replicate_set_n_buckets(rep, n_buckets); + + CLIB_MEMORY_BARRIER(); + + for (ii = 0; ii < REP_NUM_INLINE_BUCKETS; ii++) + { + dpo_reset(&rep->rep_buckets_inline[ii]); + } + } + else + { + if (n_buckets <= REP_NUM_INLINE_BUCKETS) + { + /* + * we are not crossing the threshold and it's still inline buckets. + * we can write the new on the old.. + */ + replicate_fill_buckets(rep, nhs, + replicate_get_buckets(rep), + n_buckets); + CLIB_MEMORY_BARRIER(); + replicate_set_n_buckets(rep, n_buckets); + } + else + { + /* + * we are not crossing the threshold. We need a new bucket array to + * hold the increased number of choices. + */ + dpo_id_t *new_buckets, *old_buckets, *tmp_dpo; + + new_buckets = NULL; + old_buckets = replicate_get_buckets(rep); + + vec_validate_aligned(new_buckets, + n_buckets - 1, + CLIB_CACHE_LINE_BYTES); + + replicate_fill_buckets(rep, nhs, new_buckets, n_buckets); + CLIB_MEMORY_BARRIER(); + rep->rep_buckets = new_buckets; + CLIB_MEMORY_BARRIER(); + replicate_set_n_buckets(rep, n_buckets); + + vec_foreach(tmp_dpo, old_buckets) + { + dpo_reset(tmp_dpo); + } + vec_free(old_buckets); + } + } + } + else + { + /* + * bucket size shrinkage. + */ + if (n_buckets <= REP_NUM_INLINE_BUCKETS && + rep->rep_n_buckets > REP_NUM_INLINE_BUCKETS) + { + /* + * the new decreased number of buckets is crossing the threshold + * from out-line storage to inline: + * 1 - Fill the inline buckets, + * 2 - fixup the number (and this point the inline buckets are + * used). + * 3 - free the outline buckets + */ + replicate_fill_buckets(rep, nhs, + rep->rep_buckets_inline, + n_buckets); + CLIB_MEMORY_BARRIER(); + replicate_set_n_buckets(rep, n_buckets); + CLIB_MEMORY_BARRIER(); + + vec_foreach(tmp_dpo, rep->rep_buckets) + { + dpo_reset(tmp_dpo); + } + vec_free(rep->rep_buckets); + } + else + { + /* + * not crossing the threshold. + * 1 - update the number to the smaller size + * 2 - write the new buckets + * 3 - reset those no longer used. + */ + dpo_id_t *buckets; + u32 old_n_buckets; + + old_n_buckets = rep->rep_n_buckets; + buckets = replicate_get_buckets(rep); + + replicate_set_n_buckets(rep, n_buckets); + CLIB_MEMORY_BARRIER(); + + replicate_fill_buckets(rep, nhs, + buckets, + n_buckets); + + for (ii = n_buckets; ii < old_n_buckets; ii++) + { + dpo_reset(&buckets[ii]); + } + } + } + } + + vec_foreach (nh, nhs) + { + dpo_reset(&nh->path_dpo); + } + vec_free(nhs); +} + +static void +replicate_lock (dpo_id_t *dpo) +{ + replicate_t *rep; + + rep = replicate_get(dpo->dpoi_index); + + rep->rep_locks++; +} + +static void +replicate_destroy (replicate_t *rep) +{ + dpo_id_t *buckets; + int i; + + buckets = replicate_get_buckets(rep); + + for (i = 0; i < rep->rep_n_buckets; i++) + { + dpo_reset(&buckets[i]); + } + + REP_DBG(rep, "destroy"); + if (!REP_HAS_INLINE_BUCKETS(rep)) + { + vec_free(rep->rep_buckets); + } + + pool_put(replicate_pool, rep); +} + +static void +replicate_unlock (dpo_id_t *dpo) +{ + replicate_t *rep; + + rep = replicate_get(dpo->dpoi_index); + + rep->rep_locks--; + + if (0 == rep->rep_locks) + { + replicate_destroy(rep); + } +} + +static void +replicate_mem_show (void) +{ + fib_show_memory_usage("replicate", + pool_elts(replicate_pool), + pool_len(replicate_pool), + sizeof(replicate_t)); +} + +const static dpo_vft_t rep_vft = { + .dv_lock = replicate_lock, + .dv_unlock = replicate_unlock, + .dv_format = format_replicate_dpo, + .dv_mem_show = replicate_mem_show, +}; + +/** + * @brief The per-protocol VLIB graph nodes that are assigned to a replicate + * object. + * + * this means that these graph nodes are ones from which a replicate is the + * parent object in the DPO-graph. + */ +const static char* const replicate_ip4_nodes[] = +{ + "ip4-replicate", + NULL, +}; +const static char* const replicate_ip6_nodes[] = +{ + "ip6-replicate", + NULL, +}; +const static char* const replicate_mpls_nodes[] = +{ + "mpls-replicate", + NULL, +}; + +const static char* const * const replicate_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = replicate_ip4_nodes, + [DPO_PROTO_IP6] = replicate_ip6_nodes, + [DPO_PROTO_MPLS] = replicate_mpls_nodes, +}; + +void +replicate_module_init (void) +{ + dpo_register(DPO_REPLICATE, &rep_vft, replicate_nodes); +} + +static clib_error_t * +replicate_show (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + index_t repi = INDEX_INVALID; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%d", &repi)) + ; + else + break; + } + + if (INDEX_INVALID != repi) + { + vlib_cli_output (vm, "%U", format_replicate, repi, + REPLICATE_FORMAT_DETAIL); + } + else + { + replicate_t *rep; + + pool_foreach(rep, replicate_pool, + ({ + vlib_cli_output (vm, "%U", format_replicate, + replicate_get_index(rep), + REPLICATE_FORMAT_NONE); + })); + } + + return 0; +} + +VLIB_CLI_COMMAND (replicate_show_command, static) = { + .path = "show replicate", + .short_help = "show replicate [<index>]", + .function = replicate_show, +}; + +typedef struct replicate_trace_t_ +{ + index_t rep_index; + index_t dpo_index; + dpo_type_t dpo_type; +} replicate_trace_t; + +static uword +replicate_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + vlib_combined_counter_main_t * cm = &replicate_main.repm_counters; + u32 n_left_from, * from, * to_next, next_index; + u32 cpu_index = os_get_cpu_number(); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 next0, ci0, bi0, bucket, repi0; + const replicate_t *rep0; + vlib_buffer_t * b0, *c0; + const dpo_id_t *dpo0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + repi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + rep0 = replicate_get(repi0); + + vlib_increment_combined_counter( + cm, cpu_index, repi0, 1, + vlib_buffer_length_in_chain(vm, b0)); + + /* ship the original to the first bucket */ + dpo0 = replicate_get_bucket_i(rep0, 0); + next0 = dpo0->dpoi_next_node; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + replicate_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->rep_index = repi0; + t->dpo_index = dpo0->dpoi_index; + t->dpo_type = dpo0->dpoi_type; + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + + /* ship copies to the rest of the buckets */ + for (bucket = 1; bucket < rep0->rep_n_buckets; bucket++) + { + /* Make a copy */ + c0 = vlib_buffer_copy(vm, b0); + ci0 = vlib_get_buffer_index(vm, c0); + + to_next[0] = ci0; + to_next += 1; + n_left_to_next -= 1; + + dpo0 = replicate_get_bucket_i(rep0, bucket); + next0 = dpo0->dpoi_next_node; + vnet_buffer (c0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + replicate_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->rep_index = repi0; + t->dpo_index = dpo0->dpoi_index; + t->dpo_type = dpo0->dpoi_type; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + ci0, next0); + } + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +static u8 * +format_replicate_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + replicate_trace_t *t = va_arg (*args, replicate_trace_t *); + + s = format (s, "replicate: %d via %U:%d", + t->rep_index, + format_dpo_type, t->dpo_type, + t->dpo_index); + return s; +} + +static uword +ip4_replicate (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (replicate_inline (vm, node, frame)); +} + +/** + * @brief + */ +VLIB_REGISTER_NODE (ip4_replicate_node) = { + .function = ip4_replicate, + .name = "ip4-replicate", + .vector_size = sizeof (u32), + + .format_trace = format_replicate_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +static uword +ip6_replicate (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (replicate_inline (vm, node, frame)); +} + +/** + * @brief + */ +VLIB_REGISTER_NODE (ip6_replicate_node) = { + .function = ip6_replicate, + .name = "ip6-replicate", + .vector_size = sizeof (u32), + + .format_trace = format_replicate_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; |