diff options
Diffstat (limited to 'vnet/vnet/mcast/mcast.c')
-rw-r--r-- | vnet/vnet/mcast/mcast.c | 563 |
1 files changed, 563 insertions, 0 deletions
diff --git a/vnet/vnet/mcast/mcast.c b/vnet/vnet/mcast/mcast.c new file mode 100644 index 00000000..e9177c71 --- /dev/null +++ b/vnet/vnet/mcast/mcast.c @@ -0,0 +1,563 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/mcast/mcast.h> + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/icmp46_packet.h> +#include <vnet/ip/ip4.h> + +typedef struct { + u32 sw_if_index; + u32 next_index; + u32 group_index; +} mcast_prep_trace_t; + +/* packet trace format function */ +static u8 * format_mcast_prep_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mcast_prep_trace_t * t = va_arg (*args, mcast_prep_trace_t *); + + s = format (s, "MCAST_PREP: group %d, next index %d, tx_sw_if_index %d", + t->group_index, t->next_index, t->sw_if_index); + return s; +} + +mcast_main_t mcast_main; +vlib_node_registration_t mcast_prep_node; +vlib_node_registration_t mcast_recycle_node; + +#define foreach_mcast_prep_error \ +_(MCASTS, "Multicast Packets") + +typedef enum { +#define _(sym,str) MCAST_PREP_ERROR_##sym, + foreach_mcast_prep_error +#undef _ + MCAST_PREP_N_ERROR, +} mcast_prep_error_t; + +static char * mcast_prep_error_strings[] = { +#define _(sym,string) string, + foreach_mcast_prep_error +#undef _ +}; + +typedef enum { + MCAST_PREP_NEXT_DROP, + MCAST_PREP_N_NEXT, +} mcast_prep_next_t; + +static uword +mcast_prep_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + mcast_prep_next_t next_index; + mcast_main_t * mcm = &mcast_main; + vlib_node_t *n = vlib_get_node (vm, mcast_prep_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (0 && n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + next0 = 0; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + next1 = 0; + + /* $$$$ your message in this space. Process 2 x pkts */ + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + mcast_prep_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + mcast_prep_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0, adj_index0; + mcast_group_t * g0; + ip_adjacency_t * adj0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + adj0 = ip_get_adjacency (lm, adj_index0); + vnet_buffer(b0)->mcast.mcast_group_index = adj0->mcast_group_index; + g0 = pool_elt_at_index (mcm->groups, adj0->mcast_group_index); + + /* + * Handle the degenerate single-copy case + * If we don't change the freelist, the packet will never + * make it to the recycle node... + */ + if (PREDICT_TRUE(vec_len (g0->members) > 1)) + { + /* Save the original free list index */ + vnet_buffer(b0)->mcast.original_free_list_index = + b0->free_list_index; + + /* Swap in the multicast recycle list */ + b0->free_list_index = mcm->mcast_recycle_list_index; + + /* + * Make sure that intermediate "frees" don't screw up + */ + b0->clone_count = vec_len (g0->members); + + /* Set up for the recycle node */ + vnet_buffer(b0)->mcast.mcast_current_index = 1; + } + + /* Transmit the pkt on the first interface */ + next0 = g0->members[0].prep_and_recycle_node_next_index; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = + g0->members[0].tx_sw_if_index; + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) { + mcast_prep_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + t->sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + t->group_index = vnet_buffer(b0)->mcast.mcast_group_index; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + em->counters[node_counter_base_index + MCAST_PREP_ERROR_MCASTS] += + frame->n_vectors; + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (mcast_prep_node) = { + .function = mcast_prep_node_fn, + .name = "mcast_prep", + .vector_size = sizeof (u32), + .format_trace = format_mcast_prep_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(mcast_prep_error_strings), + .error_strings = mcast_prep_error_strings, + + .n_next_nodes = MCAST_PREP_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [MCAST_PREP_NEXT_DROP] = "error-drop", + }, +}; + +typedef struct { + u32 sw_if_index; + u32 next_index; + u32 current_member; + u32 group_index; +} mcast_recycle_trace_t; + +static u8 * format_mcast_recycle_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mcast_recycle_trace_t * t = va_arg (*args, mcast_recycle_trace_t *); + + s = format (s, +"MCAST_R: group %d, current member %d next (node) index %d, tx_sw_if_index %d", + t->group_index, t->current_member, t->next_index, t->sw_if_index); + return s; +} + +#define foreach_mcast_recycle_error \ +_(RECYCLES, "Multicast Recycles") + +typedef enum { +#define _(sym,str) MCAST_RECYCLE_ERROR_##sym, + foreach_mcast_recycle_error +#undef _ + MCAST_RECYCLE_N_ERROR, +} mcast_recycle_error_t; + +static char * mcast_recycle_error_strings[] = { +#define _(sym,string) string, + foreach_mcast_recycle_error +#undef _ +}; + +typedef enum { + MCAST_RECYCLE_NEXT_DROP, + MCAST_RECYCLE_N_NEXT, +} mcast_recycle_next_t; + +static uword +mcast_recycle_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + mcast_recycle_next_t next_index; + mcast_main_t * mcm = &mcast_main; + vlib_node_t *n = vlib_get_node (vm, mcast_recycle_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t * em = &vm->error_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (0 && n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + next0 = 0; + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + next1 = 0; + + /* $$$$ your message in this space. Process 2 x pkts */ + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + mcast_recycle_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + mcast_recycle_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 current_member0; + mcast_group_t * g0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + g0 = pool_elt_at_index (mcm->groups, + vnet_buffer(b0)->mcast.mcast_group_index); + + /* No more replicas? */ + if (b0->clone_count == 1) + { + /* Restore the original free list index */ + b0->free_list_index = + vnet_buffer(b0)->mcast.original_free_list_index; + } + current_member0 = vnet_buffer(b0)->mcast.mcast_current_index; + + next0 = + g0->members[current_member0].prep_and_recycle_node_next_index; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = + g0->members[current_member0].tx_sw_if_index; + + vnet_buffer(b0)->mcast.mcast_current_index = + current_member0 + 1; + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) { + mcast_recycle_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + t->sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + t->group_index = vnet_buffer(b0)->mcast.mcast_group_index; + t->current_member = current_member0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + em->counters[node_counter_base_index + MCAST_RECYCLE_ERROR_RECYCLES] += + frame->n_vectors; + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (mcast_recycle_node) = { + .function = mcast_recycle_node_fn, + .name = "mcast-recycle", + .vector_size = sizeof (u32), + .format_trace = format_mcast_recycle_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(mcast_recycle_error_strings), + .error_strings = mcast_recycle_error_strings, + + .n_next_nodes = MCAST_RECYCLE_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [MCAST_RECYCLE_NEXT_DROP] = "error-drop", + }, +}; + +/* + * fish pkts back from the recycle queue/freelist + * un-flatten the context chains + */ +static void mcast_recycle_callback (vlib_main_t *vm, + vlib_buffer_free_list_t * fl) +{ + vlib_frame_t * f = 0; + u32 n_left_from; + u32 n_left_to_next = 0; + u32 n_this_frame = 0; + u32 * from; + u32 * to_next; + u32 bi0, pi0; + vlib_buffer_t *b0; + vlib_buffer_t *bnext0; + int i; + + /* aligned, unaligned buffers */ + for (i = 0; i < 2; i++) + { + if (i == 0) + { + from = fl->aligned_buffers; + n_left_from = vec_len (from); + } + else + { + from = fl->unaligned_buffers; + n_left_from = vec_len (from); + } + + while (n_left_from > 0) + { + if (PREDICT_FALSE(n_left_to_next == 0)) + { + if (f) + { + f->n_vectors = n_this_frame; + vlib_put_frame_to_node (vm, mcast_recycle_node.index, f); + } + + f = vlib_get_frame_to_node (vm, mcast_recycle_node.index); + to_next = vlib_frame_vector_args (f); + n_left_to_next = VLIB_FRAME_SIZE; + n_this_frame = 0; + } + + bi0 = from[0]; + if (PREDICT_TRUE(n_left_from > 1)) + { + pi0 = from[1]; + vlib_prefetch_buffer_with_index(vm,pi0,LOAD); + } + + bnext0 = b0 = vlib_get_buffer (vm, bi0); + + while (bnext0->flags & VLIB_BUFFER_NEXT_PRESENT) + { + from += 1; + n_left_from -= 1; + bnext0 = vlib_get_buffer (vm, bnext0->next_buffer); + } + to_next[0] = bi0; + + if (CLIB_DEBUG > 0) + vlib_buffer_set_known_state (vm, bi0, VLIB_BUFFER_KNOWN_ALLOCATED); + + from++; + to_next++; + n_this_frame++; + n_left_to_next--; + n_left_from--; + } + } + + vec_reset_length (fl->aligned_buffers); + vec_reset_length (fl->unaligned_buffers); + + if (f) + { + ASSERT(n_this_frame); + f->n_vectors = n_this_frame; + vlib_put_frame_to_node (vm, mcast_recycle_node.index, f); + } +} + +clib_error_t *mcast_init (vlib_main_t *vm) +{ + mcast_main_t * mcm = &mcast_main; + vlib_buffer_main_t * bm = vm->buffer_main; + vlib_buffer_free_list_t * fl; + + mcm->vlib_main = vm; + mcm->vnet_main = vnet_get_main(); + mcm->mcast_recycle_list_index = + vlib_buffer_create_free_list (vm, 1024 /* fictional */, "mcast-recycle"); + + fl = pool_elt_at_index (bm->buffer_free_list_pool, + mcm->mcast_recycle_list_index); + + fl->buffers_added_to_freelist_function = mcast_recycle_callback; + + return 0; +} + +VLIB_INIT_FUNCTION (mcast_init); + + |