diff options
Diffstat (limited to 'src/plugins/sflow/node.c')
-rw-r--r-- | src/plugins/sflow/node.c | 356 |
1 files changed, 356 insertions, 0 deletions
diff --git a/src/plugins/sflow/node.c b/src/plugins/sflow/node.c new file mode 100644 index 00000000000..51826438834 --- /dev/null +++ b/src/plugins/sflow/node.c @@ -0,0 +1,356 @@ +/* + * Copyright (c) 2024 InMon Corp. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vlibmemory/api.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <sflow/sflow.h> + +typedef struct +{ + u32 next_index; + u32 sw_if_index; + u8 new_src_mac[6]; + u8 new_dst_mac[6]; +} sflow_trace_t; + +#ifndef CLIB_MARCH_VARIANT +static u8 * +my_format_mac_address (u8 *s, va_list *args) +{ + u8 *a = va_arg (*args, u8 *); + return format (s, "%02x:%02x:%02x:%02x:%02x:%02x", a[0], a[1], a[2], a[3], + a[4], a[5]); +} + +/* packet trace format function */ +static u8 * +format_sflow_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + sflow_trace_t *t = va_arg (*args, sflow_trace_t *); + + s = format (s, "SFLOW: sw_if_index %d, next index %d\n", t->sw_if_index, + t->next_index); + s = format (s, " src %U -> dst %U", my_format_mac_address, t->new_src_mac, + my_format_mac_address, t->new_dst_mac); + return s; +} + +vlib_node_registration_t sflow_node; + +#endif /* CLIB_MARCH_VARIANT */ + +#ifndef CLIB_MARCH_VARIANT +static char *sflow_error_strings[] = { +#define _(sym, string) string, + foreach_sflow_error +#undef _ +}; +#endif /* CLIB_MARCH_VARIANT */ + +typedef enum +{ + SFLOW_NEXT_ETHERNET_INPUT, + SFLOW_N_NEXT, +} sflow_next_t; + +VLIB_NODE_FN (sflow_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + u32 n_left_from, *from, *to_next; + sflow_next_t next_index; + + sflow_main_t *smp = &sflow_main; + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + uword thread_index = os_get_thread_index (); + sflow_per_thread_data_t *sfwk = + vec_elt_at_index (smp->per_thread_data, thread_index); + + /* note that sfwk->skip==1 means "take the next packet", + so we never see sfwk->skip==0. */ + + u32 pkts = n_left_from; + if (PREDICT_TRUE (sfwk->skip > pkts)) + { + /* skip the whole frame-vector */ + sfwk->skip -= pkts; + sfwk->pool += pkts; + } + else + { + while (pkts >= sfwk->skip) + { + /* reach in to get the one we want. */ + vlib_buffer_t *bN = vlib_get_buffer (vm, from[sfwk->skip - 1]); + + /* Sample this packet header. */ + u32 hdr = bN->current_length; + if (hdr > smp->headerB) + hdr = smp->headerB; + + ethernet_header_t *en = vlib_buffer_get_current (bN); + u32 if_index = vnet_buffer (bN)->sw_if_index[VLIB_RX]; + vnet_hw_interface_t *hw = + vnet_get_sup_hw_interface (smp->vnet_main, if_index); + if (hw) + if_index = hw->hw_if_index; + else + { + // TODO: can we get interfaces that have no hw interface? + // If so, should we ignore the sample? + } + + sflow_sample_t sample = { + .samplingN = sfwk->smpN, + .input_if_index = if_index, + .sampled_packet_size = + bN->current_length + bN->total_length_not_including_first_buffer, + .header_bytes = hdr + }; + + // TODO: what bit in the buffer can we set right here to indicate + // that this packet was sampled (and perhaps another bit to say if it + // was dropped or sucessfully enqueued)? That way we can check it + // below if the packet is traced, and indicate that in the trace + // output. + + // TODO: we end up copying the header twice here. Consider allowing + // the enqueue to be just a little more complex. Like this: + // if(!sflow_fifo_enqueue(&sfwk->fifo, &sample, en, hdr). + // With headerB==128 that would be memcpy(,,24) plus memcpy(,,128) + // instead of the memcpy(,,128) plus memcpy(,,24+256) that we do + // here. (We also know that it could be done as a multiple of 8 + // (aligned) bytes because the sflow_sample_t fields are (6xu32) and + // the headerB setting is quantized to the nearest 32 bytes, so there + // may be ways to make it even easier for the compiler.) + sfwk->smpl++; + memcpy (sample.header, en, hdr); + if (PREDICT_FALSE (!sflow_fifo_enqueue (&sfwk->fifo, &sample))) + sfwk->drop++; + + pkts -= sfwk->skip; + sfwk->pool += sfwk->skip; + sfwk->skip = sflow_next_random_skip (sfwk); + } + /* We took a sample (or several) from this frame-vector, but now we are + skipping the rest. */ + sfwk->skip -= pkts; + sfwk->pool += pkts; + } + + /* the rest of this is boilerplate code just to make sure + * that packets are passed on the same way as they would + * have been if this node were not enabled. + * TODO: If there is ever a way to do this in one step + * (i.e. pass on the whole frame-vector unchanged) then it + * might help performance. + */ + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 next0 = SFLOW_NEXT_ETHERNET_INPUT; + u32 next1 = SFLOW_NEXT_ETHERNET_INPUT; + u32 next2 = SFLOW_NEXT_ETHERNET_INPUT; + u32 next3 = SFLOW_NEXT_ETHERNET_INPUT; + ethernet_header_t *en0, *en1, *en2, *en3; + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0-b3 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + /* do this to always pass on to the next node on feature arc */ + vnet_feature_next (&next0, b0); + vnet_feature_next (&next1, b1); + vnet_feature_next (&next2, b2); + vnet_feature_next (&next3, b3); + + ASSERT (b0->current_data == 0); + ASSERT (b1->current_data == 0); + ASSERT (b2->current_data == 0); + ASSERT (b3->current_data == 0); + + en0 = vlib_buffer_get_current (b0); + en1 = vlib_buffer_get_current (b1); + en2 = vlib_buffer_get_current (b2); + en3 = vlib_buffer_get_current (b3); + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sflow_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + t->next_index = next0; + clib_memcpy (t->new_src_mac, en0->src_address, + sizeof (t->new_src_mac)); + clib_memcpy (t->new_dst_mac, en0->dst_address, + sizeof (t->new_dst_mac)); + } + + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + sflow_trace_t *t = vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + t->next_index = next1; + clib_memcpy (t->new_src_mac, en1->src_address, + sizeof (t->new_src_mac)); + clib_memcpy (t->new_dst_mac, en1->dst_address, + sizeof (t->new_dst_mac)); + } + + if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) + { + sflow_trace_t *t = vlib_add_trace (vm, node, b2, sizeof (*t)); + t->sw_if_index = vnet_buffer (b2)->sw_if_index[VLIB_RX]; + t->next_index = next2; + clib_memcpy (t->new_src_mac, en2->src_address, + sizeof (t->new_src_mac)); + clib_memcpy (t->new_dst_mac, en2->dst_address, + sizeof (t->new_dst_mac)); + } + + if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) + { + sflow_trace_t *t = vlib_add_trace (vm, node, b3, sizeof (*t)); + t->sw_if_index = vnet_buffer (b3)->sw_if_index[VLIB_RX]; + t->next_index = next3; + clib_memcpy (t->new_src_mac, en3->src_address, + sizeof (t->new_src_mac)); + clib_memcpy (t->new_dst_mac, en3->dst_address, + sizeof (t->new_dst_mac)); + } + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = SFLOW_NEXT_ETHERNET_INPUT; + ethernet_header_t *en0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* do this to always pass on to the next node on feature arc */ + vnet_feature_next (&next0, b0); + + /* + * Direct from the driver, we should be at offset 0 + * aka at &b0->data[0] + */ + ASSERT (b0->current_data == 0); + + en0 = vlib_buffer_get_current (b0); + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sflow_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + t->next_index = next0; + clib_memcpy (t->new_src_mac, en0->src_address, + sizeof (t->new_src_mac)); + clib_memcpy (t->new_dst_mac, en0->dst_address, + sizeof (t->new_dst_mac)); + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +#ifndef CLIB_MARCH_VARIANT +VLIB_REGISTER_NODE (sflow_node) = +{ + .name = "sflow", + .vector_size = sizeof (u32), + .format_trace = format_sflow_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN(sflow_error_strings), + .error_strings = sflow_error_strings, + .n_next_nodes = SFLOW_N_NEXT, + /* edit / add dispositions here */ + .next_nodes = { + [SFLOW_NEXT_ETHERNET_INPUT] = "ethernet-input", + }, +}; +#endif /* CLIB_MARCH_VARIANT */ +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |