diff options
author | Dave Barach <dbarach@cisco.com> | 2019-07-26 11:58:16 -0400 |
---|---|---|
committer | Florin Coras <florin.coras@gmail.com> | 2019-07-29 22:35:19 +0000 |
commit | d56550c2b669558aa38d93f44a9a3b31e0b9370f (patch) | |
tree | fc5c06b5b2586dbda4086bbec425d01e29984f04 /src/examples/sample-plugin/sample | |
parent | 2b5fed8696ce2a9b67e63cf5b5dbf49505172c9a (diff) |
misc: add vnet/pipeline.h example
To the sample plugin. We should probably suggest that folks use the
pipeline.h coding model more often. It's really easy, and these days
the performance results are similar to quad-single loop coding.
Type: refactor
Change-Id: Ie2caa087972737e6d9c31c4ac79355f3d8ced282
Signed-off-by: Dave Barach <dave@barachs.net>
Diffstat (limited to 'src/examples/sample-plugin/sample')
-rw-r--r-- | src/examples/sample-plugin/sample/node.c | 90 |
1 files changed, 89 insertions, 1 deletions
diff --git a/src/examples/sample-plugin/sample/node.c b/src/examples/sample-plugin/sample/node.c index 1f0a2e9775b..2c71b48b42e 100644 --- a/src/examples/sample-plugin/sample/node.c +++ b/src/examples/sample-plugin/sample/node.c @@ -76,8 +76,8 @@ typedef enum * * Node costs 30 clocks/pkt at a vector size of 51 */ -#define VERSION_1 1 +#define VERSION_1 1 #ifdef VERSION_1 #define foreach_mac_address_offset \ _(0) \ @@ -599,6 +599,94 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node, } #endif +/* + * This version computes all of the buffer pointers in + * one motion, uses a fully pipelined loop model, and + * traces the entire frame in one motion. + * + * It's performance-competative with other coding paradigms, + * and it's the simplest way to write performant vpp code + */ + + +#ifdef VERSION_4 + +#define u8x16_shuffle __builtin_shuffle + +static u8x16 swapmac = + { 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 12, 13, 14, 15 }; + +/* Final stage in the pipeline, do the mac swap */ +static inline u32 +last_stage (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t * b) +{ + u8x16 src_dst0; + src_dst0 = ((u8x16 *) vlib_buffer_get_current (b))[0]; + src_dst0 = u8x16_shuffle (src_dst0, swapmac); + ((u8x16 *) vlib_buffer_get_current (b))[0] = src_dst0; + vnet_buffer (b)->sw_if_index[VLIB_TX] = + vnet_buffer (b)->sw_if_index[VLIB_RX]; + /* set next-index[] to 0 for this buffer */ + return 0; +} + +/* + * Add a couple of nil stages to increase the prefetch stride. + * For any specific platform, the optimal prefetch stride may differ. + */ +static inline void +stage1 (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t * b) +{ +} + +static inline void +stage2 (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t * b) +{ +} + +#define NSTAGES 4 +#define STAGE_INLINE inline __attribute__((__always_inline__)) + +#define stage0 generic_stage0 + +#include <vnet/pipeline.h> + +VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + dispatch_pipeline (vm, node, frame); + + vlib_node_increment_counter (vm, sample_node.index, + SAMPLE_ERROR_SWAPPED, frame->n_vectors); + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + int i; + b = bufs; + + for (i = 0; i < frame->n_vectors; i++) + { + if (b[0]->flags & VLIB_BUFFER_IS_TRACED) + { + ethernet_header_t *en; + sample_trace_t *t = + vlib_add_trace (vm, node, b[0], sizeof (*t)); + t->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_TX]; + t->next_index = SAMPLE_NEXT_INTERFACE_OUTPUT; + en = vlib_buffer_get_current (b[0]); + clib_memcpy_fast (t->new_src_mac, en->src_address, + sizeof (t->new_src_mac)); + clib_memcpy_fast (t->new_dst_mac, en->dst_address, + sizeof (t->new_dst_mac)); + b++; + } + else + break; + } + } + return frame->n_vectors; +} +#endif + /* *INDENT-OFF* */ VLIB_REGISTER_NODE (sample_node) = { |