summaryrefslogtreecommitdiffstats
path: root/src/examples/sample-plugin/sample/node.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/examples/sample-plugin/sample/node.c')
-rw-r--r--src/examples/sample-plugin/sample/node.c90
1 files changed, 89 insertions, 1 deletions
diff --git a/src/examples/sample-plugin/sample/node.c b/src/examples/sample-plugin/sample/node.c
index 1f0a2e9775b..2c71b48b42e 100644
--- a/src/examples/sample-plugin/sample/node.c
+++ b/src/examples/sample-plugin/sample/node.c
@@ -76,8 +76,8 @@ typedef enum
*
* Node costs 30 clocks/pkt at a vector size of 51
*/
-#define VERSION_1 1
+#define VERSION_1 1
#ifdef VERSION_1
#define foreach_mac_address_offset \
_(0) \
@@ -599,6 +599,94 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
}
#endif
+/*
+ * This version computes all of the buffer pointers in
+ * one motion, uses a fully pipelined loop model, and
+ * traces the entire frame in one motion.
+ *
+ * It's performance-competative with other coding paradigms,
+ * and it's the simplest way to write performant vpp code
+ */
+
+
+#ifdef VERSION_4
+
+#define u8x16_shuffle __builtin_shuffle
+
+static u8x16 swapmac =
+ { 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 12, 13, 14, 15 };
+
+/* Final stage in the pipeline, do the mac swap */
+static inline u32
+last_stage (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t * b)
+{
+ u8x16 src_dst0;
+ src_dst0 = ((u8x16 *) vlib_buffer_get_current (b))[0];
+ src_dst0 = u8x16_shuffle (src_dst0, swapmac);
+ ((u8x16 *) vlib_buffer_get_current (b))[0] = src_dst0;
+ vnet_buffer (b)->sw_if_index[VLIB_TX] =
+ vnet_buffer (b)->sw_if_index[VLIB_RX];
+ /* set next-index[] to 0 for this buffer */
+ return 0;
+}
+
+/*
+ * Add a couple of nil stages to increase the prefetch stride.
+ * For any specific platform, the optimal prefetch stride may differ.
+ */
+static inline void
+stage1 (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t * b)
+{
+}
+
+static inline void
+stage2 (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t * b)
+{
+}
+
+#define NSTAGES 4
+#define STAGE_INLINE inline __attribute__((__always_inline__))
+
+#define stage0 generic_stage0
+
+#include <vnet/pipeline.h>
+
+VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ dispatch_pipeline (vm, node, frame);
+
+ vlib_node_increment_counter (vm, sample_node.index,
+ SAMPLE_ERROR_SWAPPED, frame->n_vectors);
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ int i;
+ b = bufs;
+
+ for (i = 0; i < frame->n_vectors; i++)
+ {
+ if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ethernet_header_t *en;
+ sample_trace_t *t =
+ vlib_add_trace (vm, node, b[0], sizeof (*t));
+ t->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
+ t->next_index = SAMPLE_NEXT_INTERFACE_OUTPUT;
+ en = vlib_buffer_get_current (b[0]);
+ clib_memcpy_fast (t->new_src_mac, en->src_address,
+ sizeof (t->new_src_mac));
+ clib_memcpy_fast (t->new_dst_mac, en->dst_address,
+ sizeof (t->new_dst_mac));
+ b++;
+ }
+ else
+ break;
+ }
+ }
+ return frame->n_vectors;
+}
+#endif
+
/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sample_node) =
{