summaryrefslogtreecommitdiffstats
path: root/src/vnet/pipeline.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet/pipeline.h')
-rw-r--r--src/vnet/pipeline.h456
1 files changed, 456 insertions, 0 deletions
diff --git a/src/vnet/pipeline.h b/src/vnet/pipeline.h
new file mode 100644
index 00000000000..a4aa5cf5277
--- /dev/null
+++ b/src/vnet/pipeline.h
@@ -0,0 +1,456 @@
+/*
+ * vnet/pipeline.h: software pipeline
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Usage example.
+ *
+ * #define NSTAGES 3 or whatever
+ *
+ * <Define pipeline stages>
+ *
+ * #include <vnet/pipeline.h>
+ *
+ * static uword my_node_fn (vlib_main_t * vm,
+ * vlib_node_runtime_t * node,
+ * vlib_frame_t * frame)
+ * {
+ * return dispatch_pipeline (vm, node, frame);
+ * }
+ *
+ */
+
+#ifndef NSTAGES
+#error files which #include <vnet/pipeline.h> must define NSTAGES
+#endif
+
+#ifndef STAGE_INLINE
+#define STAGE_INLINE inline
+#endif
+
+/*
+ * A prefetch stride of 2 is quasi-equivalent to doubling the number
+ * of stages with every other pipeline stage empty.
+ */
+
+/*
+ * This is a typical first pipeline stage, which prefetches
+ * buffer metadata and the first line of pkt data.
+ * To use it:
+ * #define stage0 generic_stage0
+ */
+static STAGE_INLINE void
+generic_stage0 (vlib_main_t * vm,
+ vlib_node_runtime_t * node, u32 buffer_index)
+{
+ /* generic default stage 0 here */
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index);
+ vlib_prefetch_buffer_header (b, STORE);
+ CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, STORE);
+}
+
+#if NSTAGES == 2
+
+static STAGE_INLINE uword
+dispatch_pipeline (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left_from, n_left_to_next, *to_next, next_index, next0;
+ int pi, pi_limit;
+
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ pi_limit = clib_min (n_left_from, n_left_to_next);
+
+ for (pi = 0; pi < NSTAGES - 1; pi++)
+ {
+ if (pi == pi_limit)
+ break;
+ stage0 (vm, node, from[pi]);
+ }
+
+ for (; pi < pi_limit; pi++)
+ {
+ stage0 (vm, node, from[pi]);
+ to_next[0] = from[pi - 1];
+ to_next++;
+ n_left_to_next--;
+ next0 = last_stage (vm, node, from[pi - 1]);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ from[pi - 1], next0);
+ n_left_from--;
+ if ((int) n_left_to_next < 0 && n_left_from > 0)
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+
+ for (; pi < (pi_limit + (NSTAGES - 1)); pi++)
+ {
+ if (((pi - 1) >= 0) && ((pi - 1) < pi_limit))
+ {
+ to_next[0] = from[pi - 1];
+ to_next++;
+ n_left_to_next--;
+ next0 = last_stage (vm, node, from[pi - 1]);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ from[pi - 1], next0);
+ n_left_from--;
+ if ((int) n_left_to_next < 0 && n_left_from > 0)
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ from += pi_limit;
+ }
+ return frame->n_vectors;
+}
+#endif
+
+#if NSTAGES == 3
+static STAGE_INLINE uword
+dispatch_pipeline (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left_from, n_left_to_next, *to_next, next_index, next0;
+ int pi, pi_limit;
+
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ pi_limit = clib_min (n_left_from, n_left_to_next);
+
+ for (pi = 0; pi < NSTAGES - 1; pi++)
+ {
+ if (pi == pi_limit)
+ break;
+ stage0 (vm, node, from[pi]);
+ if (pi - 1 >= 0)
+ stage1 (vm, node, from[pi - 1]);
+ }
+
+ for (; pi < pi_limit; pi++)
+ {
+ stage0 (vm, node, from[pi]);
+ stage1 (vm, node, from[pi - 1]);
+ to_next[0] = from[pi - 2];
+ to_next++;
+ n_left_to_next--;
+ next0 = last_stage (vm, node, from[pi - 2]);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ from[pi - 2], next0);
+ n_left_from--;
+ if ((int) n_left_to_next < 0 && n_left_from > 0)
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+
+
+ for (; pi < (pi_limit + (NSTAGES - 1)); pi++)
+ {
+ if (((pi - 1) >= 0) && ((pi - 1) < pi_limit))
+ stage1 (vm, node, from[pi - 1]);
+ if (((pi - 2) >= 0) && ((pi - 2) < pi_limit))
+ {
+ to_next[0] = from[pi - 2];
+ to_next++;
+ n_left_to_next--;
+ next0 = last_stage (vm, node, from[pi - 2]);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ from[pi - 2], next0);
+ n_left_from--;
+ if ((int) n_left_to_next < 0 && n_left_from > 0)
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ from += pi_limit;
+ }
+ return frame->n_vectors;
+}
+#endif
+
+#if NSTAGES == 4
+static STAGE_INLINE uword
+dispatch_pipeline (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left_from, n_left_to_next, *to_next, next_index, next0;
+ int pi, pi_limit;
+
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ pi_limit = clib_min (n_left_from, n_left_to_next);
+
+ for (pi = 0; pi < NSTAGES - 1; pi++)
+ {
+ if (pi == pi_limit)
+ break;
+ stage0 (vm, node, from[pi]);
+ if (pi - 1 >= 0)
+ stage1 (vm, node, from[pi - 1]);
+ if (pi - 2 >= 0)
+ stage2 (vm, node, from[pi - 2]);
+ }
+
+ for (; pi < pi_limit; pi++)
+ {
+ stage0 (vm, node, from[pi]);
+ stage1 (vm, node, from[pi - 1]);
+ stage2 (vm, node, from[pi - 2]);
+ to_next[0] = from[pi - 3];
+ to_next++;
+ n_left_to_next--;
+ next0 = last_stage (vm, node, from[pi - 3]);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ from[pi - 3], next0);
+ n_left_from--;
+ if ((int) n_left_to_next < 0 && n_left_from > 0)
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+
+
+ for (; pi < (pi_limit + (NSTAGES - 1)); pi++)
+ {
+ if (((pi - 1) >= 0) && ((pi - 1) < pi_limit))
+ stage1 (vm, node, from[pi - 1]);
+ if (((pi - 2) >= 0) && ((pi - 2) < pi_limit))
+ stage2 (vm, node, from[pi - 2]);
+ if (((pi - 3) >= 0) && ((pi - 3) < pi_limit))
+ {
+ to_next[0] = from[pi - 3];
+ to_next++;
+ n_left_to_next--;
+ next0 = last_stage (vm, node, from[pi - 3]);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ from[pi - 3], next0);
+ n_left_from--;
+ if ((int) n_left_to_next < 0 && n_left_from > 0)
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ from += pi_limit;
+ }
+ return frame->n_vectors;
+}
+#endif
+
+
+#if NSTAGES == 5
+static STAGE_INLINE uword
+dispatch_pipeline (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left_from, n_left_to_next, *to_next, next_index, next0;
+ int pi, pi_limit;
+
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ pi_limit = clib_min (n_left_from, n_left_to_next);
+
+ for (pi = 0; pi < NSTAGES - 1; pi++)
+ {
+ if (pi == pi_limit)
+ break;
+ stage0 (vm, node, from[pi]);
+ if (pi - 1 >= 0)
+ stage1 (vm, node, from[pi - 1]);
+ if (pi - 2 >= 0)
+ stage2 (vm, node, from[pi - 2]);
+ if (pi - 3 >= 0)
+ stage3 (vm, node, from[pi - 3]);
+ }
+
+ for (; pi < pi_limit; pi++)
+ {
+ stage0 (vm, node, from[pi]);
+ stage1 (vm, node, from[pi - 1]);
+ stage2 (vm, node, from[pi - 2]);
+ stage3 (vm, node, from[pi - 3]);
+ to_next[0] = from[pi - 4];
+ to_next++;
+ n_left_to_next--;
+ next0 = last_stage (vm, node, from[pi - 4]);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ from[pi - 4], next0);
+ n_left_from--;
+ if ((int) n_left_to_next < 0 && n_left_from > 0)
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+
+
+ for (; pi < (pi_limit + (NSTAGES - 1)); pi++)
+ {
+ if (((pi - 1) >= 0) && ((pi - 1) < pi_limit))
+ stage1 (vm, node, from[pi - 1]);
+ if (((pi - 2) >= 0) && ((pi - 2) < pi_limit))
+ stage2 (vm, node, from[pi - 2]);
+ if (((pi - 3) >= 0) && ((pi - 3) < pi_limit))
+ stage3 (vm, node, from[pi - 3]);
+ if (((pi - 4) >= 0) && ((pi - 4) < pi_limit))
+ {
+ to_next[0] = from[pi - 4];
+ to_next++;
+ n_left_to_next--;
+ next0 = last_stage (vm, node, from[pi - 4]);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ from[pi - 4], next0);
+ n_left_from--;
+ if ((int) n_left_to_next < 0 && n_left_from > 0)
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ from += pi_limit;
+ }
+ return frame->n_vectors;
+}
+#endif
+
+#if NSTAGES == 6
+static STAGE_INLINE uword
+dispatch_pipeline (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left_from, n_left_to_next, *to_next, next_index, next0;
+ int pi, pi_limit;
+
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ pi_limit = clib_min (n_left_from, n_left_to_next);
+
+ for (pi = 0; pi < NSTAGES - 1; pi++)
+ {
+ if (pi == pi_limit)
+ break;
+ stage0 (vm, node, from[pi]);
+ if (pi - 1 >= 0)
+ stage1 (vm, node, from[pi - 1]);
+ if (pi - 2 >= 0)
+ stage2 (vm, node, from[pi - 2]);
+ if (pi - 3 >= 0)
+ stage3 (vm, node, from[pi - 3]);
+ if (pi - 4 >= 0)
+ stage4 (vm, node, from[pi - 4]);
+ }
+
+ for (; pi < pi_limit; pi++)
+ {
+ stage0 (vm, node, from[pi]);
+ stage1 (vm, node, from[pi - 1]);
+ stage2 (vm, node, from[pi - 2]);
+ stage3 (vm, node, from[pi - 3]);
+ stage4 (vm, node, from[pi - 4]);
+ to_next[0] = from[pi - 5];
+ to_next++;
+ n_left_to_next--;
+ next0 = last_stage (vm, node, from[pi - 5]);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ from[pi - 5], next0);
+ n_left_from--;
+ if ((int) n_left_to_next < 0 && n_left_from > 0)
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+
+
+ for (; pi < (pi_limit + (NSTAGES - 1)); pi++)
+ {
+ if (((pi - 1) >= 0) && ((pi - 1) < pi_limit))
+ stage1 (vm, node, from[pi - 1]);
+ if (((pi - 2) >= 0) && ((pi - 2) < pi_limit))
+ stage2 (vm, node, from[pi - 2]);
+ if (((pi - 3) >= 0) && ((pi - 3) < pi_limit))
+ stage3 (vm, node, from[pi - 3]);
+ if (((pi - 4) >= 0) && ((pi - 4) < pi_limit))
+ stage4 (vm, node, from[pi - 4]);
+ if (((pi - 5) >= 0) && ((pi - 5) < pi_limit))
+ {
+ to_next[0] = from[pi - 5];
+ to_next++;
+ n_left_to_next--;
+ next0 = last_stage (vm, node, from[pi - 5]);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ from[pi - 5], next0);
+ n_left_from--;
+ if ((int) n_left_to_next < 0 && n_left_from > 0)
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ from += pi_limit;
+ }
+ return frame->n_vectors;
+}
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */