diff options
Diffstat (limited to 'src/vnet/pipeline.h')
-rw-r--r-- | src/vnet/pipeline.h | 456 |
1 files changed, 456 insertions, 0 deletions
diff --git a/src/vnet/pipeline.h b/src/vnet/pipeline.h new file mode 100644 index 00000000000..a4aa5cf5277 --- /dev/null +++ b/src/vnet/pipeline.h @@ -0,0 +1,456 @@ +/* + * vnet/pipeline.h: software pipeline + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Usage example. + * + * #define NSTAGES 3 or whatever + * + * <Define pipeline stages> + * + * #include <vnet/pipeline.h> + * + * static uword my_node_fn (vlib_main_t * vm, + * vlib_node_runtime_t * node, + * vlib_frame_t * frame) + * { + * return dispatch_pipeline (vm, node, frame); + * } + * + */ + +#ifndef NSTAGES +#error files which #include <vnet/pipeline.h> must define NSTAGES +#endif + +#ifndef STAGE_INLINE +#define STAGE_INLINE inline +#endif + +/* + * A prefetch stride of 2 is quasi-equivalent to doubling the number + * of stages with every other pipeline stage empty. + */ + +/* + * This is a typical first pipeline stage, which prefetches + * buffer metadata and the first line of pkt data. + * To use it: + * #define stage0 generic_stage0 + */ +static STAGE_INLINE void +generic_stage0 (vlib_main_t * vm, + vlib_node_runtime_t * node, u32 buffer_index) +{ + /* generic default stage 0 here */ + vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index); + vlib_prefetch_buffer_header (b, STORE); + CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, STORE); +} + +#if NSTAGES == 2 + +static STAGE_INLINE uword +dispatch_pipeline (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 *from = vlib_frame_vector_args (frame); + u32 n_left_from, n_left_to_next, *to_next, next_index, next0; + int pi, pi_limit; + + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + pi_limit = clib_min (n_left_from, n_left_to_next); + + for (pi = 0; pi < NSTAGES - 1; pi++) + { + if (pi == pi_limit) + break; + stage0 (vm, node, from[pi]); + } + + for (; pi < pi_limit; pi++) + { + stage0 (vm, node, from[pi]); + to_next[0] = from[pi - 1]; + to_next++; + n_left_to_next--; + next0 = last_stage (vm, node, from[pi - 1]); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + from[pi - 1], next0); + n_left_from--; + if ((int) n_left_to_next < 0 && n_left_from > 0) + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + + for (; pi < (pi_limit + (NSTAGES - 1)); pi++) + { + if (((pi - 1) >= 0) && ((pi - 1) < pi_limit)) + { + to_next[0] = from[pi - 1]; + to_next++; + n_left_to_next--; + next0 = last_stage (vm, node, from[pi - 1]); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + from[pi - 1], next0); + n_left_from--; + if ((int) n_left_to_next < 0 && n_left_from > 0) + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + from += pi_limit; + } + return frame->n_vectors; +} +#endif + +#if NSTAGES == 3 +static STAGE_INLINE uword +dispatch_pipeline (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 *from = vlib_frame_vector_args (frame); + u32 n_left_from, n_left_to_next, *to_next, next_index, next0; + int pi, pi_limit; + + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + pi_limit = clib_min (n_left_from, n_left_to_next); + + for (pi = 0; pi < NSTAGES - 1; pi++) + { + if (pi == pi_limit) + break; + stage0 (vm, node, from[pi]); + if (pi - 1 >= 0) + stage1 (vm, node, from[pi - 1]); + } + + for (; pi < pi_limit; pi++) + { + stage0 (vm, node, from[pi]); + stage1 (vm, node, from[pi - 1]); + to_next[0] = from[pi - 2]; + to_next++; + n_left_to_next--; + next0 = last_stage (vm, node, from[pi - 2]); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + from[pi - 2], next0); + n_left_from--; + if ((int) n_left_to_next < 0 && n_left_from > 0) + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + + + for (; pi < (pi_limit + (NSTAGES - 1)); pi++) + { + if (((pi - 1) >= 0) && ((pi - 1) < pi_limit)) + stage1 (vm, node, from[pi - 1]); + if (((pi - 2) >= 0) && ((pi - 2) < pi_limit)) + { + to_next[0] = from[pi - 2]; + to_next++; + n_left_to_next--; + next0 = last_stage (vm, node, from[pi - 2]); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + from[pi - 2], next0); + n_left_from--; + if ((int) n_left_to_next < 0 && n_left_from > 0) + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + from += pi_limit; + } + return frame->n_vectors; +} +#endif + +#if NSTAGES == 4 +static STAGE_INLINE uword +dispatch_pipeline (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 *from = vlib_frame_vector_args (frame); + u32 n_left_from, n_left_to_next, *to_next, next_index, next0; + int pi, pi_limit; + + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + pi_limit = clib_min (n_left_from, n_left_to_next); + + for (pi = 0; pi < NSTAGES - 1; pi++) + { + if (pi == pi_limit) + break; + stage0 (vm, node, from[pi]); + if (pi - 1 >= 0) + stage1 (vm, node, from[pi - 1]); + if (pi - 2 >= 0) + stage2 (vm, node, from[pi - 2]); + } + + for (; pi < pi_limit; pi++) + { + stage0 (vm, node, from[pi]); + stage1 (vm, node, from[pi - 1]); + stage2 (vm, node, from[pi - 2]); + to_next[0] = from[pi - 3]; + to_next++; + n_left_to_next--; + next0 = last_stage (vm, node, from[pi - 3]); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + from[pi - 3], next0); + n_left_from--; + if ((int) n_left_to_next < 0 && n_left_from > 0) + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + + + for (; pi < (pi_limit + (NSTAGES - 1)); pi++) + { + if (((pi - 1) >= 0) && ((pi - 1) < pi_limit)) + stage1 (vm, node, from[pi - 1]); + if (((pi - 2) >= 0) && ((pi - 2) < pi_limit)) + stage2 (vm, node, from[pi - 2]); + if (((pi - 3) >= 0) && ((pi - 3) < pi_limit)) + { + to_next[0] = from[pi - 3]; + to_next++; + n_left_to_next--; + next0 = last_stage (vm, node, from[pi - 3]); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + from[pi - 3], next0); + n_left_from--; + if ((int) n_left_to_next < 0 && n_left_from > 0) + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + from += pi_limit; + } + return frame->n_vectors; +} +#endif + + +#if NSTAGES == 5 +static STAGE_INLINE uword +dispatch_pipeline (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 *from = vlib_frame_vector_args (frame); + u32 n_left_from, n_left_to_next, *to_next, next_index, next0; + int pi, pi_limit; + + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + pi_limit = clib_min (n_left_from, n_left_to_next); + + for (pi = 0; pi < NSTAGES - 1; pi++) + { + if (pi == pi_limit) + break; + stage0 (vm, node, from[pi]); + if (pi - 1 >= 0) + stage1 (vm, node, from[pi - 1]); + if (pi - 2 >= 0) + stage2 (vm, node, from[pi - 2]); + if (pi - 3 >= 0) + stage3 (vm, node, from[pi - 3]); + } + + for (; pi < pi_limit; pi++) + { + stage0 (vm, node, from[pi]); + stage1 (vm, node, from[pi - 1]); + stage2 (vm, node, from[pi - 2]); + stage3 (vm, node, from[pi - 3]); + to_next[0] = from[pi - 4]; + to_next++; + n_left_to_next--; + next0 = last_stage (vm, node, from[pi - 4]); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + from[pi - 4], next0); + n_left_from--; + if ((int) n_left_to_next < 0 && n_left_from > 0) + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + + + for (; pi < (pi_limit + (NSTAGES - 1)); pi++) + { + if (((pi - 1) >= 0) && ((pi - 1) < pi_limit)) + stage1 (vm, node, from[pi - 1]); + if (((pi - 2) >= 0) && ((pi - 2) < pi_limit)) + stage2 (vm, node, from[pi - 2]); + if (((pi - 3) >= 0) && ((pi - 3) < pi_limit)) + stage3 (vm, node, from[pi - 3]); + if (((pi - 4) >= 0) && ((pi - 4) < pi_limit)) + { + to_next[0] = from[pi - 4]; + to_next++; + n_left_to_next--; + next0 = last_stage (vm, node, from[pi - 4]); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + from[pi - 4], next0); + n_left_from--; + if ((int) n_left_to_next < 0 && n_left_from > 0) + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + from += pi_limit; + } + return frame->n_vectors; +} +#endif + +#if NSTAGES == 6 +static STAGE_INLINE uword +dispatch_pipeline (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 *from = vlib_frame_vector_args (frame); + u32 n_left_from, n_left_to_next, *to_next, next_index, next0; + int pi, pi_limit; + + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + pi_limit = clib_min (n_left_from, n_left_to_next); + + for (pi = 0; pi < NSTAGES - 1; pi++) + { + if (pi == pi_limit) + break; + stage0 (vm, node, from[pi]); + if (pi - 1 >= 0) + stage1 (vm, node, from[pi - 1]); + if (pi - 2 >= 0) + stage2 (vm, node, from[pi - 2]); + if (pi - 3 >= 0) + stage3 (vm, node, from[pi - 3]); + if (pi - 4 >= 0) + stage4 (vm, node, from[pi - 4]); + } + + for (; pi < pi_limit; pi++) + { + stage0 (vm, node, from[pi]); + stage1 (vm, node, from[pi - 1]); + stage2 (vm, node, from[pi - 2]); + stage3 (vm, node, from[pi - 3]); + stage4 (vm, node, from[pi - 4]); + to_next[0] = from[pi - 5]; + to_next++; + n_left_to_next--; + next0 = last_stage (vm, node, from[pi - 5]); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + from[pi - 5], next0); + n_left_from--; + if ((int) n_left_to_next < 0 && n_left_from > 0) + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + + + for (; pi < (pi_limit + (NSTAGES - 1)); pi++) + { + if (((pi - 1) >= 0) && ((pi - 1) < pi_limit)) + stage1 (vm, node, from[pi - 1]); + if (((pi - 2) >= 0) && ((pi - 2) < pi_limit)) + stage2 (vm, node, from[pi - 2]); + if (((pi - 3) >= 0) && ((pi - 3) < pi_limit)) + stage3 (vm, node, from[pi - 3]); + if (((pi - 4) >= 0) && ((pi - 4) < pi_limit)) + stage4 (vm, node, from[pi - 4]); + if (((pi - 5) >= 0) && ((pi - 5) < pi_limit)) + { + to_next[0] = from[pi - 5]; + to_next++; + n_left_to_next--; + next0 = last_stage (vm, node, from[pi - 5]); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + from[pi - 5], next0); + n_left_from--; + if ((int) n_left_to_next < 0 && n_left_from > 0) + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + from += pi_limit; + } + return frame->n_vectors; +} +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |