diff options
Diffstat (limited to 'src/vnet/handoff.c')
-rw-r--r-- | src/vnet/handoff.c | 594 |
1 files changed, 594 insertions, 0 deletions
diff --git a/src/vnet/handoff.c b/src/vnet/handoff.c new file mode 100644 index 00000000..81cb9f55 --- /dev/null +++ b/src/vnet/handoff.c @@ -0,0 +1,594 @@ + +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vppinfra/xxhash.h> +#include <vlib/threads.h> +#include <vnet/handoff.h> +#include <vnet/feature/feature.h> + +typedef struct +{ + uword *workers_bitmap; + u32 *workers; +} per_inteface_handoff_data_t; + +typedef struct +{ + u32 cached_next_index; + u32 num_workers; + u32 first_worker_index; + + per_inteface_handoff_data_t *if_data; + + /* Worker handoff index */ + u32 frame_queue_index; + + /* convenience variables */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + + u64 (*hash_fn) (ethernet_header_t *); +} handoff_main_t; + +handoff_main_t handoff_main; +vlib_node_registration_t handoff_dispatch_node; + +typedef struct +{ + u32 sw_if_index; + u32 next_worker_index; + u32 buffer_index; +} worker_handoff_trace_t; + +/* packet trace format function */ +static u8 * +format_worker_handoff_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + worker_handoff_trace_t *t = va_arg (*args, worker_handoff_trace_t *); + + s = + format (s, "worker-handoff: sw_if_index %d, next_worker %d, buffer 0x%x", + t->sw_if_index, t->next_worker_index, t->buffer_index); + return s; +} + +vlib_node_registration_t handoff_node; + +static uword +worker_handoff_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + handoff_main_t *hm = &handoff_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + u32 n_left_from, *from; + static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index; + static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index + = 0; + vlib_frame_queue_elt_t *hf = 0; + int i; + u32 n_left_to_next_worker = 0, *to_next_worker = 0; + u32 next_worker_index = 0; + u32 current_worker_index = ~0; + + if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0)) + { + vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1); + + vec_validate_init_empty (congested_handoff_queue_by_worker_index, + hm->first_worker_index + hm->num_workers - 1, + (vlib_frame_queue_t *) (~0)); + } + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 sw_if_index0; + u32 hash; + u64 hash_key; + per_inteface_handoff_data_t *ihd0; + u32 index0; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + ASSERT (hm->if_data); + ihd0 = vec_elt_at_index (hm->if_data, sw_if_index0); + + next_worker_index = hm->first_worker_index; + + /* + * Force unknown traffic onto worker 0, + * and into ethernet-input. $$$$ add more hashes. + */ + + /* Compute ingress LB hash */ + hash_key = hm->hash_fn ((ethernet_header_t *) b0->data); + hash = (u32) clib_xxhash (hash_key); + + /* if input node did not specify next index, then packet + should go to eternet-input */ + if (PREDICT_FALSE ((b0->flags & VNET_BUFFER_F_HANDOFF_NEXT_VALID) == 0)) + vnet_buffer (b0)->handoff.next_index = + HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT; + else if (vnet_buffer (b0)->handoff.next_index == + HANDOFF_DISPATCH_NEXT_IP4_INPUT + || vnet_buffer (b0)->handoff.next_index == + HANDOFF_DISPATCH_NEXT_IP6_INPUT + || vnet_buffer (b0)->handoff.next_index == + HANDOFF_DISPATCH_NEXT_MPLS_INPUT) + vlib_buffer_advance (b0, (sizeof (ethernet_header_t))); + + if (PREDICT_TRUE (is_pow2 (vec_len (ihd0->workers)))) + index0 = hash & (vec_len (ihd0->workers) - 1); + else + index0 = hash % vec_len (ihd0->workers); + + next_worker_index += ihd0->workers[index0]; + + if (next_worker_index != current_worker_index) + { + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + hf = vlib_get_worker_handoff_queue_elt (hm->frame_queue_index, + next_worker_index, + handoff_queue_elt_by_worker_index); + + n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors; + to_next_worker = &hf->buffer_index[hf->n_vectors]; + current_worker_index = next_worker_index; + } + + /* enqueue to correct worker thread */ + to_next_worker[0] = bi0; + to_next_worker++; + n_left_to_next_worker--; + + if (n_left_to_next_worker == 0) + { + hf->n_vectors = VLIB_FRAME_SIZE; + vlib_put_frame_queue_elt (hf); + current_worker_index = ~0; + handoff_queue_elt_by_worker_index[next_worker_index] = 0; + hf = 0; + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + worker_handoff_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_worker_index = next_worker_index - hm->first_worker_index; + t->buffer_index = bi0; + } + + } + + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + /* Ship frames to the worker nodes */ + for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++) + { + if (handoff_queue_elt_by_worker_index[i]) + { + hf = handoff_queue_elt_by_worker_index[i]; + /* + * It works better to let the handoff node + * rate-adapt, always ship the handoff queue element. + */ + if (1 || hf->n_vectors == hf->last_n_vectors) + { + vlib_put_frame_queue_elt (hf); + handoff_queue_elt_by_worker_index[i] = 0; + } + else + hf->last_n_vectors = hf->n_vectors; + } + congested_handoff_queue_by_worker_index[i] = + (vlib_frame_queue_t *) (~0); + } + hf = 0; + current_worker_index = ~0; + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (worker_handoff_node) = { + .function = worker_handoff_node_fn, + .name = "worker-handoff", + .vector_size = sizeof (u32), + .format_trace = format_worker_handoff_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (worker_handoff_node, worker_handoff_node_fn) +/* *INDENT-ON* */ + +int +interface_handoff_enable_disable (vlib_main_t * vm, u32 sw_if_index, + uword * bitmap, int enable_disable) +{ + handoff_main_t *hm = &handoff_main; + vnet_sw_interface_t *sw; + vnet_main_t *vnm = vnet_get_main (); + per_inteface_handoff_data_t *d; + int i, rv = 0; + + if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index)) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + sw = vnet_get_sw_interface (vnm, sw_if_index); + if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + if (clib_bitmap_last_set (bitmap) >= hm->num_workers) + return VNET_API_ERROR_INVALID_WORKER; + + if (hm->frame_queue_index == ~0) + hm->frame_queue_index = + vlib_frame_queue_main_init (handoff_dispatch_node.index, 0); + + vec_validate (hm->if_data, sw_if_index); + d = vec_elt_at_index (hm->if_data, sw_if_index); + + vec_free (d->workers); + vec_free (d->workers_bitmap); + + if (enable_disable) + { + d->workers_bitmap = bitmap; + /* *INDENT-OFF* */ + clib_bitmap_foreach (i, bitmap, + ({ + vec_add1(d->workers, i); + })); + /* *INDENT-ON* */ + } + + vnet_feature_enable_disable ("device-input", "worker-handoff", + sw_if_index, enable_disable, 0, 0); + return rv; +} + +static clib_error_t * +set_interface_handoff_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + handoff_main_t *hm = &handoff_main; + u32 sw_if_index = ~0; + int enable_disable = 1; + uword *bitmap = 0; + u32 sym = ~0; + + int rv = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "disable")) + enable_disable = 0; + else if (unformat (input, "workers %U", unformat_bitmap_list, &bitmap)) + ; + else if (unformat (input, "%U", unformat_vnet_sw_interface, + vnet_get_main (), &sw_if_index)) + ; + else if (unformat (input, "symmetrical")) + sym = 1; + else if (unformat (input, "asymmetrical")) + sym = 0; + else + break; + } + + if (sw_if_index == ~0) + return clib_error_return (0, "Please specify an interface..."); + + if (bitmap == 0) + return clib_error_return (0, "Please specify list of workers..."); + + rv = + interface_handoff_enable_disable (vm, sw_if_index, bitmap, + enable_disable); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_INVALID_SW_IF_INDEX: + return clib_error_return (0, "Invalid interface"); + break; + + case VNET_API_ERROR_INVALID_WORKER: + return clib_error_return (0, "Invalid worker(s)"); + break; + + case VNET_API_ERROR_UNIMPLEMENTED: + return clib_error_return (0, + "Device driver doesn't support redirection"); + break; + + default: + return clib_error_return (0, "unknown return value %d", rv); + } + + if (sym == 1) + hm->hash_fn = eth_get_sym_key; + else if (sym == 0) + hm->hash_fn = eth_get_key; + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_interface_handoff_command, static) = { + .path = "set interface handoff", + .short_help = + "set interface handoff <interface-name> workers <workers-list> [symmetrical|asymmetrical]", + .function = set_interface_handoff_command_fn, +}; +/* *INDENT-ON* */ + +typedef struct +{ + u32 buffer_index; + u32 next_index; + u32 sw_if_index; +} handoff_dispatch_trace_t; + +/* packet trace format function */ +static u8 * +format_handoff_dispatch_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + handoff_dispatch_trace_t *t = va_arg (*args, handoff_dispatch_trace_t *); + + s = format (s, "handoff-dispatch: sw_if_index %d next_index %d buffer 0x%x", + t->sw_if_index, t->next_index, t->buffer_index); + return s; +} + +#define foreach_handoff_dispatch_error \ +_(EXAMPLE, "example packets") + +typedef enum +{ +#define _(sym,str) HANDOFF_DISPATCH_ERROR_##sym, + foreach_handoff_dispatch_error +#undef _ + HANDOFF_DISPATCH_N_ERROR, +} handoff_dispatch_error_t; + +static char *handoff_dispatch_error_strings[] = { +#define _(sym,string) string, + foreach_handoff_dispatch_error +#undef _ +}; + +static uword +handoff_dispatch_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + handoff_dispatch_next_t next_index; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + next0 = vnet_buffer (b0)->handoff.next_index; + next1 = vnet_buffer (b1)->handoff.next_index; + + if (PREDICT_FALSE (vm->trace_main.trace_active_hint)) + { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ + 0); + handoff_dispatch_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->buffer_index = bi0; + } + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + vlib_trace_buffer (vm, node, next1, b1, /* follow_chain */ + 0); + handoff_dispatch_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + t->sw_if_index = sw_if_index1; + t->next_index = next1; + t->buffer_index = bi1; + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + u32 sw_if_index0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + next0 = vnet_buffer (b0)->handoff.next_index; + + if (PREDICT_FALSE (vm->trace_main.trace_active_hint)) + { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ + 0); + handoff_dispatch_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->buffer_index = bi0; + } + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (handoff_dispatch_node) = { + .function = handoff_dispatch_node_fn, + .name = "handoff-dispatch", + .vector_size = sizeof (u32), + .format_trace = format_handoff_dispatch_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .flags = VLIB_NODE_FLAG_IS_HANDOFF, + + .n_errors = ARRAY_LEN(handoff_dispatch_error_strings), + .error_strings = handoff_dispatch_error_strings, + + .n_next_nodes = HANDOFF_DISPATCH_N_NEXT, + + .next_nodes = { + [HANDOFF_DISPATCH_NEXT_DROP] = "error-drop", + [HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT] = "ethernet-input", + [HANDOFF_DISPATCH_NEXT_IP4_INPUT] = "ip4-input-no-checksum", + [HANDOFF_DISPATCH_NEXT_IP6_INPUT] = "ip6-input", + [HANDOFF_DISPATCH_NEXT_MPLS_INPUT] = "mpls-input", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (handoff_dispatch_node, handoff_dispatch_node_fn) +/* *INDENT-ON* */ + +clib_error_t * +handoff_init (vlib_main_t * vm) +{ + handoff_main_t *hm = &handoff_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + clib_error_t *error; + uword *p; + + if ((error = vlib_call_init_function (vm, threads_init))) + return error; + + vlib_thread_registration_t *tr; + /* Only the standard vnet worker threads are supported */ + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + if (p) + { + tr = (vlib_thread_registration_t *) p[0]; + if (tr) + { + hm->num_workers = tr->count; + hm->first_worker_index = tr->first_index; + } + } + + hm->hash_fn = eth_get_key; + + hm->vlib_main = vm; + hm->vnet_main = &vnet_main; + + hm->frame_queue_index = ~0; + + return 0; +} + +VLIB_INIT_FUNCTION (handoff_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |