diff options
-rw-r--r-- | vnet/Makefile.am | 2 | ||||
-rw-r--r-- | vnet/vnet/api_errno.h | 3 | ||||
-rw-r--r-- | vnet/vnet/buffer.h | 7 | ||||
-rw-r--r-- | vnet/vnet/devices/dpdk/dpdk.h | 4 | ||||
-rw-r--r-- | vnet/vnet/devices/dpdk/node.c | 475 | ||||
-rw-r--r-- | vnet/vnet/handoff.c | 529 | ||||
-rw-r--r-- | vnet/vnet/handoff.h | 228 | ||||
-rw-r--r-- | vppinfra/vppinfra/bitmap.h | 20 |
8 files changed, 829 insertions, 439 deletions
diff --git a/vnet/Makefile.am b/vnet/Makefile.am index 9feffc50eae..41e066f2616 100644 --- a/vnet/Makefile.am +++ b/vnet/Makefile.am @@ -26,6 +26,7 @@ TESTS = ######################################## libvnet_la_SOURCES += \ vnet/config.c \ + vnet/handoff.c \ vnet/interface.c \ vnet/interface_cli.c \ vnet/interface_format.c \ @@ -39,6 +40,7 @@ nobase_include_HEADERS += \ vnet/buffer.h \ vnet/config.h \ vnet/global_funcs.h \ + vnet/handoff.h \ vnet/interface.h \ vnet/interface_funcs.h \ vnet/l3_types.h \ diff --git a/vnet/vnet/api_errno.h b/vnet/vnet/api_errno.h index e22b590bc7c..af6f3971a13 100644 --- a/vnet/vnet/api_errno.h +++ b/vnet/vnet/api_errno.h @@ -78,7 +78,8 @@ _(SR_POLICY_NAME_NOT_PRESENT, -84, "Segement routing policy name required") \ _(NOT_RUNNING_AS_ROOT, -85, "Not running as root") \ _(ALREADY_CONNECTED, -86, "Connection to the data plane already exists") \ _(UNSUPPORTED_JNI_VERSION, -87, "Unsupported JNI version") \ -_(FAILED_TO_ATTACH_TO_JAVA_THREAD, -88, "Failed to attach to Java thread") +_(FAILED_TO_ATTACH_TO_JAVA_THREAD, -88, "Failed to attach to Java thread") \ +_(INVALID_WORKER, -89, "Invalid worker thread") typedef enum { #define _(a,b,c) VNET_API_ERROR_##a = (b), diff --git a/vnet/vnet/buffer.h b/vnet/vnet/buffer.h index 050642276b5..ea25ad0d286 100644 --- a/vnet/vnet/buffer.h +++ b/vnet/vnet/buffer.h @@ -64,6 +64,9 @@ #define LOG2_BUFFER_OUTPUT_FEAT_DONE LOG2_VLIB_BUFFER_FLAG_USER(5) #define BUFFER_OUTPUT_FEAT_DONE (1 << LOG2_BUFFER_OUTPUT_FEAT_DONE) +#define LOG2_BUFFER_HANDOFF_NEXT_VALID LOG2_VLIB_BUFFER_FLAG_USER(6) +#define BUFFER_HANDOFF_NEXT_VALID (1 << LOG2_BUFFER_HANDOFF_NEXT_VALID) + #define foreach_buffer_opaque_union_subtype \ _(ethernet) \ _(ip) \ @@ -73,7 +76,7 @@ _(l2) \ _(l2t) \ _(gre) \ _(l2_classify) \ -_(io_handoff) \ +_(handoff) \ _(policer) \ _(output_features) \ _(map) \ @@ -185,7 +188,7 @@ typedef struct { /* IO - worker thread handoff */ struct { u32 next_index; - } io_handoff; + } handoff; /* vnet policer */ struct { diff --git a/vnet/vnet/devices/dpdk/dpdk.h b/vnet/vnet/devices/dpdk/dpdk.h index 9032b7a9152..1e7d36846cd 100644 --- a/vnet/vnet/devices/dpdk/dpdk.h +++ b/vnet/vnet/devices/dpdk/dpdk.h @@ -495,10 +495,6 @@ void dpdk_set_flowcontrol_callback (vlib_main_t *vm, u32 dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance); -vlib_frame_queue_elt_t * vlib_get_handoff_queue_elt (u32 vlib_worker_index); - -u32 dpdk_get_handoff_node_index (void); - void set_efd_bitmap (u8 *bitmap, u32 value, u32 op); struct rte_mbuf * dpdk_replicate_packet_mb (vlib_buffer_t * b); diff --git a/vnet/vnet/devices/dpdk/node.c b/vnet/vnet/devices/dpdk/node.c index 9a0f9c8fb49..27670f37d98 100644 --- a/vnet/vnet/devices/dpdk/node.c +++ b/vnet/vnet/devices/dpdk/node.c @@ -22,6 +22,7 @@ #include <vnet/devices/dpdk/dpdk.h> #include <vnet/classify/vnet_classify.h> #include <vnet/mpls-gre/packet.h> +#include <vnet/handoff.h> #include "dpdk_priv.h" @@ -48,240 +49,45 @@ */ #define VMWARE_LENGTH_BUG_WORKAROUND 0 -typedef struct { - u32 cached_next_index; - - /* convenience variables */ - vlib_main_t * vlib_main; - vnet_main_t * vnet_main; -} handoff_dispatch_main_t; - -typedef struct { - u32 buffer_index; - u32 next_index; - u32 sw_if_index; -} handoff_dispatch_trace_t; - -/* packet trace format function */ -static u8 * format_handoff_dispatch_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - handoff_dispatch_trace_t * t = va_arg (*args, handoff_dispatch_trace_t *); - - s = format (s, "HANDOFF_DISPATCH: sw_if_index %d next_index %d buffer 0x%x", - t->sw_if_index, - t->next_index, - t->buffer_index); - return s; -} - -handoff_dispatch_main_t handoff_dispatch_main; - -vlib_node_registration_t handoff_dispatch_node; - -#define foreach_handoff_dispatch_error \ -_(EXAMPLE, "example packets") - -typedef enum { -#define _(sym,str) HANDOFF_DISPATCH_ERROR_##sym, - foreach_handoff_dispatch_error -#undef _ - HANDOFF_DISPATCH_N_ERROR, -} handoff_dispatch_error_t; - -static char * handoff_dispatch_error_strings[] = { -#define _(sym,string) string, - foreach_handoff_dispatch_error +static char * dpdk_error_strings[] = { +#define _(n,s) s, + foreach_dpdk_error #undef _ }; -static inline -void vlib_put_handoff_queue_elt (vlib_frame_queue_elt_t * hf) -{ - CLIB_MEMORY_BARRIER(); - hf->valid = 1; -} - -static uword -handoff_dispatch_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +always_inline int +dpdk_mbuf_is_ip4(struct rte_mbuf *mb) { - u32 n_left_from, * from, * to_next; - dpdk_rx_next_t next_index; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, - to_next, n_left_to_next); - - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 bi0, bi1; - vlib_buffer_t * b0, * b1; - u32 next0, next1; - u32 sw_if_index0, sw_if_index1; - - /* Prefetch next iteration. */ - { - vlib_buffer_t * p2, * p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - } - - /* speculatively enqueue b0 and b1 to the current next frame */ - to_next[0] = bi0 = from[0]; - to_next[1] = bi1 = from[1]; - from += 2; - to_next += 2; - n_left_from -= 2; - n_left_to_next -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - next0 = vnet_buffer(b0)->io_handoff.next_index; - next1 = vnet_buffer(b1)->io_handoff.next_index; - - if (PREDICT_FALSE(vm->trace_main.trace_active_hint)) - { - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0); - handoff_dispatch_trace_t *t = - vlib_add_trace (vm, node, b0, sizeof (*t)); - sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; - t->sw_if_index = sw_if_index0; - t->next_index = next0; - t->buffer_index = bi0; - } - if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) - { - vlib_trace_buffer (vm, node, next1, b1, /* follow_chain */ 0); - handoff_dispatch_trace_t *t = - vlib_add_trace (vm, node, b1, sizeof (*t)); - sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; - t->sw_if_index = sw_if_index1; - t->next_index = next1; - t->buffer_index = bi1; - } - } - - /* verify speculative enqueues, maybe switch current next frame */ - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, next0, next1); - } - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t * b0; - u32 next0; - u32 sw_if_index0; - - /* speculatively enqueue b0 to the current next frame */ - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - next0 = vnet_buffer(b0)->io_handoff.next_index; - - if (PREDICT_FALSE(vm->trace_main.trace_active_hint)) - { - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0); - handoff_dispatch_trace_t *t = - vlib_add_trace (vm, node, b0, sizeof (*t)); - sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; - t->sw_if_index = sw_if_index0; - t->next_index = next0; - t->buffer_index = bi0; - } - } - - /* verify speculative enqueue, maybe switch current next frame */ - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - return frame->n_vectors; +#if RTE_VERSION >= RTE_VERSION_NUM(2, 1, 0, 0) + return RTE_ETH_IS_IPV4_HDR(mb->packet_type) != 0; +#else + return (mb_flags & (PKT_RX_IPV4_HDR | PKT_RX_IPV4_HDR_EXT)) != 0; +#endif } -VLIB_REGISTER_NODE (handoff_dispatch_node) = { - .function = handoff_dispatch_node_fn, - .name = "handoff-dispatch", - .vector_size = sizeof (u32), - .format_trace = format_handoff_dispatch_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - .flags = VLIB_NODE_FLAG_IS_HANDOFF, - - .n_errors = ARRAY_LEN(handoff_dispatch_error_strings), - .error_strings = handoff_dispatch_error_strings, - - .n_next_nodes = DPDK_RX_N_NEXT, - - .next_nodes = { - [DPDK_RX_NEXT_DROP] = "error-drop", - [DPDK_RX_NEXT_ETHERNET_INPUT] = "ethernet-input", - [DPDK_RX_NEXT_IP4_INPUT] = "ip4-input", - [DPDK_RX_NEXT_IP6_INPUT] = "ip6-input", - [DPDK_RX_NEXT_MPLS_INPUT] = "mpls-gre-input", - }, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (handoff_dispatch_node, handoff_dispatch_node_fn) - -clib_error_t *handoff_dispatch_init (vlib_main_t *vm) +always_inline int +dpdk_mbuf_is_ip6(struct rte_mbuf *mb) { - handoff_dispatch_main_t * mp = &handoff_dispatch_main; - - mp->vlib_main = vm; - mp->vnet_main = &vnet_main; - - return 0; +#if RTE_VERSION >= RTE_VERSION_NUM(2, 1, 0, 0) + return RTE_ETH_IS_IPV6_HDR(mb->packet_type) != 0; +#else + return (mb_flags & (PKT_RX_IPV6_HDR | PKT_RX_IPV6_HDR_EXT)) != 0; +#endif } -VLIB_INIT_FUNCTION (handoff_dispatch_init); - -u32 dpdk_get_handoff_node_index (void) +always_inline int +vlib_buffer_is_mpls(vlib_buffer_t * b) { - return handoff_dispatch_node.index; + ethernet_header_t *h = (ethernet_header_t *) b->data; + return (h->type == clib_host_to_net_u16(ETHERNET_TYPE_MPLS_UNICAST)); } -static char * dpdk_error_strings[] = { -#define _(n,s) s, - foreach_dpdk_error -#undef _ -}; - always_inline void dpdk_rx_next_and_error_from_mb_flags_x1 (dpdk_device_t *xd, struct rte_mbuf *mb, vlib_buffer_t *b0, u8 * next0, u8 * error0) { - u8 is0_ip4, is0_ip6, is0_mpls, n0; + u8 n0; uint16_t mb_flags = mb->ol_flags; if (PREDICT_FALSE(mb_flags & ( @@ -306,37 +112,30 @@ dpdk_rx_next_and_error_from_mb_flags_x1 (dpdk_device_t *xd, struct rte_mbuf *mb, { *error0 = DPDK_ERROR_NONE; if (PREDICT_FALSE(xd->per_interface_next_index != ~0)) - n0 = xd->per_interface_next_index; + { + n0 = xd->per_interface_next_index; + b0->flags |= BUFFER_HANDOFF_NEXT_VALID; + if (PREDICT_TRUE (dpdk_mbuf_is_ip4(mb))) + vnet_buffer(b0)->handoff.next_index = HANDOFF_DISPATCH_NEXT_IP4_INPUT; + else if (PREDICT_TRUE(dpdk_mbuf_is_ip6(mb))) + vnet_buffer(b0)->handoff.next_index = HANDOFF_DISPATCH_NEXT_IP6_INPUT; + else if (PREDICT_TRUE(vlib_buffer_is_mpls(b0))) + vnet_buffer(b0)->handoff.next_index = HANDOFF_DISPATCH_NEXT_MPLS_INPUT; + else + vnet_buffer(b0)->handoff.next_index = HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT; + } else if (PREDICT_FALSE(xd->vlan_subifs || (mb_flags & PKT_RX_VLAN_PKT))) n0 = DPDK_RX_NEXT_ETHERNET_INPUT; else { - n0 = DPDK_RX_NEXT_ETHERNET_INPUT; -#if RTE_VERSION >= RTE_VERSION_NUM(2, 1, 0, 0) - is0_ip4 = RTE_ETH_IS_IPV4_HDR(mb->packet_type) != 0; -#else - is0_ip4 = (mb_flags & (PKT_RX_IPV4_HDR | PKT_RX_IPV4_HDR_EXT)) != 0; -#endif - - if (PREDICT_TRUE(is0_ip4)) + if (PREDICT_TRUE (dpdk_mbuf_is_ip4(mb))) n0 = DPDK_RX_NEXT_IP4_INPUT; + else if (PREDICT_TRUE(dpdk_mbuf_is_ip6(mb))) + n0 = DPDK_RX_NEXT_IP6_INPUT; + else if (PREDICT_TRUE(vlib_buffer_is_mpls(b0))) + n0 = DPDK_RX_NEXT_MPLS_INPUT; else - { -#if RTE_VERSION >= RTE_VERSION_NUM(2, 1, 0, 0) - is0_ip6 = RTE_ETH_IS_IPV6_HDR(mb->packet_type) != 0; -#else - is0_ip6 = - (mb_flags & (PKT_RX_IPV6_HDR | PKT_RX_IPV6_HDR_EXT)) != 0; -#endif - if (PREDICT_TRUE(is0_ip6)) - n0 = DPDK_RX_NEXT_IP6_INPUT; - else - { - ethernet_header_t *h0 = (ethernet_header_t *) b0->data; - is0_mpls = (h0->type == clib_host_to_net_u16(ETHERNET_TYPE_MPLS_UNICAST)); - n0 = is0_mpls ? DPDK_RX_NEXT_MPLS_INPUT : n0; - } - } + n0 = DPDK_RX_NEXT_ETHERNET_INPUT; } } *next0 = n0; @@ -908,194 +707,6 @@ void dpdk_set_next_node (dpdk_rx_next_t next, char *name) } } -inline vlib_frame_queue_elt_t * -vlib_get_handoff_queue_elt (u32 vlib_worker_index) -{ - vlib_frame_queue_t *fq; - vlib_frame_queue_elt_t *elt; - u64 new_tail; - - fq = vlib_frame_queues[vlib_worker_index]; - ASSERT (fq); - - new_tail = __sync_add_and_fetch (&fq->tail, 1); - - /* Wait until a ring slot is available */ - while (new_tail >= fq->head_hint + fq->nelts) - vlib_worker_thread_barrier_check (); - - elt = fq->elts + (new_tail & (fq->nelts-1)); - - /* this would be very bad... */ - while (elt->valid) - ; - - elt->msg_type = VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME; - elt->last_n_vectors = elt->n_vectors = 0; - - return elt; -} - -static inline vlib_frame_queue_elt_t * -dpdk_get_handoff_queue_elt ( - u32 vlib_worker_index, - vlib_frame_queue_elt_t ** handoff_queue_elt_by_worker_index) -{ - vlib_frame_queue_elt_t *elt; - - if (handoff_queue_elt_by_worker_index [vlib_worker_index]) - return handoff_queue_elt_by_worker_index [vlib_worker_index]; - - elt = vlib_get_handoff_queue_elt (vlib_worker_index); - - handoff_queue_elt_by_worker_index [vlib_worker_index] = elt; - - return elt; -} - -static inline vlib_frame_queue_t * -is_vlib_handoff_queue_congested ( - u32 vlib_worker_index, - u32 queue_hi_thresh, - vlib_frame_queue_t ** handoff_queue_by_worker_index) -{ - vlib_frame_queue_t *fq; - - fq = handoff_queue_by_worker_index [vlib_worker_index]; - if (fq != (vlib_frame_queue_t *)(~0)) - return fq; - - fq = vlib_frame_queues[vlib_worker_index]; - ASSERT (fq); - - if (PREDICT_FALSE(fq->tail >= (fq->head_hint + queue_hi_thresh))) { - /* a valid entry in the array will indicate the queue has reached - * the specified threshold and is congested - */ - handoff_queue_by_worker_index [vlib_worker_index] = fq; - fq->enqueue_full_events++; - return fq; - } - - return NULL; -} - -static inline u64 ipv4_get_key (ip4_header_t *ip) -{ - u64 hash_key; - - hash_key = *((u64*)(&ip->address_pair)) ^ ip->protocol; - - return hash_key; -} - -static inline u64 ipv6_get_key (ip6_header_t *ip) -{ - u64 hash_key; - - hash_key = ip->src_address.as_u64[0] ^ - rotate_left(ip->src_address.as_u64[1],13) ^ - rotate_left(ip->dst_address.as_u64[0],26) ^ - rotate_left(ip->dst_address.as_u64[1],39) ^ - ip->protocol; - - return hash_key; -} - - -#define MPLS_BOTTOM_OF_STACK_BIT_MASK 0x00000100U -#define MPLS_LABEL_MASK 0xFFFFF000U - -static inline u64 mpls_get_key (mpls_unicast_header_t *m) -{ - u64 hash_key; - u8 ip_ver; - - - /* find the bottom of the MPLS label stack. */ - if (PREDICT_TRUE(m->label_exp_s_ttl & - clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK))) { - goto bottom_lbl_found; - } - m++; - - if (PREDICT_TRUE(m->label_exp_s_ttl & - clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK))) { - goto bottom_lbl_found; - } - m++; - - if (m->label_exp_s_ttl & clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK)) { - goto bottom_lbl_found; - } - m++; - - if (m->label_exp_s_ttl & clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK)) { - goto bottom_lbl_found; - } - m++; - - if (m->label_exp_s_ttl & clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK)) { - goto bottom_lbl_found; - } - - /* the bottom label was not found - use the last label */ - hash_key = m->label_exp_s_ttl & clib_net_to_host_u32(MPLS_LABEL_MASK); - - return hash_key; - - -bottom_lbl_found: - m++; - ip_ver = (*((u8 *)m) >> 4); - - /* find out if it is IPV4 or IPV6 header */ - if (PREDICT_TRUE(ip_ver == 4)) { - hash_key = ipv4_get_key((ip4_header_t *)m); - } else if (PREDICT_TRUE(ip_ver == 6)) { - hash_key = ipv6_get_key((ip6_header_t *)m); - } else { - /* use the bottom label */ - hash_key = (m-1)->label_exp_s_ttl & clib_net_to_host_u32(MPLS_LABEL_MASK); - } - - return hash_key; - -} - -static inline u64 eth_get_key (ethernet_header_t *h0) -{ - u64 hash_key; - - - if (PREDICT_TRUE(h0->type) == clib_host_to_net_u16(ETHERNET_TYPE_IP4)) { - hash_key = ipv4_get_key((ip4_header_t *)(h0+1)); - } else if (h0->type == clib_host_to_net_u16(ETHERNET_TYPE_IP6)) { - hash_key = ipv6_get_key((ip6_header_t *)(h0+1)); - } else if (h0->type == clib_host_to_net_u16(ETHERNET_TYPE_MPLS_UNICAST)) { - hash_key = mpls_get_key((mpls_unicast_header_t *)(h0+1)); - } else if ((h0->type == clib_host_to_net_u16(ETHERNET_TYPE_VLAN)) || - (h0->type == clib_host_to_net_u16(ETHERNET_TYPE_DOT1AD))) { - ethernet_vlan_header_t * outer = (ethernet_vlan_header_t *)(h0 + 1); - - outer = (outer->type == clib_host_to_net_u16(ETHERNET_TYPE_VLAN)) ? - outer+1 : outer; - if (PREDICT_TRUE(outer->type) == clib_host_to_net_u16(ETHERNET_TYPE_IP4)) { - hash_key = ipv4_get_key((ip4_header_t *)(outer+1)); - } else if (outer->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6)) { - hash_key = ipv6_get_key((ip6_header_t *)(outer+1)); - } else if (outer->type == clib_host_to_net_u16(ETHERNET_TYPE_MPLS_UNICAST)) { - hash_key = mpls_get_key((mpls_unicast_header_t *)(outer+1)); - } else { - hash_key = outer->type; - } - } else { - hash_key = 0; - } - - return hash_key; -} - /* * This function is used when dedicated IO threads feed the worker threads. * @@ -1395,7 +1006,7 @@ void dpdk_io_thread (vlib_worker_thread_t * w, vnet_buffer(b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; vnet_buffer(b0)->sw_if_index[VLIB_TX] = (u32)~0; - vnet_buffer(b0)->io_handoff.next_index = next0; + vnet_buffer(b0)->handoff.next_index = next0; n_rx_bytes += mb->pkt_len; /* Process subsequent segments of multi-segment packets */ @@ -1796,7 +1407,7 @@ dpdk_io_input (vlib_main_t * vm, vnet_buffer(b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; vnet_buffer(b0)->sw_if_index[VLIB_TX] = (u32)~0; - vnet_buffer(b0)->io_handoff.next_index = next0; + vnet_buffer(b0)->handoff.next_index = next0; n_rx_bytes += mb->pkt_len; /* Process subsequent segments of multi-segment packets */ diff --git a/vnet/vnet/handoff.c b/vnet/vnet/handoff.c new file mode 100644 index 00000000000..6cb8d02b431 --- /dev/null +++ b/vnet/vnet/handoff.c @@ -0,0 +1,529 @@ + +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vppinfra/xxhash.h> +#include <vlib/threads.h> +#include <vnet/handoff.h> + +typedef struct { + uword * workers_bitmap; + u32 * workers; +} per_inteface_handoff_data_t; + +typedef struct { + u32 cached_next_index; + u32 num_workers; + u32 first_worker_index; + + per_inteface_handoff_data_t * if_data; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} handoff_main_t; + +handoff_main_t handoff_main; + +typedef struct { + u32 sw_if_index; + u32 next_worker_index; + u32 buffer_index; +} worker_handoff_trace_t; + +/* packet trace format function */ +static u8 * format_worker_handoff_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + worker_handoff_trace_t * t = va_arg (*args, worker_handoff_trace_t *); + + s = format (s, "worker-handoff: sw_if_index %d, next_worker %d, buffer 0x%x", + t->sw_if_index, t->next_worker_index, t->buffer_index); + return s; +} + +vlib_node_registration_t handoff_node; + +static uword +worker_handoff_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + handoff_main_t * hm = &handoff_main; + vlib_thread_main_t * tm = vlib_get_thread_main(); + u32 n_left_from, * from; + static __thread vlib_frame_queue_elt_t ** handoff_queue_elt_by_worker_index; + static __thread vlib_frame_queue_t ** congested_handoff_queue_by_worker_index = 0; + vlib_frame_queue_elt_t * hf = 0; + int i; + u32 n_left_to_next_worker = 0, * to_next_worker = 0; + u32 next_worker_index = 0; + u32 current_worker_index = ~0; + + if (PREDICT_FALSE(handoff_queue_elt_by_worker_index == 0)) + { + vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1); + + vec_validate_init_empty (congested_handoff_queue_by_worker_index, + hm->first_worker_index + hm->num_workers - 1, + (vlib_frame_queue_t *)(~0)); + } + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 sw_if_index0; + u32 hash; + u64 hash_key; + per_inteface_handoff_data_t * ihd0; + u32 index0; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + ASSERT (hm->if_data); + ihd0 = vec_elt_at_index (hm->if_data, sw_if_index0); + + next_worker_index = hm->first_worker_index; + + /* + * Force unknown traffic onto worker 0, + * and into ethernet-input. $$$$ add more hashes. + */ + + /* Compute ingress LB hash */ + hash_key = eth_get_key ((ethernet_header_t *) b0->data); + hash = (u32) clib_xxhash (hash_key); + + /* if input node did not specify next index, then packet + should go to eternet-input */ + if (PREDICT_FALSE ((b0->flags & BUFFER_HANDOFF_NEXT_VALID) == 0)) + vnet_buffer(b0)->handoff.next_index = HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT; + else if (vnet_buffer(b0)->handoff.next_index == HANDOFF_DISPATCH_NEXT_IP4_INPUT || + vnet_buffer(b0)->handoff.next_index == HANDOFF_DISPATCH_NEXT_IP6_INPUT || + vnet_buffer(b0)->handoff.next_index == HANDOFF_DISPATCH_NEXT_MPLS_INPUT) + vlib_buffer_advance (b0, (sizeof(ethernet_header_t))); + + if (PREDICT_TRUE (is_pow2 (vec_len (ihd0->workers)))) + index0 = hash & (vec_len (ihd0->workers) - 1); + else + index0 = hash % vec_len (ihd0->workers); + + next_worker_index += ihd0->workers[index0]; + + if (next_worker_index != current_worker_index) + { + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + hf = dpdk_get_handoff_queue_elt(next_worker_index, + handoff_queue_elt_by_worker_index); + + n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors; + to_next_worker = &hf->buffer_index[hf->n_vectors]; + current_worker_index = next_worker_index; + } + + /* enqueue to correct worker thread */ + to_next_worker[0] = bi0; + to_next_worker++; + n_left_to_next_worker--; + + if (n_left_to_next_worker == 0) + { + hf->n_vectors = VLIB_FRAME_SIZE; + vlib_put_handoff_queue_elt(hf); + current_worker_index = ~0; + handoff_queue_elt_by_worker_index[next_worker_index] = 0; + hf = 0; + } + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + worker_handoff_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_worker_index = next_worker_index - hm->first_worker_index; + t->buffer_index = bi0; + } + + } + + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + /* Ship frames to the worker nodes */ + for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++) + { + if (handoff_queue_elt_by_worker_index[i]) + { + hf = handoff_queue_elt_by_worker_index[i]; + /* + * It works better to let the handoff node + * rate-adapt, always ship the handoff queue element. + */ + if (1 || hf->n_vectors == hf->last_n_vectors) + { + vlib_put_handoff_queue_elt(hf); + handoff_queue_elt_by_worker_index[i] = 0; + } + else + hf->last_n_vectors = hf->n_vectors; + } + congested_handoff_queue_by_worker_index[i] = (vlib_frame_queue_t *)(~0); + } + hf = 0; + current_worker_index = ~0; + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (worker_handoff_node) = { + .function = worker_handoff_node_fn, + .name = "worker-handoff", + .vector_size = sizeof (u32), + .format_trace = format_worker_handoff_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (worker_handoff_node, worker_handoff_node_fn) + +int interface_handoff_enable_disable (vlib_main_t * vm, u32 sw_if_index, + uword * bitmap, int enable_disable) +{ + handoff_main_t * hm = &handoff_main; + vnet_sw_interface_t * sw; + vnet_main_t * vnm = vnet_get_main(); + per_inteface_handoff_data_t * d; + int i, rv; + u32 node_index = enable_disable ? worker_handoff_node.index : ~0; + + if (pool_is_free_index (vnm->interface_main.sw_interfaces, + sw_if_index)) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + sw = vnet_get_sw_interface (vnm, sw_if_index); + if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + if (clib_bitmap_last_set(bitmap) >= hm->num_workers) + return VNET_API_ERROR_INVALID_WORKER; + + vec_validate (hm->if_data, sw_if_index); + d = vec_elt_at_index(hm->if_data, sw_if_index); + + vec_free (d->workers); + vec_free (d->workers_bitmap); + + if (enable_disable) + { + d->workers_bitmap = bitmap; + clib_bitmap_foreach (i, bitmap, + ({ + vec_add1(d->workers, i); + })); + } + + rv = vnet_hw_interface_rx_redirect_to_node (vnm, sw_if_index, node_index); + return rv; +} + +static clib_error_t * +set_interface_handoff_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 sw_if_index = ~0; + int enable_disable = 1; + uword * bitmap = 0; + + int rv = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "disable")) + enable_disable = 0; + else if (unformat (input, "workers %U", unformat_bitmap_list, + &bitmap)) + ; + else if (unformat (input, "%U", unformat_vnet_sw_interface, + vnet_get_main(), &sw_if_index)) + ; + else + break; + } + + if (sw_if_index == ~0) + return clib_error_return (0, "Please specify an interface..."); + + if (bitmap == 0) + return clib_error_return (0, "Please specify list of workers..."); + + rv = interface_handoff_enable_disable (vm, sw_if_index, bitmap, enable_disable); + + switch(rv) { + case 0: + break; + + case VNET_API_ERROR_INVALID_SW_IF_INDEX: + return clib_error_return (0, "Invalid interface"); + break; + + case VNET_API_ERROR_INVALID_WORKER: + return clib_error_return (0, "Invalid worker(s)"); + break; + + case VNET_API_ERROR_UNIMPLEMENTED: + return clib_error_return (0, "Device driver doesn't support redirection"); + break; + + default: + return clib_error_return (0, "unknown return value %d", rv); + } + return 0; +} + +VLIB_CLI_COMMAND (set_interface_handoff_command, static) = { + .path = "set interface handoff", + .short_help = + "set interface handoff <interface-name> workers <workers-list>", + .function = set_interface_handoff_command_fn, +}; + +typedef struct { + u32 buffer_index; + u32 next_index; + u32 sw_if_index; +} handoff_dispatch_trace_t; + +/* packet trace format function */ +static u8 * format_handoff_dispatch_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + handoff_dispatch_trace_t * t = va_arg (*args, handoff_dispatch_trace_t *); + + s = format (s, "handoff-dispatch: sw_if_index %d next_index %d buffer 0x%x", + t->sw_if_index, + t->next_index, + t->buffer_index); + return s; +} + + +vlib_node_registration_t handoff_dispatch_node; + +#define foreach_handoff_dispatch_error \ +_(EXAMPLE, "example packets") + +typedef enum { +#define _(sym,str) HANDOFF_DISPATCH_ERROR_##sym, + foreach_handoff_dispatch_error +#undef _ + HANDOFF_DISPATCH_N_ERROR, +} handoff_dispatch_error_t; + +static char * handoff_dispatch_error_strings[] = { +#define _(sym,string) string, + foreach_handoff_dispatch_error +#undef _ +}; + +static uword +handoff_dispatch_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + handoff_dispatch_next_t next_index; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + next0 = vnet_buffer(b0)->handoff.next_index; + next1 = vnet_buffer(b1)->handoff.next_index; + + if (PREDICT_FALSE(vm->trace_main.trace_active_hint)) + { + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0); + handoff_dispatch_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->buffer_index = bi0; + } + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + vlib_trace_buffer (vm, node, next1, b1, /* follow_chain */ 0); + handoff_dispatch_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + t->sw_if_index = sw_if_index1; + t->next_index = next1; + t->buffer_index = bi1; + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + next0 = vnet_buffer(b0)->handoff.next_index; + + if (PREDICT_FALSE(vm->trace_main.trace_active_hint)) + { + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0); + handoff_dispatch_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->buffer_index = bi0; + } + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (handoff_dispatch_node) = { + .function = handoff_dispatch_node_fn, + .name = "handoff-dispatch", + .vector_size = sizeof (u32), + .format_trace = format_handoff_dispatch_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .flags = VLIB_NODE_FLAG_IS_HANDOFF, + + .n_errors = ARRAY_LEN(handoff_dispatch_error_strings), + .error_strings = handoff_dispatch_error_strings, + + .n_next_nodes = HANDOFF_DISPATCH_N_NEXT, + + .next_nodes = { + [HANDOFF_DISPATCH_NEXT_DROP] = "error-drop", + [HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT] = "ethernet-input", + [HANDOFF_DISPATCH_NEXT_IP4_INPUT] = "ip4-input-no-checksum", + [HANDOFF_DISPATCH_NEXT_IP6_INPUT] = "ip6-input", + [HANDOFF_DISPATCH_NEXT_MPLS_INPUT] = "mpls-gre-input", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (handoff_dispatch_node, handoff_dispatch_node_fn) + +clib_error_t *handoff_init (vlib_main_t *vm) +{ + handoff_main_t * hm = &handoff_main; + vlib_thread_main_t * tm = vlib_get_thread_main(); + uword * p; + + vlib_thread_registration_t * tr; + /* Only the standard vnet worker threads are supported */ + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + tr = (vlib_thread_registration_t *) p[0]; + if (tr) + { + hm->num_workers = tr->count; + hm->first_worker_index = tr->first_index; + } + + hm->vlib_main = vm; + hm->vnet_main = &vnet_main; + + return 0; +} + +VLIB_INIT_FUNCTION (handoff_init); diff --git a/vnet/vnet/handoff.h b/vnet/vnet/handoff.h new file mode 100644 index 00000000000..e0938ebfb2e --- /dev/null +++ b/vnet/vnet/handoff.h @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vnet_handoff_h +#define included_vnet_handoff_h + +#include <vlib/vlib.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vnet/mpls-gre/packet.h> + +typedef enum { + HANDOFF_DISPATCH_NEXT_IP4_INPUT, + HANDOFF_DISPATCH_NEXT_IP6_INPUT, + HANDOFF_DISPATCH_NEXT_MPLS_INPUT, + HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT, + HANDOFF_DISPATCH_NEXT_DROP, + HANDOFF_DISPATCH_N_NEXT, +} handoff_dispatch_next_t; + +static inline +void vlib_put_handoff_queue_elt (vlib_frame_queue_elt_t * hf) +{ + CLIB_MEMORY_BARRIER(); + hf->valid = 1; +} + +static inline vlib_frame_queue_elt_t * +vlib_get_handoff_queue_elt (u32 vlib_worker_index) +{ + vlib_frame_queue_t *fq; + vlib_frame_queue_elt_t *elt; + u64 new_tail; + + fq = vlib_frame_queues[vlib_worker_index]; + ASSERT (fq); + + new_tail = __sync_add_and_fetch (&fq->tail, 1); + + /* Wait until a ring slot is available */ + while (new_tail >= fq->head_hint + fq->nelts) + vlib_worker_thread_barrier_check (); + + elt = fq->elts + (new_tail & (fq->nelts-1)); + + /* this would be very bad... */ + while (elt->valid) + ; + + elt->msg_type = VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME; + elt->last_n_vectors = elt->n_vectors = 0; + + return elt; +} + +static inline vlib_frame_queue_t * +is_vlib_handoff_queue_congested ( + u32 vlib_worker_index, + u32 queue_hi_thresh, + vlib_frame_queue_t ** handoff_queue_by_worker_index) +{ + vlib_frame_queue_t *fq; + + fq = handoff_queue_by_worker_index [vlib_worker_index]; + if (fq != (vlib_frame_queue_t *)(~0)) + return fq; + + fq = vlib_frame_queues[vlib_worker_index]; + ASSERT (fq); + + if (PREDICT_FALSE(fq->tail >= (fq->head_hint + queue_hi_thresh))) { + /* a valid entry in the array will indicate the queue has reached + * the specified threshold and is congested + */ + handoff_queue_by_worker_index [vlib_worker_index] = fq; + fq->enqueue_full_events++; + return fq; + } + + return NULL; +} + +static inline vlib_frame_queue_elt_t * +dpdk_get_handoff_queue_elt (u32 vlib_worker_index, + vlib_frame_queue_elt_t ** + handoff_queue_elt_by_worker_index) +{ + vlib_frame_queue_elt_t *elt; + + if (handoff_queue_elt_by_worker_index [vlib_worker_index]) + return handoff_queue_elt_by_worker_index [vlib_worker_index]; + + elt = vlib_get_handoff_queue_elt (vlib_worker_index); + + handoff_queue_elt_by_worker_index [vlib_worker_index] = elt; + + return elt; +} + +static inline u64 ipv4_get_key (ip4_header_t *ip) +{ + u64 hash_key; + + hash_key = *((u64*)(&ip->address_pair)) ^ ip->protocol; + + return hash_key; +} + +static inline u64 ipv6_get_key (ip6_header_t *ip) +{ + u64 hash_key; + + hash_key = ip->src_address.as_u64[0] ^ + rotate_left(ip->src_address.as_u64[1],13) ^ + rotate_left(ip->dst_address.as_u64[0],26) ^ + rotate_left(ip->dst_address.as_u64[1],39) ^ + ip->protocol; + + return hash_key; +} + +#define MPLS_BOTTOM_OF_STACK_BIT_MASK 0x00000100U +#define MPLS_LABEL_MASK 0xFFFFF000U + +static inline u64 mpls_get_key (mpls_unicast_header_t *m) +{ + u64 hash_key; + u8 ip_ver; + + + /* find the bottom of the MPLS label stack. */ + if (PREDICT_TRUE(m->label_exp_s_ttl & + clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK))) { + goto bottom_lbl_found; + } + m++; + + if (PREDICT_TRUE(m->label_exp_s_ttl & + clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK))) { + goto bottom_lbl_found; + } + m++; + + if (m->label_exp_s_ttl & clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK)) { + goto bottom_lbl_found; + } + m++; + + if (m->label_exp_s_ttl & clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK)) { + goto bottom_lbl_found; + } + m++; + + if (m->label_exp_s_ttl & clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK)) { + goto bottom_lbl_found; + } + + /* the bottom label was not found - use the last label */ + hash_key = m->label_exp_s_ttl & clib_net_to_host_u32(MPLS_LABEL_MASK); + + return hash_key; + +bottom_lbl_found: + m++; + ip_ver = (*((u8 *)m) >> 4); + + /* find out if it is IPV4 or IPV6 header */ + if (PREDICT_TRUE(ip_ver == 4)) { + hash_key = ipv4_get_key((ip4_header_t *)m); + } else if (PREDICT_TRUE(ip_ver == 6)) { + hash_key = ipv6_get_key((ip6_header_t *)m); + } else { + /* use the bottom label */ + hash_key = (m-1)->label_exp_s_ttl & clib_net_to_host_u32(MPLS_LABEL_MASK); + } + + return hash_key; + +} + + +static inline u64 +eth_get_key (ethernet_header_t *h0) +{ + u64 hash_key; + + if (PREDICT_TRUE(h0->type) == clib_host_to_net_u16(ETHERNET_TYPE_IP4)) { + hash_key = ipv4_get_key((ip4_header_t *)(h0+1)); + } else if (h0->type == clib_host_to_net_u16(ETHERNET_TYPE_IP6)) { + hash_key = ipv6_get_key((ip6_header_t *)(h0+1)); + } else if (h0->type == clib_host_to_net_u16(ETHERNET_TYPE_MPLS_UNICAST)) { + hash_key = mpls_get_key((mpls_unicast_header_t *)(h0+1)); + } else if ((h0->type == clib_host_to_net_u16(ETHERNET_TYPE_VLAN)) || + (h0->type == clib_host_to_net_u16(ETHERNET_TYPE_DOT1AD))) { + ethernet_vlan_header_t * outer = (ethernet_vlan_header_t *)(h0 + 1); + + outer = (outer->type == clib_host_to_net_u16(ETHERNET_TYPE_VLAN)) ? + outer+1 : outer; + if (PREDICT_TRUE(outer->type) == clib_host_to_net_u16(ETHERNET_TYPE_IP4)) { + hash_key = ipv4_get_key((ip4_header_t *)(outer+1)); + } else if (outer->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6)) { + hash_key = ipv6_get_key((ip6_header_t *)(outer+1)); + } else if (outer->type == clib_host_to_net_u16(ETHERNET_TYPE_MPLS_UNICAST)) { + hash_key = mpls_get_key((mpls_unicast_header_t *)(outer+1)); + } else { + hash_key = outer->type; + } + } else { + hash_key = 0; + } + + return hash_key; +} + +#endif /* included_vnet_handoff_h */ diff --git a/vppinfra/vppinfra/bitmap.h b/vppinfra/vppinfra/bitmap.h index e69851b60be..986c322e86c 100644 --- a/vppinfra/vppinfra/bitmap.h +++ b/vppinfra/vppinfra/bitmap.h @@ -338,6 +338,26 @@ always_inline uword clib_bitmap_first_set (uword * ai) return ~0; } +/* Return highest numbered set bit in bitmap. + + Return infinity (~0) if bitmap is zero. */ +always_inline uword clib_bitmap_last_set (uword * ai) +{ + uword i; + + for (i = vec_len (ai) - 1; i >= 0 ; i--) + { + uword x = ai[i]; + if (x != 0) + { + uword first_bit; + count_leading_zeros (first_bit, x); + return (i + 1) * BITS (ai[0]) - first_bit - 1; + } + } + return ~0; +} + /* Return lowest numbered clear bit in bitmap. */ always_inline uword clib_bitmap_first_clear (uword * ai) |