aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2016-06-08 01:37:11 +0200
committerDave Barach <openvpp@barachs.net>2016-06-13 20:39:58 +0000
commit0247b4600235bc18e558a7c4267b59be897d714d (patch)
tree3a52c20a43b4c6eee1aaf829b493619106d865c4
parentc424879b4c01f1a9d9dc3a210454a570178d31a2 (diff)
Add worker-handoff node
worker-handoff node is universal node which taakes packets from the input node and hands them over to worker threads. Currently it supports flow hashing based on ipv4, ipv6 and mpls headers. New cli: set interface handoff <intrerface-name> workers <list> e.g. set interface handoff TenGigabitEthernet2/0/0 workers 3-6,9-10 Change-Id: Iaf0df83e69bb0e84969865e0e1cdb000b0864cf5 Signed-off-by: Damjan Marion <damarion@cisco.com>
-rw-r--r--vnet/Makefile.am2
-rw-r--r--vnet/vnet/api_errno.h3
-rw-r--r--vnet/vnet/buffer.h7
-rw-r--r--vnet/vnet/devices/dpdk/dpdk.h4
-rw-r--r--vnet/vnet/devices/dpdk/node.c475
-rw-r--r--vnet/vnet/handoff.c529
-rw-r--r--vnet/vnet/handoff.h228
-rw-r--r--vppinfra/vppinfra/bitmap.h20
8 files changed, 829 insertions, 439 deletions
diff --git a/vnet/Makefile.am b/vnet/Makefile.am
index 9feffc50eae..41e066f2616 100644
--- a/vnet/Makefile.am
+++ b/vnet/Makefile.am
@@ -26,6 +26,7 @@ TESTS =
########################################
libvnet_la_SOURCES += \
vnet/config.c \
+ vnet/handoff.c \
vnet/interface.c \
vnet/interface_cli.c \
vnet/interface_format.c \
@@ -39,6 +40,7 @@ nobase_include_HEADERS += \
vnet/buffer.h \
vnet/config.h \
vnet/global_funcs.h \
+ vnet/handoff.h \
vnet/interface.h \
vnet/interface_funcs.h \
vnet/l3_types.h \
diff --git a/vnet/vnet/api_errno.h b/vnet/vnet/api_errno.h
index e22b590bc7c..af6f3971a13 100644
--- a/vnet/vnet/api_errno.h
+++ b/vnet/vnet/api_errno.h
@@ -78,7 +78,8 @@ _(SR_POLICY_NAME_NOT_PRESENT, -84, "Segement routing policy name required") \
_(NOT_RUNNING_AS_ROOT, -85, "Not running as root") \
_(ALREADY_CONNECTED, -86, "Connection to the data plane already exists") \
_(UNSUPPORTED_JNI_VERSION, -87, "Unsupported JNI version") \
-_(FAILED_TO_ATTACH_TO_JAVA_THREAD, -88, "Failed to attach to Java thread")
+_(FAILED_TO_ATTACH_TO_JAVA_THREAD, -88, "Failed to attach to Java thread") \
+_(INVALID_WORKER, -89, "Invalid worker thread")
typedef enum {
#define _(a,b,c) VNET_API_ERROR_##a = (b),
diff --git a/vnet/vnet/buffer.h b/vnet/vnet/buffer.h
index 050642276b5..ea25ad0d286 100644
--- a/vnet/vnet/buffer.h
+++ b/vnet/vnet/buffer.h
@@ -64,6 +64,9 @@
#define LOG2_BUFFER_OUTPUT_FEAT_DONE LOG2_VLIB_BUFFER_FLAG_USER(5)
#define BUFFER_OUTPUT_FEAT_DONE (1 << LOG2_BUFFER_OUTPUT_FEAT_DONE)
+#define LOG2_BUFFER_HANDOFF_NEXT_VALID LOG2_VLIB_BUFFER_FLAG_USER(6)
+#define BUFFER_HANDOFF_NEXT_VALID (1 << LOG2_BUFFER_HANDOFF_NEXT_VALID)
+
#define foreach_buffer_opaque_union_subtype \
_(ethernet) \
_(ip) \
@@ -73,7 +76,7 @@ _(l2) \
_(l2t) \
_(gre) \
_(l2_classify) \
-_(io_handoff) \
+_(handoff) \
_(policer) \
_(output_features) \
_(map) \
@@ -185,7 +188,7 @@ typedef struct {
/* IO - worker thread handoff */
struct {
u32 next_index;
- } io_handoff;
+ } handoff;
/* vnet policer */
struct {
diff --git a/vnet/vnet/devices/dpdk/dpdk.h b/vnet/vnet/devices/dpdk/dpdk.h
index 9032b7a9152..1e7d36846cd 100644
--- a/vnet/vnet/devices/dpdk/dpdk.h
+++ b/vnet/vnet/devices/dpdk/dpdk.h
@@ -495,10 +495,6 @@ void dpdk_set_flowcontrol_callback (vlib_main_t *vm,
u32 dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance);
-vlib_frame_queue_elt_t * vlib_get_handoff_queue_elt (u32 vlib_worker_index);
-
-u32 dpdk_get_handoff_node_index (void);
-
void set_efd_bitmap (u8 *bitmap, u32 value, u32 op);
struct rte_mbuf * dpdk_replicate_packet_mb (vlib_buffer_t * b);
diff --git a/vnet/vnet/devices/dpdk/node.c b/vnet/vnet/devices/dpdk/node.c
index 9a0f9c8fb49..27670f37d98 100644
--- a/vnet/vnet/devices/dpdk/node.c
+++ b/vnet/vnet/devices/dpdk/node.c
@@ -22,6 +22,7 @@
#include <vnet/devices/dpdk/dpdk.h>
#include <vnet/classify/vnet_classify.h>
#include <vnet/mpls-gre/packet.h>
+#include <vnet/handoff.h>
#include "dpdk_priv.h"
@@ -48,240 +49,45 @@
*/
#define VMWARE_LENGTH_BUG_WORKAROUND 0
-typedef struct {
- u32 cached_next_index;
-
- /* convenience variables */
- vlib_main_t * vlib_main;
- vnet_main_t * vnet_main;
-} handoff_dispatch_main_t;
-
-typedef struct {
- u32 buffer_index;
- u32 next_index;
- u32 sw_if_index;
-} handoff_dispatch_trace_t;
-
-/* packet trace format function */
-static u8 * format_handoff_dispatch_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- handoff_dispatch_trace_t * t = va_arg (*args, handoff_dispatch_trace_t *);
-
- s = format (s, "HANDOFF_DISPATCH: sw_if_index %d next_index %d buffer 0x%x",
- t->sw_if_index,
- t->next_index,
- t->buffer_index);
- return s;
-}
-
-handoff_dispatch_main_t handoff_dispatch_main;
-
-vlib_node_registration_t handoff_dispatch_node;
-
-#define foreach_handoff_dispatch_error \
-_(EXAMPLE, "example packets")
-
-typedef enum {
-#define _(sym,str) HANDOFF_DISPATCH_ERROR_##sym,
- foreach_handoff_dispatch_error
-#undef _
- HANDOFF_DISPATCH_N_ERROR,
-} handoff_dispatch_error_t;
-
-static char * handoff_dispatch_error_strings[] = {
-#define _(sym,string) string,
- foreach_handoff_dispatch_error
+static char * dpdk_error_strings[] = {
+#define _(n,s) s,
+ foreach_dpdk_error
#undef _
};
-static inline
-void vlib_put_handoff_queue_elt (vlib_frame_queue_elt_t * hf)
-{
- CLIB_MEMORY_BARRIER();
- hf->valid = 1;
-}
-
-static uword
-handoff_dispatch_node_fn (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+always_inline int
+dpdk_mbuf_is_ip4(struct rte_mbuf *mb)
{
- u32 n_left_from, * from, * to_next;
- dpdk_rx_next_t next_index;
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index,
- to_next, n_left_to_next);
-
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- u32 bi0, bi1;
- vlib_buffer_t * b0, * b1;
- u32 next0, next1;
- u32 sw_if_index0, sw_if_index1;
-
- /* Prefetch next iteration. */
- {
- vlib_buffer_t * p2, * p3;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
-
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
- }
-
- /* speculatively enqueue b0 and b1 to the current next frame */
- to_next[0] = bi0 = from[0];
- to_next[1] = bi1 = from[1];
- from += 2;
- to_next += 2;
- n_left_from -= 2;
- n_left_to_next -= 2;
-
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
-
- next0 = vnet_buffer(b0)->io_handoff.next_index;
- next1 = vnet_buffer(b1)->io_handoff.next_index;
-
- if (PREDICT_FALSE(vm->trace_main.trace_active_hint))
- {
- if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0);
- handoff_dispatch_trace_t *t =
- vlib_add_trace (vm, node, b0, sizeof (*t));
- sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
- t->sw_if_index = sw_if_index0;
- t->next_index = next0;
- t->buffer_index = bi0;
- }
- if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
- {
- vlib_trace_buffer (vm, node, next1, b1, /* follow_chain */ 0);
- handoff_dispatch_trace_t *t =
- vlib_add_trace (vm, node, b1, sizeof (*t));
- sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
- t->sw_if_index = sw_if_index1;
- t->next_index = next1;
- t->buffer_index = bi1;
- }
- }
-
- /* verify speculative enqueues, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0;
- vlib_buffer_t * b0;
- u32 next0;
- u32 sw_if_index0;
-
- /* speculatively enqueue b0 to the current next frame */
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
-
- next0 = vnet_buffer(b0)->io_handoff.next_index;
-
- if (PREDICT_FALSE(vm->trace_main.trace_active_hint))
- {
- if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0);
- handoff_dispatch_trace_t *t =
- vlib_add_trace (vm, node, b0, sizeof (*t));
- sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
- t->sw_if_index = sw_if_index0;
- t->next_index = next0;
- t->buffer_index = bi0;
- }
- }
-
- /* verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return frame->n_vectors;
+#if RTE_VERSION >= RTE_VERSION_NUM(2, 1, 0, 0)
+ return RTE_ETH_IS_IPV4_HDR(mb->packet_type) != 0;
+#else
+ return (mb_flags & (PKT_RX_IPV4_HDR | PKT_RX_IPV4_HDR_EXT)) != 0;
+#endif
}
-VLIB_REGISTER_NODE (handoff_dispatch_node) = {
- .function = handoff_dispatch_node_fn,
- .name = "handoff-dispatch",
- .vector_size = sizeof (u32),
- .format_trace = format_handoff_dispatch_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
- .flags = VLIB_NODE_FLAG_IS_HANDOFF,
-
- .n_errors = ARRAY_LEN(handoff_dispatch_error_strings),
- .error_strings = handoff_dispatch_error_strings,
-
- .n_next_nodes = DPDK_RX_N_NEXT,
-
- .next_nodes = {
- [DPDK_RX_NEXT_DROP] = "error-drop",
- [DPDK_RX_NEXT_ETHERNET_INPUT] = "ethernet-input",
- [DPDK_RX_NEXT_IP4_INPUT] = "ip4-input",
- [DPDK_RX_NEXT_IP6_INPUT] = "ip6-input",
- [DPDK_RX_NEXT_MPLS_INPUT] = "mpls-gre-input",
- },
-};
-
-VLIB_NODE_FUNCTION_MULTIARCH (handoff_dispatch_node, handoff_dispatch_node_fn)
-
-clib_error_t *handoff_dispatch_init (vlib_main_t *vm)
+always_inline int
+dpdk_mbuf_is_ip6(struct rte_mbuf *mb)
{
- handoff_dispatch_main_t * mp = &handoff_dispatch_main;
-
- mp->vlib_main = vm;
- mp->vnet_main = &vnet_main;
-
- return 0;
+#if RTE_VERSION >= RTE_VERSION_NUM(2, 1, 0, 0)
+ return RTE_ETH_IS_IPV6_HDR(mb->packet_type) != 0;
+#else
+ return (mb_flags & (PKT_RX_IPV6_HDR | PKT_RX_IPV6_HDR_EXT)) != 0;
+#endif
}
-VLIB_INIT_FUNCTION (handoff_dispatch_init);
-
-u32 dpdk_get_handoff_node_index (void)
+always_inline int
+vlib_buffer_is_mpls(vlib_buffer_t * b)
{
- return handoff_dispatch_node.index;
+ ethernet_header_t *h = (ethernet_header_t *) b->data;
+ return (h->type == clib_host_to_net_u16(ETHERNET_TYPE_MPLS_UNICAST));
}
-static char * dpdk_error_strings[] = {
-#define _(n,s) s,
- foreach_dpdk_error
-#undef _
-};
-
always_inline void
dpdk_rx_next_and_error_from_mb_flags_x1 (dpdk_device_t *xd, struct rte_mbuf *mb,
vlib_buffer_t *b0,
u8 * next0, u8 * error0)
{
- u8 is0_ip4, is0_ip6, is0_mpls, n0;
+ u8 n0;
uint16_t mb_flags = mb->ol_flags;
if (PREDICT_FALSE(mb_flags & (
@@ -306,37 +112,30 @@ dpdk_rx_next_and_error_from_mb_flags_x1 (dpdk_device_t *xd, struct rte_mbuf *mb,
{
*error0 = DPDK_ERROR_NONE;
if (PREDICT_FALSE(xd->per_interface_next_index != ~0))
- n0 = xd->per_interface_next_index;
+ {
+ n0 = xd->per_interface_next_index;
+ b0->flags |= BUFFER_HANDOFF_NEXT_VALID;
+ if (PREDICT_TRUE (dpdk_mbuf_is_ip4(mb)))
+ vnet_buffer(b0)->handoff.next_index = HANDOFF_DISPATCH_NEXT_IP4_INPUT;
+ else if (PREDICT_TRUE(dpdk_mbuf_is_ip6(mb)))
+ vnet_buffer(b0)->handoff.next_index = HANDOFF_DISPATCH_NEXT_IP6_INPUT;
+ else if (PREDICT_TRUE(vlib_buffer_is_mpls(b0)))
+ vnet_buffer(b0)->handoff.next_index = HANDOFF_DISPATCH_NEXT_MPLS_INPUT;
+ else
+ vnet_buffer(b0)->handoff.next_index = HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT;
+ }
else if (PREDICT_FALSE(xd->vlan_subifs || (mb_flags & PKT_RX_VLAN_PKT)))
n0 = DPDK_RX_NEXT_ETHERNET_INPUT;
else
{
- n0 = DPDK_RX_NEXT_ETHERNET_INPUT;
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 1, 0, 0)
- is0_ip4 = RTE_ETH_IS_IPV4_HDR(mb->packet_type) != 0;
-#else
- is0_ip4 = (mb_flags & (PKT_RX_IPV4_HDR | PKT_RX_IPV4_HDR_EXT)) != 0;
-#endif
-
- if (PREDICT_TRUE(is0_ip4))
+ if (PREDICT_TRUE (dpdk_mbuf_is_ip4(mb)))
n0 = DPDK_RX_NEXT_IP4_INPUT;
+ else if (PREDICT_TRUE(dpdk_mbuf_is_ip6(mb)))
+ n0 = DPDK_RX_NEXT_IP6_INPUT;
+ else if (PREDICT_TRUE(vlib_buffer_is_mpls(b0)))
+ n0 = DPDK_RX_NEXT_MPLS_INPUT;
else
- {
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 1, 0, 0)
- is0_ip6 = RTE_ETH_IS_IPV6_HDR(mb->packet_type) != 0;
-#else
- is0_ip6 =
- (mb_flags & (PKT_RX_IPV6_HDR | PKT_RX_IPV6_HDR_EXT)) != 0;
-#endif
- if (PREDICT_TRUE(is0_ip6))
- n0 = DPDK_RX_NEXT_IP6_INPUT;
- else
- {
- ethernet_header_t *h0 = (ethernet_header_t *) b0->data;
- is0_mpls = (h0->type == clib_host_to_net_u16(ETHERNET_TYPE_MPLS_UNICAST));
- n0 = is0_mpls ? DPDK_RX_NEXT_MPLS_INPUT : n0;
- }
- }
+ n0 = DPDK_RX_NEXT_ETHERNET_INPUT;
}
}
*next0 = n0;
@@ -908,194 +707,6 @@ void dpdk_set_next_node (dpdk_rx_next_t next, char *name)
}
}
-inline vlib_frame_queue_elt_t *
-vlib_get_handoff_queue_elt (u32 vlib_worker_index)
-{
- vlib_frame_queue_t *fq;
- vlib_frame_queue_elt_t *elt;
- u64 new_tail;
-
- fq = vlib_frame_queues[vlib_worker_index];
- ASSERT (fq);
-
- new_tail = __sync_add_and_fetch (&fq->tail, 1);
-
- /* Wait until a ring slot is available */
- while (new_tail >= fq->head_hint + fq->nelts)
- vlib_worker_thread_barrier_check ();
-
- elt = fq->elts + (new_tail & (fq->nelts-1));
-
- /* this would be very bad... */
- while (elt->valid)
- ;
-
- elt->msg_type = VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME;
- elt->last_n_vectors = elt->n_vectors = 0;
-
- return elt;
-}
-
-static inline vlib_frame_queue_elt_t *
-dpdk_get_handoff_queue_elt (
- u32 vlib_worker_index,
- vlib_frame_queue_elt_t ** handoff_queue_elt_by_worker_index)
-{
- vlib_frame_queue_elt_t *elt;
-
- if (handoff_queue_elt_by_worker_index [vlib_worker_index])
- return handoff_queue_elt_by_worker_index [vlib_worker_index];
-
- elt = vlib_get_handoff_queue_elt (vlib_worker_index);
-
- handoff_queue_elt_by_worker_index [vlib_worker_index] = elt;
-
- return elt;
-}
-
-static inline vlib_frame_queue_t *
-is_vlib_handoff_queue_congested (
- u32 vlib_worker_index,
- u32 queue_hi_thresh,
- vlib_frame_queue_t ** handoff_queue_by_worker_index)
-{
- vlib_frame_queue_t *fq;
-
- fq = handoff_queue_by_worker_index [vlib_worker_index];
- if (fq != (vlib_frame_queue_t *)(~0))
- return fq;
-
- fq = vlib_frame_queues[vlib_worker_index];
- ASSERT (fq);
-
- if (PREDICT_FALSE(fq->tail >= (fq->head_hint + queue_hi_thresh))) {
- /* a valid entry in the array will indicate the queue has reached
- * the specified threshold and is congested
- */
- handoff_queue_by_worker_index [vlib_worker_index] = fq;
- fq->enqueue_full_events++;
- return fq;
- }
-
- return NULL;
-}
-
-static inline u64 ipv4_get_key (ip4_header_t *ip)
-{
- u64 hash_key;
-
- hash_key = *((u64*)(&ip->address_pair)) ^ ip->protocol;
-
- return hash_key;
-}
-
-static inline u64 ipv6_get_key (ip6_header_t *ip)
-{
- u64 hash_key;
-
- hash_key = ip->src_address.as_u64[0] ^
- rotate_left(ip->src_address.as_u64[1],13) ^
- rotate_left(ip->dst_address.as_u64[0],26) ^
- rotate_left(ip->dst_address.as_u64[1],39) ^
- ip->protocol;
-
- return hash_key;
-}
-
-
-#define MPLS_BOTTOM_OF_STACK_BIT_MASK 0x00000100U
-#define MPLS_LABEL_MASK 0xFFFFF000U
-
-static inline u64 mpls_get_key (mpls_unicast_header_t *m)
-{
- u64 hash_key;
- u8 ip_ver;
-
-
- /* find the bottom of the MPLS label stack. */
- if (PREDICT_TRUE(m->label_exp_s_ttl &
- clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK))) {
- goto bottom_lbl_found;
- }
- m++;
-
- if (PREDICT_TRUE(m->label_exp_s_ttl &
- clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK))) {
- goto bottom_lbl_found;
- }
- m++;
-
- if (m->label_exp_s_ttl & clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK)) {
- goto bottom_lbl_found;
- }
- m++;
-
- if (m->label_exp_s_ttl & clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK)) {
- goto bottom_lbl_found;
- }
- m++;
-
- if (m->label_exp_s_ttl & clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK)) {
- goto bottom_lbl_found;
- }
-
- /* the bottom label was not found - use the last label */
- hash_key = m->label_exp_s_ttl & clib_net_to_host_u32(MPLS_LABEL_MASK);
-
- return hash_key;
-
-
-bottom_lbl_found:
- m++;
- ip_ver = (*((u8 *)m) >> 4);
-
- /* find out if it is IPV4 or IPV6 header */
- if (PREDICT_TRUE(ip_ver == 4)) {
- hash_key = ipv4_get_key((ip4_header_t *)m);
- } else if (PREDICT_TRUE(ip_ver == 6)) {
- hash_key = ipv6_get_key((ip6_header_t *)m);
- } else {
- /* use the bottom label */
- hash_key = (m-1)->label_exp_s_ttl & clib_net_to_host_u32(MPLS_LABEL_MASK);
- }
-
- return hash_key;
-
-}
-
-static inline u64 eth_get_key (ethernet_header_t *h0)
-{
- u64 hash_key;
-
-
- if (PREDICT_TRUE(h0->type) == clib_host_to_net_u16(ETHERNET_TYPE_IP4)) {
- hash_key = ipv4_get_key((ip4_header_t *)(h0+1));
- } else if (h0->type == clib_host_to_net_u16(ETHERNET_TYPE_IP6)) {
- hash_key = ipv6_get_key((ip6_header_t *)(h0+1));
- } else if (h0->type == clib_host_to_net_u16(ETHERNET_TYPE_MPLS_UNICAST)) {
- hash_key = mpls_get_key((mpls_unicast_header_t *)(h0+1));
- } else if ((h0->type == clib_host_to_net_u16(ETHERNET_TYPE_VLAN)) ||
- (h0->type == clib_host_to_net_u16(ETHERNET_TYPE_DOT1AD))) {
- ethernet_vlan_header_t * outer = (ethernet_vlan_header_t *)(h0 + 1);
-
- outer = (outer->type == clib_host_to_net_u16(ETHERNET_TYPE_VLAN)) ?
- outer+1 : outer;
- if (PREDICT_TRUE(outer->type) == clib_host_to_net_u16(ETHERNET_TYPE_IP4)) {
- hash_key = ipv4_get_key((ip4_header_t *)(outer+1));
- } else if (outer->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6)) {
- hash_key = ipv6_get_key((ip6_header_t *)(outer+1));
- } else if (outer->type == clib_host_to_net_u16(ETHERNET_TYPE_MPLS_UNICAST)) {
- hash_key = mpls_get_key((mpls_unicast_header_t *)(outer+1));
- } else {
- hash_key = outer->type;
- }
- } else {
- hash_key = 0;
- }
-
- return hash_key;
-}
-
/*
* This function is used when dedicated IO threads feed the worker threads.
*
@@ -1395,7 +1006,7 @@ void dpdk_io_thread (vlib_worker_thread_t * w,
vnet_buffer(b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index;
vnet_buffer(b0)->sw_if_index[VLIB_TX] = (u32)~0;
- vnet_buffer(b0)->io_handoff.next_index = next0;
+ vnet_buffer(b0)->handoff.next_index = next0;
n_rx_bytes += mb->pkt_len;
/* Process subsequent segments of multi-segment packets */
@@ -1796,7 +1407,7 @@ dpdk_io_input (vlib_main_t * vm,
vnet_buffer(b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index;
vnet_buffer(b0)->sw_if_index[VLIB_TX] = (u32)~0;
- vnet_buffer(b0)->io_handoff.next_index = next0;
+ vnet_buffer(b0)->handoff.next_index = next0;
n_rx_bytes += mb->pkt_len;
/* Process subsequent segments of multi-segment packets */
diff --git a/vnet/vnet/handoff.c b/vnet/vnet/handoff.c
new file mode 100644
index 00000000000..6cb8d02b431
--- /dev/null
+++ b/vnet/vnet/handoff.c
@@ -0,0 +1,529 @@
+
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/xxhash.h>
+#include <vlib/threads.h>
+#include <vnet/handoff.h>
+
+typedef struct {
+ uword * workers_bitmap;
+ u32 * workers;
+} per_inteface_handoff_data_t;
+
+typedef struct {
+ u32 cached_next_index;
+ u32 num_workers;
+ u32 first_worker_index;
+
+ per_inteface_handoff_data_t * if_data;
+
+ /* convenience variables */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} handoff_main_t;
+
+handoff_main_t handoff_main;
+
+typedef struct {
+ u32 sw_if_index;
+ u32 next_worker_index;
+ u32 buffer_index;
+} worker_handoff_trace_t;
+
+/* packet trace format function */
+static u8 * format_worker_handoff_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ worker_handoff_trace_t * t = va_arg (*args, worker_handoff_trace_t *);
+
+ s = format (s, "worker-handoff: sw_if_index %d, next_worker %d, buffer 0x%x",
+ t->sw_if_index, t->next_worker_index, t->buffer_index);
+ return s;
+}
+
+vlib_node_registration_t handoff_node;
+
+static uword
+worker_handoff_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ handoff_main_t * hm = &handoff_main;
+ vlib_thread_main_t * tm = vlib_get_thread_main();
+ u32 n_left_from, * from;
+ static __thread vlib_frame_queue_elt_t ** handoff_queue_elt_by_worker_index;
+ static __thread vlib_frame_queue_t ** congested_handoff_queue_by_worker_index = 0;
+ vlib_frame_queue_elt_t * hf = 0;
+ int i;
+ u32 n_left_to_next_worker = 0, * to_next_worker = 0;
+ u32 next_worker_index = 0;
+ u32 current_worker_index = ~0;
+
+ if (PREDICT_FALSE(handoff_queue_elt_by_worker_index == 0))
+ {
+ vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
+
+ vec_validate_init_empty (congested_handoff_queue_by_worker_index,
+ hm->first_worker_index + hm->num_workers - 1,
+ (vlib_frame_queue_t *)(~0));
+ }
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 sw_if_index0;
+ u32 hash;
+ u64 hash_key;
+ per_inteface_handoff_data_t * ihd0;
+ u32 index0;
+
+ bi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ ASSERT (hm->if_data);
+ ihd0 = vec_elt_at_index (hm->if_data, sw_if_index0);
+
+ next_worker_index = hm->first_worker_index;
+
+ /*
+ * Force unknown traffic onto worker 0,
+ * and into ethernet-input. $$$$ add more hashes.
+ */
+
+ /* Compute ingress LB hash */
+ hash_key = eth_get_key ((ethernet_header_t *) b0->data);
+ hash = (u32) clib_xxhash (hash_key);
+
+ /* if input node did not specify next index, then packet
+ should go to eternet-input */
+ if (PREDICT_FALSE ((b0->flags & BUFFER_HANDOFF_NEXT_VALID) == 0))
+ vnet_buffer(b0)->handoff.next_index = HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT;
+ else if (vnet_buffer(b0)->handoff.next_index == HANDOFF_DISPATCH_NEXT_IP4_INPUT ||
+ vnet_buffer(b0)->handoff.next_index == HANDOFF_DISPATCH_NEXT_IP6_INPUT ||
+ vnet_buffer(b0)->handoff.next_index == HANDOFF_DISPATCH_NEXT_MPLS_INPUT)
+ vlib_buffer_advance (b0, (sizeof(ethernet_header_t)));
+
+ if (PREDICT_TRUE (is_pow2 (vec_len (ihd0->workers))))
+ index0 = hash & (vec_len (ihd0->workers) - 1);
+ else
+ index0 = hash % vec_len (ihd0->workers);
+
+ next_worker_index += ihd0->workers[index0];
+
+ if (next_worker_index != current_worker_index)
+ {
+ if (hf)
+ hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
+
+ hf = dpdk_get_handoff_queue_elt(next_worker_index,
+ handoff_queue_elt_by_worker_index);
+
+ n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
+ to_next_worker = &hf->buffer_index[hf->n_vectors];
+ current_worker_index = next_worker_index;
+ }
+
+ /* enqueue to correct worker thread */
+ to_next_worker[0] = bi0;
+ to_next_worker++;
+ n_left_to_next_worker--;
+
+ if (n_left_to_next_worker == 0)
+ {
+ hf->n_vectors = VLIB_FRAME_SIZE;
+ vlib_put_handoff_queue_elt(hf);
+ current_worker_index = ~0;
+ handoff_queue_elt_by_worker_index[next_worker_index] = 0;
+ hf = 0;
+ }
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ worker_handoff_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_worker_index = next_worker_index - hm->first_worker_index;
+ t->buffer_index = bi0;
+ }
+
+ }
+
+ if (hf)
+ hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
+
+ /* Ship frames to the worker nodes */
+ for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
+ {
+ if (handoff_queue_elt_by_worker_index[i])
+ {
+ hf = handoff_queue_elt_by_worker_index[i];
+ /*
+ * It works better to let the handoff node
+ * rate-adapt, always ship the handoff queue element.
+ */
+ if (1 || hf->n_vectors == hf->last_n_vectors)
+ {
+ vlib_put_handoff_queue_elt(hf);
+ handoff_queue_elt_by_worker_index[i] = 0;
+ }
+ else
+ hf->last_n_vectors = hf->n_vectors;
+ }
+ congested_handoff_queue_by_worker_index[i] = (vlib_frame_queue_t *)(~0);
+ }
+ hf = 0;
+ current_worker_index = ~0;
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (worker_handoff_node) = {
+ .function = worker_handoff_node_fn,
+ .name = "worker-handoff",
+ .vector_size = sizeof (u32),
+ .format_trace = format_worker_handoff_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (worker_handoff_node, worker_handoff_node_fn)
+
+int interface_handoff_enable_disable (vlib_main_t * vm, u32 sw_if_index,
+ uword * bitmap, int enable_disable)
+{
+ handoff_main_t * hm = &handoff_main;
+ vnet_sw_interface_t * sw;
+ vnet_main_t * vnm = vnet_get_main();
+ per_inteface_handoff_data_t * d;
+ int i, rv;
+ u32 node_index = enable_disable ? worker_handoff_node.index : ~0;
+
+ if (pool_is_free_index (vnm->interface_main.sw_interfaces,
+ sw_if_index))
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ sw = vnet_get_sw_interface (vnm, sw_if_index);
+ if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ if (clib_bitmap_last_set(bitmap) >= hm->num_workers)
+ return VNET_API_ERROR_INVALID_WORKER;
+
+ vec_validate (hm->if_data, sw_if_index);
+ d = vec_elt_at_index(hm->if_data, sw_if_index);
+
+ vec_free (d->workers);
+ vec_free (d->workers_bitmap);
+
+ if (enable_disable)
+ {
+ d->workers_bitmap = bitmap;
+ clib_bitmap_foreach (i, bitmap,
+ ({
+ vec_add1(d->workers, i);
+ }));
+ }
+
+ rv = vnet_hw_interface_rx_redirect_to_node (vnm, sw_if_index, node_index);
+ return rv;
+}
+
+static clib_error_t *
+set_interface_handoff_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u32 sw_if_index = ~0;
+ int enable_disable = 1;
+ uword * bitmap = 0;
+
+ int rv = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "disable"))
+ enable_disable = 0;
+ else if (unformat (input, "workers %U", unformat_bitmap_list,
+ &bitmap))
+ ;
+ else if (unformat (input, "%U", unformat_vnet_sw_interface,
+ vnet_get_main(), &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "Please specify an interface...");
+
+ if (bitmap == 0)
+ return clib_error_return (0, "Please specify list of workers...");
+
+ rv = interface_handoff_enable_disable (vm, sw_if_index, bitmap, enable_disable);
+
+ switch(rv) {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INVALID_SW_IF_INDEX:
+ return clib_error_return (0, "Invalid interface");
+ break;
+
+ case VNET_API_ERROR_INVALID_WORKER:
+ return clib_error_return (0, "Invalid worker(s)");
+ break;
+
+ case VNET_API_ERROR_UNIMPLEMENTED:
+ return clib_error_return (0, "Device driver doesn't support redirection");
+ break;
+
+ default:
+ return clib_error_return (0, "unknown return value %d", rv);
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (set_interface_handoff_command, static) = {
+ .path = "set interface handoff",
+ .short_help =
+ "set interface handoff <interface-name> workers <workers-list>",
+ .function = set_interface_handoff_command_fn,
+};
+
+typedef struct {
+ u32 buffer_index;
+ u32 next_index;
+ u32 sw_if_index;
+} handoff_dispatch_trace_t;
+
+/* packet trace format function */
+static u8 * format_handoff_dispatch_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ handoff_dispatch_trace_t * t = va_arg (*args, handoff_dispatch_trace_t *);
+
+ s = format (s, "handoff-dispatch: sw_if_index %d next_index %d buffer 0x%x",
+ t->sw_if_index,
+ t->next_index,
+ t->buffer_index);
+ return s;
+}
+
+
+vlib_node_registration_t handoff_dispatch_node;
+
+#define foreach_handoff_dispatch_error \
+_(EXAMPLE, "example packets")
+
+typedef enum {
+#define _(sym,str) HANDOFF_DISPATCH_ERROR_##sym,
+ foreach_handoff_dispatch_error
+#undef _
+ HANDOFF_DISPATCH_N_ERROR,
+} handoff_dispatch_error_t;
+
+static char * handoff_dispatch_error_strings[] = {
+#define _(sym,string) string,
+ foreach_handoff_dispatch_error
+#undef _
+};
+
+static uword
+handoff_dispatch_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ handoff_dispatch_next_t next_index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ next0 = vnet_buffer(b0)->handoff.next_index;
+ next1 = vnet_buffer(b1)->handoff.next_index;
+
+ if (PREDICT_FALSE(vm->trace_main.trace_active_hint))
+ {
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0);
+ handoff_dispatch_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ t->buffer_index = bi0;
+ }
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vlib_trace_buffer (vm, node, next1, b1, /* follow_chain */ 0);
+ handoff_dispatch_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+ t->sw_if_index = sw_if_index1;
+ t->next_index = next1;
+ t->buffer_index = bi1;
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ u32 sw_if_index0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ next0 = vnet_buffer(b0)->handoff.next_index;
+
+ if (PREDICT_FALSE(vm->trace_main.trace_active_hint))
+ {
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0);
+ handoff_dispatch_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ t->buffer_index = bi0;
+ }
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (handoff_dispatch_node) = {
+ .function = handoff_dispatch_node_fn,
+ .name = "handoff-dispatch",
+ .vector_size = sizeof (u32),
+ .format_trace = format_handoff_dispatch_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .flags = VLIB_NODE_FLAG_IS_HANDOFF,
+
+ .n_errors = ARRAY_LEN(handoff_dispatch_error_strings),
+ .error_strings = handoff_dispatch_error_strings,
+
+ .n_next_nodes = HANDOFF_DISPATCH_N_NEXT,
+
+ .next_nodes = {
+ [HANDOFF_DISPATCH_NEXT_DROP] = "error-drop",
+ [HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT] = "ethernet-input",
+ [HANDOFF_DISPATCH_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
+ [HANDOFF_DISPATCH_NEXT_IP6_INPUT] = "ip6-input",
+ [HANDOFF_DISPATCH_NEXT_MPLS_INPUT] = "mpls-gre-input",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (handoff_dispatch_node, handoff_dispatch_node_fn)
+
+clib_error_t *handoff_init (vlib_main_t *vm)
+{
+ handoff_main_t * hm = &handoff_main;
+ vlib_thread_main_t * tm = vlib_get_thread_main();
+ uword * p;
+
+ vlib_thread_registration_t * tr;
+ /* Only the standard vnet worker threads are supported */
+ p = hash_get_mem (tm->thread_registrations_by_name, "workers");
+ tr = (vlib_thread_registration_t *) p[0];
+ if (tr)
+ {
+ hm->num_workers = tr->count;
+ hm->first_worker_index = tr->first_index;
+ }
+
+ hm->vlib_main = vm;
+ hm->vnet_main = &vnet_main;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (handoff_init);
diff --git a/vnet/vnet/handoff.h b/vnet/vnet/handoff.h
new file mode 100644
index 00000000000..e0938ebfb2e
--- /dev/null
+++ b/vnet/vnet/handoff.h
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vnet_handoff_h
+#define included_vnet_handoff_h
+
+#include <vlib/vlib.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/mpls-gre/packet.h>
+
+typedef enum {
+ HANDOFF_DISPATCH_NEXT_IP4_INPUT,
+ HANDOFF_DISPATCH_NEXT_IP6_INPUT,
+ HANDOFF_DISPATCH_NEXT_MPLS_INPUT,
+ HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT,
+ HANDOFF_DISPATCH_NEXT_DROP,
+ HANDOFF_DISPATCH_N_NEXT,
+} handoff_dispatch_next_t;
+
+static inline
+void vlib_put_handoff_queue_elt (vlib_frame_queue_elt_t * hf)
+{
+ CLIB_MEMORY_BARRIER();
+ hf->valid = 1;
+}
+
+static inline vlib_frame_queue_elt_t *
+vlib_get_handoff_queue_elt (u32 vlib_worker_index)
+{
+ vlib_frame_queue_t *fq;
+ vlib_frame_queue_elt_t *elt;
+ u64 new_tail;
+
+ fq = vlib_frame_queues[vlib_worker_index];
+ ASSERT (fq);
+
+ new_tail = __sync_add_and_fetch (&fq->tail, 1);
+
+ /* Wait until a ring slot is available */
+ while (new_tail >= fq->head_hint + fq->nelts)
+ vlib_worker_thread_barrier_check ();
+
+ elt = fq->elts + (new_tail & (fq->nelts-1));
+
+ /* this would be very bad... */
+ while (elt->valid)
+ ;
+
+ elt->msg_type = VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME;
+ elt->last_n_vectors = elt->n_vectors = 0;
+
+ return elt;
+}
+
+static inline vlib_frame_queue_t *
+is_vlib_handoff_queue_congested (
+ u32 vlib_worker_index,
+ u32 queue_hi_thresh,
+ vlib_frame_queue_t ** handoff_queue_by_worker_index)
+{
+ vlib_frame_queue_t *fq;
+
+ fq = handoff_queue_by_worker_index [vlib_worker_index];
+ if (fq != (vlib_frame_queue_t *)(~0))
+ return fq;
+
+ fq = vlib_frame_queues[vlib_worker_index];
+ ASSERT (fq);
+
+ if (PREDICT_FALSE(fq->tail >= (fq->head_hint + queue_hi_thresh))) {
+ /* a valid entry in the array will indicate the queue has reached
+ * the specified threshold and is congested
+ */
+ handoff_queue_by_worker_index [vlib_worker_index] = fq;
+ fq->enqueue_full_events++;
+ return fq;
+ }
+
+ return NULL;
+}
+
+static inline vlib_frame_queue_elt_t *
+dpdk_get_handoff_queue_elt (u32 vlib_worker_index,
+ vlib_frame_queue_elt_t **
+ handoff_queue_elt_by_worker_index)
+{
+ vlib_frame_queue_elt_t *elt;
+
+ if (handoff_queue_elt_by_worker_index [vlib_worker_index])
+ return handoff_queue_elt_by_worker_index [vlib_worker_index];
+
+ elt = vlib_get_handoff_queue_elt (vlib_worker_index);
+
+ handoff_queue_elt_by_worker_index [vlib_worker_index] = elt;
+
+ return elt;
+}
+
+static inline u64 ipv4_get_key (ip4_header_t *ip)
+{
+ u64 hash_key;
+
+ hash_key = *((u64*)(&ip->address_pair)) ^ ip->protocol;
+
+ return hash_key;
+}
+
+static inline u64 ipv6_get_key (ip6_header_t *ip)
+{
+ u64 hash_key;
+
+ hash_key = ip->src_address.as_u64[0] ^
+ rotate_left(ip->src_address.as_u64[1],13) ^
+ rotate_left(ip->dst_address.as_u64[0],26) ^
+ rotate_left(ip->dst_address.as_u64[1],39) ^
+ ip->protocol;
+
+ return hash_key;
+}
+
+#define MPLS_BOTTOM_OF_STACK_BIT_MASK 0x00000100U
+#define MPLS_LABEL_MASK 0xFFFFF000U
+
+static inline u64 mpls_get_key (mpls_unicast_header_t *m)
+{
+ u64 hash_key;
+ u8 ip_ver;
+
+
+ /* find the bottom of the MPLS label stack. */
+ if (PREDICT_TRUE(m->label_exp_s_ttl &
+ clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK))) {
+ goto bottom_lbl_found;
+ }
+ m++;
+
+ if (PREDICT_TRUE(m->label_exp_s_ttl &
+ clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK))) {
+ goto bottom_lbl_found;
+ }
+ m++;
+
+ if (m->label_exp_s_ttl & clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK)) {
+ goto bottom_lbl_found;
+ }
+ m++;
+
+ if (m->label_exp_s_ttl & clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK)) {
+ goto bottom_lbl_found;
+ }
+ m++;
+
+ if (m->label_exp_s_ttl & clib_net_to_host_u32(MPLS_BOTTOM_OF_STACK_BIT_MASK)) {
+ goto bottom_lbl_found;
+ }
+
+ /* the bottom label was not found - use the last label */
+ hash_key = m->label_exp_s_ttl & clib_net_to_host_u32(MPLS_LABEL_MASK);
+
+ return hash_key;
+
+bottom_lbl_found:
+ m++;
+ ip_ver = (*((u8 *)m) >> 4);
+
+ /* find out if it is IPV4 or IPV6 header */
+ if (PREDICT_TRUE(ip_ver == 4)) {
+ hash_key = ipv4_get_key((ip4_header_t *)m);
+ } else if (PREDICT_TRUE(ip_ver == 6)) {
+ hash_key = ipv6_get_key((ip6_header_t *)m);
+ } else {
+ /* use the bottom label */
+ hash_key = (m-1)->label_exp_s_ttl & clib_net_to_host_u32(MPLS_LABEL_MASK);
+ }
+
+ return hash_key;
+
+}
+
+
+static inline u64
+eth_get_key (ethernet_header_t *h0)
+{
+ u64 hash_key;
+
+ if (PREDICT_TRUE(h0->type) == clib_host_to_net_u16(ETHERNET_TYPE_IP4)) {
+ hash_key = ipv4_get_key((ip4_header_t *)(h0+1));
+ } else if (h0->type == clib_host_to_net_u16(ETHERNET_TYPE_IP6)) {
+ hash_key = ipv6_get_key((ip6_header_t *)(h0+1));
+ } else if (h0->type == clib_host_to_net_u16(ETHERNET_TYPE_MPLS_UNICAST)) {
+ hash_key = mpls_get_key((mpls_unicast_header_t *)(h0+1));
+ } else if ((h0->type == clib_host_to_net_u16(ETHERNET_TYPE_VLAN)) ||
+ (h0->type == clib_host_to_net_u16(ETHERNET_TYPE_DOT1AD))) {
+ ethernet_vlan_header_t * outer = (ethernet_vlan_header_t *)(h0 + 1);
+
+ outer = (outer->type == clib_host_to_net_u16(ETHERNET_TYPE_VLAN)) ?
+ outer+1 : outer;
+ if (PREDICT_TRUE(outer->type) == clib_host_to_net_u16(ETHERNET_TYPE_IP4)) {
+ hash_key = ipv4_get_key((ip4_header_t *)(outer+1));
+ } else if (outer->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6)) {
+ hash_key = ipv6_get_key((ip6_header_t *)(outer+1));
+ } else if (outer->type == clib_host_to_net_u16(ETHERNET_TYPE_MPLS_UNICAST)) {
+ hash_key = mpls_get_key((mpls_unicast_header_t *)(outer+1));
+ } else {
+ hash_key = outer->type;
+ }
+ } else {
+ hash_key = 0;
+ }
+
+ return hash_key;
+}
+
+#endif /* included_vnet_handoff_h */
diff --git a/vppinfra/vppinfra/bitmap.h b/vppinfra/vppinfra/bitmap.h
index e69851b60be..986c322e86c 100644
--- a/vppinfra/vppinfra/bitmap.h
+++ b/vppinfra/vppinfra/bitmap.h
@@ -338,6 +338,26 @@ always_inline uword clib_bitmap_first_set (uword * ai)
return ~0;
}
+/* Return highest numbered set bit in bitmap.
+
+ Return infinity (~0) if bitmap is zero. */
+always_inline uword clib_bitmap_last_set (uword * ai)
+{
+ uword i;
+
+ for (i = vec_len (ai) - 1; i >= 0 ; i--)
+ {
+ uword x = ai[i];
+ if (x != 0)
+ {
+ uword first_bit;
+ count_leading_zeros (first_bit, x);
+ return (i + 1) * BITS (ai[0]) - first_bit - 1;
+ }
+ }
+ return ~0;
+}
+
/* Return lowest numbered clear bit in bitmap. */
always_inline uword
clib_bitmap_first_clear (uword * ai)