aboutsummaryrefslogtreecommitdiffstats
path: root/vnet/vnet/dpo
diff options
context:
space:
mode:
Diffstat (limited to 'vnet/vnet/dpo')
-rw-r--r--vnet/vnet/dpo/classify_dpo.c120
-rw-r--r--vnet/vnet/dpo/classify_dpo.h56
-rw-r--r--vnet/vnet/dpo/dpo.c424
-rw-r--r--vnet/vnet/dpo/dpo.h354
-rw-r--r--vnet/vnet/dpo/drop_dpo.c100
-rw-r--r--vnet/vnet/dpo/drop_dpo.h37
-rw-r--r--vnet/vnet/dpo/load_balance.c760
-rw-r--r--vnet/vnet/dpo/load_balance.h203
-rw-r--r--vnet/vnet/dpo/load_balance_map.c566
-rw-r--r--vnet/vnet/dpo/load_balance_map.h78
-rw-r--r--vnet/vnet/dpo/lookup_dpo.c802
-rw-r--r--vnet/vnet/dpo/lookup_dpo.h108
-rw-r--r--vnet/vnet/dpo/mpls_label_dpo.c263
-rw-r--r--vnet/vnet/dpo/mpls_label_dpo.h66
-rw-r--r--vnet/vnet/dpo/punt_dpo.c100
-rw-r--r--vnet/vnet/dpo/punt_dpo.h30
-rw-r--r--vnet/vnet/dpo/receive_dpo.c155
-rw-r--r--vnet/vnet/dpo/receive_dpo.h62
18 files changed, 4284 insertions, 0 deletions
diff --git a/vnet/vnet/dpo/classify_dpo.c b/vnet/vnet/dpo/classify_dpo.c
new file mode 100644
index 00000000..3b7b98f9
--- /dev/null
+++ b/vnet/vnet/dpo/classify_dpo.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/classify_dpo.h>
+#include <vnet/mpls/mpls.h>
+
+/*
+ * pool of all MPLS Label DPOs
+ */
+classify_dpo_t *classify_dpo_pool;
+
+static classify_dpo_t *
+classify_dpo_alloc (void)
+{
+ classify_dpo_t *cd;
+
+ pool_get_aligned(classify_dpo_pool, cd, CLIB_CACHE_LINE_BYTES);
+ memset(cd, 0, sizeof(*cd));
+
+ return (cd);
+}
+
+static index_t
+classify_dpo_get_index (classify_dpo_t *cd)
+{
+ return (cd - classify_dpo_pool);
+}
+
+index_t
+classify_dpo_create (fib_protocol_t proto,
+ u32 classify_table_index)
+{
+ classify_dpo_t *cd;
+
+ cd = classify_dpo_alloc();
+ cd->cd_proto = proto;
+ cd->cd_table_index = classify_table_index;
+
+ return (classify_dpo_get_index(cd));
+}
+
+u8*
+format_classify_dpo (u8 *s, va_list *args)
+{
+ index_t index = va_arg (*args, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg (*args, u32);
+ classify_dpo_t *cd;
+
+ cd = classify_dpo_get(index);
+
+ return (format(s, "classify:[%d]:table:%d",
+ index, cd->cd_table_index));
+}
+
+static void
+classify_dpo_lock (dpo_id_t *dpo)
+{
+ classify_dpo_t *cd;
+
+ cd = classify_dpo_get(dpo->dpoi_index);
+
+ cd->cd_locks++;
+}
+
+static void
+classify_dpo_unlock (dpo_id_t *dpo)
+{
+ classify_dpo_t *cd;
+
+ cd = classify_dpo_get(dpo->dpoi_index);
+
+ cd->cd_locks--;
+
+ if (0 == cd->cd_locks)
+ {
+ pool_put(classify_dpo_pool, cd);
+ }
+}
+
+const static dpo_vft_t cd_vft = {
+ .dv_lock = classify_dpo_lock,
+ .dv_unlock = classify_dpo_unlock,
+ .dv_format = format_classify_dpo,
+};
+
+const static char* const classify_ip4_nodes[] =
+{
+ "ip4-classify",
+ NULL,
+};
+const static char* const classify_ip6_nodes[] =
+{
+ "ip6-classify",
+ NULL,
+};
+const static char* const * const classify_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = classify_ip4_nodes,
+ [DPO_PROTO_IP6] = classify_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+void
+classify_dpo_module_init (void)
+{
+ dpo_register(DPO_CLASSIFY, &cd_vft, classify_nodes);
+}
diff --git a/vnet/vnet/dpo/classify_dpo.h b/vnet/vnet/dpo/classify_dpo.h
new file mode 100644
index 00000000..cd35c3c4
--- /dev/null
+++ b/vnet/vnet/dpo/classify_dpo.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CLASSIFY_DPO_H__
+#define __CLASSIFY_DPO_H__
+
+#include <vnet/vnet.h>
+#include <vnet/mpls/packet.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * A representation of an MPLS label for imposition in the data-path
+ */
+typedef struct classify_dpo_t
+{
+ fib_protocol_t cd_proto;
+
+ u32 cd_table_index;
+
+ /**
+ * Number of locks/users of the label
+ */
+ u16 cd_locks;
+} classify_dpo_t;
+
+extern index_t classify_dpo_create(fib_protocol_t proto,
+ u32 classify_table_index);
+
+extern u8* format_classify_dpo(u8 *s, va_list *args);
+
+/*
+ * Encapsulation violation for fast data-path access
+ */
+extern classify_dpo_t *classify_dpo_pool;
+
+static inline classify_dpo_t *
+classify_dpo_get (index_t index)
+{
+ return (pool_elt_at_index(classify_dpo_pool, index));
+}
+
+extern void classify_dpo_module_init(void);
+
+#endif
diff --git a/vnet/vnet/dpo/dpo.c b/vnet/vnet/dpo/dpo.c
new file mode 100644
index 00000000..5eff52b7
--- /dev/null
+++ b/vnet/vnet/dpo/dpo.c
@@ -0,0 +1,424 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * A Data-Path Object is an object that represents actions that are
+ * applied to packets are they are switched through VPP.
+ *
+ * The DPO is a base class that is specialised by other objects to provide
+ * concreate actions
+ *
+ * The VLIB graph nodes are graph of types, the DPO graph is a graph of instances.
+ */
+
+#include <vnet/dpo/dpo.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/ip/format.h>
+#include <vnet/adj/adj.h>
+
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/mpls_label_dpo.h>
+#include <vnet/dpo/lookup_dpo.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/receive_dpo.h>
+#include <vnet/dpo/punt_dpo.h>
+#include <vnet/dpo/classify_dpo.h>
+
+/**
+ * Array of char* names for the DPO types and protos
+ */
+static const char* dpo_type_names[] = DPO_TYPES;
+static const char* dpo_proto_names[] = DPO_PROTOS;
+
+/**
+ * @brief Vector of virtual function tables for the DPO types
+ *
+ * This is a vector so we can dynamically register new DPO types in plugins.
+ */
+static dpo_vft_t *dpo_vfts;
+
+/**
+ * @brief vector of graph node names associated with each DPO type and protocol.
+ *
+ * dpo_nodes[child_type][child_proto][node_X] = node_name;
+ * i.e.
+ * dpo_node[DPO_LOAD_BALANCE][DPO_PROTO_IP4][0] = "ip4-lookup"
+ * dpo_node[DPO_LOAD_BALANCE][DPO_PROTO_IP4][1] = "ip4-load-balance"
+ *
+ * This is a vector so we can dynamically register new DPO types in plugins.
+ */
+static const char* const * const ** dpo_nodes;
+
+/**
+ * @brief Vector of edge indicies from parent DPO nodes to child
+ *
+ * dpo_edges[child_type][child_proto][parent_type] = edge_index
+ *
+ * This array is derived at init time from the dpo_nodes above. Note that
+ * the third dimension in dpo_nodes is lost, hence, the edge index from each
+ * node MUST be the same.
+ *
+ * Note that this array is child type specific, not child instance specific.
+ */
+static u32 ***dpo_edges;
+
+/**
+ * @brief The DPO type value that can be assigend to the next dynamic
+ * type registration.
+ */
+static dpo_type_t dpo_dynamic = DPO_LAST;
+
+u8 *
+format_dpo_type (u8 * s, va_list * args)
+{
+ dpo_type_t type = va_arg (*args, int);
+
+ s = format(s, "%s", dpo_type_names[type]);
+
+ return (s);
+}
+
+u8 *
+format_dpo_id (u8 * s, va_list * args)
+{
+ dpo_id_t *dpo = va_arg (*args, dpo_id_t*);
+ u32 indent = va_arg (*args, u32);
+
+ s = format(s, "[@%d]: ", dpo->dpoi_next_node);
+
+ if (NULL != dpo_vfts[dpo->dpoi_type].dv_format)
+ {
+ return (format(s, "%U",
+ dpo_vfts[dpo->dpoi_type].dv_format,
+ dpo->dpoi_index,
+ indent));
+ }
+
+ switch (dpo->dpoi_type)
+ {
+ case DPO_FIRST:
+ s = format(s, "unset");
+ break;
+ default:
+ s = format(s, "unknown");
+ break;
+ }
+ return (s);
+}
+
+u8 *
+format_dpo_proto (u8 * s, va_list * args)
+{
+ dpo_proto_t proto = va_arg (*args, int);
+
+ return (format(s, "%s", dpo_proto_names[proto]));
+}
+
+void
+dpo_set (dpo_id_t *dpo,
+ dpo_type_t type,
+ dpo_proto_t proto,
+ index_t index)
+{
+ dpo_id_t tmp = *dpo;
+
+ dpo->dpoi_type = type;
+ dpo->dpoi_proto = proto,
+ dpo->dpoi_index = index;
+
+ if (DPO_ADJACENCY == type)
+ {
+ /*
+ * set the adj subtype
+ */
+ ip_adjacency_t *adj;
+
+ adj = adj_get(index);
+
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_ARP:
+ dpo->dpoi_type = DPO_ADJACENCY_INCOMPLETE;
+ break;
+ case IP_LOOKUP_NEXT_MIDCHAIN:
+ dpo->dpoi_type = DPO_ADJACENCY_MIDCHAIN;
+ break;
+ default:
+ break;
+ }
+ }
+ dpo_lock(dpo);
+ dpo_unlock(&tmp);
+}
+
+void
+dpo_reset (dpo_id_t *dpo)
+{
+ dpo_set(dpo, DPO_FIRST, DPO_PROTO_NONE, INDEX_INVALID);
+}
+
+/**
+ * \brief
+ * Compare two Data-path objects
+ *
+ * like memcmp, return 0 is matching, !0 otherwise.
+ */
+int
+dpo_cmp (const dpo_id_t *dpo1,
+ const dpo_id_t *dpo2)
+{
+ int res;
+
+ res = dpo1->dpoi_type - dpo2->dpoi_type;
+
+ if (0 != res) return (res);
+
+ return (dpo1->dpoi_index - dpo2->dpoi_index);
+}
+
+void
+dpo_copy (dpo_id_t *dst,
+ const dpo_id_t *src)
+{
+ dpo_id_t tmp = *dst;
+
+ /*
+ * the destination is written in a single u64 write - hence atomically w.r.t
+ * any packets inflight.
+ */
+ *((u64*)dst) = *(u64*)src;
+
+ dpo_lock(dst);
+ dpo_unlock(&tmp);
+}
+
+int
+dpo_is_adj (const dpo_id_t *dpo)
+{
+ return ((dpo->dpoi_type == DPO_ADJACENCY) ||
+ (dpo->dpoi_type == DPO_ADJACENCY_INCOMPLETE) ||
+ (dpo->dpoi_type == DPO_ADJACENCY_MIDCHAIN) ||
+ (dpo->dpoi_type == DPO_ADJACENCY_GLEAN));
+}
+
+void
+dpo_register (dpo_type_t type,
+ const dpo_vft_t *vft,
+ const char * const * const * nodes)
+{
+ vec_validate(dpo_vfts, type);
+ dpo_vfts[type] = *vft;
+
+ vec_validate(dpo_nodes, type);
+ dpo_nodes[type] = nodes;
+}
+
+dpo_type_t
+dpo_register_new_type (const dpo_vft_t *vft,
+ const char * const * const * nodes)
+{
+ dpo_type_t type = dpo_dynamic++;
+
+ dpo_register(type, vft, nodes);
+
+ return (type);
+}
+
+void
+dpo_lock (dpo_id_t *dpo)
+{
+ if (!dpo_id_is_valid(dpo))
+ return;
+
+ dpo_vfts[dpo->dpoi_type].dv_lock(dpo);
+}
+
+void
+dpo_unlock (dpo_id_t *dpo)
+{
+ if (!dpo_id_is_valid(dpo))
+ return;
+
+ dpo_vfts[dpo->dpoi_type].dv_unlock(dpo);
+}
+
+
+static u32
+dpo_get_next_node (dpo_type_t child_type,
+ dpo_proto_t child_proto,
+ const dpo_id_t *parent_dpo)
+{
+ dpo_proto_t parent_proto;
+ dpo_type_t parent_type;
+
+ parent_type = parent_dpo->dpoi_type;
+ parent_proto = parent_dpo->dpoi_proto;
+
+ vec_validate(dpo_edges, child_type);
+ vec_validate(dpo_edges[child_type], child_proto);
+ vec_validate_init_empty(dpo_edges[child_type][child_proto],
+ parent_dpo->dpoi_type, ~0);
+
+ /*
+ * if the edge index has not yet been created for this node to node transistion
+ */
+ if (~0 == dpo_edges[child_type][child_proto][parent_type])
+ {
+ vlib_node_t *parent_node, *child_node;
+ vlib_main_t *vm;
+ u32 edge ,pp, cc;
+
+ vm = vlib_get_main();
+
+ ASSERT(NULL != dpo_nodes[child_type]);
+ ASSERT(NULL != dpo_nodes[child_type][child_proto]);
+ ASSERT(NULL != dpo_nodes[parent_type]);
+ ASSERT(NULL != dpo_nodes[parent_type][parent_proto]);
+
+ pp = 0;
+
+ /*
+ * create a graph arc from each of the parent's registered node types,
+ * to each of the childs.
+ */
+ while (NULL != dpo_nodes[child_type][child_proto][pp])
+ {
+ parent_node =
+ vlib_get_node_by_name(vm,
+ (u8*) dpo_nodes[child_type][child_proto][pp]);
+
+ cc = 0;
+
+ while (NULL != dpo_nodes[parent_type][child_proto][cc])
+ {
+ child_node =
+ vlib_get_node_by_name(vm,
+ (u8*) dpo_nodes[parent_type][parent_proto][cc]);
+
+ edge = vlib_node_add_next(vm,
+ parent_node->index,
+ child_node->index);
+
+ if (~0 == dpo_edges[child_type][child_proto][parent_type])
+ {
+ dpo_edges[child_type][child_proto][parent_type] = edge;
+ }
+ else
+ {
+ ASSERT(dpo_edges[child_type][child_proto][parent_type] == edge);
+ }
+ cc++;
+ }
+ pp++;
+ }
+ }
+
+ return (dpo_edges[child_type][child_proto][parent_type]);
+}
+
+/**
+ * @brief Stack one DPO object on another, and thus establish a child parent
+ * relationship. The VLIB graph arc used is taken from the parent and child types
+ * passed.
+ */
+static void
+dpo_stack_i (u32 edge,
+ dpo_id_t *dpo,
+ const dpo_id_t *parent)
+{
+ /*
+ * in order to get an atomic update of the parent we create a temporary,
+ * from a copy of the child, and add the next_node. then we copy to the parent
+ */
+ dpo_id_t tmp = DPO_NULL;
+ dpo_copy(&tmp, parent);
+
+ /*
+ * get the edge index for the parent to child VLIB graph transisition
+ */
+ tmp.dpoi_next_node = edge;
+
+ /*
+ * this update is atomic.
+ */
+ dpo_copy(dpo, &tmp);
+
+ dpo_reset(&tmp);
+}
+
+/**
+ * @brief Stack one DPO object on another, and thus establish a child-parent
+ * relationship. The VLIB graph arc used is taken from the parent and child types
+ * passed.
+ */
+void
+dpo_stack (dpo_type_t child_type,
+ dpo_proto_t child_proto,
+ dpo_id_t *dpo,
+ const dpo_id_t *parent)
+{
+ dpo_stack_i(dpo_get_next_node(child_type, child_proto, parent), dpo, parent);
+}
+
+/**
+ * @brief Stack one DPO object on another, and thus establish a child parent
+ * relationship. A new VLIB graph arc is created from the child node passed
+ * to the nodes registered by the parent. The VLIB infra will ensure this arc
+ * is added only once.
+ */
+void
+dpo_stack_from_node (u32 child_node_index,
+ dpo_id_t *dpo,
+ const dpo_id_t *parent)
+{
+ dpo_proto_t parent_proto;
+ vlib_node_t *parent_node;
+ dpo_type_t parent_type;
+ vlib_main_t *vm;
+ u32 edge;
+
+ parent_type = parent->dpoi_type;
+ parent_proto = parent->dpoi_proto;
+
+ vm = vlib_get_main();
+
+ ASSERT(NULL != dpo_nodes[parent_type]);
+ ASSERT(NULL != dpo_nodes[parent_type][parent_proto]);
+
+ parent_node =
+ vlib_get_node_by_name(vm, (u8*) dpo_nodes[parent_type][parent_proto][0]);
+
+ edge = vlib_node_add_next(vm,
+ child_node_index,
+ parent_node->index);
+
+ dpo_stack_i(edge, dpo, parent);
+}
+
+static clib_error_t *
+dpo_module_init (vlib_main_t * vm)
+{
+ drop_dpo_module_init();
+ punt_dpo_module_init();
+ receive_dpo_module_init();
+ load_balance_module_init();
+ mpls_label_dpo_module_init();
+ classify_dpo_module_init();
+ lookup_dpo_module_init();
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION(dpo_module_init);
diff --git a/vnet/vnet/dpo/dpo.h b/vnet/vnet/dpo/dpo.h
new file mode 100644
index 00000000..8c22f00b
--- /dev/null
+++ b/vnet/vnet/dpo/dpo.h
@@ -0,0 +1,354 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * A Data-Path Object is an object that represents actions that are
+ * applied to packets are they are switched through VPP's data-path.
+ *
+ * The DPO can be considered to be like is a base class that is specialised
+ * by other objects to provide concreate actions
+ *
+ * The VLIB graph nodes are graph of DPO types, the DPO graph is a graph of
+ * instances.
+ */
+
+#ifndef __DPO_H__
+#define __DPO_H__
+
+#include <vnet/vnet.h>
+
+/**
+ * @brief An index for adjacencies.
+ * Alas 'C' is not typesafe enough to b0rk when a u32 is used instead of
+ * an index_t. However, for us humans, we can glean much more intent
+ * from the declaration
+ * foo barindex_t t);
+ * than we can from
+ * foo bar(u32 t);
+ */
+typedef u32 index_t;
+
+/**
+ * @brief Invalid index - used when no index is known
+ * blazoned capitals INVALID speak volumes where ~0 does not.
+ */
+#define INDEX_INVALID ((index_t)(~0))
+
+/**
+ * @brief Data path protocol.
+ * Actions performed on packets in the data-plane can be described and represented
+ * by protocol independent objects, i.e. ADJACENCY, but the spceifics actions
+ * required during ADJACENCY processing can be protocol dependent. For example,
+ * the adjacency rewrite node performs a ip4 checksum calculation, ip6 and MPLS
+ * do not, all 3 perform a TTL decrement. The VLIB graph nodes are thus protocol
+ * dependent, and thus each graph edge/arc is too.
+ * When programming a DPO's next node arc from child to parent it is thus required
+ * to know the parent's data-path protocol so the correct arc index can be used.
+ */
+typedef enum dpo_proto_t_
+{
+#if CLIB_DEBUG > 0
+ DPO_PROTO_IP4 = 1,
+#else
+ DPO_PROTO_IP4 = 0,
+#endif
+ DPO_PROTO_IP6,
+ DPO_PROTO_MPLS,
+} __attribute__((packed)) dpo_proto_t;
+
+#define DPO_PROTO_NUM (DPO_PROTO_MPLS+1)
+#define DPO_PROTO_NONE (DPO_PROTO_NUM+1)
+
+#define DPO_PROTOS { \
+ [DPO_PROTO_IP4] = "ip4", \
+ [DPO_PROTO_IP6] = "ip6", \
+ [DPO_PROTO_MPLS] = "mpls", \
+}
+
+/**
+ * @brief Common types of data-path objects
+ * New types can be dynamically added using dpo_register_new_type()
+ */
+typedef enum dpo_type_t_ {
+ /**
+ * A non-zero value first so we can spot unitialisation errors
+ */
+ DPO_FIRST,
+ DPO_DROP,
+ DPO_PUNT,
+ /**
+ * @brief load-balancing over a choice of [un]equal cost paths
+ */
+ DPO_LOAD_BALANCE,
+ DPO_ADJACENCY,
+ DPO_ADJACENCY_INCOMPLETE,
+ DPO_ADJACENCY_MIDCHAIN,
+ DPO_ADJACENCY_GLEAN,
+ DPO_RECEIVE,
+ DPO_LOOKUP,
+ DPO_LISP_CP,
+ DPO_CLASSIFY,
+ DPO_MPLS_LABEL,
+ DPO_LAST,
+} __attribute__((packed)) dpo_type_t;
+
+#define DPO_TYPE_NUM DPO_LAST
+
+#define DPO_TYPES { \
+ [DPO_FIRST] = "dpo-invalid", \
+ [DPO_DROP] = "dpo-drop", \
+ [DPO_PUNT] = "dpo-punt", \
+ [DPO_ADJACENCY] = "dpo-adjacency", \
+ [DPO_ADJACENCY_INCOMPLETE] = "dpo-adjacency-incomplete", \
+ [DPO_ADJACENCY_MIDCHAIN] = "dpo-adjacency-midcahin", \
+ [DPO_ADJACENCY_GLEAN] = "dpo-glean", \
+ [DPO_RECEIVE] = "dpo-receive", \
+ [DPO_LOOKUP] = "dpo-lookup", \
+ [DPO_LOAD_BALANCE] = "dpo-load-balance", \
+ [DPO_LISP_CP] = "dpo-lisp-cp", \
+ [DPO_CLASSIFY] = "dpo-classify", \
+ [DPO_MPLS_LABEL] = "dpo-mpls-label", \
+}
+
+/**
+ * @brief The identity of a DPO is a combination of its type and its
+ * instance number/index of objects of that type
+ */
+typedef struct dpo_id_t_ {
+ /**
+ * the type
+ */
+ dpo_type_t dpoi_type;
+ /**
+ * the data-path protocol of the type.
+ */
+ dpo_proto_t dpoi_proto;
+ /**
+ * The next VLIB node to follow.
+ */
+ u16 dpoi_next_node;
+ /**
+ * the index of objects of that type
+ */
+ index_t dpoi_index;
+} __attribute__ ((aligned(sizeof(u64)))) dpo_id_t;
+
+_Static_assert(sizeof(dpo_id_t) <= sizeof(u64),
+ "DPO ID is greater than sizeof u64 "
+ "atomic updates need to be revisited");
+
+/**
+ * @brief An initialiser for DPos declared on the stack.
+ */
+#define DPO_NULL {0}
+
+/**
+ * @brief Return true if the DPO object is valid, i.e. has been initialised.
+ */
+static inline int
+dpo_id_is_valid (const dpo_id_t *dpoi)
+{
+ return (dpoi->dpoi_type != DPO_FIRST &&
+ dpoi->dpoi_index != INDEX_INVALID);
+}
+
+/**
+ * @brief
+ * Take a reference counting lock on the DPO
+ */
+extern void dpo_lock(dpo_id_t *dpo);
+
+/**
+ * @brief
+ * Release a reference counting lock on the DPO
+ */
+extern void dpo_unlock(dpo_id_t *dpo);
+
+/**
+ * @brief Set/create a DPO ID
+ * The DPO will be locked.
+ *
+ * @param dpo
+ * The DPO object to configure
+ *
+ * @param type
+ * The dpo_type_t of the DPO
+ *
+ * @param proto
+ * The dpo_proto_t of the DPO
+ *
+ * @param index
+ * The type specific index of the DPO
+ */
+extern void dpo_set(dpo_id_t *dpo,
+ dpo_type_t type,
+ dpo_proto_t proto,
+ index_t index);
+
+/**
+ * @brief reset a DPO ID
+ * The DPO will be unlocked.
+ *
+ * @param dpo
+ * The DPO object to reset
+ */
+extern void dpo_reset(dpo_id_t *dpo);
+
+/**
+ * @brief compare two DPOs for equality
+ */
+extern int dpo_cmp(const dpo_id_t *dpo1,
+ const dpo_id_t *dpo2);
+
+/**
+ * @brief
+ * atomic copy a data-plane object.
+ * This is safe to use when the dst DPO is currently switching packets
+ */
+extern void dpo_copy(dpo_id_t *dst,
+ const dpo_id_t *src);
+
+/**
+ * @brief Return TRUE is the DPO is any type of adjacency
+ */
+extern int dpo_is_adj(const dpo_id_t *dpo);
+
+/**
+ * @biref Format a DPO_id_t oject
+ */
+extern u8 *format_dpo_id(u8 * s, va_list * args);
+
+/**
+ * @biref format a DPO type
+ */
+extern u8 *format_dpo_type(u8 * s, va_list * args);
+
+/**
+ * @brief format a DPO protocol
+ */
+extern u8 *format_dpo_proto(u8 * s, va_list * args);
+
+/**
+ * @brief
+ * Set and stack a DPO.
+ * The DPO passed is set to the parent DPO and the necessary
+ * VLIB graph arcs are created. The child_type and child_proto
+ * are used to get the VLID nodes from which the arcs are added.
+ *
+ * @param child_type
+ * Child DPO type.
+ *
+ * @param child_proto
+ * Child DPO proto
+ *
+ * @parem dpo
+ * This is the DPO to stack and set.
+ *
+ * @paren parent_dpo
+ * The parent DPO to stack onto.
+ */
+extern void dpo_stack(dpo_type_t child_type,
+ dpo_proto_t child_proto,
+ dpo_id_t *dpo,
+ const dpo_id_t *parent_dpo);
+
+/**
+ * @brief
+ * Set and stack a DPO.
+ * The DPO passed is set to the parent DPO and the necessary
+ * VLIB graph arcs are created, from the child_node passed.
+ *
+ * @param child_node
+ * The VLIB grpah node index to create an arc from to the parent
+ *
+ * @parem dpo
+ * This is the DPO to stack and set.
+ *
+ * @paren parent_dpo
+ * The parent DPO to stack onto.
+ */
+extern void dpo_stack_from_node(u32 child_node,
+ dpo_id_t *dpo,
+ const dpo_id_t *parent);
+
+/**
+ * @brief A lock function registered for a DPO type
+ */
+typedef void (*dpo_lock_fn_t)(dpo_id_t *dpo);
+
+/**
+ * @brief An unlock function registered for a DPO type
+ */
+typedef void (*dpo_unlock_fn_t)(dpo_id_t *dpo);
+
+/**
+ * @brief A virtual function table regisitered for a DPO type
+ */
+typedef struct dpo_vft_t_
+{
+ /**
+ * A reference counting lock function
+ */
+ dpo_lock_fn_t dv_lock;
+ /**
+ * A reference counting unlock function
+ */
+ dpo_lock_fn_t dv_unlock;
+ /**
+ * A format function
+ */
+ format_function_t *dv_format;
+} dpo_vft_t;
+
+
+/**
+ * @brief For a given DPO type Register:
+ * - a virtual function table
+ * - a NULL terminated array of graph nodes from which that object type
+ * will originate packets, i.e. the nodes in which the object type will be
+ * the parent DPO in the DP graph. The ndoes are per-data-path protocol
+ * (see above).
+ *
+ * @param type
+ * The type being registered.
+ *
+ * @param vft
+ * The virtual function table to register for the type.
+ *
+ * @param nodes
+ * The string description of the per-protocol VLIB graph nodes.
+ */
+void dpo_register(dpo_type_t type,
+ const dpo_vft_t *vft,
+ const char * const * const * nodes);
+
+/**
+ * @brief Create and register a new DPO type.
+ *
+ * This can be used by plugins to create new DPO types that are not listed
+ * in dpo_type_t enum
+ *
+ * @param vft
+ * The virtual function table to register for the type.
+ *
+ * @param nodes
+ * The string description of the per-protocol VLIB graph nodes.
+ *
+ * @return The new dpo_type_t
+ */
+dpo_type_t dpo_register_new_type(const dpo_vft_t *vft,
+ const char * const * const * nodes);
+
+#endif
diff --git a/vnet/vnet/dpo/drop_dpo.c b/vnet/vnet/dpo/drop_dpo.c
new file mode 100644
index 00000000..62f56488
--- /dev/null
+++ b/vnet/vnet/dpo/drop_dpo.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The data-path object representing dropping the packet
+ */
+
+#include <vnet/dpo/dpo.h>
+
+static dpo_id_t drop_dpos[DPO_PROTO_NUM];
+
+const dpo_id_t *
+drop_dpo_get (dpo_proto_t proto)
+{
+ dpo_set(&drop_dpos[proto], DPO_DROP, proto, 1);
+
+ return (&drop_dpos[proto]);
+}
+
+int
+dpo_is_drop (const dpo_id_t *dpo)
+{
+ return (dpo->dpoi_type == DPO_DROP);
+}
+
+static void
+drop_dpo_lock (dpo_id_t *dpo)
+{
+ /*
+ * not maintaining a lock count on the drop
+ * more trouble than it's worth.
+ * There always needs to be one around. no point it managaing its lifetime
+ */
+}
+static void
+drop_dpo_unlock (dpo_id_t *dpo)
+{
+}
+
+static u8*
+format_drop_dpo (u8 *s, va_list *ap)
+{
+ CLIB_UNUSED(index_t index) = va_arg(ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(ap, u32);
+
+ return (format(s, "dpo-drop"));
+}
+
+const static dpo_vft_t drop_vft = {
+ .dv_lock = drop_dpo_lock,
+ .dv_unlock = drop_dpo_unlock,
+ .dv_format = format_drop_dpo,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a drop
+ * object.
+ *
+ * this means that these graph nodes are ones from which a drop is the
+ * parent object in the DPO-graph.
+ */
+const static char* const drop_ip4_nodes[] =
+{
+ "ip4-drop",
+ NULL,
+};
+const static char* const drop_ip6_nodes[] =
+{
+ "ip6-drop",
+ NULL,
+};
+const static char* const drop_mpls_nodes[] =
+{
+ "mpls-drop",
+ NULL,
+};
+const static char* const * const drop_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = drop_ip4_nodes,
+ [DPO_PROTO_IP6] = drop_ip6_nodes,
+ [DPO_PROTO_MPLS] = drop_mpls_nodes,
+};
+
+void
+drop_dpo_module_init (void)
+{
+ dpo_register(DPO_DROP, &drop_vft, drop_nodes);
+}
diff --git a/vnet/vnet/dpo/drop_dpo.h b/vnet/vnet/dpo/drop_dpo.h
new file mode 100644
index 00000000..e7bd8f51
--- /dev/null
+++ b/vnet/vnet/dpo/drop_dpo.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * A Data-Path Object is an object that represents actions that are
+ * applied to packets are they are switched through VPP.
+ *
+ * The DPO is a base class that is specialised by other objects to provide
+ * concreate actions
+ *
+ * The VLIB graph nodes are graph of types, the DPO graph is a graph of instances.
+ */
+
+#ifndef __DROP_DPO_H__
+#define __DROP_DPO_H__
+
+#include <vnet/dpo/dpo.h>
+
+extern int dpo_is_drop(const dpo_id_t *dpo);
+
+extern const dpo_id_t *drop_dpo_get(dpo_proto_t proto);
+
+extern void drop_dpo_module_init(void);
+
+#endif
diff --git a/vnet/vnet/dpo/load_balance.c b/vnet/vnet/dpo/load_balance.c
new file mode 100644
index 00000000..963ff0ba
--- /dev/null
+++ b/vnet/vnet/dpo/load_balance.c
@@ -0,0 +1,760 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/lookup.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/load_balance_map.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vppinfra/math.h> /* for fabs */
+#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_alloc.h>
+#include <vnet/adj/adj_internal.h>
+
+/*
+ * distribution error tolerance for load-balancing
+ */
+const f64 multipath_next_hop_error_tolerance = 0.1;
+
+#undef LB_DEBUG
+
+#ifdef LB_DEBUG
+#define LB_DBG(_lb, _fmt, _args...) \
+{ \
+ u8* _tmp =NULL; \
+ clib_warning("lb:[%s]:" _fmt, \
+ load_balance_format(load_balance_get_index((_lb)), \
+ 0, _tmp), \
+ ##_args); \
+ vec_free(_tmp); \
+}
+#else
+#define LB_DBG(_p, _fmt, _args...)
+#endif
+
+
+/**
+ * Pool of all DPOs. It's not static so the DP can have fast access
+ */
+load_balance_t *load_balance_pool;
+
+/**
+ * The one instance of load-balance main
+ */
+load_balance_main_t load_balance_main;
+
+f64
+load_balance_get_multipath_tolerance (void)
+{
+ return (multipath_next_hop_error_tolerance);
+}
+
+static inline index_t
+load_balance_get_index (const load_balance_t *lb)
+{
+ return (lb - load_balance_pool);
+}
+
+static inline dpo_id_t*
+load_balance_get_buckets (load_balance_t *lb)
+{
+ if (LB_HAS_INLINE_BUCKETS(lb))
+ {
+ return (lb->lb_buckets_inline);
+ }
+ else
+ {
+ return (lb->lb_buckets);
+ }
+}
+
+static load_balance_t *
+load_balance_alloc_i (void)
+{
+ load_balance_t *lb;
+
+ pool_get_aligned(load_balance_pool, lb, CLIB_CACHE_LINE_BYTES);
+ memset(lb, 0, sizeof(*lb));
+
+ lb->lb_map = INDEX_INVALID;
+ vlib_validate_combined_counter(&(load_balance_main.lbm_to_counters),
+ load_balance_get_index(lb));
+ vlib_validate_combined_counter(&(load_balance_main.lbm_via_counters),
+ load_balance_get_index(lb));
+ vlib_zero_combined_counter(&(load_balance_main.lbm_to_counters),
+ load_balance_get_index(lb));
+ vlib_zero_combined_counter(&(load_balance_main.lbm_via_counters),
+ load_balance_get_index(lb));
+
+ return (lb);
+}
+
+static u8*
+load_balance_format (index_t lbi,
+ load_balance_format_flags_t flags,
+ u32 indent,
+ u8 *s)
+{
+ vlib_counter_t to, via;
+ load_balance_t *lb;
+ dpo_id_t *buckets;
+ u32 i;
+
+ lb = load_balance_get(lbi);
+ vlib_get_combined_counter(&(load_balance_main.lbm_to_counters), lbi, &to);
+ vlib_get_combined_counter(&(load_balance_main.lbm_via_counters), lbi, &via);
+ buckets = load_balance_get_buckets(lb);
+
+ s = format(s, "%U: ", format_dpo_type, DPO_LOAD_BALANCE);
+ s = format(s, "[index:%d buckets:%d ", lbi, lb->lb_n_buckets);
+ s = format(s, "locks:%d ", lb->lb_locks);
+ s = format(s, "to:[%Ld:%Ld]", to.packets, to.bytes);
+ if (0 != via.packets)
+ {
+ s = format(s, " via:[%Ld:%Ld]",
+ via.packets, via.bytes);
+ }
+ s = format(s, "]");
+
+ if (INDEX_INVALID != lb->lb_map)
+ {
+ s = format(s, "\n%U%U",
+ format_white_space, indent+4,
+ format_load_balance_map, lb->lb_map, indent+4);
+ }
+ for (i = 0; i < lb->lb_n_buckets; i++)
+ {
+ s = format(s, "\n%U[%d] %U",
+ format_white_space, indent+2,
+ i,
+ format_dpo_id,
+ &buckets[i], indent+6);
+ }
+ return (s);
+}
+
+u8*
+format_load_balance (u8 * s, va_list * args)
+{
+ index_t lbi = va_arg(args, index_t);
+ load_balance_format_flags_t flags = va_arg(args, load_balance_format_flags_t);
+
+ return (load_balance_format(lbi, flags, 0, s));
+}
+static u8*
+format_load_balance_dpo (u8 * s, va_list * args)
+{
+ index_t lbi = va_arg(args, index_t);
+ u32 indent = va_arg(args, u32);
+
+ return (load_balance_format(lbi, LOAD_BALANCE_FORMAT_DETAIL, indent, s));
+}
+
+
+static load_balance_t *
+load_balance_create_i (u32 num_buckets,
+ dpo_proto_t lb_proto,
+ flow_hash_config_t fhc)
+{
+ load_balance_t *lb;
+
+ lb = load_balance_alloc_i();
+ lb->lb_hash_config = fhc;
+ lb->lb_n_buckets = num_buckets;
+ lb->lb_n_buckets_minus_1 = num_buckets-1;
+ lb->lb_proto = lb_proto;
+
+ if (!LB_HAS_INLINE_BUCKETS(lb))
+ {
+ vec_validate_aligned(lb->lb_buckets,
+ lb->lb_n_buckets - 1,
+ CLIB_CACHE_LINE_BYTES);
+ }
+
+ LB_DBG(lb, "create");
+
+ return (lb);
+}
+
+index_t
+load_balance_create (u32 n_buckets,
+ dpo_proto_t lb_proto,
+ flow_hash_config_t fhc)
+{
+ return (load_balance_get_index(load_balance_create_i(n_buckets, lb_proto, fhc)));
+}
+
+static inline void
+load_balance_set_bucket_i (load_balance_t *lb,
+ u32 bucket,
+ dpo_id_t *buckets,
+ const dpo_id_t *next)
+{
+ dpo_stack(DPO_LOAD_BALANCE, lb->lb_proto, &buckets[bucket], next);
+}
+
+void
+load_balance_set_bucket (index_t lbi,
+ u32 bucket,
+ const dpo_id_t *next)
+{
+ load_balance_t *lb;
+ dpo_id_t *buckets;
+
+ lb = load_balance_get(lbi);
+ buckets = load_balance_get_buckets(lb);
+
+ ASSERT(bucket < lb->lb_n_buckets);
+
+ load_balance_set_bucket_i(lb, bucket, buckets, next);
+}
+
+int
+load_balance_is_drop (const dpo_id_t *dpo)
+{
+ load_balance_t *lb;
+
+ if (DPO_LOAD_BALANCE != dpo->dpoi_type)
+ return (0);
+
+ lb = load_balance_get(dpo->dpoi_index);
+
+ if (1 == lb->lb_n_buckets)
+ {
+ return (dpo_is_drop(load_balance_get_bucket_i(lb, 0)));
+ }
+ return (0);
+}
+
+const dpo_id_t *
+load_balance_get_bucket (index_t lbi,
+ u32 bucket)
+{
+ load_balance_t *lb;
+
+ lb = load_balance_get(lbi);
+
+ return (load_balance_get_bucket_i(lb, bucket));
+}
+
+static int
+next_hop_sort_by_weight (load_balance_path_t * n1,
+ load_balance_path_t * n2)
+{
+ return ((int) n1->path_weight - (int) n2->path_weight);
+}
+
+/* Given next hop vector is over-written with normalized one with sorted weights and
+ with weights corresponding to the number of adjacencies for each next hop.
+ Returns number of adjacencies in block. */
+u32
+ip_multipath_normalize_next_hops (load_balance_path_t * raw_next_hops,
+ load_balance_path_t ** normalized_next_hops,
+ u32 *sum_weight_in,
+ f64 multipath_next_hop_error_tolerance)
+{
+ load_balance_path_t * nhs;
+ uword n_nhs, n_adj, n_adj_left, i, sum_weight;
+ f64 norm, error;
+
+ n_nhs = vec_len (raw_next_hops);
+ ASSERT (n_nhs > 0);
+ if (n_nhs == 0)
+ return 0;
+
+ /* Allocate enough space for 2 copies; we'll use second copy to save original weights. */
+ nhs = *normalized_next_hops;
+ vec_validate (nhs, 2*n_nhs - 1);
+
+ /* Fast path: 1 next hop in block. */
+ n_adj = n_nhs;
+ if (n_nhs == 1)
+ {
+ nhs[0] = raw_next_hops[0];
+ nhs[0].path_weight = 1;
+ _vec_len (nhs) = 1;
+ sum_weight = 1;
+ goto done;
+ }
+
+ else if (n_nhs == 2)
+ {
+ int cmp = next_hop_sort_by_weight (&raw_next_hops[0], &raw_next_hops[1]) < 0;
+
+ /* Fast sort. */
+ nhs[0] = raw_next_hops[cmp];
+ nhs[1] = raw_next_hops[cmp ^ 1];
+
+ /* Fast path: equal cost multipath with 2 next hops. */
+ if (nhs[0].path_weight == nhs[1].path_weight)
+ {
+ nhs[0].path_weight = nhs[1].path_weight = 1;
+ _vec_len (nhs) = 2;
+ sum_weight = 2;
+ goto done;
+ }
+ }
+ else
+ {
+ clib_memcpy (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0]));
+ qsort (nhs, n_nhs, sizeof (nhs[0]), (void *) next_hop_sort_by_weight);
+ }
+
+ /* Find total weight to normalize weights. */
+ sum_weight = 0;
+ for (i = 0; i < n_nhs; i++)
+ sum_weight += nhs[i].path_weight;
+
+ /* In the unlikely case that all weights are given as 0, set them all to 1. */
+ if (sum_weight == 0)
+ {
+ for (i = 0; i < n_nhs; i++)
+ nhs[i].path_weight = 1;
+ sum_weight = n_nhs;
+ }
+
+ /* Save copies of all next hop weights to avoid being overwritten in loop below. */
+ for (i = 0; i < n_nhs; i++)
+ nhs[n_nhs + i].path_weight = nhs[i].path_weight;
+
+ /* Try larger and larger power of 2 sized adjacency blocks until we
+ find one where traffic flows to within 1% of specified weights. */
+ for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2)
+ {
+ error = 0;
+
+ norm = n_adj / ((f64) sum_weight);
+ n_adj_left = n_adj;
+ for (i = 0; i < n_nhs; i++)
+ {
+ f64 nf = nhs[n_nhs + i].path_weight * norm; /* use saved weights */
+ word n = flt_round_nearest (nf);
+
+ n = n > n_adj_left ? n_adj_left : n;
+ n_adj_left -= n;
+ error += fabs (nf - n);
+ nhs[i].path_weight = n;
+ }
+
+ nhs[0].path_weight += n_adj_left;
+
+ /* Less than 5% average error per adjacency with this size adjacency block? */
+ if (error <= multipath_next_hop_error_tolerance*n_adj)
+ {
+ /* Truncate any next hops with zero weight. */
+ _vec_len (nhs) = i;
+ break;
+ }
+ }
+
+done:
+ /* Save vector for next call. */
+ *normalized_next_hops = nhs;
+ *sum_weight_in = sum_weight;
+ return n_adj;
+}
+
+static load_balance_path_t *
+load_balance_multipath_next_hop_fixup (load_balance_path_t *nhs,
+ dpo_proto_t drop_proto)
+{
+ if (0 == vec_len(nhs))
+ {
+ load_balance_path_t *nh;
+
+ /*
+ * we need something for the load-balance. so use the drop
+ */
+ vec_add2(nhs, nh, 1);
+
+ nh->path_weight = 1;
+ dpo_copy(&nh->path_dpo, drop_dpo_get(drop_proto));
+ }
+
+ return (nhs);
+}
+
+/*
+ * Fill in adjacencies in block based on corresponding
+ * next hop adjacencies.
+ */
+static void
+load_balance_fill_buckets (load_balance_t *lb,
+ load_balance_path_t *nhs,
+ dpo_id_t *buckets,
+ u32 n_buckets)
+{
+ load_balance_path_t * nh;
+ u16 ii, bucket;
+
+ bucket = 0;
+
+ /*
+ * the next-hops have normalised weights. that means their sum is the number
+ * of buckets we need to fill.
+ */
+ vec_foreach (nh, nhs)
+ {
+ for (ii = 0; ii < nh->path_weight; ii++)
+ {
+ ASSERT(bucket < n_buckets);
+ load_balance_set_bucket_i(lb, bucket++, buckets, &nh->path_dpo);
+ }
+ }
+}
+
+static inline void
+load_balance_set_n_buckets (load_balance_t *lb,
+ u32 n_buckets)
+{
+ lb->lb_n_buckets = n_buckets;
+ lb->lb_n_buckets_minus_1 = n_buckets-1;
+}
+
+void
+load_balance_multipath_update (const dpo_id_t *dpo,
+ load_balance_path_t * raw_next_hops,
+ load_balance_flags_t flags)
+{
+ u32 sum_of_weights,n_buckets, ii;
+ load_balance_path_t * nh, * nhs;
+ index_t lbmi, old_lbmi;
+ load_balance_t *lb;
+ dpo_id_t *tmp_dpo;
+
+ nhs = NULL;
+
+ ASSERT(DPO_LOAD_BALANCE == dpo->dpoi_type);
+ lb = load_balance_get(dpo->dpoi_index);
+ raw_next_hops =
+ load_balance_multipath_next_hop_fixup(raw_next_hops,
+ lb->lb_proto);
+ n_buckets =
+ ip_multipath_normalize_next_hops(raw_next_hops,
+ &nhs,
+ &sum_of_weights,
+ multipath_next_hop_error_tolerance);
+
+ ASSERT (n_buckets >= vec_len (raw_next_hops));
+
+ /*
+ * Save the old load-balance map used, and get a new one if required.
+ */
+ old_lbmi = lb->lb_map;
+ if (flags & LOAD_BALANCE_FLAG_USES_MAP)
+ {
+ lbmi = load_balance_map_add_or_lock(n_buckets, sum_of_weights, nhs);
+ }
+ else
+ {
+ lbmi = INDEX_INVALID;
+ }
+
+ if (0 == lb->lb_n_buckets)
+ {
+ /*
+ * first time initialisation. no packets inflight, so we can write
+ * at leisure.
+ */
+ load_balance_set_n_buckets(lb, n_buckets);
+
+ if (!LB_HAS_INLINE_BUCKETS(lb))
+ vec_validate_aligned(lb->lb_buckets,
+ lb->lb_n_buckets - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ load_balance_fill_buckets(lb, nhs,
+ load_balance_get_buckets(lb),
+ n_buckets);
+ lb->lb_map = lbmi;
+ }
+ else
+ {
+ /*
+ * This is a modification of an existing load-balance.
+ * We need to ensure that packets inflight see a consistent state, that
+ * is the number of reported buckets the LB has (read from
+ * lb_n_buckets_minus_1) is not more than it actually has. So if the
+ * number of buckets is increasing, we must update the bucket array first,
+ * then the reported number. vice-versa if the number of buckets goes down.
+ */
+ if (n_buckets == lb->lb_n_buckets)
+ {
+ /*
+ * no change in the number of buckets. we can simply fill what
+ * is new over what is old.
+ */
+ load_balance_fill_buckets(lb, nhs,
+ load_balance_get_buckets(lb),
+ n_buckets);
+ lb->lb_map = lbmi;
+ }
+ else if (n_buckets > lb->lb_n_buckets)
+ {
+ /*
+ * we have more buckets. the old load-balance map (if there is one)
+ * will remain valid, i.e. mapping to indices within range, so we
+ * update it last.
+ */
+ if (n_buckets > LB_NUM_INLINE_BUCKETS &&
+ lb->lb_n_buckets <= LB_NUM_INLINE_BUCKETS)
+ {
+ /*
+ * the new increased number of buckets is crossing the threshold
+ * from the inline storage to out-line. Alloc the outline buckets
+ * first, then fixup the number. then reset the inlines.
+ */
+ ASSERT(NULL == lb->lb_buckets);
+ vec_validate_aligned(lb->lb_buckets,
+ n_buckets - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ load_balance_fill_buckets(lb, nhs,
+ lb->lb_buckets,
+ n_buckets);
+ CLIB_MEMORY_BARRIER();
+ load_balance_set_n_buckets(lb, n_buckets);
+
+ CLIB_MEMORY_BARRIER();
+
+ for (ii = 0; ii < LB_NUM_INLINE_BUCKETS; ii++)
+ {
+ dpo_reset(&lb->lb_buckets_inline[ii]);
+ }
+ }
+ else
+ {
+ /*
+ * we are not crossing the threshold. we can write the new on the
+ * old, whether they be inline or not.
+ */
+ load_balance_fill_buckets(lb, nhs,
+ load_balance_get_buckets(lb),
+ n_buckets);
+ CLIB_MEMORY_BARRIER();
+ load_balance_set_n_buckets(lb, n_buckets);
+ }
+
+ /*
+ * buckets fixed. ready for the MAP update.
+ */
+ lb->lb_map = lbmi;
+ }
+ else
+ {
+ /*
+ * bucket size shrinkage.
+ * Any map we have will be based on the old
+ * larger number of buckets, so will be translating to indices
+ * out of range. So the new MAP must be installed first.
+ */
+ lb->lb_map = lbmi;
+ CLIB_MEMORY_BARRIER();
+
+
+ if (n_buckets <= LB_NUM_INLINE_BUCKETS &&
+ lb->lb_n_buckets > LB_NUM_INLINE_BUCKETS)
+ {
+ /*
+ * the new decreased number of buckets is crossing the threshold
+ * from out-line storage to inline:
+ * 1 - Fill the inline buckets,
+ * 2 - fixup the number (and this point the inline buckets are
+ * used).
+ * 3 - free the outline buckets
+ */
+ load_balance_fill_buckets(lb, nhs,
+ lb->lb_buckets_inline,
+ n_buckets);
+ CLIB_MEMORY_BARRIER();
+ load_balance_set_n_buckets(lb, n_buckets);
+ CLIB_MEMORY_BARRIER();
+
+ vec_foreach(tmp_dpo, lb->lb_buckets)
+ {
+ dpo_reset(tmp_dpo);
+ }
+ vec_free(lb->lb_buckets);
+ }
+ else
+ {
+ /*
+ * not crossing the threshold.
+ * 1 - update the number to the smaller size
+ * 2 - write the new buckets
+ * 3 - reset those no longer used.
+ */
+ dpo_id_t *buckets;
+ u32 old_n_buckets;
+
+ old_n_buckets = lb->lb_n_buckets;
+ buckets = load_balance_get_buckets(lb);
+
+ load_balance_set_n_buckets(lb, n_buckets);
+ CLIB_MEMORY_BARRIER();
+
+ load_balance_fill_buckets(lb, nhs,
+ buckets,
+ n_buckets);
+
+ for (ii = old_n_buckets-n_buckets; ii < old_n_buckets; ii++)
+ {
+ dpo_reset(&buckets[ii]);
+ }
+ }
+ }
+ }
+
+ vec_foreach (nh, nhs)
+ {
+ dpo_reset(&nh->path_dpo);
+ }
+
+ load_balance_map_unlock(old_lbmi);
+}
+
+static void
+load_balance_lock (dpo_id_t *dpo)
+{
+ load_balance_t *lb;
+
+ lb = load_balance_get(dpo->dpoi_index);
+
+ lb->lb_locks++;
+}
+
+static void
+load_balance_destroy (load_balance_t *lb)
+{
+ dpo_id_t *buckets;
+ int i;
+
+ buckets = load_balance_get_buckets(lb);
+
+ for (i = 0; i < lb->lb_n_buckets; i++)
+ {
+ dpo_reset(&buckets[i]);
+ }
+
+ LB_DBG(lb, "destroy");
+ if (!LB_HAS_INLINE_BUCKETS(lb))
+ {
+ vec_free(lb->lb_buckets);
+ }
+
+ pool_put(load_balance_pool, lb);
+}
+
+static void
+load_balance_unlock (dpo_id_t *dpo)
+{
+ load_balance_t *lb;
+
+ lb = load_balance_get(dpo->dpoi_index);
+
+ lb->lb_locks--;
+
+ if (0 == lb->lb_locks)
+ {
+ load_balance_destroy(lb);
+ }
+}
+
+const static dpo_vft_t lb_vft = {
+ .dv_lock = load_balance_lock,
+ .dv_unlock = load_balance_unlock,
+ .dv_format = format_load_balance_dpo,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a load-balance
+ * object.
+ *
+ * this means that these graph nodes are ones from which a load-balance is the
+ * parent object in the DPO-graph.
+ *
+ * We do not list all the load-balance nodes, such as the *-lookup. instead
+ * we are relying on the correct use of the .sibling_of field when setting
+ * up these sibling nodes.
+ */
+const static char* const load_balance_ip4_nodes[] =
+{
+ "ip4-load-balance",
+ NULL,
+};
+const static char* const load_balance_ip6_nodes[] =
+{
+ "ip6-load-balance",
+ NULL,
+};
+const static char* const load_balance_mpls_nodes[] =
+{
+ "mpls-load-balance",
+ NULL,
+};
+const static char* const * const load_balance_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = load_balance_ip4_nodes,
+ [DPO_PROTO_IP6] = load_balance_ip6_nodes,
+ [DPO_PROTO_MPLS] = load_balance_mpls_nodes,
+};
+
+void
+load_balance_module_init (void)
+{
+ dpo_register(DPO_LOAD_BALANCE, &lb_vft, load_balance_nodes);
+
+ load_balance_map_module_init();
+}
+
+static clib_error_t *
+load_balance_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ index_t lbi = INDEX_INVALID;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%d", &lbi))
+ ;
+ else
+ break;
+ }
+
+ if (INDEX_INVALID != lbi)
+ {
+ vlib_cli_output (vm, "%U", format_load_balance, lbi,
+ LOAD_BALANCE_FORMAT_DETAIL);
+ }
+ else
+ {
+ load_balance_t *lb;
+
+ pool_foreach(lb, load_balance_pool,
+ ({
+ vlib_cli_output (vm, "%U", format_load_balance,
+ load_balance_get_index(lb),
+ LOAD_BALANCE_FORMAT_NONE);
+ }));
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (load_balance_show_command, static) = {
+ .path = "show load-balance",
+ .short_help = "show load-balance [<index>]",
+ .function = load_balance_show,
+};
diff --git a/vnet/vnet/dpo/load_balance.h b/vnet/vnet/dpo/load_balance.h
new file mode 100644
index 00000000..d630a2c2
--- /dev/null
+++ b/vnet/vnet/dpo/load_balance.h
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * \brief
+ * The load-balance object represents an ECMP choice. The buckets of a load
+ * balance object point to the sub-graph after the choice is made.
+ * THe load-balance object is also object type returned from a FIB table lookup.
+ * As such it needs to represent the case where there is only one coice. It may
+ * seem like overkill to use a load-balance object in this case, but the reason
+ * is for performance. If the load-balance object were not the result of the FIB
+ * lookup, then some other object would be. The case where there was ECMP
+ * this other object would need a load-balance as a parent and hence just add
+ * an unnecessary indirection.
+ *
+ * It is also the object in the DP that represents a via-fib-entry in a recursive
+ * route.
+ *
+ */
+
+#ifndef __LOAD_BALANCE_H__
+#define __LOAD_BALANCE_H__
+
+#include <vlib/vlib.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/fib/fib_types.h>
+
+/**
+ * Load-balance main
+ */
+typedef struct load_balance_main_t_
+{
+ vlib_combined_counter_main_t lbm_to_counters;
+ vlib_combined_counter_main_t lbm_via_counters;
+} load_balance_main_t;
+
+extern load_balance_main_t load_balance_main;
+
+/**
+ * The number of buckets that a load-balance object can have and still
+ * fit in one cache-line
+ */
+#define LB_NUM_INLINE_BUCKETS 4
+
+/**
+ * @brief One path from an [EU]CMP set that the client wants to add to a
+ * load-balance object
+ */
+typedef struct load_balance_path_t_ {
+ /**
+ * ID of the Data-path object.
+ */
+ dpo_id_t path_dpo;
+
+ /**
+ * The index of the FIB path
+ */
+ fib_node_index_t path_index;
+
+ /**
+ * weight for the path.
+ */
+ u32 path_weight;
+} load_balance_path_t;
+
+/**
+ * The FIB DPO provieds;
+ * - load-balancing over the next DPOs in the chain/graph
+ * - per-route counters
+ */
+typedef struct load_balance_t_ {
+ /**
+ * number of buckets in the load-balance. always a power of 2.
+ */
+ u16 lb_n_buckets;
+ /**
+ * number of buckets in the load-balance - 1. used in the switch path
+ * as part of the hash calculation.
+ */
+ u16 lb_n_buckets_minus_1;
+
+ /**
+ * The protocol of packets that traverse this LB.
+ * need in combination with the flow hash config to determine how to hash.
+ * u8.
+ */
+ dpo_proto_t lb_proto;
+
+ /**
+ * The number of locks, which is approximately the number of users,
+ * of this load-balance.
+ * Load-balance objects of via-entries are heavily shared by recursives,
+ * so the lock count is a u32.
+ */
+ u32 lb_locks;
+
+ /**
+ * index of the load-balance map, INVALID if this LB does not use one
+ */
+ index_t lb_map;
+
+ /**
+ * the hash config to use when selecting a bucket. this is a u16
+ */
+ flow_hash_config_t lb_hash_config;
+
+ /**
+ * Vector of buckets containing the next DPOs, sized as lbo_num
+ */
+ dpo_id_t *lb_buckets;
+
+ /**
+ * The rest of the cache line is used for buckets. In the common case
+ * where there there are less than 4 buckets, then the buckets are
+ * on the same cachlie and we save ourselves a pointer dereferance in
+ * the data-path.
+ */
+ dpo_id_t lb_buckets_inline[LB_NUM_INLINE_BUCKETS];
+} load_balance_t;
+
+_Static_assert(sizeof(load_balance_t) <= CLIB_CACHE_LINE_BYTES,
+ "A load_balance object size exceeds one cachline");
+
+/**
+ * Flags controlling load-balance formatting/display
+ */
+typedef enum load_balance_format_flags_t_ {
+ LOAD_BALANCE_FORMAT_NONE,
+ LOAD_BALANCE_FORMAT_DETAIL = (1 << 0),
+} load_balance_format_flags_t;
+
+/**
+ * Flags controlling load-balance creation and modification
+ */
+typedef enum load_balance_flags_t_ {
+ LOAD_BALANCE_FLAG_NONE = 0,
+ LOAD_BALANCE_FLAG_USES_MAP = (1 << 0),
+} load_balance_flags_t;
+
+extern index_t load_balance_create(u32 num_buckets,
+ dpo_proto_t lb_proto,
+ flow_hash_config_t fhc);
+extern void load_balance_multipath_update(
+ const dpo_id_t *dpo,
+ load_balance_path_t * raw_next_hops,
+ load_balance_flags_t flags);
+
+extern void load_balance_set_bucket(index_t lbi,
+ u32 bucket,
+ const dpo_id_t *next);
+
+extern u8* format_load_balance(u8 * s, va_list * args);
+
+extern const dpo_id_t *load_balance_get_bucket(index_t lbi,
+ u32 bucket);
+extern int load_balance_is_drop(const dpo_id_t *dpo);
+
+extern f64 load_balance_get_multipath_tolerance(void);
+
+/**
+ * The encapsulation breakages are for fast DP access
+ */
+extern load_balance_t *load_balance_pool;
+static inline load_balance_t*
+load_balance_get (index_t lbi)
+{
+ return (pool_elt_at_index(load_balance_pool, lbi));
+}
+
+#define LB_HAS_INLINE_BUCKETS(_lb) \
+ ((_lb)->lb_n_buckets <= LB_NUM_INLINE_BUCKETS)
+
+static inline const dpo_id_t *
+load_balance_get_bucket_i (const load_balance_t *lb,
+ u32 bucket)
+{
+ ASSERT(bucket < lb->lb_n_buckets);
+
+ if (PREDICT_TRUE(LB_HAS_INLINE_BUCKETS(lb)))
+ {
+ return (&lb->lb_buckets_inline[bucket]);
+ }
+ else
+ {
+ return (&lb->lb_buckets[bucket]);
+ }
+}
+
+extern void load_balance_module_init(void);
+
+#endif
diff --git a/vnet/vnet/dpo/load_balance_map.c b/vnet/vnet/dpo/load_balance_map.c
new file mode 100644
index 00000000..f08801f1
--- /dev/null
+++ b/vnet/vnet/dpo/load_balance_map.c
@@ -0,0 +1,566 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ */
+#include <vnet/fib/fib_path.h>
+#include <vnet/fib/fib_node_list.h>
+#include <vnet/dpo/load_balance_map.h>
+#include <vnet/dpo/load_balance.h>
+
+/**
+ * A hash-table of load-balance maps by path index.
+ * this provides the fast lookup of the LB map when a path goes down
+ */
+static uword *lb_maps_by_path_index;
+
+/**
+ * A hash-table of load-balance maps by set of paths.
+ * This provides the LB map sharing.
+ * LB maps do not necessarily use all the paths in the list, since
+ * the entry that is requesting the map, may not have an out-going
+ * label for each of the paths.
+ */
+static uword *load_balance_map_db;
+
+typedef enum load_balance_map_path_flags_t_
+{
+ LOAD_BALANCE_MAP_PATH_UP = (1 << 0),
+ LOAD_BALANCE_MAP_PATH_USABLE = (1 << 1),
+} __attribute__ ((packed)) load_balance_map_path_flags_t;
+
+typedef struct load_balance_map_path_t_ {
+ /**
+ * Index of the path
+ */
+ fib_node_index_t lbmp_index;
+
+ /**
+ * Sibling Index in the list of all maps with this path index
+ */
+ fib_node_index_t lbmp_sibling;
+
+ /**
+ * the normalised wegiht of the path
+ */
+ u32 lbmp_weight;
+
+ /**
+ * The sate of the path
+ */
+ load_balance_map_path_flags_t lbmp_flags;
+} load_balance_map_path_t;
+
+/**
+ * The global pool of LB maps
+ */
+load_balance_map_t *load_balance_map_pool;
+
+/*
+ * Debug macro
+ */
+#ifdef FIB_DEBUG
+#define LOAD_BALANCE_MAP_DBG(_pl, _fmt, _args...) \
+ { \
+ clib_warning("lbm: FIXME" _fmt, \
+ ##_args); \
+ }
+#else
+#define LOAD_BALANCE_MAP_DBG(_pl, _fmt, _args...)
+#endif
+
+static index_t
+load_balance_map_get_index (load_balance_map_t *lbm)
+{
+ return (lbm - load_balance_map_pool);
+}
+
+u8*
+format_load_balance_map (u8 *s, va_list ap)
+{
+ index_t lbmi = va_arg(ap, index_t);
+ u32 indent = va_arg(ap, u32);
+ load_balance_map_t *lbm;
+ u32 n_buckets, ii;
+
+ lbm = load_balance_map_get(lbmi);
+ n_buckets = vec_len(lbm->lbm_buckets);
+
+ s = format(s, "load-balance-map: index:%d buckets:%d", lbmi, n_buckets);
+ s = format(s, "\n%U index:", format_white_space, indent+2);
+ for (ii = 0; ii < n_buckets; ii++)
+ {
+ s = format(s, "%5d", ii);
+ }
+ s = format(s, "\n%U map:", format_white_space, indent+2);
+ for (ii = 0; ii < n_buckets; ii++)
+ {
+ s = format(s, "%5d", lbm->lbm_buckets[ii]);
+ }
+
+ return (s);
+}
+
+
+static uword
+load_balance_map_hash (load_balance_map_t *lbm)
+{
+ u32 old_lbm_hash, new_lbm_hash, hash;
+ load_balance_map_path_t *lb_path;
+
+ new_lbm_hash = old_lbm_hash = vec_len(lbm->lbm_paths);
+
+ vec_foreach (lb_path, lbm->lbm_paths)
+ {
+ hash = lb_path->lbmp_index;
+ hash_mix32(hash, old_lbm_hash, new_lbm_hash);
+ }
+
+ return (new_lbm_hash);
+}
+
+always_inline uword
+load_balance_map_db_hash_key_from_index (uword index)
+{
+ return 1 + 2*index;
+}
+
+always_inline uword
+load_balance_map_db_hash_key_is_index (uword key)
+{
+ return key & 1;
+}
+
+always_inline uword
+load_balance_map_db_hash_key_2_index (uword key)
+{
+ ASSERT (load_balance_map_db_hash_key_is_index (key));
+ return key / 2;
+}
+
+static load_balance_map_t*
+load_balance_map_db_get_from_hash_key (uword key)
+{
+ load_balance_map_t *lbm;
+
+ if (load_balance_map_db_hash_key_is_index (key))
+ {
+ index_t lbm_index;
+
+ lbm_index = load_balance_map_db_hash_key_2_index(key);
+ lbm = load_balance_map_get(lbm_index);
+ }
+ else
+ {
+ lbm = uword_to_pointer (key, load_balance_map_t *);
+ }
+
+ return (lbm);
+}
+
+static uword
+load_balance_map_db_hash_key_sum (hash_t * h,
+ uword key)
+{
+ load_balance_map_t *lbm;
+
+ lbm = load_balance_map_db_get_from_hash_key(key);
+
+ return (load_balance_map_hash(lbm));
+}
+
+static uword
+load_balance_map_db_hash_key_equal (hash_t * h,
+ uword key1,
+ uword key2)
+{
+ load_balance_map_t *lbm1, *lbm2;
+
+ lbm1 = load_balance_map_db_get_from_hash_key(key1);
+ lbm2 = load_balance_map_db_get_from_hash_key(key2);
+
+ return (load_balance_map_hash(lbm1) ==
+ load_balance_map_hash(lbm2));
+}
+
+static index_t
+load_balance_map_db_find (load_balance_map_t *lbm)
+{
+ uword *p;
+
+ p = hash_get(load_balance_map_db, lbm);
+
+ if (NULL != p)
+ {
+ return p[0];
+ }
+
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+static void
+load_balance_map_db_insert (load_balance_map_t *lbm)
+{
+ load_balance_map_path_t *lbmp;
+ fib_node_list_t list;
+ uword *p;
+
+ ASSERT(FIB_NODE_INDEX_INVALID == load_balance_map_db_find(lbm));
+
+ /*
+ * insert into the DB based on the set of paths.
+ */
+ hash_set (load_balance_map_db,
+ load_balance_map_db_hash_key_from_index(
+ load_balance_map_get_index(lbm)),
+ load_balance_map_get_index(lbm));
+
+ /*
+ * insert into each per-path list.
+ */
+ vec_foreach(lbmp, lbm->lbm_paths)
+ {
+ p = hash_get(lb_maps_by_path_index, lbmp->lbmp_index);
+
+ if (NULL == p)
+ {
+ list = fib_node_list_create();
+ hash_set(lb_maps_by_path_index, lbmp->lbmp_index, list);
+ }
+ else
+ {
+ list = p[0];
+ }
+
+ lbmp->lbmp_sibling =
+ fib_node_list_push_front(list,
+ 0, FIB_NODE_TYPE_FIRST,
+ load_balance_map_get_index(lbm));
+ }
+
+ LOAD_BALANCE_MAP_DBG(lbm, "DB-inserted");
+}
+
+static void
+load_balance_map_db_remove (load_balance_map_t *lbm)
+{
+ load_balance_map_path_t *lbmp;
+ uword *p;
+
+ ASSERT(FIB_NODE_INDEX_INVALID != load_balance_map_db_find(lbm));
+
+ hash_unset(load_balance_map_db,
+ load_balance_map_db_hash_key_from_index(
+ load_balance_map_get_index(lbm)));
+
+ /*
+ * remove from each per-path list.
+ */
+ vec_foreach(lbmp, lbm->lbm_paths)
+ {
+ p = hash_get(lb_maps_by_path_index, lbmp->lbmp_index);
+
+ ASSERT(NULL != p);
+
+ fib_node_list_remove(p[0], lbmp->lbmp_sibling);
+ }
+
+ LOAD_BALANCE_MAP_DBG(lbm, "DB-removed");
+}
+
+/**
+ * @brief from the paths that are usable, fill the Map.
+ */
+static void
+load_balance_map_fill (load_balance_map_t *lbm)
+{
+ load_balance_map_path_t *lbmp;
+ u32 n_buckets, bucket, ii, jj;
+ u16 *tmp_buckets;
+
+ tmp_buckets = NULL;
+ n_buckets = vec_len(lbm->lbm_buckets);
+
+ /*
+ * run throught the set of paths once, and build a vector of the
+ * indices that are usable. we do this is a scratch space, since we
+ * need to refer to it multiple times as we build the real buckets.
+ */
+ vec_validate(tmp_buckets, n_buckets-1);
+
+ bucket = jj = 0;
+ vec_foreach (lbmp, lbm->lbm_paths)
+ {
+ if (fib_path_is_resolved(lbmp->lbmp_index))
+ {
+ for (ii = 0; ii < lbmp->lbmp_weight; ii++)
+ {
+ tmp_buckets[jj++] = bucket++;
+ }
+ }
+ else
+ {
+ bucket += lbmp->lbmp_weight;
+ }
+ }
+ _vec_len(tmp_buckets) = jj;
+
+ /*
+ * If the number of temporaries written is as many as we need, implying
+ * all paths were up, then we can simply copy the scratch area over the
+ * actual buckets' memory
+ */
+ if (jj == n_buckets)
+ {
+ memcpy(lbm->lbm_buckets,
+ tmp_buckets,
+ sizeof(lbm->lbm_buckets[0]) * n_buckets);
+ }
+ else
+ {
+ /*
+ * one or more paths are down.
+ */
+ if (0 == vec_len(tmp_buckets))
+ {
+ /*
+ * if the scratch area is empty, then no paths are usable.
+ * they will all drop. so use them all, lest we account drops
+ * against only one.
+ */
+ for (bucket = 0; bucket < n_buckets; bucket++)
+ {
+ lbm->lbm_buckets[bucket] = bucket;
+ }
+ }
+ else
+ {
+ bucket = jj = 0;
+ vec_foreach (lbmp, lbm->lbm_paths)
+ {
+ if (fib_path_is_resolved(lbmp->lbmp_index))
+ {
+ for (ii = 0; ii < lbmp->lbmp_weight; ii++)
+ {
+ lbm->lbm_buckets[bucket] = bucket;
+ bucket++;
+ }
+ }
+ else
+ {
+ /*
+ * path is unusable
+ * cycle through the scratch space selecting a index.
+ * this means we load balance, in the intended ratio,
+ * over the paths that are still usable.
+ */
+ for (ii = 0; ii < lbmp->lbmp_weight; ii++)
+ {
+ lbm->lbm_buckets[bucket] = tmp_buckets[jj];
+ jj = (jj + 1) % vec_len(tmp_buckets);
+ bucket++;
+ }
+ }
+ }
+ }
+ }
+
+ vec_free(tmp_buckets);
+}
+
+static load_balance_map_t*
+load_balance_map_alloc (const load_balance_path_t *paths)
+{
+ load_balance_map_t *lbm;
+ u32 ii;
+
+ pool_get_aligned(load_balance_map_pool, lbm, CLIB_CACHE_LINE_BYTES);
+ memset(lbm, 0, sizeof(*lbm));
+
+ vec_validate(lbm->lbm_paths, vec_len(paths)-1);
+
+ vec_foreach_index(ii, paths)
+ {
+ lbm->lbm_paths[ii].lbmp_index = paths[ii].path_index;
+ lbm->lbm_paths[ii].lbmp_weight = paths[ii].path_weight;
+ }
+
+ return (lbm);
+}
+
+static load_balance_map_t *
+load_balance_map_init (load_balance_map_t *lbm,
+ u32 n_buckets,
+ u32 sum_of_weights)
+{
+ lbm->lbm_sum_of_norm_weights = sum_of_weights;
+ vec_validate(lbm->lbm_buckets, n_buckets-1);
+
+ load_balance_map_db_insert(lbm);
+
+ load_balance_map_fill(lbm);
+
+ return (lbm);
+}
+
+index_t
+load_balance_map_add_or_lock (u32 n_buckets,
+ u32 sum_of_weights,
+ const load_balance_path_t *paths)
+{
+ load_balance_map_t *tmp, *lbm;
+ index_t lbmi;
+
+ tmp = load_balance_map_alloc(paths);
+
+ lbmi = load_balance_map_db_find(tmp);
+
+ if (INDEX_INVALID == lbmi)
+ {
+ lbm = load_balance_map_init(tmp, n_buckets, sum_of_weights);
+ }
+ else
+ {
+ lbm = load_balance_map_get(lbmi);
+ }
+
+ lbm->lbm_locks++;
+
+ return (load_balance_map_get_index(lbm));
+}
+
+void
+load_balance_map_lock (index_t lbmi)
+{
+ load_balance_map_t *lbm;
+
+ lbm = load_balance_map_get(lbmi);
+
+ lbm->lbm_locks++;
+}
+
+void
+load_balance_map_unlock (index_t lbmi)
+{
+ load_balance_map_t *lbm;
+
+ if (INDEX_INVALID == lbmi)
+ {
+ return;
+ }
+
+ lbm = load_balance_map_get(lbmi);
+
+ lbm->lbm_locks--;
+
+ if (0 == lbm->lbm_locks)
+ {
+ load_balance_map_db_remove(lbm);
+ vec_free(lbm->lbm_paths);
+ vec_free(lbm->lbm_buckets);
+ pool_put(load_balance_map_pool, lbm);
+ }
+}
+
+static int
+load_balance_map_path_state_change_walk (fib_node_ptr_t *fptr,
+ void *ctx)
+{
+ load_balance_map_t *lbm;
+
+ lbm = load_balance_map_get(fptr->fnp_index);
+
+ load_balance_map_fill(lbm);
+
+ return (!0);
+}
+
+/**
+ * @brief the state of a path has changed (it has no doubt gone down).
+ * This is the trigger to perform a PIC edge cutover and update the maps
+ * to exclude this path.
+ */
+void
+load_balance_map_path_state_change (fib_node_index_t path_index)
+{
+ uword *p;
+
+ /*
+ * re-stripe the buckets for each affect MAP
+ */
+ p = hash_get(lb_maps_by_path_index, path_index);
+
+ if (NULL == p)
+ return;
+
+ fib_node_list_walk(p[0], load_balance_map_path_state_change_walk, NULL);
+}
+
+/**
+ * @brief Make/add a new or lock an existing Load-balance map
+ */
+void
+load_balance_map_module_init (void)
+{
+ load_balance_map_db =
+ hash_create2 (/* elts */ 0,
+ /* user */ 0,
+ /* value_bytes */ sizeof (index_t),
+ load_balance_map_db_hash_key_sum,
+ load_balance_map_db_hash_key_equal,
+ /* format pair/arg */
+ 0, 0);
+
+ lb_maps_by_path_index = hash_create(0, sizeof(fib_node_list_t));
+}
+
+static clib_error_t *
+load_balance_map_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ index_t lbmi = INDEX_INVALID;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%d", &lbmi))
+ ;
+ else
+ break;
+ }
+
+ if (INDEX_INVALID != lbmi)
+ {
+ vlib_cli_output (vm, "%U", format_load_balance_map, lbmi, 0);
+ }
+ else
+ {
+ load_balance_map_t *lbm;
+
+ pool_foreach(lbm, load_balance_map_pool,
+ ({
+ vlib_cli_output (vm, "%U", format_load_balance_map,
+ load_balance_map_get_index(lbm), 0);
+ }));
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (load_balance_map_show_command, static) = {
+ .path = "show load-balance-map",
+ .short_help = "show load-balance-map [<index>]",
+ .function = load_balance_map_show,
+};
diff --git a/vnet/vnet/dpo/load_balance_map.h b/vnet/vnet/dpo/load_balance_map.h
new file mode 100644
index 00000000..f080e97c
--- /dev/null
+++ b/vnet/vnet/dpo/load_balance_map.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ */
+
+#ifndef __LOAD_BALANCE_MAP_H__
+#define __LOAD_BALANCE_MAP_H__
+
+#include <vlib/vlib.h>
+#include <vnet/fib/fib_types.h>
+#include <vnet/dpo/load_balance.h>
+
+struct load_balance_map_path_t_;
+
+/**
+ */
+typedef struct load_balance_map_t_ {
+ /**
+ * The buckets of the map that provide the index to index translation.
+ * In the first cacheline.
+ */
+ u16 *lbm_buckets;
+
+ /**
+ * the vector of paths this MAP represents
+ */
+ struct load_balance_map_path_t_ *lbm_paths;
+
+ /**
+ * the sum of the normalised weights. cache for convenience
+ */
+ u32 lbm_sum_of_norm_weights;
+
+ /**
+ * Number of locks. Maps are shared by a large number of recrusvie fib_entry_ts
+ */
+ u32 lbm_locks;
+} load_balance_map_t;
+
+extern index_t load_balance_map_add_or_lock(u32 n_buckets,
+ u32 sum_of_weights,
+ const load_balance_path_t *norm_paths);
+
+extern void load_balance_map_lock(index_t lmbi);
+extern void load_balance_map_unlock(index_t lbmi);
+
+extern void load_balance_map_path_state_change(fib_node_index_t path_index);
+
+extern u8* format_load_balance_map(u8 *s, va_list ap);
+
+/**
+ * The encapsulation breakages are for fast DP access
+ */
+extern load_balance_map_t *load_balance_map_pool;
+
+static inline load_balance_map_t*
+load_balance_map_get (index_t lbmi)
+{
+ return (pool_elt_at_index(load_balance_map_pool, lbmi));
+}
+
+
+extern void load_balance_map_module_init(void);
+
+#endif
diff --git a/vnet/vnet/dpo/lookup_dpo.c b/vnet/vnet/dpo/lookup_dpo.c
new file mode 100644
index 00000000..0bfc0651
--- /dev/null
+++ b/vnet/vnet/dpo/lookup_dpo.c
@@ -0,0 +1,802 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/lookup_dpo.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/fib/mpls_fib.h>
+
+static const char *const lookup_input_names[] = LOOKUP_INPUTS;
+
+/**
+ * @brief Enumeration of the lookup subtypes
+ */
+typedef enum lookup_sub_type_t_
+{
+ LOOKUP_SUB_TYPE_SRC,
+ LOOKUP_SUB_TYPE_DST,
+ LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE,
+} lookup_sub_type_t;
+#define LOOKUP_SUB_TYPE_NUM (LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE+1)
+
+#define FOR_EACH_LOOKUP_SUB_TYPE(_st) \
+ for (_st = LOOKUP_SUB_TYPE_IP4_SRC; _st < LOOKUP_SUB_TYPE_NUM; _st++)
+
+/**
+ * @brief pool of all MPLS Label DPOs
+ */
+lookup_dpo_t *lookup_dpo_pool;
+
+/**
+ * @brief An array of registered DPO type values for the sub-types
+ */
+static dpo_type_t lookup_dpo_sub_types[LOOKUP_SUB_TYPE_NUM];
+
+static lookup_dpo_t *
+lookup_dpo_alloc (void)
+{
+ lookup_dpo_t *lkd;
+
+ pool_get_aligned(lookup_dpo_pool, lkd, CLIB_CACHE_LINE_BYTES);
+
+ return (lkd);
+}
+
+static index_t
+lookup_dpo_get_index (lookup_dpo_t *lkd)
+{
+ return (lkd - lookup_dpo_pool);
+}
+
+static void
+lookup_dpo_add_or_lock_i (fib_node_index_t fib_index,
+ dpo_proto_t proto,
+ lookup_input_t input,
+ lookup_table_t table_config,
+ dpo_id_t *dpo)
+{
+ lookup_dpo_t *lkd;
+ dpo_type_t type;
+
+ lkd = lookup_dpo_alloc();
+ lkd->lkd_fib_index = fib_index;
+ lkd->lkd_proto = proto;
+ lkd->lkd_input = input;
+ lkd->lkd_table = table_config;
+
+ /*
+ * use the input type to select the lookup sub-type
+ */
+ type = 0;
+
+ switch (input)
+ {
+ case LOOKUP_INPUT_SRC_ADDR:
+ type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_SRC];
+ break;
+ case LOOKUP_INPUT_DST_ADDR:
+ switch (table_config)
+ {
+ case LOOKUP_TABLE_FROM_INPUT_INTERFACE:
+ type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE];
+ break;
+ case LOOKUP_TABLE_FROM_CONFIG:
+ type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST];
+ break;
+ }
+ }
+
+ if (0 == type)
+ {
+ dpo_reset(dpo);
+ }
+ else
+ {
+ dpo_set(dpo, type, proto, lookup_dpo_get_index(lkd));
+ }
+}
+
+void
+lookup_dpo_add_or_lock_w_fib_index (fib_node_index_t fib_index,
+ dpo_proto_t proto,
+ lookup_input_t input,
+ lookup_table_t table_config,
+ dpo_id_t *dpo)
+{
+ if (LOOKUP_TABLE_FROM_CONFIG == table_config)
+ {
+ fib_table_lock(fib_index, dpo_proto_to_fib(proto));
+ }
+ lookup_dpo_add_or_lock_i(fib_index, proto, input, table_config, dpo);
+}
+
+void
+lookup_dpo_add_or_lock_w_table_id (u32 table_id,
+ dpo_proto_t proto,
+ lookup_input_t input,
+ lookup_table_t table_config,
+ dpo_id_t *dpo)
+{
+ fib_node_index_t fib_index = FIB_NODE_INDEX_INVALID;
+
+ if (LOOKUP_TABLE_FROM_CONFIG == table_config)
+ {
+ fib_index =
+ fib_table_find_or_create_and_lock(dpo_proto_to_fib(proto),
+ table_id);
+ }
+
+ ASSERT(FIB_NODE_INDEX_INVALID != fib_index);
+ lookup_dpo_add_or_lock_i(fib_index, proto, input, table_config, dpo);
+}
+
+u8*
+format_lookup_dpo (u8 *s, va_list *args)
+{
+ index_t index = va_arg (*args, index_t);
+ lookup_dpo_t *lkd;
+
+ lkd = lookup_dpo_get(index);
+
+ if (LOOKUP_TABLE_FROM_INPUT_INTERFACE == lkd->lkd_table)
+ {
+ s = format(s, "%s lookup in interface's %U table",
+ lookup_input_names[lkd->lkd_input],
+ format_dpo_proto, lkd->lkd_proto);
+ }
+ else
+ {
+ s = format(s, "%s lookup in %U",
+ lookup_input_names[lkd->lkd_input],
+ format_fib_table_name, lkd->lkd_fib_index,
+ dpo_proto_to_fib(lkd->lkd_proto));
+ }
+ return (s);
+}
+
+static void
+lookup_dpo_lock (dpo_id_t *dpo)
+{
+ lookup_dpo_t *lkd;
+
+ lkd = lookup_dpo_get(dpo->dpoi_index);
+
+ lkd->lkd_locks++;
+}
+
+static void
+lookup_dpo_unlock (dpo_id_t *dpo)
+{
+ lookup_dpo_t *lkd;
+
+ lkd = lookup_dpo_get(dpo->dpoi_index);
+
+ lkd->lkd_locks--;
+
+ if (0 == lkd->lkd_locks)
+ {
+ if (LOOKUP_TABLE_FROM_CONFIG == lkd->lkd_table)
+ {
+ fib_table_unlock(lkd->lkd_fib_index,
+ dpo_proto_to_fib(lkd->lkd_proto));
+ }
+ pool_put(lookup_dpo_pool, lkd);
+ }
+}
+
+always_inline void
+ip4_src_fib_lookup_one (u32 src_fib_index0,
+ const ip4_address_t * addr0,
+ u32 * src_adj_index0)
+{
+ ip4_fib_mtrie_leaf_t leaf0, leaf1;
+ ip4_fib_mtrie_t * mtrie0;
+
+ mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie;
+
+ leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3);
+
+ /* Handle default route. */
+ leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
+ src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+}
+
+always_inline void
+ip4_src_fib_lookup_two (u32 src_fib_index0,
+ u32 src_fib_index1,
+ const ip4_address_t * addr0,
+ const ip4_address_t * addr1,
+ u32 * src_adj_index0,
+ u32 * src_adj_index1)
+{
+ ip4_fib_mtrie_leaf_t leaf0, leaf1;
+ ip4_fib_mtrie_t * mtrie0, * mtrie1;
+
+ mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie;
+ mtrie1 = &ip4_fib_get (src_fib_index1)->mtrie;
+
+ leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 0);
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 1);
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 2);
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 3);
+
+ /* Handle default route. */
+ leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
+ leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
+ src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ src_adj_index1[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
+}
+
+/**
+ * @brief Lookup trace data
+ */
+typedef struct lookup_trace_t_
+{
+ union {
+ ip46_address_t addr;
+ mpls_unicast_header_t hdr;
+ };
+ fib_node_index_t fib_index;
+ index_t lbi;
+} lookup_trace_t;
+
+
+always_inline uword
+lookup_dpo_ip4_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame,
+ int input_src_addr,
+ int table_from_interface)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ u32 cpu_index = os_get_cpu_number();
+ vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ /* while (n_left_from >= 4 && n_left_to_next >= 2) */
+ /* } */
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, lkdi0, lbi0, fib_index0, next0;
+ const ip4_address_t *input_addr;
+ const load_balance_t *lb0;
+ const lookup_dpo_t * lkd0;
+ const ip4_header_t * ip0;
+ const dpo_id_t *dpo0;
+ vlib_buffer_t * b0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (b0);
+
+ /* dst lookup was done by ip4 lookup */
+ lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ lkd0 = lookup_dpo_get(lkdi0);
+
+ /*
+ * choose between a lookup using the fib index in the DPO
+ * or getting the FIB index from the interface.
+ */
+ if (table_from_interface)
+ {
+ fib_index0 =
+ ip4_fib_table_get_index_for_sw_if_index(
+ vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+ }
+ else
+ {
+ fib_index0 = lkd0->lkd_fib_index;
+ }
+
+ /*
+ * choose between a source or destination address lookup in the table
+ */
+ if (input_src_addr)
+ {
+ input_addr = &ip0->src_address;
+ }
+ else
+ {
+ input_addr = &ip0->dst_address;
+ }
+
+ /* do lookup */
+ ip4_src_fib_lookup_one (fib_index0, input_addr, &lbi0);
+ lb0 = load_balance_get(lbi0);
+ dpo0 = load_balance_get_bucket_i(lb0, 0);
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm, b0));
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->fib_index = fib_index0;
+ tr->lbi = lbi0;
+ tr->addr.ip4 = *input_addr;
+ }
+
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+static u8 *
+format_lookup_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ lookup_trace_t * t = va_arg (*args, lookup_trace_t *);
+ uword indent = format_get_indent (s);
+ s = format (s, "%U fib-index:%d addr:%U load-balance:%d",
+ format_white_space, indent,
+ t->fib_index,
+ format_ip46_address, &t->addr, IP46_TYPE_ANY,
+ t->lbi);
+ return s;
+}
+
+always_inline uword
+lookup_ip4_dst (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (lookup_dpo_ip4_inline(vm, node, from_frame, 0, 0));
+}
+
+VLIB_REGISTER_NODE (lookup_ip4_dst_node) = {
+ .function = lookup_ip4_dst,
+ .name = "lookup-ip4-dst",
+ .vector_size = sizeof (u32),
+ .sibling_of = "ip4-lookup",
+ .format_trace = format_lookup_trace,
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip4_dst_node, lookup_ip4_dst)
+
+always_inline uword
+lookup_ip4_dst_itf (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (lookup_dpo_ip4_inline(vm, node, from_frame, 0, 1));
+}
+
+VLIB_REGISTER_NODE (lookup_ip4_dst_itf_node) = {
+ .function = lookup_ip4_dst_itf,
+ .name = "lookup-ip4-dst-itf",
+ .vector_size = sizeof (u32),
+ .sibling_of = "ip4-lookup",
+ .format_trace = format_lookup_trace,
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip4_dst_itf_node, lookup_ip4_dst_itf)
+
+always_inline uword
+lookup_ip4_src (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (lookup_dpo_ip4_inline(vm, node, from_frame, 1, 0));
+}
+
+VLIB_REGISTER_NODE (lookup_ip4_src_node) = {
+ .function = lookup_ip4_src,
+ .name = "lookup-ip4-src",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lookup_trace,
+ .sibling_of = "ip4-lookup",
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip4_src_node, lookup_ip4_src)
+
+always_inline uword
+lookup_dpo_ip6_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame,
+ int input_src_addr)
+{
+ vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
+ u32 n_left_from, next_index, * from, * to_next;
+ u32 cpu_index = os_get_cpu_number();
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ /* while (n_left_from >= 4 && n_left_to_next >= 2) */
+ /* { */
+ /* } */
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, lkdi0, lbi0, fib_index0, next0;
+ const ip6_address_t *input_addr0;
+ const load_balance_t *lb0;
+ const lookup_dpo_t * lkd0;
+ const ip6_header_t * ip0;
+ const dpo_id_t *dpo0;
+ vlib_buffer_t * b0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (b0);
+
+ /* dst lookup was done by ip6 lookup */
+ lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ lkd0 = lookup_dpo_get(lkdi0);
+ fib_index0 = lkd0->lkd_fib_index;
+
+ /*
+ * choose between a source or destination address lookup in the table
+ */
+ if (input_src_addr)
+ {
+ input_addr0 = &ip0->src_address;
+ }
+ else
+ {
+ input_addr0 = &ip0->dst_address;
+ }
+
+ /* do src lookup */
+ lbi0 = ip6_fib_table_fwding_lookup(&ip6_main,
+ fib_index0,
+ input_addr0);
+ lb0 = load_balance_get(lbi0);
+ dpo0 = load_balance_get_bucket_i(lb0, 0);
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm, b0));
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->fib_index = fib_index0;
+ tr->lbi = lbi0;
+ tr->addr.ip6 = *input_addr0;
+ }
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+always_inline uword
+lookup_ip6_dst (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (lookup_dpo_ip6_inline(vm, node, from_frame, 0 /*use src*/));
+}
+
+VLIB_REGISTER_NODE (lookup_ip6_dst_node) = {
+ .function = lookup_ip6_dst,
+ .name = "lookup-ip6-dst",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lookup_trace,
+ .sibling_of = "ip6-lookup",
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip6_dst_node, lookup_ip6_dst)
+
+always_inline uword
+lookup_ip6_src (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (lookup_dpo_ip6_inline(vm, node, from_frame, 1 /*use src*/));
+}
+
+VLIB_REGISTER_NODE (lookup_ip6_src_node) = {
+ .function = lookup_ip6_src,
+ .name = "lookup-ip6-src",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lookup_trace,
+ .sibling_of = "ip6-lookup",
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip6_src_node, lookup_ip6_src)
+
+always_inline uword
+lookup_dpo_mpls_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame,
+ int table_from_interface)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ u32 cpu_index = os_get_cpu_number();
+ vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ /* while (n_left_from >= 4 && n_left_to_next >= 2) */
+ /* } */
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, lkdi0, lbi0, fib_index0, next0;
+ const mpls_unicast_header_t * hdr0;
+ const load_balance_t *lb0;
+ const lookup_dpo_t * lkd0;
+ const dpo_id_t *dpo0;
+ vlib_buffer_t * b0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ hdr0 = vlib_buffer_get_current (b0);
+
+ /* dst lookup was done by mpls lookup */
+ lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ lkd0 = lookup_dpo_get(lkdi0);
+
+ /*
+ * choose between a lookup using the fib index in the DPO
+ * or getting the FIB index from the interface.
+ */
+ if (table_from_interface)
+ {
+ fib_index0 =
+ mpls_fib_table_get_index_for_sw_if_index(
+ vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+ }
+ else
+ {
+ fib_index0 = lkd0->lkd_fib_index;
+ }
+
+ /* do lookup */
+ lbi0 = mpls_fib_table_forwarding_lookup (fib_index0, hdr0);
+ lb0 = load_balance_get(lbi0);
+ dpo0 = load_balance_get_bucket_i(lb0, 0);
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm, b0));
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->fib_index = fib_index0;
+ tr->lbi = lbi0;
+ tr->hdr = *hdr0;
+ }
+
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+static u8 *
+format_lookup_mpls_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ lookup_trace_t * t = va_arg (*args, lookup_trace_t *);
+ uword indent = format_get_indent (s);
+ mpls_unicast_header_t hdr;
+
+ hdr.label_exp_s_ttl = clib_net_to_host_u32(t->hdr.label_exp_s_ttl);
+
+ s = format (s, "%U fib-index:%d hdr:%U load-balance:%d",
+ format_white_space, indent,
+ t->fib_index,
+ format_mpls_header, hdr,
+ t->lbi);
+ return s;
+}
+
+always_inline uword
+lookup_mpls_dst (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (lookup_dpo_mpls_inline(vm, node, from_frame, 0));
+}
+
+VLIB_REGISTER_NODE (lookup_mpls_dst_node) = {
+ .function = lookup_mpls_dst,
+ .name = "lookup-mpls-dst",
+ .vector_size = sizeof (u32),
+ .sibling_of = "mpls-lookup",
+ .format_trace = format_lookup_mpls_trace,
+ .n_next_nodes = 0,
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_mpls_dst_node, lookup_mpls_dst)
+
+always_inline uword
+lookup_mpls_dst_itf (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (lookup_dpo_mpls_inline(vm, node, from_frame, 1));
+}
+
+VLIB_REGISTER_NODE (lookup_mpls_dst_itf_node) = {
+ .function = lookup_mpls_dst_itf,
+ .name = "lookup-mpls-dst-itf",
+ .vector_size = sizeof (u32),
+ .sibling_of = "mpls-lookup",
+ .format_trace = format_lookup_mpls_trace,
+ .n_next_nodes = 0,
+};
+VLIB_NODE_FUNCTION_MULTIARCH (lookup_mpls_dst_itf_node, lookup_mpls_dst_itf)
+
+const static dpo_vft_t lkd_vft = {
+ .dv_lock = lookup_dpo_lock,
+ .dv_unlock = lookup_dpo_unlock,
+ .dv_format = format_lookup_dpo,
+};
+
+const static char* const lookup_src_ip4_nodes[] =
+{
+ "lookup-ip4-src",
+ NULL,
+};
+const static char* const lookup_src_ip6_nodes[] =
+{
+ "lookup-ip6-src",
+ NULL,
+};
+const static char* const * const lookup_src_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = lookup_src_ip4_nodes,
+ [DPO_PROTO_IP6] = lookup_src_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+const static char* const lookup_dst_ip4_nodes[] =
+{
+ "lookup-ip4-dst",
+ NULL,
+};
+const static char* const lookup_dst_ip6_nodes[] =
+{
+ "lookup-ip6-dst",
+ NULL,
+};
+const static char* const lookup_dst_mpls_nodes[] =
+{
+ "lookup-mpls-dst",
+ NULL,
+};
+const static char* const * const lookup_dst_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = lookup_dst_ip4_nodes,
+ [DPO_PROTO_IP6] = lookup_dst_ip6_nodes,
+ [DPO_PROTO_MPLS] = lookup_dst_mpls_nodes,
+};
+
+const static char* const lookup_dst_from_interface_ip4_nodes[] =
+{
+ "lookup-ip4-dst-itf",
+ NULL,
+};
+const static char* const lookup_dst_from_interface_ip6_nodes[] =
+{
+ "lookup-ip6-dst-itf",
+ NULL,
+};
+const static char* const lookup_dst_from_interface_mpls_nodes[] =
+{
+ "lookup-mpls-dst-itf",
+ NULL,
+};
+const static char* const * const lookup_dst_from_interface_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = lookup_dst_from_interface_ip4_nodes,
+ [DPO_PROTO_IP6] = lookup_dst_from_interface_ip6_nodes,
+ [DPO_PROTO_MPLS] = lookup_dst_from_interface_mpls_nodes,
+};
+
+
+void
+lookup_dpo_module_init (void)
+{
+ dpo_register(DPO_LOOKUP, &lkd_vft, NULL);
+
+ /*
+ * There are various sorts of lookup; src or dst addr v4 /v6 etc.
+ * there isn't an object type for each (there is only the lookup_dpo_t),
+ * but, for performance reasons, there is a data plane function, and hence
+ * VLIB node for each. VLIB graph node construction is based on DPO types
+ * so we create sub-types.
+ */
+ lookup_dpo_sub_types[LOOKUP_SUB_TYPE_SRC] =
+ dpo_register_new_type(&lkd_vft, lookup_src_nodes);
+ lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST] =
+ dpo_register_new_type(&lkd_vft, lookup_dst_nodes);
+ lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE] =
+ dpo_register_new_type(&lkd_vft, lookup_dst_nodes);
+}
diff --git a/vnet/vnet/dpo/lookup_dpo.h b/vnet/vnet/dpo/lookup_dpo.h
new file mode 100644
index 00000000..ff283388
--- /dev/null
+++ b/vnet/vnet/dpo/lookup_dpo.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOOKUP_DPO_H__
+#define __LOOKUP_DPO_H__
+
+#include <vnet/vnet.h>
+#include <vnet/fib/fib_types.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * Switch to use the packet's source or destination address for lookup
+ */
+typedef enum lookup_input_t_ {
+ LOOKUP_INPUT_SRC_ADDR,
+ LOOKUP_INPUT_DST_ADDR,
+} __attribute__ ((packed)) lookup_input_t;
+
+#define LOOKUP_INPUTS { \
+ [LOOKUP_INPUT_SRC_ADDR] = "src-address", \
+ [LOOKUP_INPUT_DST_ADDR] = "dst-address", \
+}
+
+/**
+ * Switch to use the packet's source or destination address for lookup
+ */
+typedef enum lookup_table_t_ {
+ LOOKUP_TABLE_FROM_INPUT_INTERFACE,
+ LOOKUP_TABLE_FROM_CONFIG,
+} __attribute__ ((packed)) lookup_table_t;
+
+#define LOOKUP_TABLES { \
+ [LOOKUP_INPUT_SRC_ADDR] = "table-input-interface", \
+ [LOOKUP_INPUT_DST_ADDR] = "table-configured", \
+}
+
+/**
+ * A representation of an MPLS label for imposition in the data-path
+ */
+typedef struct lookup_dpo_t
+{
+ /**
+ * The FIB, or interface from which to get a FIB, in which to perform
+ * the next lookup;
+ */
+ fib_node_index_t lkd_fib_index;
+
+ /**
+ * The protocol of the FIB for the lookup, and hence
+ * the protocol of the packet
+ */
+ dpo_proto_t lkd_proto;
+
+ /**
+ * Switch to use src or dst address
+ */
+ lookup_input_t lkd_input;
+
+ /**
+ * Switch to use the table index passed, or the table of the input interface
+ */
+ lookup_table_t lkd_table;
+
+ /**
+ * Number of locks
+ */
+ u16 lkd_locks;
+} lookup_dpo_t;
+
+extern void lookup_dpo_add_or_lock_w_fib_index(fib_node_index_t fib_index,
+ dpo_proto_t proto,
+ lookup_input_t input,
+ lookup_table_t table,
+ dpo_id_t *dpo);
+extern void lookup_dpo_add_or_lock_w_table_id(u32 table_id,
+ dpo_proto_t proto,
+ lookup_input_t input,
+ lookup_table_t table,
+ dpo_id_t *dpo);
+
+extern u8* format_lookup_dpo(u8 *s, va_list *args);
+
+/*
+ * Encapsulation violation for fast data-path access
+ */
+extern lookup_dpo_t *lookup_dpo_pool;
+
+static inline lookup_dpo_t *
+lookup_dpo_get (index_t index)
+{
+ return (pool_elt_at_index(lookup_dpo_pool, index));
+}
+
+extern void lookup_dpo_module_init(void);
+
+#endif
diff --git a/vnet/vnet/dpo/mpls_label_dpo.c b/vnet/vnet/dpo/mpls_label_dpo.c
new file mode 100644
index 00000000..0ec840ec
--- /dev/null
+++ b/vnet/vnet/dpo/mpls_label_dpo.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/mpls_label_dpo.h>
+#include <vnet/mpls/mpls.h>
+
+/*
+ * pool of all MPLS Label DPOs
+ */
+mpls_label_dpo_t *mpls_label_dpo_pool;
+
+static mpls_label_dpo_t *
+mpls_label_dpo_alloc (void)
+{
+ mpls_label_dpo_t *mld;
+
+ pool_get_aligned(mpls_label_dpo_pool, mld, CLIB_CACHE_LINE_BYTES);
+ memset(mld, 0, sizeof(*mld));
+
+ dpo_reset(&mld->mld_dpo);
+
+ return (mld);
+}
+
+static index_t
+mpls_label_dpo_get_index (mpls_label_dpo_t *mld)
+{
+ return (mld - mpls_label_dpo_pool);
+}
+
+index_t
+mpls_label_dpo_create (mpls_label_t label,
+ mpls_eos_bit_t eos,
+ u8 ttl,
+ u8 exp,
+ const dpo_id_t *dpo)
+{
+ mpls_label_dpo_t *mld;
+
+ mld = mpls_label_dpo_alloc();
+
+ vnet_mpls_uc_set_label(&mld->mld_hdr.label_exp_s_ttl, label);
+ vnet_mpls_uc_set_ttl(&mld->mld_hdr.label_exp_s_ttl, ttl);
+ vnet_mpls_uc_set_exp(&mld->mld_hdr.label_exp_s_ttl, exp);
+ vnet_mpls_uc_set_s(&mld->mld_hdr.label_exp_s_ttl, eos);
+
+ /*
+ * get the header in network byte order since we will paint it
+ * on a packet in the data-plane
+ */
+ mld->mld_hdr.label_exp_s_ttl =
+ clib_host_to_net_u32(mld->mld_hdr.label_exp_s_ttl);
+
+ dpo_stack(DPO_MPLS_LABEL, DPO_PROTO_MPLS, &mld->mld_dpo, dpo);
+
+ return (mpls_label_dpo_get_index(mld));
+}
+
+u8*
+format_mpls_label_dpo (u8 *s, va_list *args)
+{
+ index_t index = va_arg (*args, index_t);
+ u32 indent = va_arg (*args, u32);
+ mpls_unicast_header_t hdr;
+ mpls_label_dpo_t *mld;
+
+ mld = mpls_label_dpo_get(index);
+
+ hdr.label_exp_s_ttl =
+ clib_net_to_host_u32(mld->mld_hdr.label_exp_s_ttl);
+
+ return (format(s, "mpls-label:[%d]:%U\n%U%U",
+ index,
+ format_mpls_header, hdr,
+ format_white_space, indent,
+ format_dpo_id, &mld->mld_dpo, indent+2));
+}
+
+static void
+mpls_label_dpo_lock (dpo_id_t *dpo)
+{
+ mpls_label_dpo_t *mld;
+
+ mld = mpls_label_dpo_get(dpo->dpoi_index);
+
+ mld->mld_locks++;
+}
+
+static void
+mpls_label_dpo_unlock (dpo_id_t *dpo)
+{
+ mpls_label_dpo_t *mld;
+
+ mld = mpls_label_dpo_get(dpo->dpoi_index);
+
+ mld->mld_locks--;
+
+ if (0 == mld->mld_locks)
+ {
+ dpo_reset(&mld->mld_dpo);
+ pool_put(mpls_label_dpo_pool, mld);
+ }
+}
+
+/**
+ * @brief A struct to hold tracing information for the MPLS label imposition
+ * node.
+ */
+typedef struct mpls_label_imposition_trace_t_
+{
+ /**
+ * The MPLS header imposed
+ */
+ mpls_unicast_header_t hdr;
+} mpls_label_imposition_trace_t;
+
+always_inline uword
+mpls_label_imposition (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ mpls_unicast_header_t *hdr0;
+ mpls_label_dpo_t *mld0;
+ vlib_buffer_t * b0;
+ u32 bi0, mldi0;
+ u32 next0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* dst lookup was done by ip4 lookup */
+ mldi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ mld0 = mpls_label_dpo_get(mldi0);
+
+ /* Paint the MPLS header */
+ vlib_buffer_advance(b0, -sizeof(*hdr0));
+ hdr0 = vlib_buffer_get_current(b0);
+
+ // FIXME.
+ // need to copy the TTL from the correct place.
+ // for IPvX imposition from the IP header
+ // so we need a deidcated ipx-to-mpls-label-imp-node
+ // for mpls switch and stack another solution is required.
+ *hdr0 = mld0->mld_hdr;
+
+ next0 = mld0->mld_dpo.dpoi_next_node;
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mld0->mld_dpo.dpoi_index;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_label_imposition_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->hdr = *hdr0;
+ }
+
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+static u8 *
+format_mpls_label_imposition_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mpls_label_imposition_trace_t * t;
+ mpls_unicast_header_t hdr;
+ uword indent;
+
+ t = va_arg (*args, mpls_label_imposition_trace_t *);
+ indent = format_get_indent (s);
+ hdr.label_exp_s_ttl = clib_net_to_host_u32(t->hdr.label_exp_s_ttl);
+
+ s = format (s, "%Umpls-header:%U",
+ format_white_space, indent,
+ format_mpls_header, hdr);
+ return (s);
+}
+
+VLIB_REGISTER_NODE (mpls_label_imposition_node) = {
+ .function = mpls_label_imposition,
+ .name = "mpls-label-imposition",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_mpls_label_imposition_trace,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ }
+};
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_label_imposition_node, mpls_label_imposition)
+
+const static dpo_vft_t mld_vft = {
+ .dv_lock = mpls_label_dpo_lock,
+ .dv_unlock = mpls_label_dpo_unlock,
+ .dv_format = format_mpls_label_dpo,
+};
+
+const static char* const mpls_label_imp_ip4_nodes[] =
+{
+ "mpls-label-imposition",
+ NULL,
+};
+const static char* const mpls_label_imp_ip6_nodes[] =
+{
+ "mpls-label-imposition",
+ NULL,
+};
+const static char* const mpls_label_imp_mpls_nodes[] =
+{
+ "mpls-label-imposition",
+ NULL,
+};
+const static char* const * const mpls_label_imp_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = mpls_label_imp_ip4_nodes,
+ [DPO_PROTO_IP6] = mpls_label_imp_ip6_nodes,
+ [DPO_PROTO_MPLS] = mpls_label_imp_mpls_nodes,
+};
+
+
+void
+mpls_label_dpo_module_init (void)
+{
+ dpo_register(DPO_MPLS_LABEL, &mld_vft, mpls_label_imp_nodes);
+}
diff --git a/vnet/vnet/dpo/mpls_label_dpo.h b/vnet/vnet/dpo/mpls_label_dpo.h
new file mode 100644
index 00000000..47ee3449
--- /dev/null
+++ b/vnet/vnet/dpo/mpls_label_dpo.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPLS_LABEL_DPO_H__
+#define __MPLS_LABEL_DPO_H__
+
+#include <vnet/vnet.h>
+#include <vnet/mpls/packet.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * A representation of an MPLS label for imposition in the data-path
+ */
+typedef struct mpls_label_dpo_t
+{
+ /**
+ * The MPLS label header to impose
+ */
+ mpls_unicast_header_t mld_hdr;
+
+ /**
+ * Next DPO in the graph
+ */
+ dpo_id_t mld_dpo;
+
+ /**
+ * Number of locks/users of the label
+ */
+ u16 mld_locks;
+} mpls_label_dpo_t;
+
+extern index_t mpls_label_dpo_create(mpls_label_t label,
+ mpls_eos_bit_t eos,
+ u8 ttl,
+ u8 exp,
+ const dpo_id_t *dpo);
+
+extern u8* format_mpls_label_dpo(u8 *s, va_list *args);
+
+
+/*
+ * Encapsulation violation for fast data-path access
+ */
+extern mpls_label_dpo_t *mpls_label_dpo_pool;
+
+static inline mpls_label_dpo_t *
+mpls_label_dpo_get (index_t index)
+{
+ return (pool_elt_at_index(mpls_label_dpo_pool, index));
+}
+
+extern void mpls_label_dpo_module_init(void);
+
+#endif
diff --git a/vnet/vnet/dpo/punt_dpo.c b/vnet/vnet/dpo/punt_dpo.c
new file mode 100644
index 00000000..e27a8ff3
--- /dev/null
+++ b/vnet/vnet/dpo/punt_dpo.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The data-path object representing puntping the packet
+ */
+
+#include <vnet/dpo/dpo.h>
+
+static dpo_id_t punt_dpos[DPO_PROTO_NUM];
+
+const dpo_id_t *
+punt_dpo_get (dpo_proto_t proto)
+{
+ dpo_set(&punt_dpos[proto], DPO_PUNT, proto, 1);
+
+ return (&punt_dpos[proto]);
+}
+
+int
+dpo_is_punt (const dpo_id_t *dpo)
+{
+ return (dpo->dpoi_type == DPO_PUNT);
+}
+
+static void
+punt_dpo_lock (dpo_id_t *dpo)
+{
+ /*
+ * not maintaining a lock count on the punt
+ * more trouble than it's worth.
+ * There always needs to be one around. no point it managaing its lifetime
+ */
+}
+static void
+punt_dpo_unlock (dpo_id_t *dpo)
+{
+}
+
+static u8*
+format_punt_dpo (u8 *s, va_list *ap)
+{
+ CLIB_UNUSED(index_t index) = va_arg(ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(ap, u32);
+
+ return (format(s, "dpo-punt"));
+}
+
+const static dpo_vft_t punt_vft = {
+ .dv_lock = punt_dpo_lock,
+ .dv_unlock = punt_dpo_unlock,
+ .dv_format = format_punt_dpo,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a punt
+ * object.
+ *
+ * this means that these graph nodes are ones from which a punt is the
+ * parent object in the DPO-graph.
+ */
+const static char* const punt_ip4_nodes[] =
+{
+ "ip4-punt",
+ NULL,
+};
+const static char* const punt_ip6_nodes[] =
+{
+ "ip6-punt",
+ NULL,
+};
+const static char* const punt_mpls_nodes[] =
+{
+ "mpls-punt",
+ NULL,
+};
+const static char* const * const punt_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = punt_ip4_nodes,
+ [DPO_PROTO_IP6] = punt_ip6_nodes,
+ [DPO_PROTO_MPLS] = punt_mpls_nodes,
+};
+
+void
+punt_dpo_module_init (void)
+{
+ dpo_register(DPO_PUNT, &punt_vft, punt_nodes);
+}
diff --git a/vnet/vnet/dpo/punt_dpo.h b/vnet/vnet/dpo/punt_dpo.h
new file mode 100644
index 00000000..370547c1
--- /dev/null
+++ b/vnet/vnet/dpo/punt_dpo.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief A DPO to punt packets to the Control-plane
+ */
+
+#ifndef __PUNT_DPO_H__
+#define __PUNT_DPO_H__
+
+#include <vnet/dpo/dpo.h>
+
+extern int dpo_is_punt(const dpo_id_t *dpo);
+
+extern const dpo_id_t *punt_dpo_get(dpo_proto_t proto);
+
+extern void punt_dpo_module_init(void);
+
+#endif
diff --git a/vnet/vnet/dpo/receive_dpo.c b/vnet/vnet/dpo/receive_dpo.c
new file mode 100644
index 00000000..ee7d82b0
--- /dev/null
+++ b/vnet/vnet/dpo/receive_dpo.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The data-path object representing receiveing the packet, i.e. it's for-us
+ */
+#include <vlib/vlib.h>
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/receive_dpo.h>
+
+/**
+ * @brief pool of all receive DPOs
+ */
+receive_dpo_t *receive_dpo_pool;
+
+static receive_dpo_t *
+receive_dpo_alloc (void)
+{
+ receive_dpo_t *rd;
+
+ pool_get_aligned(receive_dpo_pool, rd, CLIB_CACHE_LINE_BYTES);
+ memset(rd, 0, sizeof(*rd));
+
+ return (rd);
+}
+
+static receive_dpo_t *
+receive_dpo_get_from_dpo (const dpo_id_t *dpo)
+{
+ ASSERT(DPO_RECEIVE == dpo->dpoi_type);
+
+ return (receive_dpo_get(dpo->dpoi_index));
+}
+
+
+/*
+ * receive_dpo_add_or_lock
+ *
+ * The next_hop address here is used for source address selection in the DP.
+ * The local adj is added to an interface's receive prefix, the next-hop
+ * passed here is the local prefix on the same interface.
+ */
+void
+receive_dpo_add_or_lock (dpo_proto_t proto,
+ u32 sw_if_index,
+ const ip46_address_t *nh_addr,
+ dpo_id_t *dpo)
+{
+ receive_dpo_t *rd;
+
+ rd = receive_dpo_alloc();
+
+ rd->rd_sw_if_index = sw_if_index;
+ if (NULL != nh_addr)
+ {
+ rd->rd_addr = *nh_addr;
+ }
+
+ dpo_set(dpo, DPO_RECEIVE, proto, (rd - receive_dpo_pool));
+}
+
+static void
+receive_dpo_lock (dpo_id_t *dpo)
+{
+ receive_dpo_t *rd;
+
+ rd = receive_dpo_get_from_dpo(dpo);
+ rd->rd_locks++;
+}
+
+static void
+receive_dpo_unlock (dpo_id_t *dpo)
+{
+ receive_dpo_t *rd;
+
+ rd = receive_dpo_get_from_dpo(dpo);
+ rd->rd_locks--;
+
+ if (0 == rd->rd_locks)
+ {
+ pool_put(receive_dpo_pool, rd);
+ }
+}
+
+static u8*
+format_receive_dpo (u8 *s, va_list *ap)
+{
+ CLIB_UNUSED(index_t index) = va_arg(ap, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg(ap, u32);
+ vnet_main_t * vnm = vnet_get_main();
+ receive_dpo_t *rd;
+
+ rd = receive_dpo_get(index);
+
+ if (~0 != rd->rd_sw_if_index)
+ {
+ return (format(s, "dpo-receive: %U on %U",
+ format_ip46_address, &rd->rd_addr, IP46_TYPE_ANY,
+ format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface(vnm, rd->rd_sw_if_index)));
+ }
+ else
+ {
+ return (format(s, "dpo-receive"));
+ }
+}
+
+const static dpo_vft_t receive_vft = {
+ .dv_lock = receive_dpo_lock,
+ .dv_unlock = receive_dpo_unlock,
+ .dv_format = format_receive_dpo,
+};
+
+/**
+ * @brief The per-protocol VLIB graph nodes that are assigned to a receive
+ * object.
+ *
+ * this means that these graph nodes are ones from which a receive is the
+ * parent object in the DPO-graph.
+ */
+const static char* const receive_ip4_nodes[] =
+{
+ "ip4-local",
+ NULL,
+};
+const static char* const receive_ip6_nodes[] =
+{
+ "ip6-local",
+ NULL,
+};
+
+const static char* const * const receive_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = receive_ip4_nodes,
+ [DPO_PROTO_IP6] = receive_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+void
+receive_dpo_module_init (void)
+{
+ dpo_register(DPO_RECEIVE, &receive_vft, receive_nodes);
+}
diff --git a/vnet/vnet/dpo/receive_dpo.h b/vnet/vnet/dpo/receive_dpo.h
new file mode 100644
index 00000000..2420fd78
--- /dev/null
+++ b/vnet/vnet/dpo/receive_dpo.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief
+ * The data-path object representing receiveing the packet, i.e. it's for-us
+ */
+
+#ifndef __RECEIVE_DPO_H__
+#define __RECEIVE_DPO_H__
+
+#include <vnet/dpo/dpo.h>
+#include <vnet/ip/ip6.h>
+
+typedef struct receive_dpo_t_
+{
+ /**
+ * The Software interface index on which traffic is received
+ */
+ u32 rd_sw_if_index;
+
+ /**
+ * The address on the receive interface. packet are destined to this address
+ */
+ ip46_address_t rd_addr;
+
+ /**
+ * number oflocks.
+ */
+ u16 rd_locks;
+} receive_dpo_t;
+
+extern void receive_dpo_add_or_lock (dpo_proto_t proto,
+ u32 sw_if_index,
+ const ip46_address_t *nh_addr,
+ dpo_id_t *dpo);
+
+extern void receive_dpo_module_init(void);
+
+/**
+ * @brief pool of all receive DPOs
+ */
+receive_dpo_t *receive_dpo_pool;
+
+static inline receive_dpo_t *
+receive_dpo_get (index_t index)
+{
+ return (pool_elt_at_index(receive_dpo_pool, index));
+}
+
+#endif