diff options
Diffstat (limited to 'vnet/vnet/dpo')
-rw-r--r-- | vnet/vnet/dpo/classify_dpo.c | 120 | ||||
-rw-r--r-- | vnet/vnet/dpo/classify_dpo.h | 56 | ||||
-rw-r--r-- | vnet/vnet/dpo/dpo.c | 424 | ||||
-rw-r--r-- | vnet/vnet/dpo/dpo.h | 354 | ||||
-rw-r--r-- | vnet/vnet/dpo/drop_dpo.c | 100 | ||||
-rw-r--r-- | vnet/vnet/dpo/drop_dpo.h | 37 | ||||
-rw-r--r-- | vnet/vnet/dpo/load_balance.c | 760 | ||||
-rw-r--r-- | vnet/vnet/dpo/load_balance.h | 203 | ||||
-rw-r--r-- | vnet/vnet/dpo/load_balance_map.c | 566 | ||||
-rw-r--r-- | vnet/vnet/dpo/load_balance_map.h | 78 | ||||
-rw-r--r-- | vnet/vnet/dpo/lookup_dpo.c | 802 | ||||
-rw-r--r-- | vnet/vnet/dpo/lookup_dpo.h | 108 | ||||
-rw-r--r-- | vnet/vnet/dpo/mpls_label_dpo.c | 263 | ||||
-rw-r--r-- | vnet/vnet/dpo/mpls_label_dpo.h | 66 | ||||
-rw-r--r-- | vnet/vnet/dpo/punt_dpo.c | 100 | ||||
-rw-r--r-- | vnet/vnet/dpo/punt_dpo.h | 30 | ||||
-rw-r--r-- | vnet/vnet/dpo/receive_dpo.c | 155 | ||||
-rw-r--r-- | vnet/vnet/dpo/receive_dpo.h | 62 |
18 files changed, 4284 insertions, 0 deletions
diff --git a/vnet/vnet/dpo/classify_dpo.c b/vnet/vnet/dpo/classify_dpo.c new file mode 100644 index 00000000000..3b7b98f9da8 --- /dev/null +++ b/vnet/vnet/dpo/classify_dpo.c @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/ip/ip.h> +#include <vnet/dpo/classify_dpo.h> +#include <vnet/mpls/mpls.h> + +/* + * pool of all MPLS Label DPOs + */ +classify_dpo_t *classify_dpo_pool; + +static classify_dpo_t * +classify_dpo_alloc (void) +{ + classify_dpo_t *cd; + + pool_get_aligned(classify_dpo_pool, cd, CLIB_CACHE_LINE_BYTES); + memset(cd, 0, sizeof(*cd)); + + return (cd); +} + +static index_t +classify_dpo_get_index (classify_dpo_t *cd) +{ + return (cd - classify_dpo_pool); +} + +index_t +classify_dpo_create (fib_protocol_t proto, + u32 classify_table_index) +{ + classify_dpo_t *cd; + + cd = classify_dpo_alloc(); + cd->cd_proto = proto; + cd->cd_table_index = classify_table_index; + + return (classify_dpo_get_index(cd)); +} + +u8* +format_classify_dpo (u8 *s, va_list *args) +{ + index_t index = va_arg (*args, index_t); + CLIB_UNUSED(u32 indent) = va_arg (*args, u32); + classify_dpo_t *cd; + + cd = classify_dpo_get(index); + + return (format(s, "classify:[%d]:table:%d", + index, cd->cd_table_index)); +} + +static void +classify_dpo_lock (dpo_id_t *dpo) +{ + classify_dpo_t *cd; + + cd = classify_dpo_get(dpo->dpoi_index); + + cd->cd_locks++; +} + +static void +classify_dpo_unlock (dpo_id_t *dpo) +{ + classify_dpo_t *cd; + + cd = classify_dpo_get(dpo->dpoi_index); + + cd->cd_locks--; + + if (0 == cd->cd_locks) + { + pool_put(classify_dpo_pool, cd); + } +} + +const static dpo_vft_t cd_vft = { + .dv_lock = classify_dpo_lock, + .dv_unlock = classify_dpo_unlock, + .dv_format = format_classify_dpo, +}; + +const static char* const classify_ip4_nodes[] = +{ + "ip4-classify", + NULL, +}; +const static char* const classify_ip6_nodes[] = +{ + "ip6-classify", + NULL, +}; +const static char* const * const classify_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = classify_ip4_nodes, + [DPO_PROTO_IP6] = classify_ip6_nodes, + [DPO_PROTO_MPLS] = NULL, +}; + +void +classify_dpo_module_init (void) +{ + dpo_register(DPO_CLASSIFY, &cd_vft, classify_nodes); +} diff --git a/vnet/vnet/dpo/classify_dpo.h b/vnet/vnet/dpo/classify_dpo.h new file mode 100644 index 00000000000..cd35c3c440b --- /dev/null +++ b/vnet/vnet/dpo/classify_dpo.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CLASSIFY_DPO_H__ +#define __CLASSIFY_DPO_H__ + +#include <vnet/vnet.h> +#include <vnet/mpls/packet.h> +#include <vnet/dpo/dpo.h> + +/** + * A representation of an MPLS label for imposition in the data-path + */ +typedef struct classify_dpo_t +{ + fib_protocol_t cd_proto; + + u32 cd_table_index; + + /** + * Number of locks/users of the label + */ + u16 cd_locks; +} classify_dpo_t; + +extern index_t classify_dpo_create(fib_protocol_t proto, + u32 classify_table_index); + +extern u8* format_classify_dpo(u8 *s, va_list *args); + +/* + * Encapsulation violation for fast data-path access + */ +extern classify_dpo_t *classify_dpo_pool; + +static inline classify_dpo_t * +classify_dpo_get (index_t index) +{ + return (pool_elt_at_index(classify_dpo_pool, index)); +} + +extern void classify_dpo_module_init(void); + +#endif diff --git a/vnet/vnet/dpo/dpo.c b/vnet/vnet/dpo/dpo.c new file mode 100644 index 00000000000..5eff52b7b8a --- /dev/null +++ b/vnet/vnet/dpo/dpo.c @@ -0,0 +1,424 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + * A Data-Path Object is an object that represents actions that are + * applied to packets are they are switched through VPP. + * + * The DPO is a base class that is specialised by other objects to provide + * concreate actions + * + * The VLIB graph nodes are graph of types, the DPO graph is a graph of instances. + */ + +#include <vnet/dpo/dpo.h> +#include <vnet/ip/lookup.h> +#include <vnet/ip/format.h> +#include <vnet/adj/adj.h> + +#include <vnet/dpo/load_balance.h> +#include <vnet/dpo/mpls_label_dpo.h> +#include <vnet/dpo/lookup_dpo.h> +#include <vnet/dpo/drop_dpo.h> +#include <vnet/dpo/receive_dpo.h> +#include <vnet/dpo/punt_dpo.h> +#include <vnet/dpo/classify_dpo.h> + +/** + * Array of char* names for the DPO types and protos + */ +static const char* dpo_type_names[] = DPO_TYPES; +static const char* dpo_proto_names[] = DPO_PROTOS; + +/** + * @brief Vector of virtual function tables for the DPO types + * + * This is a vector so we can dynamically register new DPO types in plugins. + */ +static dpo_vft_t *dpo_vfts; + +/** + * @brief vector of graph node names associated with each DPO type and protocol. + * + * dpo_nodes[child_type][child_proto][node_X] = node_name; + * i.e. + * dpo_node[DPO_LOAD_BALANCE][DPO_PROTO_IP4][0] = "ip4-lookup" + * dpo_node[DPO_LOAD_BALANCE][DPO_PROTO_IP4][1] = "ip4-load-balance" + * + * This is a vector so we can dynamically register new DPO types in plugins. + */ +static const char* const * const ** dpo_nodes; + +/** + * @brief Vector of edge indicies from parent DPO nodes to child + * + * dpo_edges[child_type][child_proto][parent_type] = edge_index + * + * This array is derived at init time from the dpo_nodes above. Note that + * the third dimension in dpo_nodes is lost, hence, the edge index from each + * node MUST be the same. + * + * Note that this array is child type specific, not child instance specific. + */ +static u32 ***dpo_edges; + +/** + * @brief The DPO type value that can be assigend to the next dynamic + * type registration. + */ +static dpo_type_t dpo_dynamic = DPO_LAST; + +u8 * +format_dpo_type (u8 * s, va_list * args) +{ + dpo_type_t type = va_arg (*args, int); + + s = format(s, "%s", dpo_type_names[type]); + + return (s); +} + +u8 * +format_dpo_id (u8 * s, va_list * args) +{ + dpo_id_t *dpo = va_arg (*args, dpo_id_t*); + u32 indent = va_arg (*args, u32); + + s = format(s, "[@%d]: ", dpo->dpoi_next_node); + + if (NULL != dpo_vfts[dpo->dpoi_type].dv_format) + { + return (format(s, "%U", + dpo_vfts[dpo->dpoi_type].dv_format, + dpo->dpoi_index, + indent)); + } + + switch (dpo->dpoi_type) + { + case DPO_FIRST: + s = format(s, "unset"); + break; + default: + s = format(s, "unknown"); + break; + } + return (s); +} + +u8 * +format_dpo_proto (u8 * s, va_list * args) +{ + dpo_proto_t proto = va_arg (*args, int); + + return (format(s, "%s", dpo_proto_names[proto])); +} + +void +dpo_set (dpo_id_t *dpo, + dpo_type_t type, + dpo_proto_t proto, + index_t index) +{ + dpo_id_t tmp = *dpo; + + dpo->dpoi_type = type; + dpo->dpoi_proto = proto, + dpo->dpoi_index = index; + + if (DPO_ADJACENCY == type) + { + /* + * set the adj subtype + */ + ip_adjacency_t *adj; + + adj = adj_get(index); + + switch (adj->lookup_next_index) + { + case IP_LOOKUP_NEXT_ARP: + dpo->dpoi_type = DPO_ADJACENCY_INCOMPLETE; + break; + case IP_LOOKUP_NEXT_MIDCHAIN: + dpo->dpoi_type = DPO_ADJACENCY_MIDCHAIN; + break; + default: + break; + } + } + dpo_lock(dpo); + dpo_unlock(&tmp); +} + +void +dpo_reset (dpo_id_t *dpo) +{ + dpo_set(dpo, DPO_FIRST, DPO_PROTO_NONE, INDEX_INVALID); +} + +/** + * \brief + * Compare two Data-path objects + * + * like memcmp, return 0 is matching, !0 otherwise. + */ +int +dpo_cmp (const dpo_id_t *dpo1, + const dpo_id_t *dpo2) +{ + int res; + + res = dpo1->dpoi_type - dpo2->dpoi_type; + + if (0 != res) return (res); + + return (dpo1->dpoi_index - dpo2->dpoi_index); +} + +void +dpo_copy (dpo_id_t *dst, + const dpo_id_t *src) +{ + dpo_id_t tmp = *dst; + + /* + * the destination is written in a single u64 write - hence atomically w.r.t + * any packets inflight. + */ + *((u64*)dst) = *(u64*)src; + + dpo_lock(dst); + dpo_unlock(&tmp); +} + +int +dpo_is_adj (const dpo_id_t *dpo) +{ + return ((dpo->dpoi_type == DPO_ADJACENCY) || + (dpo->dpoi_type == DPO_ADJACENCY_INCOMPLETE) || + (dpo->dpoi_type == DPO_ADJACENCY_MIDCHAIN) || + (dpo->dpoi_type == DPO_ADJACENCY_GLEAN)); +} + +void +dpo_register (dpo_type_t type, + const dpo_vft_t *vft, + const char * const * const * nodes) +{ + vec_validate(dpo_vfts, type); + dpo_vfts[type] = *vft; + + vec_validate(dpo_nodes, type); + dpo_nodes[type] = nodes; +} + +dpo_type_t +dpo_register_new_type (const dpo_vft_t *vft, + const char * const * const * nodes) +{ + dpo_type_t type = dpo_dynamic++; + + dpo_register(type, vft, nodes); + + return (type); +} + +void +dpo_lock (dpo_id_t *dpo) +{ + if (!dpo_id_is_valid(dpo)) + return; + + dpo_vfts[dpo->dpoi_type].dv_lock(dpo); +} + +void +dpo_unlock (dpo_id_t *dpo) +{ + if (!dpo_id_is_valid(dpo)) + return; + + dpo_vfts[dpo->dpoi_type].dv_unlock(dpo); +} + + +static u32 +dpo_get_next_node (dpo_type_t child_type, + dpo_proto_t child_proto, + const dpo_id_t *parent_dpo) +{ + dpo_proto_t parent_proto; + dpo_type_t parent_type; + + parent_type = parent_dpo->dpoi_type; + parent_proto = parent_dpo->dpoi_proto; + + vec_validate(dpo_edges, child_type); + vec_validate(dpo_edges[child_type], child_proto); + vec_validate_init_empty(dpo_edges[child_type][child_proto], + parent_dpo->dpoi_type, ~0); + + /* + * if the edge index has not yet been created for this node to node transistion + */ + if (~0 == dpo_edges[child_type][child_proto][parent_type]) + { + vlib_node_t *parent_node, *child_node; + vlib_main_t *vm; + u32 edge ,pp, cc; + + vm = vlib_get_main(); + + ASSERT(NULL != dpo_nodes[child_type]); + ASSERT(NULL != dpo_nodes[child_type][child_proto]); + ASSERT(NULL != dpo_nodes[parent_type]); + ASSERT(NULL != dpo_nodes[parent_type][parent_proto]); + + pp = 0; + + /* + * create a graph arc from each of the parent's registered node types, + * to each of the childs. + */ + while (NULL != dpo_nodes[child_type][child_proto][pp]) + { + parent_node = + vlib_get_node_by_name(vm, + (u8*) dpo_nodes[child_type][child_proto][pp]); + + cc = 0; + + while (NULL != dpo_nodes[parent_type][child_proto][cc]) + { + child_node = + vlib_get_node_by_name(vm, + (u8*) dpo_nodes[parent_type][parent_proto][cc]); + + edge = vlib_node_add_next(vm, + parent_node->index, + child_node->index); + + if (~0 == dpo_edges[child_type][child_proto][parent_type]) + { + dpo_edges[child_type][child_proto][parent_type] = edge; + } + else + { + ASSERT(dpo_edges[child_type][child_proto][parent_type] == edge); + } + cc++; + } + pp++; + } + } + + return (dpo_edges[child_type][child_proto][parent_type]); +} + +/** + * @brief Stack one DPO object on another, and thus establish a child parent + * relationship. The VLIB graph arc used is taken from the parent and child types + * passed. + */ +static void +dpo_stack_i (u32 edge, + dpo_id_t *dpo, + const dpo_id_t *parent) +{ + /* + * in order to get an atomic update of the parent we create a temporary, + * from a copy of the child, and add the next_node. then we copy to the parent + */ + dpo_id_t tmp = DPO_NULL; + dpo_copy(&tmp, parent); + + /* + * get the edge index for the parent to child VLIB graph transisition + */ + tmp.dpoi_next_node = edge; + + /* + * this update is atomic. + */ + dpo_copy(dpo, &tmp); + + dpo_reset(&tmp); +} + +/** + * @brief Stack one DPO object on another, and thus establish a child-parent + * relationship. The VLIB graph arc used is taken from the parent and child types + * passed. + */ +void +dpo_stack (dpo_type_t child_type, + dpo_proto_t child_proto, + dpo_id_t *dpo, + const dpo_id_t *parent) +{ + dpo_stack_i(dpo_get_next_node(child_type, child_proto, parent), dpo, parent); +} + +/** + * @brief Stack one DPO object on another, and thus establish a child parent + * relationship. A new VLIB graph arc is created from the child node passed + * to the nodes registered by the parent. The VLIB infra will ensure this arc + * is added only once. + */ +void +dpo_stack_from_node (u32 child_node_index, + dpo_id_t *dpo, + const dpo_id_t *parent) +{ + dpo_proto_t parent_proto; + vlib_node_t *parent_node; + dpo_type_t parent_type; + vlib_main_t *vm; + u32 edge; + + parent_type = parent->dpoi_type; + parent_proto = parent->dpoi_proto; + + vm = vlib_get_main(); + + ASSERT(NULL != dpo_nodes[parent_type]); + ASSERT(NULL != dpo_nodes[parent_type][parent_proto]); + + parent_node = + vlib_get_node_by_name(vm, (u8*) dpo_nodes[parent_type][parent_proto][0]); + + edge = vlib_node_add_next(vm, + child_node_index, + parent_node->index); + + dpo_stack_i(edge, dpo, parent); +} + +static clib_error_t * +dpo_module_init (vlib_main_t * vm) +{ + drop_dpo_module_init(); + punt_dpo_module_init(); + receive_dpo_module_init(); + load_balance_module_init(); + mpls_label_dpo_module_init(); + classify_dpo_module_init(); + lookup_dpo_module_init(); + + return (NULL); +} + +VLIB_INIT_FUNCTION(dpo_module_init); diff --git a/vnet/vnet/dpo/dpo.h b/vnet/vnet/dpo/dpo.h new file mode 100644 index 00000000000..8c22f00b091 --- /dev/null +++ b/vnet/vnet/dpo/dpo.h @@ -0,0 +1,354 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + * A Data-Path Object is an object that represents actions that are + * applied to packets are they are switched through VPP's data-path. + * + * The DPO can be considered to be like is a base class that is specialised + * by other objects to provide concreate actions + * + * The VLIB graph nodes are graph of DPO types, the DPO graph is a graph of + * instances. + */ + +#ifndef __DPO_H__ +#define __DPO_H__ + +#include <vnet/vnet.h> + +/** + * @brief An index for adjacencies. + * Alas 'C' is not typesafe enough to b0rk when a u32 is used instead of + * an index_t. However, for us humans, we can glean much more intent + * from the declaration + * foo barindex_t t); + * than we can from + * foo bar(u32 t); + */ +typedef u32 index_t; + +/** + * @brief Invalid index - used when no index is known + * blazoned capitals INVALID speak volumes where ~0 does not. + */ +#define INDEX_INVALID ((index_t)(~0)) + +/** + * @brief Data path protocol. + * Actions performed on packets in the data-plane can be described and represented + * by protocol independent objects, i.e. ADJACENCY, but the spceifics actions + * required during ADJACENCY processing can be protocol dependent. For example, + * the adjacency rewrite node performs a ip4 checksum calculation, ip6 and MPLS + * do not, all 3 perform a TTL decrement. The VLIB graph nodes are thus protocol + * dependent, and thus each graph edge/arc is too. + * When programming a DPO's next node arc from child to parent it is thus required + * to know the parent's data-path protocol so the correct arc index can be used. + */ +typedef enum dpo_proto_t_ +{ +#if CLIB_DEBUG > 0 + DPO_PROTO_IP4 = 1, +#else + DPO_PROTO_IP4 = 0, +#endif + DPO_PROTO_IP6, + DPO_PROTO_MPLS, +} __attribute__((packed)) dpo_proto_t; + +#define DPO_PROTO_NUM (DPO_PROTO_MPLS+1) +#define DPO_PROTO_NONE (DPO_PROTO_NUM+1) + +#define DPO_PROTOS { \ + [DPO_PROTO_IP4] = "ip4", \ + [DPO_PROTO_IP6] = "ip6", \ + [DPO_PROTO_MPLS] = "mpls", \ +} + +/** + * @brief Common types of data-path objects + * New types can be dynamically added using dpo_register_new_type() + */ +typedef enum dpo_type_t_ { + /** + * A non-zero value first so we can spot unitialisation errors + */ + DPO_FIRST, + DPO_DROP, + DPO_PUNT, + /** + * @brief load-balancing over a choice of [un]equal cost paths + */ + DPO_LOAD_BALANCE, + DPO_ADJACENCY, + DPO_ADJACENCY_INCOMPLETE, + DPO_ADJACENCY_MIDCHAIN, + DPO_ADJACENCY_GLEAN, + DPO_RECEIVE, + DPO_LOOKUP, + DPO_LISP_CP, + DPO_CLASSIFY, + DPO_MPLS_LABEL, + DPO_LAST, +} __attribute__((packed)) dpo_type_t; + +#define DPO_TYPE_NUM DPO_LAST + +#define DPO_TYPES { \ + [DPO_FIRST] = "dpo-invalid", \ + [DPO_DROP] = "dpo-drop", \ + [DPO_PUNT] = "dpo-punt", \ + [DPO_ADJACENCY] = "dpo-adjacency", \ + [DPO_ADJACENCY_INCOMPLETE] = "dpo-adjacency-incomplete", \ + [DPO_ADJACENCY_MIDCHAIN] = "dpo-adjacency-midcahin", \ + [DPO_ADJACENCY_GLEAN] = "dpo-glean", \ + [DPO_RECEIVE] = "dpo-receive", \ + [DPO_LOOKUP] = "dpo-lookup", \ + [DPO_LOAD_BALANCE] = "dpo-load-balance", \ + [DPO_LISP_CP] = "dpo-lisp-cp", \ + [DPO_CLASSIFY] = "dpo-classify", \ + [DPO_MPLS_LABEL] = "dpo-mpls-label", \ +} + +/** + * @brief The identity of a DPO is a combination of its type and its + * instance number/index of objects of that type + */ +typedef struct dpo_id_t_ { + /** + * the type + */ + dpo_type_t dpoi_type; + /** + * the data-path protocol of the type. + */ + dpo_proto_t dpoi_proto; + /** + * The next VLIB node to follow. + */ + u16 dpoi_next_node; + /** + * the index of objects of that type + */ + index_t dpoi_index; +} __attribute__ ((aligned(sizeof(u64)))) dpo_id_t; + +_Static_assert(sizeof(dpo_id_t) <= sizeof(u64), + "DPO ID is greater than sizeof u64 " + "atomic updates need to be revisited"); + +/** + * @brief An initialiser for DPos declared on the stack. + */ +#define DPO_NULL {0} + +/** + * @brief Return true if the DPO object is valid, i.e. has been initialised. + */ +static inline int +dpo_id_is_valid (const dpo_id_t *dpoi) +{ + return (dpoi->dpoi_type != DPO_FIRST && + dpoi->dpoi_index != INDEX_INVALID); +} + +/** + * @brief + * Take a reference counting lock on the DPO + */ +extern void dpo_lock(dpo_id_t *dpo); + +/** + * @brief + * Release a reference counting lock on the DPO + */ +extern void dpo_unlock(dpo_id_t *dpo); + +/** + * @brief Set/create a DPO ID + * The DPO will be locked. + * + * @param dpo + * The DPO object to configure + * + * @param type + * The dpo_type_t of the DPO + * + * @param proto + * The dpo_proto_t of the DPO + * + * @param index + * The type specific index of the DPO + */ +extern void dpo_set(dpo_id_t *dpo, + dpo_type_t type, + dpo_proto_t proto, + index_t index); + +/** + * @brief reset a DPO ID + * The DPO will be unlocked. + * + * @param dpo + * The DPO object to reset + */ +extern void dpo_reset(dpo_id_t *dpo); + +/** + * @brief compare two DPOs for equality + */ +extern int dpo_cmp(const dpo_id_t *dpo1, + const dpo_id_t *dpo2); + +/** + * @brief + * atomic copy a data-plane object. + * This is safe to use when the dst DPO is currently switching packets + */ +extern void dpo_copy(dpo_id_t *dst, + const dpo_id_t *src); + +/** + * @brief Return TRUE is the DPO is any type of adjacency + */ +extern int dpo_is_adj(const dpo_id_t *dpo); + +/** + * @biref Format a DPO_id_t oject + */ +extern u8 *format_dpo_id(u8 * s, va_list * args); + +/** + * @biref format a DPO type + */ +extern u8 *format_dpo_type(u8 * s, va_list * args); + +/** + * @brief format a DPO protocol + */ +extern u8 *format_dpo_proto(u8 * s, va_list * args); + +/** + * @brief + * Set and stack a DPO. + * The DPO passed is set to the parent DPO and the necessary + * VLIB graph arcs are created. The child_type and child_proto + * are used to get the VLID nodes from which the arcs are added. + * + * @param child_type + * Child DPO type. + * + * @param child_proto + * Child DPO proto + * + * @parem dpo + * This is the DPO to stack and set. + * + * @paren parent_dpo + * The parent DPO to stack onto. + */ +extern void dpo_stack(dpo_type_t child_type, + dpo_proto_t child_proto, + dpo_id_t *dpo, + const dpo_id_t *parent_dpo); + +/** + * @brief + * Set and stack a DPO. + * The DPO passed is set to the parent DPO and the necessary + * VLIB graph arcs are created, from the child_node passed. + * + * @param child_node + * The VLIB grpah node index to create an arc from to the parent + * + * @parem dpo + * This is the DPO to stack and set. + * + * @paren parent_dpo + * The parent DPO to stack onto. + */ +extern void dpo_stack_from_node(u32 child_node, + dpo_id_t *dpo, + const dpo_id_t *parent); + +/** + * @brief A lock function registered for a DPO type + */ +typedef void (*dpo_lock_fn_t)(dpo_id_t *dpo); + +/** + * @brief An unlock function registered for a DPO type + */ +typedef void (*dpo_unlock_fn_t)(dpo_id_t *dpo); + +/** + * @brief A virtual function table regisitered for a DPO type + */ +typedef struct dpo_vft_t_ +{ + /** + * A reference counting lock function + */ + dpo_lock_fn_t dv_lock; + /** + * A reference counting unlock function + */ + dpo_lock_fn_t dv_unlock; + /** + * A format function + */ + format_function_t *dv_format; +} dpo_vft_t; + + +/** + * @brief For a given DPO type Register: + * - a virtual function table + * - a NULL terminated array of graph nodes from which that object type + * will originate packets, i.e. the nodes in which the object type will be + * the parent DPO in the DP graph. The ndoes are per-data-path protocol + * (see above). + * + * @param type + * The type being registered. + * + * @param vft + * The virtual function table to register for the type. + * + * @param nodes + * The string description of the per-protocol VLIB graph nodes. + */ +void dpo_register(dpo_type_t type, + const dpo_vft_t *vft, + const char * const * const * nodes); + +/** + * @brief Create and register a new DPO type. + * + * This can be used by plugins to create new DPO types that are not listed + * in dpo_type_t enum + * + * @param vft + * The virtual function table to register for the type. + * + * @param nodes + * The string description of the per-protocol VLIB graph nodes. + * + * @return The new dpo_type_t + */ +dpo_type_t dpo_register_new_type(const dpo_vft_t *vft, + const char * const * const * nodes); + +#endif diff --git a/vnet/vnet/dpo/drop_dpo.c b/vnet/vnet/dpo/drop_dpo.c new file mode 100644 index 00000000000..62f56488a01 --- /dev/null +++ b/vnet/vnet/dpo/drop_dpo.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + * The data-path object representing dropping the packet + */ + +#include <vnet/dpo/dpo.h> + +static dpo_id_t drop_dpos[DPO_PROTO_NUM]; + +const dpo_id_t * +drop_dpo_get (dpo_proto_t proto) +{ + dpo_set(&drop_dpos[proto], DPO_DROP, proto, 1); + + return (&drop_dpos[proto]); +} + +int +dpo_is_drop (const dpo_id_t *dpo) +{ + return (dpo->dpoi_type == DPO_DROP); +} + +static void +drop_dpo_lock (dpo_id_t *dpo) +{ + /* + * not maintaining a lock count on the drop + * more trouble than it's worth. + * There always needs to be one around. no point it managaing its lifetime + */ +} +static void +drop_dpo_unlock (dpo_id_t *dpo) +{ +} + +static u8* +format_drop_dpo (u8 *s, va_list *ap) +{ + CLIB_UNUSED(index_t index) = va_arg(ap, index_t); + CLIB_UNUSED(u32 indent) = va_arg(ap, u32); + + return (format(s, "dpo-drop")); +} + +const static dpo_vft_t drop_vft = { + .dv_lock = drop_dpo_lock, + .dv_unlock = drop_dpo_unlock, + .dv_format = format_drop_dpo, +}; + +/** + * @brief The per-protocol VLIB graph nodes that are assigned to a drop + * object. + * + * this means that these graph nodes are ones from which a drop is the + * parent object in the DPO-graph. + */ +const static char* const drop_ip4_nodes[] = +{ + "ip4-drop", + NULL, +}; +const static char* const drop_ip6_nodes[] = +{ + "ip6-drop", + NULL, +}; +const static char* const drop_mpls_nodes[] = +{ + "mpls-drop", + NULL, +}; +const static char* const * const drop_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = drop_ip4_nodes, + [DPO_PROTO_IP6] = drop_ip6_nodes, + [DPO_PROTO_MPLS] = drop_mpls_nodes, +}; + +void +drop_dpo_module_init (void) +{ + dpo_register(DPO_DROP, &drop_vft, drop_nodes); +} diff --git a/vnet/vnet/dpo/drop_dpo.h b/vnet/vnet/dpo/drop_dpo.h new file mode 100644 index 00000000000..e7bd8f5156e --- /dev/null +++ b/vnet/vnet/dpo/drop_dpo.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + * A Data-Path Object is an object that represents actions that are + * applied to packets are they are switched through VPP. + * + * The DPO is a base class that is specialised by other objects to provide + * concreate actions + * + * The VLIB graph nodes are graph of types, the DPO graph is a graph of instances. + */ + +#ifndef __DROP_DPO_H__ +#define __DROP_DPO_H__ + +#include <vnet/dpo/dpo.h> + +extern int dpo_is_drop(const dpo_id_t *dpo); + +extern const dpo_id_t *drop_dpo_get(dpo_proto_t proto); + +extern void drop_dpo_module_init(void); + +#endif diff --git a/vnet/vnet/dpo/load_balance.c b/vnet/vnet/dpo/load_balance.c new file mode 100644 index 00000000000..963ff0ba160 --- /dev/null +++ b/vnet/vnet/dpo/load_balance.c @@ -0,0 +1,760 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/ip/lookup.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/dpo/load_balance_map.h> +#include <vnet/dpo/drop_dpo.h> +#include <vppinfra/math.h> /* for fabs */ +#include <vnet/adj/adj.h> +#include <vnet/adj/adj_alloc.h> +#include <vnet/adj/adj_internal.h> + +/* + * distribution error tolerance for load-balancing + */ +const f64 multipath_next_hop_error_tolerance = 0.1; + +#undef LB_DEBUG + +#ifdef LB_DEBUG +#define LB_DBG(_lb, _fmt, _args...) \ +{ \ + u8* _tmp =NULL; \ + clib_warning("lb:[%s]:" _fmt, \ + load_balance_format(load_balance_get_index((_lb)), \ + 0, _tmp), \ + ##_args); \ + vec_free(_tmp); \ +} +#else +#define LB_DBG(_p, _fmt, _args...) +#endif + + +/** + * Pool of all DPOs. It's not static so the DP can have fast access + */ +load_balance_t *load_balance_pool; + +/** + * The one instance of load-balance main + */ +load_balance_main_t load_balance_main; + +f64 +load_balance_get_multipath_tolerance (void) +{ + return (multipath_next_hop_error_tolerance); +} + +static inline index_t +load_balance_get_index (const load_balance_t *lb) +{ + return (lb - load_balance_pool); +} + +static inline dpo_id_t* +load_balance_get_buckets (load_balance_t *lb) +{ + if (LB_HAS_INLINE_BUCKETS(lb)) + { + return (lb->lb_buckets_inline); + } + else + { + return (lb->lb_buckets); + } +} + +static load_balance_t * +load_balance_alloc_i (void) +{ + load_balance_t *lb; + + pool_get_aligned(load_balance_pool, lb, CLIB_CACHE_LINE_BYTES); + memset(lb, 0, sizeof(*lb)); + + lb->lb_map = INDEX_INVALID; + vlib_validate_combined_counter(&(load_balance_main.lbm_to_counters), + load_balance_get_index(lb)); + vlib_validate_combined_counter(&(load_balance_main.lbm_via_counters), + load_balance_get_index(lb)); + vlib_zero_combined_counter(&(load_balance_main.lbm_to_counters), + load_balance_get_index(lb)); + vlib_zero_combined_counter(&(load_balance_main.lbm_via_counters), + load_balance_get_index(lb)); + + return (lb); +} + +static u8* +load_balance_format (index_t lbi, + load_balance_format_flags_t flags, + u32 indent, + u8 *s) +{ + vlib_counter_t to, via; + load_balance_t *lb; + dpo_id_t *buckets; + u32 i; + + lb = load_balance_get(lbi); + vlib_get_combined_counter(&(load_balance_main.lbm_to_counters), lbi, &to); + vlib_get_combined_counter(&(load_balance_main.lbm_via_counters), lbi, &via); + buckets = load_balance_get_buckets(lb); + + s = format(s, "%U: ", format_dpo_type, DPO_LOAD_BALANCE); + s = format(s, "[index:%d buckets:%d ", lbi, lb->lb_n_buckets); + s = format(s, "locks:%d ", lb->lb_locks); + s = format(s, "to:[%Ld:%Ld]", to.packets, to.bytes); + if (0 != via.packets) + { + s = format(s, " via:[%Ld:%Ld]", + via.packets, via.bytes); + } + s = format(s, "]"); + + if (INDEX_INVALID != lb->lb_map) + { + s = format(s, "\n%U%U", + format_white_space, indent+4, + format_load_balance_map, lb->lb_map, indent+4); + } + for (i = 0; i < lb->lb_n_buckets; i++) + { + s = format(s, "\n%U[%d] %U", + format_white_space, indent+2, + i, + format_dpo_id, + &buckets[i], indent+6); + } + return (s); +} + +u8* +format_load_balance (u8 * s, va_list * args) +{ + index_t lbi = va_arg(args, index_t); + load_balance_format_flags_t flags = va_arg(args, load_balance_format_flags_t); + + return (load_balance_format(lbi, flags, 0, s)); +} +static u8* +format_load_balance_dpo (u8 * s, va_list * args) +{ + index_t lbi = va_arg(args, index_t); + u32 indent = va_arg(args, u32); + + return (load_balance_format(lbi, LOAD_BALANCE_FORMAT_DETAIL, indent, s)); +} + + +static load_balance_t * +load_balance_create_i (u32 num_buckets, + dpo_proto_t lb_proto, + flow_hash_config_t fhc) +{ + load_balance_t *lb; + + lb = load_balance_alloc_i(); + lb->lb_hash_config = fhc; + lb->lb_n_buckets = num_buckets; + lb->lb_n_buckets_minus_1 = num_buckets-1; + lb->lb_proto = lb_proto; + + if (!LB_HAS_INLINE_BUCKETS(lb)) + { + vec_validate_aligned(lb->lb_buckets, + lb->lb_n_buckets - 1, + CLIB_CACHE_LINE_BYTES); + } + + LB_DBG(lb, "create"); + + return (lb); +} + +index_t +load_balance_create (u32 n_buckets, + dpo_proto_t lb_proto, + flow_hash_config_t fhc) +{ + return (load_balance_get_index(load_balance_create_i(n_buckets, lb_proto, fhc))); +} + +static inline void +load_balance_set_bucket_i (load_balance_t *lb, + u32 bucket, + dpo_id_t *buckets, + const dpo_id_t *next) +{ + dpo_stack(DPO_LOAD_BALANCE, lb->lb_proto, &buckets[bucket], next); +} + +void +load_balance_set_bucket (index_t lbi, + u32 bucket, + const dpo_id_t *next) +{ + load_balance_t *lb; + dpo_id_t *buckets; + + lb = load_balance_get(lbi); + buckets = load_balance_get_buckets(lb); + + ASSERT(bucket < lb->lb_n_buckets); + + load_balance_set_bucket_i(lb, bucket, buckets, next); +} + +int +load_balance_is_drop (const dpo_id_t *dpo) +{ + load_balance_t *lb; + + if (DPO_LOAD_BALANCE != dpo->dpoi_type) + return (0); + + lb = load_balance_get(dpo->dpoi_index); + + if (1 == lb->lb_n_buckets) + { + return (dpo_is_drop(load_balance_get_bucket_i(lb, 0))); + } + return (0); +} + +const dpo_id_t * +load_balance_get_bucket (index_t lbi, + u32 bucket) +{ + load_balance_t *lb; + + lb = load_balance_get(lbi); + + return (load_balance_get_bucket_i(lb, bucket)); +} + +static int +next_hop_sort_by_weight (load_balance_path_t * n1, + load_balance_path_t * n2) +{ + return ((int) n1->path_weight - (int) n2->path_weight); +} + +/* Given next hop vector is over-written with normalized one with sorted weights and + with weights corresponding to the number of adjacencies for each next hop. + Returns number of adjacencies in block. */ +u32 +ip_multipath_normalize_next_hops (load_balance_path_t * raw_next_hops, + load_balance_path_t ** normalized_next_hops, + u32 *sum_weight_in, + f64 multipath_next_hop_error_tolerance) +{ + load_balance_path_t * nhs; + uword n_nhs, n_adj, n_adj_left, i, sum_weight; + f64 norm, error; + + n_nhs = vec_len (raw_next_hops); + ASSERT (n_nhs > 0); + if (n_nhs == 0) + return 0; + + /* Allocate enough space for 2 copies; we'll use second copy to save original weights. */ + nhs = *normalized_next_hops; + vec_validate (nhs, 2*n_nhs - 1); + + /* Fast path: 1 next hop in block. */ + n_adj = n_nhs; + if (n_nhs == 1) + { + nhs[0] = raw_next_hops[0]; + nhs[0].path_weight = 1; + _vec_len (nhs) = 1; + sum_weight = 1; + goto done; + } + + else if (n_nhs == 2) + { + int cmp = next_hop_sort_by_weight (&raw_next_hops[0], &raw_next_hops[1]) < 0; + + /* Fast sort. */ + nhs[0] = raw_next_hops[cmp]; + nhs[1] = raw_next_hops[cmp ^ 1]; + + /* Fast path: equal cost multipath with 2 next hops. */ + if (nhs[0].path_weight == nhs[1].path_weight) + { + nhs[0].path_weight = nhs[1].path_weight = 1; + _vec_len (nhs) = 2; + sum_weight = 2; + goto done; + } + } + else + { + clib_memcpy (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0])); + qsort (nhs, n_nhs, sizeof (nhs[0]), (void *) next_hop_sort_by_weight); + } + + /* Find total weight to normalize weights. */ + sum_weight = 0; + for (i = 0; i < n_nhs; i++) + sum_weight += nhs[i].path_weight; + + /* In the unlikely case that all weights are given as 0, set them all to 1. */ + if (sum_weight == 0) + { + for (i = 0; i < n_nhs; i++) + nhs[i].path_weight = 1; + sum_weight = n_nhs; + } + + /* Save copies of all next hop weights to avoid being overwritten in loop below. */ + for (i = 0; i < n_nhs; i++) + nhs[n_nhs + i].path_weight = nhs[i].path_weight; + + /* Try larger and larger power of 2 sized adjacency blocks until we + find one where traffic flows to within 1% of specified weights. */ + for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2) + { + error = 0; + + norm = n_adj / ((f64) sum_weight); + n_adj_left = n_adj; + for (i = 0; i < n_nhs; i++) + { + f64 nf = nhs[n_nhs + i].path_weight * norm; /* use saved weights */ + word n = flt_round_nearest (nf); + + n = n > n_adj_left ? n_adj_left : n; + n_adj_left -= n; + error += fabs (nf - n); + nhs[i].path_weight = n; + } + + nhs[0].path_weight += n_adj_left; + + /* Less than 5% average error per adjacency with this size adjacency block? */ + if (error <= multipath_next_hop_error_tolerance*n_adj) + { + /* Truncate any next hops with zero weight. */ + _vec_len (nhs) = i; + break; + } + } + +done: + /* Save vector for next call. */ + *normalized_next_hops = nhs; + *sum_weight_in = sum_weight; + return n_adj; +} + +static load_balance_path_t * +load_balance_multipath_next_hop_fixup (load_balance_path_t *nhs, + dpo_proto_t drop_proto) +{ + if (0 == vec_len(nhs)) + { + load_balance_path_t *nh; + + /* + * we need something for the load-balance. so use the drop + */ + vec_add2(nhs, nh, 1); + + nh->path_weight = 1; + dpo_copy(&nh->path_dpo, drop_dpo_get(drop_proto)); + } + + return (nhs); +} + +/* + * Fill in adjacencies in block based on corresponding + * next hop adjacencies. + */ +static void +load_balance_fill_buckets (load_balance_t *lb, + load_balance_path_t *nhs, + dpo_id_t *buckets, + u32 n_buckets) +{ + load_balance_path_t * nh; + u16 ii, bucket; + + bucket = 0; + + /* + * the next-hops have normalised weights. that means their sum is the number + * of buckets we need to fill. + */ + vec_foreach (nh, nhs) + { + for (ii = 0; ii < nh->path_weight; ii++) + { + ASSERT(bucket < n_buckets); + load_balance_set_bucket_i(lb, bucket++, buckets, &nh->path_dpo); + } + } +} + +static inline void +load_balance_set_n_buckets (load_balance_t *lb, + u32 n_buckets) +{ + lb->lb_n_buckets = n_buckets; + lb->lb_n_buckets_minus_1 = n_buckets-1; +} + +void +load_balance_multipath_update (const dpo_id_t *dpo, + load_balance_path_t * raw_next_hops, + load_balance_flags_t flags) +{ + u32 sum_of_weights,n_buckets, ii; + load_balance_path_t * nh, * nhs; + index_t lbmi, old_lbmi; + load_balance_t *lb; + dpo_id_t *tmp_dpo; + + nhs = NULL; + + ASSERT(DPO_LOAD_BALANCE == dpo->dpoi_type); + lb = load_balance_get(dpo->dpoi_index); + raw_next_hops = + load_balance_multipath_next_hop_fixup(raw_next_hops, + lb->lb_proto); + n_buckets = + ip_multipath_normalize_next_hops(raw_next_hops, + &nhs, + &sum_of_weights, + multipath_next_hop_error_tolerance); + + ASSERT (n_buckets >= vec_len (raw_next_hops)); + + /* + * Save the old load-balance map used, and get a new one if required. + */ + old_lbmi = lb->lb_map; + if (flags & LOAD_BALANCE_FLAG_USES_MAP) + { + lbmi = load_balance_map_add_or_lock(n_buckets, sum_of_weights, nhs); + } + else + { + lbmi = INDEX_INVALID; + } + + if (0 == lb->lb_n_buckets) + { + /* + * first time initialisation. no packets inflight, so we can write + * at leisure. + */ + load_balance_set_n_buckets(lb, n_buckets); + + if (!LB_HAS_INLINE_BUCKETS(lb)) + vec_validate_aligned(lb->lb_buckets, + lb->lb_n_buckets - 1, + CLIB_CACHE_LINE_BYTES); + + load_balance_fill_buckets(lb, nhs, + load_balance_get_buckets(lb), + n_buckets); + lb->lb_map = lbmi; + } + else + { + /* + * This is a modification of an existing load-balance. + * We need to ensure that packets inflight see a consistent state, that + * is the number of reported buckets the LB has (read from + * lb_n_buckets_minus_1) is not more than it actually has. So if the + * number of buckets is increasing, we must update the bucket array first, + * then the reported number. vice-versa if the number of buckets goes down. + */ + if (n_buckets == lb->lb_n_buckets) + { + /* + * no change in the number of buckets. we can simply fill what + * is new over what is old. + */ + load_balance_fill_buckets(lb, nhs, + load_balance_get_buckets(lb), + n_buckets); + lb->lb_map = lbmi; + } + else if (n_buckets > lb->lb_n_buckets) + { + /* + * we have more buckets. the old load-balance map (if there is one) + * will remain valid, i.e. mapping to indices within range, so we + * update it last. + */ + if (n_buckets > LB_NUM_INLINE_BUCKETS && + lb->lb_n_buckets <= LB_NUM_INLINE_BUCKETS) + { + /* + * the new increased number of buckets is crossing the threshold + * from the inline storage to out-line. Alloc the outline buckets + * first, then fixup the number. then reset the inlines. + */ + ASSERT(NULL == lb->lb_buckets); + vec_validate_aligned(lb->lb_buckets, + n_buckets - 1, + CLIB_CACHE_LINE_BYTES); + + load_balance_fill_buckets(lb, nhs, + lb->lb_buckets, + n_buckets); + CLIB_MEMORY_BARRIER(); + load_balance_set_n_buckets(lb, n_buckets); + + CLIB_MEMORY_BARRIER(); + + for (ii = 0; ii < LB_NUM_INLINE_BUCKETS; ii++) + { + dpo_reset(&lb->lb_buckets_inline[ii]); + } + } + else + { + /* + * we are not crossing the threshold. we can write the new on the + * old, whether they be inline or not. + */ + load_balance_fill_buckets(lb, nhs, + load_balance_get_buckets(lb), + n_buckets); + CLIB_MEMORY_BARRIER(); + load_balance_set_n_buckets(lb, n_buckets); + } + + /* + * buckets fixed. ready for the MAP update. + */ + lb->lb_map = lbmi; + } + else + { + /* + * bucket size shrinkage. + * Any map we have will be based on the old + * larger number of buckets, so will be translating to indices + * out of range. So the new MAP must be installed first. + */ + lb->lb_map = lbmi; + CLIB_MEMORY_BARRIER(); + + + if (n_buckets <= LB_NUM_INLINE_BUCKETS && + lb->lb_n_buckets > LB_NUM_INLINE_BUCKETS) + { + /* + * the new decreased number of buckets is crossing the threshold + * from out-line storage to inline: + * 1 - Fill the inline buckets, + * 2 - fixup the number (and this point the inline buckets are + * used). + * 3 - free the outline buckets + */ + load_balance_fill_buckets(lb, nhs, + lb->lb_buckets_inline, + n_buckets); + CLIB_MEMORY_BARRIER(); + load_balance_set_n_buckets(lb, n_buckets); + CLIB_MEMORY_BARRIER(); + + vec_foreach(tmp_dpo, lb->lb_buckets) + { + dpo_reset(tmp_dpo); + } + vec_free(lb->lb_buckets); + } + else + { + /* + * not crossing the threshold. + * 1 - update the number to the smaller size + * 2 - write the new buckets + * 3 - reset those no longer used. + */ + dpo_id_t *buckets; + u32 old_n_buckets; + + old_n_buckets = lb->lb_n_buckets; + buckets = load_balance_get_buckets(lb); + + load_balance_set_n_buckets(lb, n_buckets); + CLIB_MEMORY_BARRIER(); + + load_balance_fill_buckets(lb, nhs, + buckets, + n_buckets); + + for (ii = old_n_buckets-n_buckets; ii < old_n_buckets; ii++) + { + dpo_reset(&buckets[ii]); + } + } + } + } + + vec_foreach (nh, nhs) + { + dpo_reset(&nh->path_dpo); + } + + load_balance_map_unlock(old_lbmi); +} + +static void +load_balance_lock (dpo_id_t *dpo) +{ + load_balance_t *lb; + + lb = load_balance_get(dpo->dpoi_index); + + lb->lb_locks++; +} + +static void +load_balance_destroy (load_balance_t *lb) +{ + dpo_id_t *buckets; + int i; + + buckets = load_balance_get_buckets(lb); + + for (i = 0; i < lb->lb_n_buckets; i++) + { + dpo_reset(&buckets[i]); + } + + LB_DBG(lb, "destroy"); + if (!LB_HAS_INLINE_BUCKETS(lb)) + { + vec_free(lb->lb_buckets); + } + + pool_put(load_balance_pool, lb); +} + +static void +load_balance_unlock (dpo_id_t *dpo) +{ + load_balance_t *lb; + + lb = load_balance_get(dpo->dpoi_index); + + lb->lb_locks--; + + if (0 == lb->lb_locks) + { + load_balance_destroy(lb); + } +} + +const static dpo_vft_t lb_vft = { + .dv_lock = load_balance_lock, + .dv_unlock = load_balance_unlock, + .dv_format = format_load_balance_dpo, +}; + +/** + * @brief The per-protocol VLIB graph nodes that are assigned to a load-balance + * object. + * + * this means that these graph nodes are ones from which a load-balance is the + * parent object in the DPO-graph. + * + * We do not list all the load-balance nodes, such as the *-lookup. instead + * we are relying on the correct use of the .sibling_of field when setting + * up these sibling nodes. + */ +const static char* const load_balance_ip4_nodes[] = +{ + "ip4-load-balance", + NULL, +}; +const static char* const load_balance_ip6_nodes[] = +{ + "ip6-load-balance", + NULL, +}; +const static char* const load_balance_mpls_nodes[] = +{ + "mpls-load-balance", + NULL, +}; +const static char* const * const load_balance_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = load_balance_ip4_nodes, + [DPO_PROTO_IP6] = load_balance_ip6_nodes, + [DPO_PROTO_MPLS] = load_balance_mpls_nodes, +}; + +void +load_balance_module_init (void) +{ + dpo_register(DPO_LOAD_BALANCE, &lb_vft, load_balance_nodes); + + load_balance_map_module_init(); +} + +static clib_error_t * +load_balance_show (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + index_t lbi = INDEX_INVALID; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%d", &lbi)) + ; + else + break; + } + + if (INDEX_INVALID != lbi) + { + vlib_cli_output (vm, "%U", format_load_balance, lbi, + LOAD_BALANCE_FORMAT_DETAIL); + } + else + { + load_balance_t *lb; + + pool_foreach(lb, load_balance_pool, + ({ + vlib_cli_output (vm, "%U", format_load_balance, + load_balance_get_index(lb), + LOAD_BALANCE_FORMAT_NONE); + })); + } + + return 0; +} + +VLIB_CLI_COMMAND (load_balance_show_command, static) = { + .path = "show load-balance", + .short_help = "show load-balance [<index>]", + .function = load_balance_show, +}; diff --git a/vnet/vnet/dpo/load_balance.h b/vnet/vnet/dpo/load_balance.h new file mode 100644 index 00000000000..d630a2c2d75 --- /dev/null +++ b/vnet/vnet/dpo/load_balance.h @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * \brief + * The load-balance object represents an ECMP choice. The buckets of a load + * balance object point to the sub-graph after the choice is made. + * THe load-balance object is also object type returned from a FIB table lookup. + * As such it needs to represent the case where there is only one coice. It may + * seem like overkill to use a load-balance object in this case, but the reason + * is for performance. If the load-balance object were not the result of the FIB + * lookup, then some other object would be. The case where there was ECMP + * this other object would need a load-balance as a parent and hence just add + * an unnecessary indirection. + * + * It is also the object in the DP that represents a via-fib-entry in a recursive + * route. + * + */ + +#ifndef __LOAD_BALANCE_H__ +#define __LOAD_BALANCE_H__ + +#include <vlib/vlib.h> +#include <vnet/ip/lookup.h> +#include <vnet/dpo/dpo.h> +#include <vnet/fib/fib_types.h> + +/** + * Load-balance main + */ +typedef struct load_balance_main_t_ +{ + vlib_combined_counter_main_t lbm_to_counters; + vlib_combined_counter_main_t lbm_via_counters; +} load_balance_main_t; + +extern load_balance_main_t load_balance_main; + +/** + * The number of buckets that a load-balance object can have and still + * fit in one cache-line + */ +#define LB_NUM_INLINE_BUCKETS 4 + +/** + * @brief One path from an [EU]CMP set that the client wants to add to a + * load-balance object + */ +typedef struct load_balance_path_t_ { + /** + * ID of the Data-path object. + */ + dpo_id_t path_dpo; + + /** + * The index of the FIB path + */ + fib_node_index_t path_index; + + /** + * weight for the path. + */ + u32 path_weight; +} load_balance_path_t; + +/** + * The FIB DPO provieds; + * - load-balancing over the next DPOs in the chain/graph + * - per-route counters + */ +typedef struct load_balance_t_ { + /** + * number of buckets in the load-balance. always a power of 2. + */ + u16 lb_n_buckets; + /** + * number of buckets in the load-balance - 1. used in the switch path + * as part of the hash calculation. + */ + u16 lb_n_buckets_minus_1; + + /** + * The protocol of packets that traverse this LB. + * need in combination with the flow hash config to determine how to hash. + * u8. + */ + dpo_proto_t lb_proto; + + /** + * The number of locks, which is approximately the number of users, + * of this load-balance. + * Load-balance objects of via-entries are heavily shared by recursives, + * so the lock count is a u32. + */ + u32 lb_locks; + + /** + * index of the load-balance map, INVALID if this LB does not use one + */ + index_t lb_map; + + /** + * the hash config to use when selecting a bucket. this is a u16 + */ + flow_hash_config_t lb_hash_config; + + /** + * Vector of buckets containing the next DPOs, sized as lbo_num + */ + dpo_id_t *lb_buckets; + + /** + * The rest of the cache line is used for buckets. In the common case + * where there there are less than 4 buckets, then the buckets are + * on the same cachlie and we save ourselves a pointer dereferance in + * the data-path. + */ + dpo_id_t lb_buckets_inline[LB_NUM_INLINE_BUCKETS]; +} load_balance_t; + +_Static_assert(sizeof(load_balance_t) <= CLIB_CACHE_LINE_BYTES, + "A load_balance object size exceeds one cachline"); + +/** + * Flags controlling load-balance formatting/display + */ +typedef enum load_balance_format_flags_t_ { + LOAD_BALANCE_FORMAT_NONE, + LOAD_BALANCE_FORMAT_DETAIL = (1 << 0), +} load_balance_format_flags_t; + +/** + * Flags controlling load-balance creation and modification + */ +typedef enum load_balance_flags_t_ { + LOAD_BALANCE_FLAG_NONE = 0, + LOAD_BALANCE_FLAG_USES_MAP = (1 << 0), +} load_balance_flags_t; + +extern index_t load_balance_create(u32 num_buckets, + dpo_proto_t lb_proto, + flow_hash_config_t fhc); +extern void load_balance_multipath_update( + const dpo_id_t *dpo, + load_balance_path_t * raw_next_hops, + load_balance_flags_t flags); + +extern void load_balance_set_bucket(index_t lbi, + u32 bucket, + const dpo_id_t *next); + +extern u8* format_load_balance(u8 * s, va_list * args); + +extern const dpo_id_t *load_balance_get_bucket(index_t lbi, + u32 bucket); +extern int load_balance_is_drop(const dpo_id_t *dpo); + +extern f64 load_balance_get_multipath_tolerance(void); + +/** + * The encapsulation breakages are for fast DP access + */ +extern load_balance_t *load_balance_pool; +static inline load_balance_t* +load_balance_get (index_t lbi) +{ + return (pool_elt_at_index(load_balance_pool, lbi)); +} + +#define LB_HAS_INLINE_BUCKETS(_lb) \ + ((_lb)->lb_n_buckets <= LB_NUM_INLINE_BUCKETS) + +static inline const dpo_id_t * +load_balance_get_bucket_i (const load_balance_t *lb, + u32 bucket) +{ + ASSERT(bucket < lb->lb_n_buckets); + + if (PREDICT_TRUE(LB_HAS_INLINE_BUCKETS(lb))) + { + return (&lb->lb_buckets_inline[bucket]); + } + else + { + return (&lb->lb_buckets[bucket]); + } +} + +extern void load_balance_module_init(void); + +#endif diff --git a/vnet/vnet/dpo/load_balance_map.c b/vnet/vnet/dpo/load_balance_map.c new file mode 100644 index 00000000000..f08801f1ce7 --- /dev/null +++ b/vnet/vnet/dpo/load_balance_map.c @@ -0,0 +1,566 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + */ +#include <vnet/fib/fib_path.h> +#include <vnet/fib/fib_node_list.h> +#include <vnet/dpo/load_balance_map.h> +#include <vnet/dpo/load_balance.h> + +/** + * A hash-table of load-balance maps by path index. + * this provides the fast lookup of the LB map when a path goes down + */ +static uword *lb_maps_by_path_index; + +/** + * A hash-table of load-balance maps by set of paths. + * This provides the LB map sharing. + * LB maps do not necessarily use all the paths in the list, since + * the entry that is requesting the map, may not have an out-going + * label for each of the paths. + */ +static uword *load_balance_map_db; + +typedef enum load_balance_map_path_flags_t_ +{ + LOAD_BALANCE_MAP_PATH_UP = (1 << 0), + LOAD_BALANCE_MAP_PATH_USABLE = (1 << 1), +} __attribute__ ((packed)) load_balance_map_path_flags_t; + +typedef struct load_balance_map_path_t_ { + /** + * Index of the path + */ + fib_node_index_t lbmp_index; + + /** + * Sibling Index in the list of all maps with this path index + */ + fib_node_index_t lbmp_sibling; + + /** + * the normalised wegiht of the path + */ + u32 lbmp_weight; + + /** + * The sate of the path + */ + load_balance_map_path_flags_t lbmp_flags; +} load_balance_map_path_t; + +/** + * The global pool of LB maps + */ +load_balance_map_t *load_balance_map_pool; + +/* + * Debug macro + */ +#ifdef FIB_DEBUG +#define LOAD_BALANCE_MAP_DBG(_pl, _fmt, _args...) \ + { \ + clib_warning("lbm: FIXME" _fmt, \ + ##_args); \ + } +#else +#define LOAD_BALANCE_MAP_DBG(_pl, _fmt, _args...) +#endif + +static index_t +load_balance_map_get_index (load_balance_map_t *lbm) +{ + return (lbm - load_balance_map_pool); +} + +u8* +format_load_balance_map (u8 *s, va_list ap) +{ + index_t lbmi = va_arg(ap, index_t); + u32 indent = va_arg(ap, u32); + load_balance_map_t *lbm; + u32 n_buckets, ii; + + lbm = load_balance_map_get(lbmi); + n_buckets = vec_len(lbm->lbm_buckets); + + s = format(s, "load-balance-map: index:%d buckets:%d", lbmi, n_buckets); + s = format(s, "\n%U index:", format_white_space, indent+2); + for (ii = 0; ii < n_buckets; ii++) + { + s = format(s, "%5d", ii); + } + s = format(s, "\n%U map:", format_white_space, indent+2); + for (ii = 0; ii < n_buckets; ii++) + { + s = format(s, "%5d", lbm->lbm_buckets[ii]); + } + + return (s); +} + + +static uword +load_balance_map_hash (load_balance_map_t *lbm) +{ + u32 old_lbm_hash, new_lbm_hash, hash; + load_balance_map_path_t *lb_path; + + new_lbm_hash = old_lbm_hash = vec_len(lbm->lbm_paths); + + vec_foreach (lb_path, lbm->lbm_paths) + { + hash = lb_path->lbmp_index; + hash_mix32(hash, old_lbm_hash, new_lbm_hash); + } + + return (new_lbm_hash); +} + +always_inline uword +load_balance_map_db_hash_key_from_index (uword index) +{ + return 1 + 2*index; +} + +always_inline uword +load_balance_map_db_hash_key_is_index (uword key) +{ + return key & 1; +} + +always_inline uword +load_balance_map_db_hash_key_2_index (uword key) +{ + ASSERT (load_balance_map_db_hash_key_is_index (key)); + return key / 2; +} + +static load_balance_map_t* +load_balance_map_db_get_from_hash_key (uword key) +{ + load_balance_map_t *lbm; + + if (load_balance_map_db_hash_key_is_index (key)) + { + index_t lbm_index; + + lbm_index = load_balance_map_db_hash_key_2_index(key); + lbm = load_balance_map_get(lbm_index); + } + else + { + lbm = uword_to_pointer (key, load_balance_map_t *); + } + + return (lbm); +} + +static uword +load_balance_map_db_hash_key_sum (hash_t * h, + uword key) +{ + load_balance_map_t *lbm; + + lbm = load_balance_map_db_get_from_hash_key(key); + + return (load_balance_map_hash(lbm)); +} + +static uword +load_balance_map_db_hash_key_equal (hash_t * h, + uword key1, + uword key2) +{ + load_balance_map_t *lbm1, *lbm2; + + lbm1 = load_balance_map_db_get_from_hash_key(key1); + lbm2 = load_balance_map_db_get_from_hash_key(key2); + + return (load_balance_map_hash(lbm1) == + load_balance_map_hash(lbm2)); +} + +static index_t +load_balance_map_db_find (load_balance_map_t *lbm) +{ + uword *p; + + p = hash_get(load_balance_map_db, lbm); + + if (NULL != p) + { + return p[0]; + } + + return (FIB_NODE_INDEX_INVALID); +} + +static void +load_balance_map_db_insert (load_balance_map_t *lbm) +{ + load_balance_map_path_t *lbmp; + fib_node_list_t list; + uword *p; + + ASSERT(FIB_NODE_INDEX_INVALID == load_balance_map_db_find(lbm)); + + /* + * insert into the DB based on the set of paths. + */ + hash_set (load_balance_map_db, + load_balance_map_db_hash_key_from_index( + load_balance_map_get_index(lbm)), + load_balance_map_get_index(lbm)); + + /* + * insert into each per-path list. + */ + vec_foreach(lbmp, lbm->lbm_paths) + { + p = hash_get(lb_maps_by_path_index, lbmp->lbmp_index); + + if (NULL == p) + { + list = fib_node_list_create(); + hash_set(lb_maps_by_path_index, lbmp->lbmp_index, list); + } + else + { + list = p[0]; + } + + lbmp->lbmp_sibling = + fib_node_list_push_front(list, + 0, FIB_NODE_TYPE_FIRST, + load_balance_map_get_index(lbm)); + } + + LOAD_BALANCE_MAP_DBG(lbm, "DB-inserted"); +} + +static void +load_balance_map_db_remove (load_balance_map_t *lbm) +{ + load_balance_map_path_t *lbmp; + uword *p; + + ASSERT(FIB_NODE_INDEX_INVALID != load_balance_map_db_find(lbm)); + + hash_unset(load_balance_map_db, + load_balance_map_db_hash_key_from_index( + load_balance_map_get_index(lbm))); + + /* + * remove from each per-path list. + */ + vec_foreach(lbmp, lbm->lbm_paths) + { + p = hash_get(lb_maps_by_path_index, lbmp->lbmp_index); + + ASSERT(NULL != p); + + fib_node_list_remove(p[0], lbmp->lbmp_sibling); + } + + LOAD_BALANCE_MAP_DBG(lbm, "DB-removed"); +} + +/** + * @brief from the paths that are usable, fill the Map. + */ +static void +load_balance_map_fill (load_balance_map_t *lbm) +{ + load_balance_map_path_t *lbmp; + u32 n_buckets, bucket, ii, jj; + u16 *tmp_buckets; + + tmp_buckets = NULL; + n_buckets = vec_len(lbm->lbm_buckets); + + /* + * run throught the set of paths once, and build a vector of the + * indices that are usable. we do this is a scratch space, since we + * need to refer to it multiple times as we build the real buckets. + */ + vec_validate(tmp_buckets, n_buckets-1); + + bucket = jj = 0; + vec_foreach (lbmp, lbm->lbm_paths) + { + if (fib_path_is_resolved(lbmp->lbmp_index)) + { + for (ii = 0; ii < lbmp->lbmp_weight; ii++) + { + tmp_buckets[jj++] = bucket++; + } + } + else + { + bucket += lbmp->lbmp_weight; + } + } + _vec_len(tmp_buckets) = jj; + + /* + * If the number of temporaries written is as many as we need, implying + * all paths were up, then we can simply copy the scratch area over the + * actual buckets' memory + */ + if (jj == n_buckets) + { + memcpy(lbm->lbm_buckets, + tmp_buckets, + sizeof(lbm->lbm_buckets[0]) * n_buckets); + } + else + { + /* + * one or more paths are down. + */ + if (0 == vec_len(tmp_buckets)) + { + /* + * if the scratch area is empty, then no paths are usable. + * they will all drop. so use them all, lest we account drops + * against only one. + */ + for (bucket = 0; bucket < n_buckets; bucket++) + { + lbm->lbm_buckets[bucket] = bucket; + } + } + else + { + bucket = jj = 0; + vec_foreach (lbmp, lbm->lbm_paths) + { + if (fib_path_is_resolved(lbmp->lbmp_index)) + { + for (ii = 0; ii < lbmp->lbmp_weight; ii++) + { + lbm->lbm_buckets[bucket] = bucket; + bucket++; + } + } + else + { + /* + * path is unusable + * cycle through the scratch space selecting a index. + * this means we load balance, in the intended ratio, + * over the paths that are still usable. + */ + for (ii = 0; ii < lbmp->lbmp_weight; ii++) + { + lbm->lbm_buckets[bucket] = tmp_buckets[jj]; + jj = (jj + 1) % vec_len(tmp_buckets); + bucket++; + } + } + } + } + } + + vec_free(tmp_buckets); +} + +static load_balance_map_t* +load_balance_map_alloc (const load_balance_path_t *paths) +{ + load_balance_map_t *lbm; + u32 ii; + + pool_get_aligned(load_balance_map_pool, lbm, CLIB_CACHE_LINE_BYTES); + memset(lbm, 0, sizeof(*lbm)); + + vec_validate(lbm->lbm_paths, vec_len(paths)-1); + + vec_foreach_index(ii, paths) + { + lbm->lbm_paths[ii].lbmp_index = paths[ii].path_index; + lbm->lbm_paths[ii].lbmp_weight = paths[ii].path_weight; + } + + return (lbm); +} + +static load_balance_map_t * +load_balance_map_init (load_balance_map_t *lbm, + u32 n_buckets, + u32 sum_of_weights) +{ + lbm->lbm_sum_of_norm_weights = sum_of_weights; + vec_validate(lbm->lbm_buckets, n_buckets-1); + + load_balance_map_db_insert(lbm); + + load_balance_map_fill(lbm); + + return (lbm); +} + +index_t +load_balance_map_add_or_lock (u32 n_buckets, + u32 sum_of_weights, + const load_balance_path_t *paths) +{ + load_balance_map_t *tmp, *lbm; + index_t lbmi; + + tmp = load_balance_map_alloc(paths); + + lbmi = load_balance_map_db_find(tmp); + + if (INDEX_INVALID == lbmi) + { + lbm = load_balance_map_init(tmp, n_buckets, sum_of_weights); + } + else + { + lbm = load_balance_map_get(lbmi); + } + + lbm->lbm_locks++; + + return (load_balance_map_get_index(lbm)); +} + +void +load_balance_map_lock (index_t lbmi) +{ + load_balance_map_t *lbm; + + lbm = load_balance_map_get(lbmi); + + lbm->lbm_locks++; +} + +void +load_balance_map_unlock (index_t lbmi) +{ + load_balance_map_t *lbm; + + if (INDEX_INVALID == lbmi) + { + return; + } + + lbm = load_balance_map_get(lbmi); + + lbm->lbm_locks--; + + if (0 == lbm->lbm_locks) + { + load_balance_map_db_remove(lbm); + vec_free(lbm->lbm_paths); + vec_free(lbm->lbm_buckets); + pool_put(load_balance_map_pool, lbm); + } +} + +static int +load_balance_map_path_state_change_walk (fib_node_ptr_t *fptr, + void *ctx) +{ + load_balance_map_t *lbm; + + lbm = load_balance_map_get(fptr->fnp_index); + + load_balance_map_fill(lbm); + + return (!0); +} + +/** + * @brief the state of a path has changed (it has no doubt gone down). + * This is the trigger to perform a PIC edge cutover and update the maps + * to exclude this path. + */ +void +load_balance_map_path_state_change (fib_node_index_t path_index) +{ + uword *p; + + /* + * re-stripe the buckets for each affect MAP + */ + p = hash_get(lb_maps_by_path_index, path_index); + + if (NULL == p) + return; + + fib_node_list_walk(p[0], load_balance_map_path_state_change_walk, NULL); +} + +/** + * @brief Make/add a new or lock an existing Load-balance map + */ +void +load_balance_map_module_init (void) +{ + load_balance_map_db = + hash_create2 (/* elts */ 0, + /* user */ 0, + /* value_bytes */ sizeof (index_t), + load_balance_map_db_hash_key_sum, + load_balance_map_db_hash_key_equal, + /* format pair/arg */ + 0, 0); + + lb_maps_by_path_index = hash_create(0, sizeof(fib_node_list_t)); +} + +static clib_error_t * +load_balance_map_show (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + index_t lbmi = INDEX_INVALID; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%d", &lbmi)) + ; + else + break; + } + + if (INDEX_INVALID != lbmi) + { + vlib_cli_output (vm, "%U", format_load_balance_map, lbmi, 0); + } + else + { + load_balance_map_t *lbm; + + pool_foreach(lbm, load_balance_map_pool, + ({ + vlib_cli_output (vm, "%U", format_load_balance_map, + load_balance_map_get_index(lbm), 0); + })); + } + + return 0; +} + +VLIB_CLI_COMMAND (load_balance_map_show_command, static) = { + .path = "show load-balance-map", + .short_help = "show load-balance-map [<index>]", + .function = load_balance_map_show, +}; diff --git a/vnet/vnet/dpo/load_balance_map.h b/vnet/vnet/dpo/load_balance_map.h new file mode 100644 index 00000000000..f080e97ccad --- /dev/null +++ b/vnet/vnet/dpo/load_balance_map.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + */ + +#ifndef __LOAD_BALANCE_MAP_H__ +#define __LOAD_BALANCE_MAP_H__ + +#include <vlib/vlib.h> +#include <vnet/fib/fib_types.h> +#include <vnet/dpo/load_balance.h> + +struct load_balance_map_path_t_; + +/** + */ +typedef struct load_balance_map_t_ { + /** + * The buckets of the map that provide the index to index translation. + * In the first cacheline. + */ + u16 *lbm_buckets; + + /** + * the vector of paths this MAP represents + */ + struct load_balance_map_path_t_ *lbm_paths; + + /** + * the sum of the normalised weights. cache for convenience + */ + u32 lbm_sum_of_norm_weights; + + /** + * Number of locks. Maps are shared by a large number of recrusvie fib_entry_ts + */ + u32 lbm_locks; +} load_balance_map_t; + +extern index_t load_balance_map_add_or_lock(u32 n_buckets, + u32 sum_of_weights, + const load_balance_path_t *norm_paths); + +extern void load_balance_map_lock(index_t lmbi); +extern void load_balance_map_unlock(index_t lbmi); + +extern void load_balance_map_path_state_change(fib_node_index_t path_index); + +extern u8* format_load_balance_map(u8 *s, va_list ap); + +/** + * The encapsulation breakages are for fast DP access + */ +extern load_balance_map_t *load_balance_map_pool; + +static inline load_balance_map_t* +load_balance_map_get (index_t lbmi) +{ + return (pool_elt_at_index(load_balance_map_pool, lbmi)); +} + + +extern void load_balance_map_module_init(void); + +#endif diff --git a/vnet/vnet/dpo/lookup_dpo.c b/vnet/vnet/dpo/lookup_dpo.c new file mode 100644 index 00000000000..0bfc0651a63 --- /dev/null +++ b/vnet/vnet/dpo/lookup_dpo.c @@ -0,0 +1,802 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/ip/ip.h> +#include <vnet/dpo/lookup_dpo.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/mpls/mpls.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/fib/mpls_fib.h> + +static const char *const lookup_input_names[] = LOOKUP_INPUTS; + +/** + * @brief Enumeration of the lookup subtypes + */ +typedef enum lookup_sub_type_t_ +{ + LOOKUP_SUB_TYPE_SRC, + LOOKUP_SUB_TYPE_DST, + LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE, +} lookup_sub_type_t; +#define LOOKUP_SUB_TYPE_NUM (LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE+1) + +#define FOR_EACH_LOOKUP_SUB_TYPE(_st) \ + for (_st = LOOKUP_SUB_TYPE_IP4_SRC; _st < LOOKUP_SUB_TYPE_NUM; _st++) + +/** + * @brief pool of all MPLS Label DPOs + */ +lookup_dpo_t *lookup_dpo_pool; + +/** + * @brief An array of registered DPO type values for the sub-types + */ +static dpo_type_t lookup_dpo_sub_types[LOOKUP_SUB_TYPE_NUM]; + +static lookup_dpo_t * +lookup_dpo_alloc (void) +{ + lookup_dpo_t *lkd; + + pool_get_aligned(lookup_dpo_pool, lkd, CLIB_CACHE_LINE_BYTES); + + return (lkd); +} + +static index_t +lookup_dpo_get_index (lookup_dpo_t *lkd) +{ + return (lkd - lookup_dpo_pool); +} + +static void +lookup_dpo_add_or_lock_i (fib_node_index_t fib_index, + dpo_proto_t proto, + lookup_input_t input, + lookup_table_t table_config, + dpo_id_t *dpo) +{ + lookup_dpo_t *lkd; + dpo_type_t type; + + lkd = lookup_dpo_alloc(); + lkd->lkd_fib_index = fib_index; + lkd->lkd_proto = proto; + lkd->lkd_input = input; + lkd->lkd_table = table_config; + + /* + * use the input type to select the lookup sub-type + */ + type = 0; + + switch (input) + { + case LOOKUP_INPUT_SRC_ADDR: + type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_SRC]; + break; + case LOOKUP_INPUT_DST_ADDR: + switch (table_config) + { + case LOOKUP_TABLE_FROM_INPUT_INTERFACE: + type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE]; + break; + case LOOKUP_TABLE_FROM_CONFIG: + type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST]; + break; + } + } + + if (0 == type) + { + dpo_reset(dpo); + } + else + { + dpo_set(dpo, type, proto, lookup_dpo_get_index(lkd)); + } +} + +void +lookup_dpo_add_or_lock_w_fib_index (fib_node_index_t fib_index, + dpo_proto_t proto, + lookup_input_t input, + lookup_table_t table_config, + dpo_id_t *dpo) +{ + if (LOOKUP_TABLE_FROM_CONFIG == table_config) + { + fib_table_lock(fib_index, dpo_proto_to_fib(proto)); + } + lookup_dpo_add_or_lock_i(fib_index, proto, input, table_config, dpo); +} + +void +lookup_dpo_add_or_lock_w_table_id (u32 table_id, + dpo_proto_t proto, + lookup_input_t input, + lookup_table_t table_config, + dpo_id_t *dpo) +{ + fib_node_index_t fib_index = FIB_NODE_INDEX_INVALID; + + if (LOOKUP_TABLE_FROM_CONFIG == table_config) + { + fib_index = + fib_table_find_or_create_and_lock(dpo_proto_to_fib(proto), + table_id); + } + + ASSERT(FIB_NODE_INDEX_INVALID != fib_index); + lookup_dpo_add_or_lock_i(fib_index, proto, input, table_config, dpo); +} + +u8* +format_lookup_dpo (u8 *s, va_list *args) +{ + index_t index = va_arg (*args, index_t); + lookup_dpo_t *lkd; + + lkd = lookup_dpo_get(index); + + if (LOOKUP_TABLE_FROM_INPUT_INTERFACE == lkd->lkd_table) + { + s = format(s, "%s lookup in interface's %U table", + lookup_input_names[lkd->lkd_input], + format_dpo_proto, lkd->lkd_proto); + } + else + { + s = format(s, "%s lookup in %U", + lookup_input_names[lkd->lkd_input], + format_fib_table_name, lkd->lkd_fib_index, + dpo_proto_to_fib(lkd->lkd_proto)); + } + return (s); +} + +static void +lookup_dpo_lock (dpo_id_t *dpo) +{ + lookup_dpo_t *lkd; + + lkd = lookup_dpo_get(dpo->dpoi_index); + + lkd->lkd_locks++; +} + +static void +lookup_dpo_unlock (dpo_id_t *dpo) +{ + lookup_dpo_t *lkd; + + lkd = lookup_dpo_get(dpo->dpoi_index); + + lkd->lkd_locks--; + + if (0 == lkd->lkd_locks) + { + if (LOOKUP_TABLE_FROM_CONFIG == lkd->lkd_table) + { + fib_table_unlock(lkd->lkd_fib_index, + dpo_proto_to_fib(lkd->lkd_proto)); + } + pool_put(lookup_dpo_pool, lkd); + } +} + +always_inline void +ip4_src_fib_lookup_one (u32 src_fib_index0, + const ip4_address_t * addr0, + u32 * src_adj_index0) +{ + ip4_fib_mtrie_leaf_t leaf0, leaf1; + ip4_fib_mtrie_t * mtrie0; + + mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie; + + leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3); + + /* Handle default route. */ + leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); + src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0); +} + +always_inline void +ip4_src_fib_lookup_two (u32 src_fib_index0, + u32 src_fib_index1, + const ip4_address_t * addr0, + const ip4_address_t * addr1, + u32 * src_adj_index0, + u32 * src_adj_index1) +{ + ip4_fib_mtrie_leaf_t leaf0, leaf1; + ip4_fib_mtrie_t * mtrie0, * mtrie1; + + mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie; + mtrie1 = &ip4_fib_get (src_fib_index1)->mtrie; + + leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 0); + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 1); + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 2); + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 3); + + /* Handle default route. */ + leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); + leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1); + src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + src_adj_index1[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf1); +} + +/** + * @brief Lookup trace data + */ +typedef struct lookup_trace_t_ +{ + union { + ip46_address_t addr; + mpls_unicast_header_t hdr; + }; + fib_node_index_t fib_index; + index_t lbi; +} lookup_trace_t; + + +always_inline uword +lookup_dpo_ip4_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, + int input_src_addr, + int table_from_interface) +{ + u32 n_left_from, next_index, * from, * to_next; + u32 cpu_index = os_get_cpu_number(); + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + /* while (n_left_from >= 4 && n_left_to_next >= 2) */ + /* } */ + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, lkdi0, lbi0, fib_index0, next0; + const ip4_address_t *input_addr; + const load_balance_t *lb0; + const lookup_dpo_t * lkd0; + const ip4_header_t * ip0; + const dpo_id_t *dpo0; + vlib_buffer_t * b0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + + /* dst lookup was done by ip4 lookup */ + lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + lkd0 = lookup_dpo_get(lkdi0); + + /* + * choose between a lookup using the fib index in the DPO + * or getting the FIB index from the interface. + */ + if (table_from_interface) + { + fib_index0 = + ip4_fib_table_get_index_for_sw_if_index( + vnet_buffer(b0)->sw_if_index[VLIB_RX]); + } + else + { + fib_index0 = lkd0->lkd_fib_index; + } + + /* + * choose between a source or destination address lookup in the table + */ + if (input_src_addr) + { + input_addr = &ip0->src_address; + } + else + { + input_addr = &ip0->dst_address; + } + + /* do lookup */ + ip4_src_fib_lookup_one (fib_index0, input_addr, &lbi0); + lb0 = load_balance_get(lbi0); + dpo0 = load_balance_get_bucket_i(lb0, 0); + + next0 = dpo0->dpoi_next_node; + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, b0)); + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + lookup_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->fib_index = fib_index0; + tr->lbi = lbi0; + tr->addr.ip4 = *input_addr; + } + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +static u8 * +format_lookup_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + lookup_trace_t * t = va_arg (*args, lookup_trace_t *); + uword indent = format_get_indent (s); + s = format (s, "%U fib-index:%d addr:%U load-balance:%d", + format_white_space, indent, + t->fib_index, + format_ip46_address, &t->addr, IP46_TYPE_ANY, + t->lbi); + return s; +} + +always_inline uword +lookup_ip4_dst (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (lookup_dpo_ip4_inline(vm, node, from_frame, 0, 0)); +} + +VLIB_REGISTER_NODE (lookup_ip4_dst_node) = { + .function = lookup_ip4_dst, + .name = "lookup-ip4-dst", + .vector_size = sizeof (u32), + .sibling_of = "ip4-lookup", + .format_trace = format_lookup_trace, +}; +VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip4_dst_node, lookup_ip4_dst) + +always_inline uword +lookup_ip4_dst_itf (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (lookup_dpo_ip4_inline(vm, node, from_frame, 0, 1)); +} + +VLIB_REGISTER_NODE (lookup_ip4_dst_itf_node) = { + .function = lookup_ip4_dst_itf, + .name = "lookup-ip4-dst-itf", + .vector_size = sizeof (u32), + .sibling_of = "ip4-lookup", + .format_trace = format_lookup_trace, +}; +VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip4_dst_itf_node, lookup_ip4_dst_itf) + +always_inline uword +lookup_ip4_src (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (lookup_dpo_ip4_inline(vm, node, from_frame, 1, 0)); +} + +VLIB_REGISTER_NODE (lookup_ip4_src_node) = { + .function = lookup_ip4_src, + .name = "lookup-ip4-src", + .vector_size = sizeof (u32), + .format_trace = format_lookup_trace, + .sibling_of = "ip4-lookup", +}; +VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip4_src_node, lookup_ip4_src) + +always_inline uword +lookup_dpo_ip6_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, + int input_src_addr) +{ + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; + u32 n_left_from, next_index, * from, * to_next; + u32 cpu_index = os_get_cpu_number(); + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + /* while (n_left_from >= 4 && n_left_to_next >= 2) */ + /* { */ + /* } */ + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, lkdi0, lbi0, fib_index0, next0; + const ip6_address_t *input_addr0; + const load_balance_t *lb0; + const lookup_dpo_t * lkd0; + const ip6_header_t * ip0; + const dpo_id_t *dpo0; + vlib_buffer_t * b0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + + /* dst lookup was done by ip6 lookup */ + lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + lkd0 = lookup_dpo_get(lkdi0); + fib_index0 = lkd0->lkd_fib_index; + + /* + * choose between a source or destination address lookup in the table + */ + if (input_src_addr) + { + input_addr0 = &ip0->src_address; + } + else + { + input_addr0 = &ip0->dst_address; + } + + /* do src lookup */ + lbi0 = ip6_fib_table_fwding_lookup(&ip6_main, + fib_index0, + input_addr0); + lb0 = load_balance_get(lbi0); + dpo0 = load_balance_get_bucket_i(lb0, 0); + + next0 = dpo0->dpoi_next_node; + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, b0)); + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + lookup_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->fib_index = fib_index0; + tr->lbi = lbi0; + tr->addr.ip6 = *input_addr0; + } + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +always_inline uword +lookup_ip6_dst (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (lookup_dpo_ip6_inline(vm, node, from_frame, 0 /*use src*/)); +} + +VLIB_REGISTER_NODE (lookup_ip6_dst_node) = { + .function = lookup_ip6_dst, + .name = "lookup-ip6-dst", + .vector_size = sizeof (u32), + .format_trace = format_lookup_trace, + .sibling_of = "ip6-lookup", +}; +VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip6_dst_node, lookup_ip6_dst) + +always_inline uword +lookup_ip6_src (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (lookup_dpo_ip6_inline(vm, node, from_frame, 1 /*use src*/)); +} + +VLIB_REGISTER_NODE (lookup_ip6_src_node) = { + .function = lookup_ip6_src, + .name = "lookup-ip6-src", + .vector_size = sizeof (u32), + .format_trace = format_lookup_trace, + .sibling_of = "ip6-lookup", +}; +VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip6_src_node, lookup_ip6_src) + +always_inline uword +lookup_dpo_mpls_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, + int table_from_interface) +{ + u32 n_left_from, next_index, * from, * to_next; + u32 cpu_index = os_get_cpu_number(); + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + /* while (n_left_from >= 4 && n_left_to_next >= 2) */ + /* } */ + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, lkdi0, lbi0, fib_index0, next0; + const mpls_unicast_header_t * hdr0; + const load_balance_t *lb0; + const lookup_dpo_t * lkd0; + const dpo_id_t *dpo0; + vlib_buffer_t * b0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + hdr0 = vlib_buffer_get_current (b0); + + /* dst lookup was done by mpls lookup */ + lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + lkd0 = lookup_dpo_get(lkdi0); + + /* + * choose between a lookup using the fib index in the DPO + * or getting the FIB index from the interface. + */ + if (table_from_interface) + { + fib_index0 = + mpls_fib_table_get_index_for_sw_if_index( + vnet_buffer(b0)->sw_if_index[VLIB_RX]); + } + else + { + fib_index0 = lkd0->lkd_fib_index; + } + + /* do lookup */ + lbi0 = mpls_fib_table_forwarding_lookup (fib_index0, hdr0); + lb0 = load_balance_get(lbi0); + dpo0 = load_balance_get_bucket_i(lb0, 0); + + next0 = dpo0->dpoi_next_node; + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, b0)); + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + lookup_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->fib_index = fib_index0; + tr->lbi = lbi0; + tr->hdr = *hdr0; + } + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +static u8 * +format_lookup_mpls_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + lookup_trace_t * t = va_arg (*args, lookup_trace_t *); + uword indent = format_get_indent (s); + mpls_unicast_header_t hdr; + + hdr.label_exp_s_ttl = clib_net_to_host_u32(t->hdr.label_exp_s_ttl); + + s = format (s, "%U fib-index:%d hdr:%U load-balance:%d", + format_white_space, indent, + t->fib_index, + format_mpls_header, hdr, + t->lbi); + return s; +} + +always_inline uword +lookup_mpls_dst (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (lookup_dpo_mpls_inline(vm, node, from_frame, 0)); +} + +VLIB_REGISTER_NODE (lookup_mpls_dst_node) = { + .function = lookup_mpls_dst, + .name = "lookup-mpls-dst", + .vector_size = sizeof (u32), + .sibling_of = "mpls-lookup", + .format_trace = format_lookup_mpls_trace, + .n_next_nodes = 0, +}; +VLIB_NODE_FUNCTION_MULTIARCH (lookup_mpls_dst_node, lookup_mpls_dst) + +always_inline uword +lookup_mpls_dst_itf (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (lookup_dpo_mpls_inline(vm, node, from_frame, 1)); +} + +VLIB_REGISTER_NODE (lookup_mpls_dst_itf_node) = { + .function = lookup_mpls_dst_itf, + .name = "lookup-mpls-dst-itf", + .vector_size = sizeof (u32), + .sibling_of = "mpls-lookup", + .format_trace = format_lookup_mpls_trace, + .n_next_nodes = 0, +}; +VLIB_NODE_FUNCTION_MULTIARCH (lookup_mpls_dst_itf_node, lookup_mpls_dst_itf) + +const static dpo_vft_t lkd_vft = { + .dv_lock = lookup_dpo_lock, + .dv_unlock = lookup_dpo_unlock, + .dv_format = format_lookup_dpo, +}; + +const static char* const lookup_src_ip4_nodes[] = +{ + "lookup-ip4-src", + NULL, +}; +const static char* const lookup_src_ip6_nodes[] = +{ + "lookup-ip6-src", + NULL, +}; +const static char* const * const lookup_src_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = lookup_src_ip4_nodes, + [DPO_PROTO_IP6] = lookup_src_ip6_nodes, + [DPO_PROTO_MPLS] = NULL, +}; + +const static char* const lookup_dst_ip4_nodes[] = +{ + "lookup-ip4-dst", + NULL, +}; +const static char* const lookup_dst_ip6_nodes[] = +{ + "lookup-ip6-dst", + NULL, +}; +const static char* const lookup_dst_mpls_nodes[] = +{ + "lookup-mpls-dst", + NULL, +}; +const static char* const * const lookup_dst_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = lookup_dst_ip4_nodes, + [DPO_PROTO_IP6] = lookup_dst_ip6_nodes, + [DPO_PROTO_MPLS] = lookup_dst_mpls_nodes, +}; + +const static char* const lookup_dst_from_interface_ip4_nodes[] = +{ + "lookup-ip4-dst-itf", + NULL, +}; +const static char* const lookup_dst_from_interface_ip6_nodes[] = +{ + "lookup-ip6-dst-itf", + NULL, +}; +const static char* const lookup_dst_from_interface_mpls_nodes[] = +{ + "lookup-mpls-dst-itf", + NULL, +}; +const static char* const * const lookup_dst_from_interface_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = lookup_dst_from_interface_ip4_nodes, + [DPO_PROTO_IP6] = lookup_dst_from_interface_ip6_nodes, + [DPO_PROTO_MPLS] = lookup_dst_from_interface_mpls_nodes, +}; + + +void +lookup_dpo_module_init (void) +{ + dpo_register(DPO_LOOKUP, &lkd_vft, NULL); + + /* + * There are various sorts of lookup; src or dst addr v4 /v6 etc. + * there isn't an object type for each (there is only the lookup_dpo_t), + * but, for performance reasons, there is a data plane function, and hence + * VLIB node for each. VLIB graph node construction is based on DPO types + * so we create sub-types. + */ + lookup_dpo_sub_types[LOOKUP_SUB_TYPE_SRC] = + dpo_register_new_type(&lkd_vft, lookup_src_nodes); + lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST] = + dpo_register_new_type(&lkd_vft, lookup_dst_nodes); + lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE] = + dpo_register_new_type(&lkd_vft, lookup_dst_nodes); +} diff --git a/vnet/vnet/dpo/lookup_dpo.h b/vnet/vnet/dpo/lookup_dpo.h new file mode 100644 index 00000000000..ff283388868 --- /dev/null +++ b/vnet/vnet/dpo/lookup_dpo.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LOOKUP_DPO_H__ +#define __LOOKUP_DPO_H__ + +#include <vnet/vnet.h> +#include <vnet/fib/fib_types.h> +#include <vnet/dpo/dpo.h> + +/** + * Switch to use the packet's source or destination address for lookup + */ +typedef enum lookup_input_t_ { + LOOKUP_INPUT_SRC_ADDR, + LOOKUP_INPUT_DST_ADDR, +} __attribute__ ((packed)) lookup_input_t; + +#define LOOKUP_INPUTS { \ + [LOOKUP_INPUT_SRC_ADDR] = "src-address", \ + [LOOKUP_INPUT_DST_ADDR] = "dst-address", \ +} + +/** + * Switch to use the packet's source or destination address for lookup + */ +typedef enum lookup_table_t_ { + LOOKUP_TABLE_FROM_INPUT_INTERFACE, + LOOKUP_TABLE_FROM_CONFIG, +} __attribute__ ((packed)) lookup_table_t; + +#define LOOKUP_TABLES { \ + [LOOKUP_INPUT_SRC_ADDR] = "table-input-interface", \ + [LOOKUP_INPUT_DST_ADDR] = "table-configured", \ +} + +/** + * A representation of an MPLS label for imposition in the data-path + */ +typedef struct lookup_dpo_t +{ + /** + * The FIB, or interface from which to get a FIB, in which to perform + * the next lookup; + */ + fib_node_index_t lkd_fib_index; + + /** + * The protocol of the FIB for the lookup, and hence + * the protocol of the packet + */ + dpo_proto_t lkd_proto; + + /** + * Switch to use src or dst address + */ + lookup_input_t lkd_input; + + /** + * Switch to use the table index passed, or the table of the input interface + */ + lookup_table_t lkd_table; + + /** + * Number of locks + */ + u16 lkd_locks; +} lookup_dpo_t; + +extern void lookup_dpo_add_or_lock_w_fib_index(fib_node_index_t fib_index, + dpo_proto_t proto, + lookup_input_t input, + lookup_table_t table, + dpo_id_t *dpo); +extern void lookup_dpo_add_or_lock_w_table_id(u32 table_id, + dpo_proto_t proto, + lookup_input_t input, + lookup_table_t table, + dpo_id_t *dpo); + +extern u8* format_lookup_dpo(u8 *s, va_list *args); + +/* + * Encapsulation violation for fast data-path access + */ +extern lookup_dpo_t *lookup_dpo_pool; + +static inline lookup_dpo_t * +lookup_dpo_get (index_t index) +{ + return (pool_elt_at_index(lookup_dpo_pool, index)); +} + +extern void lookup_dpo_module_init(void); + +#endif diff --git a/vnet/vnet/dpo/mpls_label_dpo.c b/vnet/vnet/dpo/mpls_label_dpo.c new file mode 100644 index 00000000000..0ec840ecfbd --- /dev/null +++ b/vnet/vnet/dpo/mpls_label_dpo.c @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/ip/ip.h> +#include <vnet/dpo/mpls_label_dpo.h> +#include <vnet/mpls/mpls.h> + +/* + * pool of all MPLS Label DPOs + */ +mpls_label_dpo_t *mpls_label_dpo_pool; + +static mpls_label_dpo_t * +mpls_label_dpo_alloc (void) +{ + mpls_label_dpo_t *mld; + + pool_get_aligned(mpls_label_dpo_pool, mld, CLIB_CACHE_LINE_BYTES); + memset(mld, 0, sizeof(*mld)); + + dpo_reset(&mld->mld_dpo); + + return (mld); +} + +static index_t +mpls_label_dpo_get_index (mpls_label_dpo_t *mld) +{ + return (mld - mpls_label_dpo_pool); +} + +index_t +mpls_label_dpo_create (mpls_label_t label, + mpls_eos_bit_t eos, + u8 ttl, + u8 exp, + const dpo_id_t *dpo) +{ + mpls_label_dpo_t *mld; + + mld = mpls_label_dpo_alloc(); + + vnet_mpls_uc_set_label(&mld->mld_hdr.label_exp_s_ttl, label); + vnet_mpls_uc_set_ttl(&mld->mld_hdr.label_exp_s_ttl, ttl); + vnet_mpls_uc_set_exp(&mld->mld_hdr.label_exp_s_ttl, exp); + vnet_mpls_uc_set_s(&mld->mld_hdr.label_exp_s_ttl, eos); + + /* + * get the header in network byte order since we will paint it + * on a packet in the data-plane + */ + mld->mld_hdr.label_exp_s_ttl = + clib_host_to_net_u32(mld->mld_hdr.label_exp_s_ttl); + + dpo_stack(DPO_MPLS_LABEL, DPO_PROTO_MPLS, &mld->mld_dpo, dpo); + + return (mpls_label_dpo_get_index(mld)); +} + +u8* +format_mpls_label_dpo (u8 *s, va_list *args) +{ + index_t index = va_arg (*args, index_t); + u32 indent = va_arg (*args, u32); + mpls_unicast_header_t hdr; + mpls_label_dpo_t *mld; + + mld = mpls_label_dpo_get(index); + + hdr.label_exp_s_ttl = + clib_net_to_host_u32(mld->mld_hdr.label_exp_s_ttl); + + return (format(s, "mpls-label:[%d]:%U\n%U%U", + index, + format_mpls_header, hdr, + format_white_space, indent, + format_dpo_id, &mld->mld_dpo, indent+2)); +} + +static void +mpls_label_dpo_lock (dpo_id_t *dpo) +{ + mpls_label_dpo_t *mld; + + mld = mpls_label_dpo_get(dpo->dpoi_index); + + mld->mld_locks++; +} + +static void +mpls_label_dpo_unlock (dpo_id_t *dpo) +{ + mpls_label_dpo_t *mld; + + mld = mpls_label_dpo_get(dpo->dpoi_index); + + mld->mld_locks--; + + if (0 == mld->mld_locks) + { + dpo_reset(&mld->mld_dpo); + pool_put(mpls_label_dpo_pool, mld); + } +} + +/** + * @brief A struct to hold tracing information for the MPLS label imposition + * node. + */ +typedef struct mpls_label_imposition_trace_t_ +{ + /** + * The MPLS header imposed + */ + mpls_unicast_header_t hdr; +} mpls_label_imposition_trace_t; + +always_inline uword +mpls_label_imposition (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + mpls_unicast_header_t *hdr0; + mpls_label_dpo_t *mld0; + vlib_buffer_t * b0; + u32 bi0, mldi0; + u32 next0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* dst lookup was done by ip4 lookup */ + mldi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + mld0 = mpls_label_dpo_get(mldi0); + + /* Paint the MPLS header */ + vlib_buffer_advance(b0, -sizeof(*hdr0)); + hdr0 = vlib_buffer_get_current(b0); + + // FIXME. + // need to copy the TTL from the correct place. + // for IPvX imposition from the IP header + // so we need a deidcated ipx-to-mpls-label-imp-node + // for mpls switch and stack another solution is required. + *hdr0 = mld0->mld_hdr; + + next0 = mld0->mld_dpo.dpoi_next_node; + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mld0->mld_dpo.dpoi_index; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_label_imposition_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->hdr = *hdr0; + } + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +static u8 * +format_mpls_label_imposition_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_label_imposition_trace_t * t; + mpls_unicast_header_t hdr; + uword indent; + + t = va_arg (*args, mpls_label_imposition_trace_t *); + indent = format_get_indent (s); + hdr.label_exp_s_ttl = clib_net_to_host_u32(t->hdr.label_exp_s_ttl); + + s = format (s, "%Umpls-header:%U", + format_white_space, indent, + format_mpls_header, hdr); + return (s); +} + +VLIB_REGISTER_NODE (mpls_label_imposition_node) = { + .function = mpls_label_imposition, + .name = "mpls-label-imposition", + .vector_size = sizeof (u32), + + .format_trace = format_mpls_label_imposition_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + } +}; +VLIB_NODE_FUNCTION_MULTIARCH (mpls_label_imposition_node, mpls_label_imposition) + +const static dpo_vft_t mld_vft = { + .dv_lock = mpls_label_dpo_lock, + .dv_unlock = mpls_label_dpo_unlock, + .dv_format = format_mpls_label_dpo, +}; + +const static char* const mpls_label_imp_ip4_nodes[] = +{ + "mpls-label-imposition", + NULL, +}; +const static char* const mpls_label_imp_ip6_nodes[] = +{ + "mpls-label-imposition", + NULL, +}; +const static char* const mpls_label_imp_mpls_nodes[] = +{ + "mpls-label-imposition", + NULL, +}; +const static char* const * const mpls_label_imp_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = mpls_label_imp_ip4_nodes, + [DPO_PROTO_IP6] = mpls_label_imp_ip6_nodes, + [DPO_PROTO_MPLS] = mpls_label_imp_mpls_nodes, +}; + + +void +mpls_label_dpo_module_init (void) +{ + dpo_register(DPO_MPLS_LABEL, &mld_vft, mpls_label_imp_nodes); +} diff --git a/vnet/vnet/dpo/mpls_label_dpo.h b/vnet/vnet/dpo/mpls_label_dpo.h new file mode 100644 index 00000000000..47ee344933f --- /dev/null +++ b/vnet/vnet/dpo/mpls_label_dpo.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MPLS_LABEL_DPO_H__ +#define __MPLS_LABEL_DPO_H__ + +#include <vnet/vnet.h> +#include <vnet/mpls/packet.h> +#include <vnet/dpo/dpo.h> + +/** + * A representation of an MPLS label for imposition in the data-path + */ +typedef struct mpls_label_dpo_t +{ + /** + * The MPLS label header to impose + */ + mpls_unicast_header_t mld_hdr; + + /** + * Next DPO in the graph + */ + dpo_id_t mld_dpo; + + /** + * Number of locks/users of the label + */ + u16 mld_locks; +} mpls_label_dpo_t; + +extern index_t mpls_label_dpo_create(mpls_label_t label, + mpls_eos_bit_t eos, + u8 ttl, + u8 exp, + const dpo_id_t *dpo); + +extern u8* format_mpls_label_dpo(u8 *s, va_list *args); + + +/* + * Encapsulation violation for fast data-path access + */ +extern mpls_label_dpo_t *mpls_label_dpo_pool; + +static inline mpls_label_dpo_t * +mpls_label_dpo_get (index_t index) +{ + return (pool_elt_at_index(mpls_label_dpo_pool, index)); +} + +extern void mpls_label_dpo_module_init(void); + +#endif diff --git a/vnet/vnet/dpo/punt_dpo.c b/vnet/vnet/dpo/punt_dpo.c new file mode 100644 index 00000000000..e27a8ff3018 --- /dev/null +++ b/vnet/vnet/dpo/punt_dpo.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + * The data-path object representing puntping the packet + */ + +#include <vnet/dpo/dpo.h> + +static dpo_id_t punt_dpos[DPO_PROTO_NUM]; + +const dpo_id_t * +punt_dpo_get (dpo_proto_t proto) +{ + dpo_set(&punt_dpos[proto], DPO_PUNT, proto, 1); + + return (&punt_dpos[proto]); +} + +int +dpo_is_punt (const dpo_id_t *dpo) +{ + return (dpo->dpoi_type == DPO_PUNT); +} + +static void +punt_dpo_lock (dpo_id_t *dpo) +{ + /* + * not maintaining a lock count on the punt + * more trouble than it's worth. + * There always needs to be one around. no point it managaing its lifetime + */ +} +static void +punt_dpo_unlock (dpo_id_t *dpo) +{ +} + +static u8* +format_punt_dpo (u8 *s, va_list *ap) +{ + CLIB_UNUSED(index_t index) = va_arg(ap, index_t); + CLIB_UNUSED(u32 indent) = va_arg(ap, u32); + + return (format(s, "dpo-punt")); +} + +const static dpo_vft_t punt_vft = { + .dv_lock = punt_dpo_lock, + .dv_unlock = punt_dpo_unlock, + .dv_format = format_punt_dpo, +}; + +/** + * @brief The per-protocol VLIB graph nodes that are assigned to a punt + * object. + * + * this means that these graph nodes are ones from which a punt is the + * parent object in the DPO-graph. + */ +const static char* const punt_ip4_nodes[] = +{ + "ip4-punt", + NULL, +}; +const static char* const punt_ip6_nodes[] = +{ + "ip6-punt", + NULL, +}; +const static char* const punt_mpls_nodes[] = +{ + "mpls-punt", + NULL, +}; +const static char* const * const punt_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = punt_ip4_nodes, + [DPO_PROTO_IP6] = punt_ip6_nodes, + [DPO_PROTO_MPLS] = punt_mpls_nodes, +}; + +void +punt_dpo_module_init (void) +{ + dpo_register(DPO_PUNT, &punt_vft, punt_nodes); +} diff --git a/vnet/vnet/dpo/punt_dpo.h b/vnet/vnet/dpo/punt_dpo.h new file mode 100644 index 00000000000..370547c1596 --- /dev/null +++ b/vnet/vnet/dpo/punt_dpo.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief A DPO to punt packets to the Control-plane + */ + +#ifndef __PUNT_DPO_H__ +#define __PUNT_DPO_H__ + +#include <vnet/dpo/dpo.h> + +extern int dpo_is_punt(const dpo_id_t *dpo); + +extern const dpo_id_t *punt_dpo_get(dpo_proto_t proto); + +extern void punt_dpo_module_init(void); + +#endif diff --git a/vnet/vnet/dpo/receive_dpo.c b/vnet/vnet/dpo/receive_dpo.c new file mode 100644 index 00000000000..ee7d82b0980 --- /dev/null +++ b/vnet/vnet/dpo/receive_dpo.c @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + * The data-path object representing receiveing the packet, i.e. it's for-us + */ +#include <vlib/vlib.h> +#include <vnet/ip/ip.h> +#include <vnet/dpo/receive_dpo.h> + +/** + * @brief pool of all receive DPOs + */ +receive_dpo_t *receive_dpo_pool; + +static receive_dpo_t * +receive_dpo_alloc (void) +{ + receive_dpo_t *rd; + + pool_get_aligned(receive_dpo_pool, rd, CLIB_CACHE_LINE_BYTES); + memset(rd, 0, sizeof(*rd)); + + return (rd); +} + +static receive_dpo_t * +receive_dpo_get_from_dpo (const dpo_id_t *dpo) +{ + ASSERT(DPO_RECEIVE == dpo->dpoi_type); + + return (receive_dpo_get(dpo->dpoi_index)); +} + + +/* + * receive_dpo_add_or_lock + * + * The next_hop address here is used for source address selection in the DP. + * The local adj is added to an interface's receive prefix, the next-hop + * passed here is the local prefix on the same interface. + */ +void +receive_dpo_add_or_lock (dpo_proto_t proto, + u32 sw_if_index, + const ip46_address_t *nh_addr, + dpo_id_t *dpo) +{ + receive_dpo_t *rd; + + rd = receive_dpo_alloc(); + + rd->rd_sw_if_index = sw_if_index; + if (NULL != nh_addr) + { + rd->rd_addr = *nh_addr; + } + + dpo_set(dpo, DPO_RECEIVE, proto, (rd - receive_dpo_pool)); +} + +static void +receive_dpo_lock (dpo_id_t *dpo) +{ + receive_dpo_t *rd; + + rd = receive_dpo_get_from_dpo(dpo); + rd->rd_locks++; +} + +static void +receive_dpo_unlock (dpo_id_t *dpo) +{ + receive_dpo_t *rd; + + rd = receive_dpo_get_from_dpo(dpo); + rd->rd_locks--; + + if (0 == rd->rd_locks) + { + pool_put(receive_dpo_pool, rd); + } +} + +static u8* +format_receive_dpo (u8 *s, va_list *ap) +{ + CLIB_UNUSED(index_t index) = va_arg(ap, index_t); + CLIB_UNUSED(u32 indent) = va_arg(ap, u32); + vnet_main_t * vnm = vnet_get_main(); + receive_dpo_t *rd; + + rd = receive_dpo_get(index); + + if (~0 != rd->rd_sw_if_index) + { + return (format(s, "dpo-receive: %U on %U", + format_ip46_address, &rd->rd_addr, IP46_TYPE_ANY, + format_vnet_sw_interface_name, vnm, + vnet_get_sw_interface(vnm, rd->rd_sw_if_index))); + } + else + { + return (format(s, "dpo-receive")); + } +} + +const static dpo_vft_t receive_vft = { + .dv_lock = receive_dpo_lock, + .dv_unlock = receive_dpo_unlock, + .dv_format = format_receive_dpo, +}; + +/** + * @brief The per-protocol VLIB graph nodes that are assigned to a receive + * object. + * + * this means that these graph nodes are ones from which a receive is the + * parent object in the DPO-graph. + */ +const static char* const receive_ip4_nodes[] = +{ + "ip4-local", + NULL, +}; +const static char* const receive_ip6_nodes[] = +{ + "ip6-local", + NULL, +}; + +const static char* const * const receive_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = receive_ip4_nodes, + [DPO_PROTO_IP6] = receive_ip6_nodes, + [DPO_PROTO_MPLS] = NULL, +}; + +void +receive_dpo_module_init (void) +{ + dpo_register(DPO_RECEIVE, &receive_vft, receive_nodes); +} diff --git a/vnet/vnet/dpo/receive_dpo.h b/vnet/vnet/dpo/receive_dpo.h new file mode 100644 index 00000000000..2420fd7843c --- /dev/null +++ b/vnet/vnet/dpo/receive_dpo.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + * The data-path object representing receiveing the packet, i.e. it's for-us + */ + +#ifndef __RECEIVE_DPO_H__ +#define __RECEIVE_DPO_H__ + +#include <vnet/dpo/dpo.h> +#include <vnet/ip/ip6.h> + +typedef struct receive_dpo_t_ +{ + /** + * The Software interface index on which traffic is received + */ + u32 rd_sw_if_index; + + /** + * The address on the receive interface. packet are destined to this address + */ + ip46_address_t rd_addr; + + /** + * number oflocks. + */ + u16 rd_locks; +} receive_dpo_t; + +extern void receive_dpo_add_or_lock (dpo_proto_t proto, + u32 sw_if_index, + const ip46_address_t *nh_addr, + dpo_id_t *dpo); + +extern void receive_dpo_module_init(void); + +/** + * @brief pool of all receive DPOs + */ +receive_dpo_t *receive_dpo_pool; + +static inline receive_dpo_t * +receive_dpo_get (index_t index) +{ + return (pool_elt_at_index(receive_dpo_pool, index)); +} + +#endif |