/* * Copyright (c) 2016 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * @brief * A Data-Path Object is an object that represents actions that are * applied to packets are they are switched through VPP. * * The DPO is a base class that is specialised by other objects to provide * concrete actions * * The VLIB graph nodes are graph of types, the DPO graph is a graph of instances. */ #include <vnet/dpo/dpo.h> #include <vnet/ip/lookup.h> #include <vnet/ip/format.h> #include <vnet/adj/adj.h> #include <vnet/dpo/load_balance.h> #include <vnet/dpo/mpls_label_dpo.h> #include <vnet/dpo/lookup_dpo.h> #include <vnet/dpo/drop_dpo.h> #include <vnet/dpo/receive_dpo.h> #include <vnet/dpo/punt_dpo.h> #include <vnet/dpo/classify_dpo.h> #include <vnet/dpo/ip_null_dpo.h> #include <vnet/dpo/replicate_dpo.h> #include <vnet/dpo/interface_rx_dpo.h> #include <vnet/dpo/interface_tx_dpo.h> #include <vnet/dpo/mpls_disposition.h> #include <vnet/dpo/dvr_dpo.h> #include <vnet/dpo/l3_proxy_dpo.h> #include <vnet/dpo/ip6_ll_dpo.h> /** * Array of char* names for the DPO types and protos */ static const char* dpo_type_names[] = DPO_TYPES; static const char* dpo_proto_names[] = DPO_PROTOS; /** * @brief Vector of virtual function tables for the DPO types * * This is a vector so we can dynamically register new DPO types in plugins. */ static dpo_vft_t *dpo_vfts; /** * @brief vector of graph node names associated with each DPO type and protocol. * * dpo_nodes[child_type][child_proto][node_X] = node_name; * i.e. * dpo_node[DPO_LOAD_BALANCE][DPO_PROTO_IP4][0] = "ip4-lookup" * dpo_node[DPO_LOAD_BALANCE][DPO_PROTO_IP4][1] = "ip4-load-balance" * * This is a vector so we can dynamically register new DPO types in plugins. */ static const char* const * const ** dpo_nodes; /** * @brief Vector of edge indicies from parent DPO nodes to child * * dpo_edges[child_type][child_proto][parent_type][parent_proto] = edge_index * * This array is derived at init time from the dpo_nodes above. Note that * the third dimension in dpo_nodes is lost, hence, the edge index from each * node MUST be the same. * Including both the child and parent protocol is required to support the * case where it changes as the graph is traversed, most notably when an * MPLS label is popped. * * Note that this array is child type specific, not child instance specific. */ static u32 ****dpo_edges; /** * @brief The DPO type value that can be assigned to the next dynamic * type registration. */ static dpo_type_t dpo_dynamic = DPO_LAST; dpo_proto_t vnet_link_to_dpo_proto (vnet_link_t linkt) { switch (linkt) { case VNET_LINK_IP6: return (DPO_PROTO_IP6); case VNET_LINK_IP4: return (DPO_PROTO_IP4); case VNET_LINK_MPLS: return (DPO_PROTO_MPLS); case VNET_LINK_ETHERNET: return (DPO_PROTO_ETHERNET); case VNET_LINK_NSH: return (DPO_PROTO_NSH); case VNET_LINK_ARP: break; } ASSERT(0); return (0); } vnet_link_t dpo_proto_to_link (dpo_proto_t dp) { switch (dp) { case DPO_PROTO_IP6: return (VNET_LINK_IP6); case DPO_PROTO_IP4: return (VNET_LINK_IP4); case DPO_PROTO_MPLS: case DPO_PROTO_BIER: return (VNET_LINK_MPLS); case DPO_PROTO_ETHERNET: return (VNET_LINK_ETHERNET); case DPO_PROTO_NSH: return (VNET_LINK_NSH); } return (~0); } u8 * format_dpo_type (u8 * s, va_list * args) { dpo_type_t type = va_arg (*args, int); s = format(s, "%s", dpo_type_names[type]); return (s); } u8 * format_dpo_id (u8 * s, va_list * args) { dpo_id_t *dpo = va_arg (*args, dpo_id_t*); u32 indent = va_arg (*args, u32); s = format(s, "[@%d]: ", dpo->dpoi_next_node); if (NULL != dpo_vfts[dpo->dpoi_type].dv_format) { s = format(s, "%U", dpo_vfts[dpo->dpoi_type].dv_format, dpo->dpoi_index, indent); } else { switch (dpo->dpoi_type) { case DPO_FIRST: s = format(s, "unset"); break; default: s = format(s, "unknown"); break; } } return (s); } u8 * format_dpo_proto (u8 * s, va_list * args) { dpo_proto_t proto = va_arg (*args, int); return (format(s, "%s", dpo_proto_names[proto])); } void dpo_set (dpo_id_t *dpo, dpo_type_t type, dpo_proto_t proto, index_t index) { dpo_id_t tmp = *dpo; dpo->dpoi_type = type; dpo->dpoi_proto = proto, dpo->dpoi_index = index; if (DPO_ADJACENCY == type) { /* * set the adj subtype */ ip_adjacency_t *adj; adj = adj_get(index); switch (adj->lookup_next_index) { case IP_LOOKUP_NEXT_ARP: dpo->dpoi_type = DPO_ADJACENCY_INCOMPLETE; break; case IP_LOOKUP_NEXT_MIDCHAIN: dpo->dpoi_type = DPO_ADJACENCY_MIDCHAIN; break; case IP_LOOKUP_NEXT_MCAST_MIDCHAIN: dpo->dpoi_type = DPO_ADJACENCY_MCAST_MIDCHAIN; break; case IP_LOOKUP_NEXT_MCAST: dpo->dpoi_type = DPO_ADJACENCY_MCAST; break; case IP_LOOKUP_NEXT_GLEAN: dpo->dpoi_type = DPO_ADJACENCY_GLEAN; break; default: break; } } dpo_lock(dpo); dpo_unlock(&tmp); } void dpo_reset (dpo_id_t *dpo) { dpo_id_t tmp = DPO_INVALID; /* * use the atomic copy operation. */ dpo_copy(dpo, &tmp); } /** * \brief * Compare two Data-path objects * * like memcmp, return 0 is matching, !0 otherwise. */ int dpo_cmp (const dpo_id_t *dpo1, const dpo_id_t *dpo2) { int res; res = dpo1->dpoi_type - dpo2->dpoi_type; if (0 != res) return (res); return (dpo1->dpoi_index - dpo2->dpoi_index); } void dpo_copy (dpo_id_t *dst, const dpo_id_t *src) { dpo_id_t tmp = *dst; /* * the destination is written in a single u64 write - hence atomically w.r.t * any packets inflight. */ *((u64*)dst) = *(u64*)src; dpo_lock(dst); dpo_unlock(&tmp); } int dpo_is_adj (const dpo_id_t *dpo) { return ((dpo->dpoi_type == DPO_ADJACENCY) || (dpo->dpoi_type == DPO_ADJACENCY_INCOMPLETE) || (dpo->dpoi_type == DPO_ADJACENCY_MIDCHAIN) || (dpo->dpoi_type == DPO_ADJACENCY_GLEAN)); } static u32 * dpo_default_get_next_node (const dpo_id_t *dpo) { u32 *node_indices = NULL; const char *node_name; u32 ii = 0; node_name = dpo_nodes[dpo->dpoi_type][dpo->dpoi_proto][ii]; while (NULL != node_name) { vlib_node_t *node; node = vlib_get_node_by_name(vlib_get_main(), (u8*) node_name); ASSERT(NULL != node); vec_add1(node_indices, node->index); ++ii; node_name = dpo_nodes[dpo->dpoi_type][dpo->dpoi_proto][ii]; } return (node_indices); } /** * A default variant of the make interpose function that just returns * the original */ static void dpo_default_mk_interpose (const dpo_id_t *original, const dpo_id_t *parent, dpo_id_t *clone) { dpo_copy(clone, original); } void dpo_register (dpo_type_t type, const dpo_vft_t *vft, const char * const * const * nodes) { vec_validate(dpo_vfts, type); dpo_vfts[type] = *vft; if (NULL == dpo_vfts[type].dv_get_next_node) { dpo_vfts[type].dv_get_next_node = dpo_default_get_next_node; } if (NULL == dpo_vfts[type].dv_mk_interpose) { dpo_vfts[type].dv_mk_interpose = dpo_default_mk_interpose; } vec_validate(dpo_nodes, type); dpo_nodes[type] = nodes; } dpo_type_t dpo_register_new_type (const dpo_vft_t *vft, const char * const * const * nodes) { dpo_type_t type = dpo_dynamic++; dpo_register(type, vft, nodes); return (type); } void dpo_mk_interpose (const dpo_id_t *original, const dpo_id_t *parent, dpo_id_t *clone) { if (!dpo_id_is_valid(original)) return; dpo_vfts[original->dpoi_type].dv_mk_interpose(original, parent, clone); } void dpo_lock (dpo_id_t *dpo) { if (!dpo_id_is_valid(dpo)) return; dpo_vfts[dpo->dpoi_type].dv_lock(dpo); } void dpo_unlock (dpo_id_t *dpo) { if (!dpo_id_is_valid(dpo)) return; dpo_vfts[dpo->dpoi_type].dv_unlock(dpo); } u32 dpo_get_urpf(const dpo_id_t *dpo) { if (dpo_id_is_valid(dpo) && (NULL != dpo_vfts[dpo->dpoi_type].dv_get_urpf)) { return (dpo_vfts[dpo->dpoi_type].dv_get_urpf(dpo)); } return (~0); } static u32 dpo_get_next_node (dpo_type_t child_type, dpo_proto_t child_proto, const dpo_id_t *parent_dpo) { dpo_proto_t parent_proto; dpo_type_t parent_type; parent_type = parent_dpo->dpoi_type; parent_proto = parent_dpo->dpoi_proto; vec_validate(dpo_edges, child_type); vec_validate(dpo_edges[child_type], child_proto); vec_validate(dpo_edges[child_type][child_proto], parent_type); vec_validate_init_empty( dpo_edges[child_type][child_proto][parent_type], parent_proto, ~0); /* * if the edge index has not yet been created for this node to node transition */ if (~0 == dpo_edges[child_type][child_proto][parent_type][parent_proto]) { vlib_node_t *child_node; u32 *parent_indices; vlib_main_t *vm; u32 edge, *pi, cc; vm = vlib_get_main(); ASSERT(NULL != dpo_vfts[parent_type].dv_get_next_node); ASSERT(NULL != dpo_nodes[child_type]); ASSERT(NULL != dpo_nodes[child_type][child_proto]); cc = 0; parent_indices = dpo_vfts[parent_type].dv_get_next_node(parent_dpo); vlib_worker_thread_barrier_sync(vm); /* * create a graph arc from each of the child's registered node types, * to each of the parent's. */ while (NULL != dpo_nodes[child_type][child_proto][cc]) { child_node = vlib_get_node_by_name(vm, (u8*) dpo_nodes[child_type][child_proto][cc]); vec_foreach(pi, parent_indices) { edge = vlib_node_add_next(vm, child_node->index, *pi); if (~0 == dpo_edges[child_type][child_proto][parent_type][parent_proto]) { dpo_edges[child_type][child_proto][parent_type][parent_proto] = edge; } else { ASSERT(dpo_edges[child_type][child_proto][parent_type][parent_proto] == edge); } } cc++; } vlib_worker_thread_barrier_release(vm); vec_free(parent_indices); } return (dpo_edges[child_type][child_proto][parent_type][parent_proto]); } /** * @brief return already stacked up next node index for a given * child_type/child_proto and parent_type/patent_proto. * The VLIB graph arc used is taken from the parent and child types * passed. */ u32 dpo_get_next_node_by_type_and_proto (dpo_type_t child_type, dpo_proto_t child_proto, dpo_type_t parent_type, dpo_proto_t parent_proto) { return (dpo_edges[child_type][child_proto][parent_type][parent_proto]); } /** * @brief Stack one DPO object on another, and thus establish a child parent * relationship. The VLIB graph arc used is taken from the parent and child types * passed. */ static void dpo_stack_i (u32 edge, dpo_id_t *dpo, const dpo_id_t *parent) { /* * in order to get an atomic update of the parent we create a temporary, * from a copy of the child, and add the next_node. then we copy to the parent */ dpo_id_t tmp = DPO_INVALID; dpo_copy(&tmp, parent); /* * get the edge index for the parent to child VLIB graph transition */ tmp.dpoi_next_node = edge; /* * this update is atomic. */ dpo_copy(dpo, &tmp); dpo_reset(&tmp); } /** * @brief Stack one DPO object on another, and thus establish a child-parent * relationship. The VLIB graph arc used is taken from the parent and child types * passed. */ void dpo_stack (dpo_type_t child_type, dpo_proto_t child_proto, dpo_id_t *dpo, const dpo_id_t *parent) { dpo_stack_i(dpo_get_next_node(child_type, child_proto, parent), dpo, parent); } /** * @brief Stack one DPO object on another, and thus establish a child parent * relationship. A new VLIB graph arc is created from the child node passed * to the nodes registered by the parent. The VLIB infra will ensure this arc * is added only once. */ void dpo_stack_from_node (u32 child_node_index, dpo_id_t *dpo, const dpo_id_t *parent) { dpo_type_t parent_type; u32 *parent_indices; vlib_main_t *vm; u32 edge, *pi; edge = 0; parent_type = parent->dpoi_type; vm = vlib_get_main(); ASSERT(NULL != dpo_vfts[parent_type].dv_get_next_node); parent_indices = dpo_vfts[parent_type].dv_get_next_node(parent); ASSERT(parent_indices); /* * This loop is purposefully written with the worker thread lock in the * inner loop because; * 1) the likelihood that the edge does not exist is smaller * 2) the likelihood there is more than one node is even smaller * so we are optimising for not need to take the lock */ vec_foreach(pi, parent_indices) { edge = vlib_node_get_next(vm, child_node_index, *pi); if (~0 == edge) { vlib_worker_thread_barrier_sync(vm); edge = vlib_node_add_next(vm, child_node_index, *pi); vlib_worker_thread_barrier_release(vm); } } dpo_stack_i(edge, dpo, parent); /* should free this local vector to avoid memory leak */ vec_free(parent_indices); } static clib_error_t * dpo_module_init (vlib_main_t * vm) { drop_dpo_module_init(); punt_dpo_module_init(); receive_dpo_module_init(); load_balance_module_init(); mpls_label_dpo_module_init(); classify_dpo_module_init(); lookup_dpo_module_init(); ip_null_dpo_module_init(); ip6_ll_dpo_module_init(); replicate_module_init(); interface_rx_dpo_module_init(); interface_tx_dpo_module_init(); mpls_disp_dpo_module_init(); dvr_dpo_module_init(); l3_proxy_dpo_module_init(); return (NULL); } /* *INDENT-OFF* */ VLIB_INIT_FUNCTION(dpo_module_init) = { .runs_before = VLIB_INITS ("ip_main_init"), }; /* *INDENT-ON* */ static clib_error_t * dpo_memory_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { dpo_vft_t *vft; vlib_cli_output (vm, "DPO memory"); vlib_cli_output (vm, "%=30s %=5s %=8s/%=9s totals", "Name","Size", "in-use", "allocated"); vec_foreach(vft, dpo_vfts) { if (NULL != vft->dv_mem_show) vft->dv_mem_show(); } return (NULL); } /* *INDENT-OFF* */ /*? * The '<em>sh dpo memory </em>' command displays the memory usage for each * data-plane object type. * * @cliexpar * @cliexstart{show dpo memory} * DPO memory * Name Size in-use /allocated totals * load-balance 64 12 / 12 768/768 * Adjacency 256 1 / 1 256/256 * Receive 24 5 / 5 120/120 * Lookup 12 0 / 0 0/0 * Classify 12 0 / 0 0/0 * MPLS label 24 0 / 0 0/0 * @cliexend ?*/ VLIB_CLI_COMMAND (show_fib_memory, static) = { .path = "show dpo memory", .function = dpo_memory_show, .short_help = "show dpo memory", }; /* *INDENT-ON* */