From 5f3fcb96296a4769f55f60270e10c6294c604db9 Mon Sep 17 00:00:00 2001 From: Andrew Yourtchenko Date: Wed, 25 Oct 2017 05:50:37 -0700 Subject: L3 proxy FIB source for container networking Change-Id: I4164c4c19c8dbfd73e6ddf94a12056325cc093b9 Signed-off-by: Neale Ranns Signed-off-by: Andrew Yourtchenko --- src/vnet.am | 1 + src/vnet/adj/adj.c | 10 ++ src/vnet/adj/adj_glean.c | 1 + src/vnet/adj/adj_internal.h | 2 + src/vnet/adj/adj_mcast.c | 2 + src/vnet/adj/adj_midchain.c | 1 + src/vnet/adj/adj_nbr.c | 2 + src/vnet/dpo/dpo.c | 13 +++ src/vnet/dpo/dpo.h | 28 +++++- src/vnet/dpo/l3_proxy_dpo.c | 175 +++++++++++++++++++++++++++++++++++ src/vnet/dpo/l3_proxy_dpo.h | 57 ++++++++++++ src/vnet/fib/fib_entry.h | 5 + src/vnet/fib/fib_entry_src_special.c | 1 + src/vnet/fib/fib_path.c | 15 +-- src/vnet/ip/lookup.c | 79 ++++++++++++++++ 15 files changed, 382 insertions(+), 10 deletions(-) create mode 100644 src/vnet/dpo/l3_proxy_dpo.c create mode 100644 src/vnet/dpo/l3_proxy_dpo.h (limited to 'src') diff --git a/src/vnet.am b/src/vnet.am index d76441fcc79..5186eaa048c 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -1057,6 +1057,7 @@ libvnet_la_SOURCES += \ vnet/dpo/interface_tx_dpo.c \ vnet/dpo/mpls_disposition.c \ vnet/dpo/mpls_label_dpo.c \ + vnet/dpo/l3_proxy_dpo.c \ vnet/dpo/l2_bridge_dpo.c nobase_include_HEADERS += \ diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c index f84969136c9..5f7fe74cd43 100644 --- a/src/vnet/adj/adj.c +++ b/src/vnet/adj/adj.c @@ -208,6 +208,16 @@ adj_last_lock_gone (ip_adjacency_t *adj) pool_put(adj_pool, adj); } +u32 +adj_dpo_get_urpf (const dpo_id_t *dpo) +{ + ip_adjacency_t *adj; + + adj = adj_get(dpo->dpoi_index); + + return (adj->rewrite_header.sw_if_index); +} + void adj_lock (adj_index_t adj_index) { diff --git a/src/vnet/adj/adj_glean.c b/src/vnet/adj/adj_glean.c index 8d86e2a9f00..82023f12dd2 100644 --- a/src/vnet/adj/adj_glean.c +++ b/src/vnet/adj/adj_glean.c @@ -251,6 +251,7 @@ const static dpo_vft_t adj_glean_dpo_vft = { .dv_lock = adj_dpo_lock, .dv_unlock = adj_dpo_unlock, .dv_format = format_adj_glean, + .dv_get_urpf = adj_dpo_get_urpf, }; /** diff --git a/src/vnet/adj/adj_internal.h b/src/vnet/adj/adj_internal.h index 2c123c542a1..ca41cb21d9e 100644 --- a/src/vnet/adj/adj_internal.h +++ b/src/vnet/adj/adj_internal.h @@ -109,4 +109,6 @@ extern void adj_glean_remove(fib_protocol_t proto, extern void adj_mcast_remove(fib_protocol_t proto, u32 sw_if_index); +extern u32 adj_dpo_get_urpf(const dpo_id_t *dpo); + #endif diff --git a/src/vnet/adj/adj_mcast.c b/src/vnet/adj/adj_mcast.c index da06cd00b4e..00a12ad330b 100644 --- a/src/vnet/adj/adj_mcast.c +++ b/src/vnet/adj/adj_mcast.c @@ -387,11 +387,13 @@ const static dpo_vft_t adj_mcast_dpo_vft = { .dv_lock = adj_dpo_lock, .dv_unlock = adj_dpo_unlock, .dv_format = format_adj_mcast, + .dv_get_urpf = adj_dpo_get_urpf, }; const static dpo_vft_t adj_mcast_midchain_dpo_vft = { .dv_lock = adj_dpo_lock, .dv_unlock = adj_dpo_unlock, .dv_format = format_adj_mcast_midchain, + .dv_get_urpf = adj_dpo_get_urpf, }; /** diff --git a/src/vnet/adj/adj_midchain.c b/src/vnet/adj/adj_midchain.c index e9a510b004c..370fa4652f8 100644 --- a/src/vnet/adj/adj_midchain.c +++ b/src/vnet/adj/adj_midchain.c @@ -615,6 +615,7 @@ const static dpo_vft_t adj_midchain_dpo_vft = { .dv_lock = adj_dpo_lock, .dv_unlock = adj_dpo_unlock, .dv_format = format_adj_midchain, + .dv_get_urpf = adj_dpo_get_urpf, }; /** diff --git a/src/vnet/adj/adj_nbr.c b/src/vnet/adj/adj_nbr.c index 3d450d1fba3..fc7a7fcd93c 100644 --- a/src/vnet/adj/adj_nbr.c +++ b/src/vnet/adj/adj_nbr.c @@ -1047,11 +1047,13 @@ const static dpo_vft_t adj_nbr_dpo_vft = { .dv_unlock = adj_dpo_unlock, .dv_format = format_adj_nbr, .dv_mem_show = adj_mem_show, + .dv_get_urpf = adj_dpo_get_urpf, }; const static dpo_vft_t adj_nbr_incompl_dpo_vft = { .dv_lock = adj_dpo_lock, .dv_unlock = adj_dpo_unlock, .dv_format = format_adj_nbr_incomplete, + .dv_get_urpf = adj_dpo_get_urpf, }; /** diff --git a/src/vnet/dpo/dpo.c b/src/vnet/dpo/dpo.c index e94f347466e..7658132d47a 100644 --- a/src/vnet/dpo/dpo.c +++ b/src/vnet/dpo/dpo.c @@ -41,6 +41,7 @@ #include #include #include +#include /** * Array of char* names for the DPO types and protos @@ -345,6 +346,17 @@ dpo_unlock (dpo_id_t *dpo) dpo_vfts[dpo->dpoi_type].dv_unlock(dpo); } +u32 +dpo_get_urpf(const dpo_id_t *dpo) +{ + if (dpo_id_is_valid(dpo) && + (NULL != dpo_vfts[dpo->dpoi_type].dv_get_urpf)) + { + return (dpo_vfts[dpo->dpoi_type].dv_get_urpf(dpo)); + } + + return (~0); +} static u32 dpo_get_next_node (dpo_type_t child_type, @@ -525,6 +537,7 @@ dpo_module_init (vlib_main_t * vm) interface_tx_dpo_module_init(); mpls_disp_dpo_module_init(); l2_bridge_dpo_module_init(); + l3_proxy_dpo_module_init(); return (NULL); } diff --git a/src/vnet/dpo/dpo.h b/src/vnet/dpo/dpo.h index d1309c19031..304b4331495 100644 --- a/src/vnet/dpo/dpo.h +++ b/src/vnet/dpo/dpo.h @@ -115,6 +115,7 @@ typedef enum dpo_type_t_ { DPO_INTERFACE_RX, DPO_INTERFACE_TX, DPO_L2_BRIDGE, + DPO_L3_PROXY, DPO_LAST, } __attribute__((packed)) dpo_type_t; @@ -142,7 +143,8 @@ typedef enum dpo_type_t_ { [DPO_MFIB_ENTRY] = "dpo-mfib_entry", \ [DPO_INTERFACE_RX] = "dpo-interface-rx", \ [DPO_INTERFACE_TX] = "dpo-interface-tx", \ - [DPO_L2_BRIDGE] = "dpo-l2-bridge" \ + [DPO_L2_BRIDGE] = "dpo-l2-bridge", \ + [DPO_L3_PROXY] = "dpo-l3-proxy", \ } /** @@ -310,16 +312,26 @@ extern void dpo_stack(dpo_type_t child_type, * @param child_node * The VLIB grpah node index to create an arc from to the parent * - * @parem dpo + * @param dpo * This is the DPO to stack and set. * - * @paren parent_dpo + * @param parent_dpo * The parent DPO to stack onto. */ extern void dpo_stack_from_node(u32 child_node, dpo_id_t *dpo, const dpo_id_t *parent); +/** + * Get a uRPF interface for the DPO + * + * @param dpo + * The DPO from which to get the uRPF interface + * + * @return valid SW interface index or ~0 + */ +extern u32 dpo_get_urpf(const dpo_id_t *dpo); + /** * @brief A lock function registered for a DPO type */ @@ -341,6 +353,12 @@ typedef void (*dpo_mem_show_t)(void); */ typedef u32* (*dpo_get_next_node_t)(const dpo_id_t *dpo); +/** + * @brief Given a DPO instance return an interface that can + * be used in an uRPF check + */ +typedef u32 (*dpo_get_urpf_t)(const dpo_id_t *dpo); + /** * @brief A virtual function table regisitered for a DPO type */ @@ -369,6 +387,10 @@ typedef struct dpo_vft_t_ * function */ dpo_get_next_node_t dv_get_next_node; + /** + * Get uRPF interface + */ + dpo_get_urpf_t dv_get_urpf; } dpo_vft_t; diff --git a/src/vnet/dpo/l3_proxy_dpo.c b/src/vnet/dpo/l3_proxy_dpo.c new file mode 100644 index 00000000000..ea3db7151d0 --- /dev/null +++ b/src/vnet/dpo/l3_proxy_dpo.c @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + * The data-path object representing l3_proxying the packet, i.e. it's for-us + */ +#include +#include +#include + +/** + * @brief pool of all l3_proxy DPOs + */ +l3_proxy_dpo_t *l3_proxy_dpo_pool; + +static l3_proxy_dpo_t * +l3_proxy_dpo_alloc (void) +{ + l3_proxy_dpo_t *l3p; + + pool_get_aligned(l3_proxy_dpo_pool, l3p, CLIB_CACHE_LINE_BYTES); + memset(l3p, 0, sizeof(*l3p)); + + return (l3p); +} + +static l3_proxy_dpo_t * +l3_proxy_dpo_get_from_dpo (const dpo_id_t *dpo) +{ + ASSERT(DPO_L3_PROXY == dpo->dpoi_type); + + return (l3_proxy_dpo_get(dpo->dpoi_index)); +} + + +/* + * l3_proxy_dpo_add_or_lock + * + * The next_hop address here is used for source address selection in the DP. + * The local adj is added to an interface's l3_proxy prefix, the next-hop + * passed here is the local prefix on the same interface. + */ +void +l3_proxy_dpo_add_or_lock (dpo_proto_t proto, + u32 sw_if_index, + dpo_id_t *dpo) +{ + l3_proxy_dpo_t *l3p; + + l3p = l3_proxy_dpo_alloc(); + + l3p->l3p_sw_if_index = sw_if_index; + + dpo_set(dpo, DPO_L3_PROXY, proto, (l3p - l3_proxy_dpo_pool)); +} + +static void +l3_proxy_dpo_lock (dpo_id_t *dpo) +{ + l3_proxy_dpo_t *l3p; + + l3p = l3_proxy_dpo_get_from_dpo(dpo); + l3p->l3p_locks++; +} + +static void +l3_proxy_dpo_unlock (dpo_id_t *dpo) +{ + l3_proxy_dpo_t *l3p; + + l3p = l3_proxy_dpo_get_from_dpo(dpo); + l3p->l3p_locks--; + + if (0 == l3p->l3p_locks) + { + pool_put(l3_proxy_dpo_pool, l3p); + } +} + +static u32 +l3_proxy_dpo_get_urpf (const dpo_id_t *dpo) +{ + l3_proxy_dpo_t *l3p; + + l3p = l3_proxy_dpo_get_from_dpo(dpo); + + return (l3p->l3p_sw_if_index); +} + +static u8* +format_l3_proxy_dpo (u8 *s, va_list *ap) +{ + CLIB_UNUSED(index_t index) = va_arg(*ap, index_t); + CLIB_UNUSED(u32 indent) = va_arg(*ap, u32); + vnet_main_t * vnm = vnet_get_main(); + l3_proxy_dpo_t *l3p; + + if (pool_is_free_index(l3_proxy_dpo_pool, index)) + { + return (format(s, "dpo-l3_proxy DELETED")); + } + + l3p = l3_proxy_dpo_get(index); + + if (~0 != l3p->l3p_sw_if_index) + { + return (format(s, "dpo-l3_proxy: %U", + format_vnet_sw_interface_name, vnm, + vnet_get_sw_interface(vnm, l3p->l3p_sw_if_index))); + } + else + { + return (format(s, "dpo-l3-proxy")); + } +} + +static void +l3_proxy_dpo_mem_show (void) +{ + fib_show_memory_usage("L3 Proxy", + pool_elts(l3_proxy_dpo_pool), + pool_len(l3_proxy_dpo_pool), + sizeof(l3_proxy_dpo_t)); +} + +const static dpo_vft_t l3_proxy_vft = { + .dv_lock = l3_proxy_dpo_lock, + .dv_unlock = l3_proxy_dpo_unlock, + .dv_format = format_l3_proxy_dpo, + .dv_get_urpf = l3_proxy_dpo_get_urpf, + .dv_mem_show = l3_proxy_dpo_mem_show, +}; + +/** + * @brief The per-protocol VLIB graph nodes that are assigned to a l3_proxy + * object. + * + * this means that these graph nodes are ones from which a l3_proxy is the + * parent object in the DPO-graph. + */ +const static char* const l3_proxy_ip4_nodes[] = +{ + "ip4-local", + NULL, +}; +const static char* const l3_proxy_ip6_nodes[] = +{ + "ip6-local", + NULL, +}; + +const static char* const * const l3_proxy_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = l3_proxy_ip4_nodes, + [DPO_PROTO_IP6] = l3_proxy_ip6_nodes, + [DPO_PROTO_MPLS] = NULL, +}; + +void +l3_proxy_dpo_module_init (void) +{ + dpo_register(DPO_L3_PROXY, &l3_proxy_vft, l3_proxy_nodes); +} diff --git a/src/vnet/dpo/l3_proxy_dpo.h b/src/vnet/dpo/l3_proxy_dpo.h new file mode 100644 index 00000000000..f17ace50876 --- /dev/null +++ b/src/vnet/dpo/l3_proxy_dpo.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + * The data-path object representing L3 proxy. An L3 proxy is when VPP has + * an address in the FIB that is also assigned to an attached host. + */ + +#ifndef __L3_PROXY_DPO_H__ +#define __L3_PROXY_DPO_H__ + +#include +#include + +typedef struct l3_proxy_dpo_t_ +{ + /** + * The Software interface index on which traffic is l3_proxyd + */ + u32 l3p_sw_if_index; + + /** + * number oflocks. + */ + u16 l3p_locks; +} l3_proxy_dpo_t; + +extern void l3_proxy_dpo_add_or_lock (dpo_proto_t proto, + u32 sw_if_index, + dpo_id_t *dpo); + +extern void l3_proxy_dpo_module_init(void); + +/** + * @brief pool of all l3_proxy DPOs + */ +l3_proxy_dpo_t *l3_proxy_dpo_pool; + +static inline l3_proxy_dpo_t * +l3_proxy_dpo_get (index_t index) +{ + return (pool_elt_at_index(l3_proxy_dpo_pool, index)); +} + +#endif diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h index 2f6e37fe69a..7e4b52acffd 100644 --- a/src/vnet/fib/fib_entry.h +++ b/src/vnet/fib/fib_entry.h @@ -42,6 +42,10 @@ typedef enum fib_source_t_ { * Classify. A route that links directly to a classify adj */ FIB_SOURCE_CLASSIFY, + /** + * A route the is being 'proxied' on behalf of another device + */ + FIB_SOURCE_PROXY, /** * Route added as a result of interface configuration. * this will also come from the API/CLI, but the distinction is @@ -136,6 +140,7 @@ STATIC_ASSERT (sizeof(fib_source_t) == 1, #define FIB_SOURCES { \ [FIB_SOURCE_SPECIAL] = "special", \ [FIB_SOURCE_INTERFACE] = "interface", \ + [FIB_SOURCE_PROXY] = "proxy", \ [FIB_SOURCE_API] = "API", \ [FIB_SOURCE_CLI] = "CLI", \ [FIB_SOURCE_ADJ] = "adjacency", \ diff --git a/src/vnet/fib/fib_entry_src_special.c b/src/vnet/fib/fib_entry_src_special.c index e979e18f680..c3e4fe5eba7 100644 --- a/src/vnet/fib/fib_entry_src_special.c +++ b/src/vnet/fib/fib_entry_src_special.c @@ -67,4 +67,5 @@ fib_entry_src_special_register (void) fib_entry_src_register(FIB_SOURCE_SIXRD, &special_src_vft); fib_entry_src_register(FIB_SOURCE_CLASSIFY, &special_src_vft); fib_entry_src_register(FIB_SOURCE_AE, &special_src_vft); + fib_entry_src_register(FIB_SOURCE_PROXY, &special_src_vft); } diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c index 7b713a4b5c5..926b2f3dfc6 100644 --- a/src/vnet/fib/fib_path.c +++ b/src/vnet/fib/fib_path.c @@ -1854,20 +1854,21 @@ fib_path_contribute_urpf (fib_node_index_t path_index, case FIB_PATH_TYPE_EXCLUSIVE: case FIB_PATH_TYPE_SPECIAL: - /* + { + /* * these path types may link to an adj, if that's what * the clinet gave */ - if (dpo_is_adj(&path->fp_dpo)) - { - ip_adjacency_t *adj; + u32 rpf_sw_if_index; - adj = adj_get(path->fp_dpo.dpoi_index); + rpf_sw_if_index = dpo_get_urpf(&path->fp_dpo); - fib_urpf_list_append(urpf, adj->rewrite_header.sw_if_index); + if (~0 != rpf_sw_if_index) + { + fib_urpf_list_append(urpf, rpf_sw_if_index); } break; - + } case FIB_PATH_TYPE_DEAG: case FIB_PATH_TYPE_RECEIVE: case FIB_PATH_TYPE_INTF_RX: diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c index 856c4942ea4..61350b4f0d8 100644 --- a/src/vnet/ip/lookup.c +++ b/src/vnet/ip/lookup.c @@ -49,6 +49,7 @@ #include #include #include +#include #include /** @@ -1433,6 +1434,84 @@ VLIB_CLI_COMMAND (ip_probe_neighbor_command, static) = { }; /* *INDENT-ON* */ +clib_error_t * +ip_container_cmd (vlib_main_t * vm, + unformat_input_t * main_input, vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + fib_prefix_t pfx; + + u32 is_del; + vnet_main_t *vnm; + u32 fib_index; + u32 sw_if_index; + + vnm = vnet_get_main (); + is_del = 0; + sw_if_index = ~0; + + /* Get a line of input. */ + if (!unformat_user (main_input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U", unformat_ip4_address, &pfx.fp_addr.ip4)) + { + pfx.fp_proto = FIB_PROTOCOL_IP4; + pfx.fp_len = 32; + } + else if (unformat (line_input, "%U", + unformat_ip6_address, &pfx.fp_addr.ip6)) + { + pfx.fp_proto = FIB_PROTOCOL_IP6; + pfx.fp_len = 128; + } + else if (unformat (line_input, "%U", + unformat_vnet_sw_interface, vnm, &sw_if_index)) + ; + else if (unformat (line_input, "del")) + is_del = 1; + else + return (clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input)); + } + + if (~0 == sw_if_index) + { + return (clib_error_return (0, "no interface")); + } + + fib_index = fib_table_get_table_id_for_sw_if_index (pfx.fp_proto, + sw_if_index); + + if (is_del) + fib_table_entry_special_remove (fib_index, &pfx, FIB_SOURCE_PROXY); + else + { + dpo_id_t proxy_dpo = DPO_INVALID; + + l3_proxy_dpo_add_or_lock (fib_proto_to_dpo (pfx.fp_proto), + sw_if_index, &proxy_dpo); + + fib_table_entry_special_dpo_add (fib_index, + &pfx, + FIB_SOURCE_PROXY, + FIB_ENTRY_FLAG_EXCLUSIVE, &proxy_dpo); + } + + return (NULL); +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip_container_command_node, static) = { + .path = "ip container", + .function = ip_container_cmd, + .short_help = "ip container
", + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + /* * fd.io coding-style-patch-verification: ON * -- cgit 1.2.3-korg