diff options
-rw-r--r-- | src/plugins/linux-cp/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/plugins/linux-cp/FEATURE.yaml | 3 | ||||
-rw-r--r-- | src/plugins/linux-cp/lcp_mpls_sync.c | 158 | ||||
-rw-r--r-- | src/plugins/linux-cp/lcp_node.c | 100 | ||||
-rw-r--r-- | src/plugins/linux-cp/lcp_router.c | 168 |
5 files changed, 419 insertions, 11 deletions
diff --git a/src/plugins/linux-cp/CMakeLists.txt b/src/plugins/linux-cp/CMakeLists.txt index 3a61bbb0afd..c891689b4b4 100644 --- a/src/plugins/linux-cp/CMakeLists.txt +++ b/src/plugins/linux-cp/CMakeLists.txt @@ -34,6 +34,7 @@ add_vpp_library(lcp SOURCES lcp_interface.c lcp_interface_sync.c + lcp_mpls_sync.c lcp_adj.c lcp.c diff --git a/src/plugins/linux-cp/FEATURE.yaml b/src/plugins/linux-cp/FEATURE.yaml index cf99b7aa5be..425858591f2 100644 --- a/src/plugins/linux-cp/FEATURE.yaml +++ b/src/plugins/linux-cp/FEATURE.yaml @@ -17,6 +17,9 @@ description: |- In the TX direction, packets received by VPP an the mirror Tap/Tun are cross-connected to the VPP interfaces. For IP packets, IP output features are applied. + If MPLS is enabled on a VPP interface, state is synced to Linux and + in TX direction a special feature is enabled to pass MPLS packets through + untouched. The "linux_nl" plugin listens to netlink messages and synchronizes the IP configuration of the paired interfaces. diff --git a/src/plugins/linux-cp/lcp_mpls_sync.c b/src/plugins/linux-cp/lcp_mpls_sync.c new file mode 100644 index 00000000000..d4b0d13b907 --- /dev/null +++ b/src/plugins/linux-cp/lcp_mpls_sync.c @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2023 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define _GNU_SOURCE + +#include <linux-cp/lcp_interface.h> + +#include <vnet/plugin/plugin.h> +#include <vnet/mpls/mpls.h> +#include <vppinfra/linux/netns.h> + +#include <fcntl.h> + +vlib_log_class_t lcp_mpls_sync_logger; + +#define LCP_MPLS_SYNC_DBG(...) \ + vlib_log_debug (lcp_mpls_sync_logger, __VA_ARGS__); + +void +lcp_mpls_sync_pair_add_cb (lcp_itf_pair_t *lip) +{ + u8 phy_is_enabled = mpls_sw_interface_is_enabled (lip->lip_phy_sw_if_index); + LCP_MPLS_SYNC_DBG ("pair_add_cb: mpls enabled %u, parent %U", phy_is_enabled, + format_lcp_itf_pair, lip); + if (phy_is_enabled) + mpls_sw_interface_enable_disable (&mpls_main, lip->lip_host_sw_if_index, + 1); +} + +void +lcp_mpls_sync_state_cb (struct mpls_main_t *mm, uword opaque, u32 sw_if_index, + u32 is_enable) +{ + lcp_itf_pair_t *lip; + index_t lipi; + int curr_ns_fd = -1; + int vif_ns_fd = -1; + int ctl_fd = -1; + u8 *ctl_path = NULL; + + LCP_MPLS_SYNC_DBG ("sync_state_cb: called for sw_if_index %u", sw_if_index); + + // If device is LCP PHY, sync state to host tap. + lipi = lcp_itf_pair_find_by_phy (sw_if_index); + if (INDEX_INVALID != lipi) + { + lip = lcp_itf_pair_get (lipi); + LCP_MPLS_SYNC_DBG ("sync_state_cb: mpls enabled %u parent %U", is_enable, + format_lcp_itf_pair, lip); + mpls_sw_interface_enable_disable (&mpls_main, lip->lip_host_sw_if_index, + is_enable); + return; + } + + // If device is LCP host, toggle MPLS XC feature. + lipi = lcp_itf_pair_find_by_host (sw_if_index); + if (INDEX_INVALID == lipi) + return; + lip = lcp_itf_pair_get (lipi); + + vnet_feature_enable_disable ("mpls-input", "linux-cp-xc-mpls", sw_if_index, + is_enable, NULL, 0); + + LCP_MPLS_SYNC_DBG ("sync_state_cb: mpls xc state %u parent %U", is_enable, + format_lcp_itf_pair, lip); + + // If syncing is enabled, sync Linux state as well. + if (!lcp_sync ()) + return; + + if (lip->lip_namespace) + { + curr_ns_fd = clib_netns_open (NULL /* self */); + vif_ns_fd = clib_netns_open (lip->lip_namespace); + if (vif_ns_fd != -1) + clib_setns (vif_ns_fd); + } + + ctl_path = format (NULL, "/proc/sys/net/mpls/conf/%s/input%c", + lip->lip_host_name, NULL); + if (NULL == ctl_path) + { + LCP_MPLS_SYNC_DBG ("sync_state_cb: failed to format sysctl"); + goto SYNC_CLEANUP; + } + + ctl_fd = open ((char *) ctl_path, O_WRONLY); + if (ctl_fd < 0) + { + LCP_MPLS_SYNC_DBG ("sync_state_cb: failed to open %s for writing", + ctl_path); + goto SYNC_CLEANUP; + } + + if (fdformat (ctl_fd, "%u", is_enable) < 1) + { + LCP_MPLS_SYNC_DBG ("sync_state_cb: failed to write to %s", ctl_path); + goto SYNC_CLEANUP; + } + + LCP_MPLS_SYNC_DBG ("sync_state_cb: set mpls input for %s", + lip->lip_host_name); + +SYNC_CLEANUP: + if (ctl_fd > -1) + close (ctl_fd); + + if (NULL != ctl_path) + vec_free (ctl_path); + + if (vif_ns_fd != -1) + close (vif_ns_fd); + + if (curr_ns_fd != -1) + { + clib_setns (curr_ns_fd); + close (curr_ns_fd); + } +} + +static clib_error_t * +lcp_mpls_sync_init (vlib_main_t *vm) +{ + lcp_itf_pair_vft_t mpls_sync_itf_pair_vft = { + .pair_add_fn = lcp_mpls_sync_pair_add_cb, + }; + lcp_itf_pair_register_vft (&mpls_sync_itf_pair_vft); + + mpls_interface_state_change_add_callback (lcp_mpls_sync_state_cb, 0); + + lcp_mpls_sync_logger = vlib_log_register_class ("linux-cp", "mpls-sync"); + + return NULL; +} + +VLIB_INIT_FUNCTION (lcp_mpls_sync_init) = { + .runs_after = VLIB_INITS ("lcp_interface_init", "mpls_init"), +}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/linux-cp/lcp_node.c b/src/plugins/linux-cp/lcp_node.c index b00049884ea..241cc5e4bff 100644 --- a/src/plugins/linux-cp/lcp_node.c +++ b/src/plugins/linux-cp/lcp_node.c @@ -31,6 +31,7 @@ #include <vnet/ip/ip4.h> #include <vnet/ip/ip6.h> #include <vnet/l2/l2_input.h> +#include <vnet/mpls/mpls.h> #define foreach_lip_punt \ _ (IO, "punt to host") \ @@ -438,6 +439,103 @@ VNET_FEATURE_INIT (lcp_xc_ip6_mcast_node, static) = { typedef enum { + LCP_XC_MPLS_NEXT_DROP, + LCP_XC_MPLS_NEXT_IO, + LCP_XC_MPLS_N_NEXT, +} lcp_xc_mpls_next_t; + +static_always_inline uword +lcp_xc_mpls_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, *to_next, n_left_to_next; + lcp_xc_next_t next_index; + + next_index = 0; + n_left_from = frame->n_vectors; + from = vlib_frame_vector_args (frame); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + const ethernet_header_t *eth; + const lcp_itf_pair_t *lip; + u32 next0, bi0, lipi, ai; + vlib_buffer_t *b0; + // const ip_adjacency_t *adj; + + bi0 = to_next[0] = from[0]; + + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + lipi = + lcp_itf_pair_find_by_host (vnet_buffer (b0)->sw_if_index[VLIB_RX]); + lip = lcp_itf_pair_get (lipi); + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = lip->lip_phy_sw_if_index; + vlib_buffer_advance (b0, -lip->lip_rewrite_len); + eth = vlib_buffer_get_current (b0); + + ai = ADJ_INDEX_INVALID; + next0 = LCP_XC_MPLS_NEXT_DROP; + if (!ethernet_address_cast (eth->dst_address)) + ai = lcp_adj_lkup ((u8 *) eth, lip->lip_rewrite_len, + vnet_buffer (b0)->sw_if_index[VLIB_TX]); + if (ai != ADJ_INDEX_INVALID) + { + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ai; + next0 = LCP_XC_MPLS_NEXT_IO; + } + + if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED))) + { + lcp_xc_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->phy_sw_if_index = lip->lip_phy_sw_if_index; + t->adj_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_NODE_FN (lcp_xc_mpls) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return (lcp_xc_mpls_inline (vm, node, frame)); +} + +VLIB_REGISTER_NODE ( + lcp_xc_mpls) = { .name = "linux-cp-xc-mpls", + .vector_size = sizeof (u32), + .format_trace = format_lcp_xc_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_next_nodes = LCP_XC_MPLS_N_NEXT, + .next_nodes = { + [LCP_XC_MPLS_NEXT_DROP] = "error-drop", + [LCP_XC_MPLS_NEXT_IO] = "interface-output", + } }; + +VNET_FEATURE_INIT (lcp_xc_mpls_node, static) = { + .arc_name = "mpls-input", + .node_name = "linux-cp-xc-mpls", +}; + +typedef enum +{ LCP_XC_L3_NEXT_XC, LCP_XC_L3_NEXT_LOOKUP, LCP_XC_L3_N_NEXT, @@ -446,7 +544,7 @@ typedef enum /** * X-connect all packets from the HOST to the PHY on L3 interfaces * - * There's only one adjacency that can be used on thises links. + * There's only one adjacency that can be used on these links. */ static_always_inline u32 lcp_xc_l3_inline (vlib_main_t *vm, vlib_node_runtime_t *node, diff --git a/src/plugins/linux-cp/lcp_router.c b/src/plugins/linux-cp/lcp_router.c index 88b7c537afc..f53ec4fd8c6 100644 --- a/src/plugins/linux-cp/lcp_router.c +++ b/src/plugins/linux-cp/lcp_router.c @@ -15,6 +15,7 @@ #include <sys/socket.h> #include <linux/if.h> +#include <linux/mpls.h> //#include <vlib/vlib.h> #include <vlib/unix/plugin.h> @@ -27,6 +28,7 @@ #include <netlink/route/link.h> #include <netlink/route/route.h> #include <netlink/route/neighbour.h> +#include <netlink/route/nexthop.h> #include <netlink/route/addr.h> #include <netlink/route/link/vlan.h> @@ -596,9 +598,18 @@ VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (lcp_router_link_up_down); static fib_protocol_t lcp_router_proto_k2f (uint32_t k) { - if (AF_INET6 == k) - return (FIB_PROTOCOL_IP6); - return (FIB_PROTOCOL_IP4); + switch (k) + { + case AF_INET6: + return FIB_PROTOCOL_IP6; + case AF_INET: + return FIB_PROTOCOL_IP4; + case AF_MPLS: + return FIB_PROTOCOL_MPLS; + default: + ASSERT (0); + return FIB_PROTOCOL_NONE; + } } static void @@ -608,6 +619,7 @@ lcp_router_mk_addr (const struct nl_addr *rna, ip_address_t *ia) ip_address_reset (ia); fproto = lcp_router_proto_k2f (nl_addr_get_family (rna)); + ASSERT (FIB_PROTOCOL_MPLS != fproto); ip_address_set (ia, nl_addr_get_binary_addr (rna), FIB_PROTOCOL_IP4 == fproto ? AF_IP4 : AF_IP6); @@ -619,6 +631,8 @@ lcp_router_mk_addr46 (const struct nl_addr *rna, ip46_address_t *ia) fib_protocol_t fproto; fproto = lcp_router_proto_k2f (nl_addr_get_family (rna)); + ASSERT (FIB_PROTOCOL_MPLS != fproto); + ip46_address_reset (ia); if (FIB_PROTOCOL_IP4 == fproto) memcpy (&ia->ip4, nl_addr_get_binary_addr (rna), nl_addr_get_len (rna)); @@ -988,9 +1002,31 @@ static void lcp_router_route_mk_prefix (struct rtnl_route *r, fib_prefix_t *p) { const struct nl_addr *addr = rtnl_route_get_dst (r); + u32 *baddr = nl_addr_get_binary_addr (addr); + u32 blen = nl_addr_get_len (addr); + ip46_address_t *paddr = &p->fp_addr; + u32 entry; + + p->fp_proto = lcp_router_proto_k2f (nl_addr_get_family (addr)); + + switch (p->fp_proto) + { + case FIB_PROTOCOL_MPLS: + entry = ntohl (*baddr); + p->fp_label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT; + p->fp_len = 21; + p->fp_eos = MPLS_NON_EOS; + return; + case FIB_PROTOCOL_IP4: + ip46_address_reset (paddr); + memcpy (&paddr->ip4, baddr, blen); + break; + case FIB_PROTOCOL_IP6: + memcpy (&paddr->ip6, baddr, blen); + break; + } p->fp_len = nl_addr_get_prefixlen (addr); - p->fp_proto = lcp_router_mk_addr46 (addr, &p->fp_addr); } static void @@ -1008,6 +1044,37 @@ lcp_router_route_mk_mprefix (struct rtnl_route *r, mfib_prefix_t *p) p->fp_proto = lcp_router_mk_addr46 (addr, &p->fp_src_addr); } +static int +lcp_router_mpls_nladdr_to_path (fib_route_path_t *path, struct nl_addr *addr) +{ + if (!addr) + return 0; + + struct mpls_label *stack = nl_addr_get_binary_addr (addr); + u32 entry, label; + u8 exp, ttl; + int label_count = 0; + + while (1) + { + entry = ntohl (stack[label_count++].entry); + label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT; + exp = (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT; + ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT; + + fib_mpls_label_t fml = { + .fml_value = label, + .fml_exp = exp, + .fml_ttl = ttl, + }; + vec_add1 (path->frp_label_stack, fml); + + if (entry & MPLS_LS_S_MASK) + break; + } + return label_count; +} + typedef struct lcp_router_route_path_parse_t_ { fib_route_path_t *paths; @@ -1023,6 +1090,7 @@ lcp_router_route_path_parse (struct rtnl_nexthop *rnh, void *arg) lcp_router_route_path_parse_t *ctx = arg; fib_route_path_t *path; u32 sw_if_index; + int label_count = 0; sw_if_index = lcp_router_intf_h2p (rtnl_route_nh_get_ifindex (rnh)); @@ -1035,9 +1103,16 @@ lcp_router_route_path_parse (struct rtnl_nexthop *rnh, void *arg) path->frp_flags = FIB_ROUTE_PATH_FLAG_NONE | ctx->type_flags; path->frp_sw_if_index = sw_if_index; - path->frp_weight = rtnl_route_nh_get_weight (rnh); path->frp_preference = ctx->preference; + /* + * FIB Path Weight of 0 is meaningless and replaced with 1 further along. + * See fib_path_create. fib_path_cmp_w_route_path would fail to match + * such a fib_route_path_t with any fib_path_t, because a fib_path_t's + * fp_weight can never be 0. + */ + path->frp_weight = clib_max (1, rtnl_route_nh_get_weight (rnh)); + addr = rtnl_route_nh_get_gateway (rnh); if (!addr) addr = rtnl_route_nh_get_via (rnh); @@ -1049,6 +1124,32 @@ lcp_router_route_path_parse (struct rtnl_nexthop *rnh, void *arg) path->frp_proto = fib_proto_to_dpo (fproto); + if (ctx->route_proto == FIB_PROTOCOL_MPLS) + { + addr = rtnl_route_nh_get_newdst (rnh); + label_count = lcp_router_mpls_nladdr_to_path (path, addr); + if (label_count) + { + LCP_ROUTER_DBG (" is label swap to %u", + path->frp_label_stack[0].fml_value); + } + else + { + fib_mpls_label_t fml = { + .fml_value = MPLS_LABEL_POP, + }; + vec_add1 (path->frp_label_stack, fml); + LCP_ROUTER_DBG (" is label pop"); + } + } + +#ifdef NL_CAPABILITY_VERSION_3_6_0 + addr = rtnl_route_nh_get_encap_mpls_dst (rnh); + label_count = lcp_router_mpls_nladdr_to_path (path, addr); + if (label_count) + LCP_ROUTER_DBG (" has encap mpls, %d labels", label_count); +#endif + if (ctx->is_mcast) path->frp_mitf_flags = MFIB_ITF_FLAG_FORWARD; @@ -1171,11 +1272,20 @@ lcp_router_route_del (struct rtnl_route *rr) fib_src = lcp_router_proto_fib_source (rproto); - if (pfx.fp_proto == FIB_PROTOCOL_IP6) - fib_table_entry_delete (nlt->nlt_fib_index, &pfx, fib_src); - else - fib_table_entry_path_remove2 (nlt->nlt_fib_index, &pfx, fib_src, - np.paths); + switch (pfx.fp_proto) + { + case FIB_PROTOCOL_IP6: + fib_table_entry_delete (nlt->nlt_fib_index, &pfx, fib_src); + break; + case FIB_PROTOCOL_MPLS: + fib_table_entry_path_remove2 (nlt->nlt_fib_index, &pfx, fib_src, + np.paths); + /* delete the EOS route in addition to NEOS - fallthrough */ + pfx.fp_eos = MPLS_EOS; + default: + fib_table_entry_path_remove2 (nlt->nlt_fib_index, &pfx, fib_src, + np.paths); + } } vec_free (np.paths); @@ -1183,6 +1293,26 @@ lcp_router_route_del (struct rtnl_route *rr) lcp_router_table_unlock (nlt); } +static fib_route_path_t * +lcp_router_fib_route_path_dup (fib_route_path_t *old) +{ + int idx; + fib_route_path_t *p; + + fib_route_path_t *new = vec_dup (old); + if (!new) + return NULL; + + for (idx = 0; idx < vec_len (new); idx++) + { + p = &new[idx]; + if (p->frp_label_stack) + p->frp_label_stack = vec_dup (p->frp_label_stack); + } + + return new; +} + static void lcp_router_route_add (struct rtnl_route *rr, int is_replace) { @@ -1263,6 +1393,24 @@ lcp_router_route_add (struct rtnl_route *rr, int is_replace) fib_src = lcp_router_proto_fib_source (rproto); + if (pfx.fp_proto == FIB_PROTOCOL_MPLS) + { + /* in order to avoid double-frees, we duplicate the paths. */ + fib_route_path_t *pathdup = + lcp_router_fib_route_path_dup (np.paths); + if (is_replace) + fib_table_entry_update (nlt->nlt_fib_index, &pfx, fib_src, + entry_flags, pathdup); + else + fib_table_entry_path_add2 (nlt->nlt_fib_index, &pfx, fib_src, + entry_flags, pathdup); + vec_free (pathdup); + + /* install EOS route in addition to NEOS */ + pfx.fp_eos = MPLS_EOS; + pfx.fp_payload_proto = np.paths[0].frp_proto; + } + if (is_replace) fib_table_entry_update (nlt->nlt_fib_index, &pfx, fib_src, entry_flags, np.paths); |