aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/plugins/linux-cp/CMakeLists.txt1
-rw-r--r--src/plugins/linux-cp/FEATURE.yaml3
-rw-r--r--src/plugins/linux-cp/lcp_mpls_sync.c158
-rw-r--r--src/plugins/linux-cp/lcp_node.c100
-rw-r--r--src/plugins/linux-cp/lcp_router.c168
5 files changed, 419 insertions, 11 deletions
diff --git a/src/plugins/linux-cp/CMakeLists.txt b/src/plugins/linux-cp/CMakeLists.txt
index 3a61bbb0afd..c891689b4b4 100644
--- a/src/plugins/linux-cp/CMakeLists.txt
+++ b/src/plugins/linux-cp/CMakeLists.txt
@@ -34,6 +34,7 @@ add_vpp_library(lcp
SOURCES
lcp_interface.c
lcp_interface_sync.c
+ lcp_mpls_sync.c
lcp_adj.c
lcp.c
diff --git a/src/plugins/linux-cp/FEATURE.yaml b/src/plugins/linux-cp/FEATURE.yaml
index cf99b7aa5be..425858591f2 100644
--- a/src/plugins/linux-cp/FEATURE.yaml
+++ b/src/plugins/linux-cp/FEATURE.yaml
@@ -17,6 +17,9 @@ description: |-
In the TX direction, packets received by VPP an the mirror Tap/Tun
are cross-connected to the VPP interfaces. For IP packets, IP output
features are applied.
+ If MPLS is enabled on a VPP interface, state is synced to Linux and
+ in TX direction a special feature is enabled to pass MPLS packets through
+ untouched.
The "linux_nl" plugin listens to netlink messages and synchronizes the IP
configuration of the paired interfaces.
diff --git a/src/plugins/linux-cp/lcp_mpls_sync.c b/src/plugins/linux-cp/lcp_mpls_sync.c
new file mode 100644
index 00000000000..d4b0d13b907
--- /dev/null
+++ b/src/plugins/linux-cp/lcp_mpls_sync.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+
+#include <linux-cp/lcp_interface.h>
+
+#include <vnet/plugin/plugin.h>
+#include <vnet/mpls/mpls.h>
+#include <vppinfra/linux/netns.h>
+
+#include <fcntl.h>
+
+vlib_log_class_t lcp_mpls_sync_logger;
+
+#define LCP_MPLS_SYNC_DBG(...) \
+ vlib_log_debug (lcp_mpls_sync_logger, __VA_ARGS__);
+
+void
+lcp_mpls_sync_pair_add_cb (lcp_itf_pair_t *lip)
+{
+ u8 phy_is_enabled = mpls_sw_interface_is_enabled (lip->lip_phy_sw_if_index);
+ LCP_MPLS_SYNC_DBG ("pair_add_cb: mpls enabled %u, parent %U", phy_is_enabled,
+ format_lcp_itf_pair, lip);
+ if (phy_is_enabled)
+ mpls_sw_interface_enable_disable (&mpls_main, lip->lip_host_sw_if_index,
+ 1);
+}
+
+void
+lcp_mpls_sync_state_cb (struct mpls_main_t *mm, uword opaque, u32 sw_if_index,
+ u32 is_enable)
+{
+ lcp_itf_pair_t *lip;
+ index_t lipi;
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+ int ctl_fd = -1;
+ u8 *ctl_path = NULL;
+
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: called for sw_if_index %u", sw_if_index);
+
+ // If device is LCP PHY, sync state to host tap.
+ lipi = lcp_itf_pair_find_by_phy (sw_if_index);
+ if (INDEX_INVALID != lipi)
+ {
+ lip = lcp_itf_pair_get (lipi);
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: mpls enabled %u parent %U", is_enable,
+ format_lcp_itf_pair, lip);
+ mpls_sw_interface_enable_disable (&mpls_main, lip->lip_host_sw_if_index,
+ is_enable);
+ return;
+ }
+
+ // If device is LCP host, toggle MPLS XC feature.
+ lipi = lcp_itf_pair_find_by_host (sw_if_index);
+ if (INDEX_INVALID == lipi)
+ return;
+ lip = lcp_itf_pair_get (lipi);
+
+ vnet_feature_enable_disable ("mpls-input", "linux-cp-xc-mpls", sw_if_index,
+ is_enable, NULL, 0);
+
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: mpls xc state %u parent %U", is_enable,
+ format_lcp_itf_pair, lip);
+
+ // If syncing is enabled, sync Linux state as well.
+ if (!lcp_sync ())
+ return;
+
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
+ ctl_path = format (NULL, "/proc/sys/net/mpls/conf/%s/input%c",
+ lip->lip_host_name, NULL);
+ if (NULL == ctl_path)
+ {
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: failed to format sysctl");
+ goto SYNC_CLEANUP;
+ }
+
+ ctl_fd = open ((char *) ctl_path, O_WRONLY);
+ if (ctl_fd < 0)
+ {
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: failed to open %s for writing",
+ ctl_path);
+ goto SYNC_CLEANUP;
+ }
+
+ if (fdformat (ctl_fd, "%u", is_enable) < 1)
+ {
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: failed to write to %s", ctl_path);
+ goto SYNC_CLEANUP;
+ }
+
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: set mpls input for %s",
+ lip->lip_host_name);
+
+SYNC_CLEANUP:
+ if (ctl_fd > -1)
+ close (ctl_fd);
+
+ if (NULL != ctl_path)
+ vec_free (ctl_path);
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+}
+
+static clib_error_t *
+lcp_mpls_sync_init (vlib_main_t *vm)
+{
+ lcp_itf_pair_vft_t mpls_sync_itf_pair_vft = {
+ .pair_add_fn = lcp_mpls_sync_pair_add_cb,
+ };
+ lcp_itf_pair_register_vft (&mpls_sync_itf_pair_vft);
+
+ mpls_interface_state_change_add_callback (lcp_mpls_sync_state_cb, 0);
+
+ lcp_mpls_sync_logger = vlib_log_register_class ("linux-cp", "mpls-sync");
+
+ return NULL;
+}
+
+VLIB_INIT_FUNCTION (lcp_mpls_sync_init) = {
+ .runs_after = VLIB_INITS ("lcp_interface_init", "mpls_init"),
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/lcp_node.c b/src/plugins/linux-cp/lcp_node.c
index b00049884ea..241cc5e4bff 100644
--- a/src/plugins/linux-cp/lcp_node.c
+++ b/src/plugins/linux-cp/lcp_node.c
@@ -31,6 +31,7 @@
#include <vnet/ip/ip4.h>
#include <vnet/ip/ip6.h>
#include <vnet/l2/l2_input.h>
+#include <vnet/mpls/mpls.h>
#define foreach_lip_punt \
_ (IO, "punt to host") \
@@ -438,6 +439,103 @@ VNET_FEATURE_INIT (lcp_xc_ip6_mcast_node, static) = {
typedef enum
{
+ LCP_XC_MPLS_NEXT_DROP,
+ LCP_XC_MPLS_NEXT_IO,
+ LCP_XC_MPLS_N_NEXT,
+} lcp_xc_mpls_next_t;
+
+static_always_inline uword
+lcp_xc_mpls_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 n_left_from, *from, *to_next, n_left_to_next;
+ lcp_xc_next_t next_index;
+
+ next_index = 0;
+ n_left_from = frame->n_vectors;
+ from = vlib_frame_vector_args (frame);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ const ethernet_header_t *eth;
+ const lcp_itf_pair_t *lip;
+ u32 next0, bi0, lipi, ai;
+ vlib_buffer_t *b0;
+ // const ip_adjacency_t *adj;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ lipi =
+ lcp_itf_pair_find_by_host (vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+ lip = lcp_itf_pair_get (lipi);
+
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = lip->lip_phy_sw_if_index;
+ vlib_buffer_advance (b0, -lip->lip_rewrite_len);
+ eth = vlib_buffer_get_current (b0);
+
+ ai = ADJ_INDEX_INVALID;
+ next0 = LCP_XC_MPLS_NEXT_DROP;
+ if (!ethernet_address_cast (eth->dst_address))
+ ai = lcp_adj_lkup ((u8 *) eth, lip->lip_rewrite_len,
+ vnet_buffer (b0)->sw_if_index[VLIB_TX]);
+ if (ai != ADJ_INDEX_INVALID)
+ {
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ai;
+ next0 = LCP_XC_MPLS_NEXT_IO;
+ }
+
+ if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ lcp_xc_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->phy_sw_if_index = lip->lip_phy_sw_if_index;
+ t->adj_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (lcp_xc_mpls)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return (lcp_xc_mpls_inline (vm, node, frame));
+}
+
+VLIB_REGISTER_NODE (
+ lcp_xc_mpls) = { .name = "linux-cp-xc-mpls",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lcp_xc_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_next_nodes = LCP_XC_MPLS_N_NEXT,
+ .next_nodes = {
+ [LCP_XC_MPLS_NEXT_DROP] = "error-drop",
+ [LCP_XC_MPLS_NEXT_IO] = "interface-output",
+ } };
+
+VNET_FEATURE_INIT (lcp_xc_mpls_node, static) = {
+ .arc_name = "mpls-input",
+ .node_name = "linux-cp-xc-mpls",
+};
+
+typedef enum
+{
LCP_XC_L3_NEXT_XC,
LCP_XC_L3_NEXT_LOOKUP,
LCP_XC_L3_N_NEXT,
@@ -446,7 +544,7 @@ typedef enum
/**
* X-connect all packets from the HOST to the PHY on L3 interfaces
*
- * There's only one adjacency that can be used on thises links.
+ * There's only one adjacency that can be used on these links.
*/
static_always_inline u32
lcp_xc_l3_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
diff --git a/src/plugins/linux-cp/lcp_router.c b/src/plugins/linux-cp/lcp_router.c
index 88b7c537afc..f53ec4fd8c6 100644
--- a/src/plugins/linux-cp/lcp_router.c
+++ b/src/plugins/linux-cp/lcp_router.c
@@ -15,6 +15,7 @@
#include <sys/socket.h>
#include <linux/if.h>
+#include <linux/mpls.h>
//#include <vlib/vlib.h>
#include <vlib/unix/plugin.h>
@@ -27,6 +28,7 @@
#include <netlink/route/link.h>
#include <netlink/route/route.h>
#include <netlink/route/neighbour.h>
+#include <netlink/route/nexthop.h>
#include <netlink/route/addr.h>
#include <netlink/route/link/vlan.h>
@@ -596,9 +598,18 @@ VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (lcp_router_link_up_down);
static fib_protocol_t
lcp_router_proto_k2f (uint32_t k)
{
- if (AF_INET6 == k)
- return (FIB_PROTOCOL_IP6);
- return (FIB_PROTOCOL_IP4);
+ switch (k)
+ {
+ case AF_INET6:
+ return FIB_PROTOCOL_IP6;
+ case AF_INET:
+ return FIB_PROTOCOL_IP4;
+ case AF_MPLS:
+ return FIB_PROTOCOL_MPLS;
+ default:
+ ASSERT (0);
+ return FIB_PROTOCOL_NONE;
+ }
}
static void
@@ -608,6 +619,7 @@ lcp_router_mk_addr (const struct nl_addr *rna, ip_address_t *ia)
ip_address_reset (ia);
fproto = lcp_router_proto_k2f (nl_addr_get_family (rna));
+ ASSERT (FIB_PROTOCOL_MPLS != fproto);
ip_address_set (ia, nl_addr_get_binary_addr (rna),
FIB_PROTOCOL_IP4 == fproto ? AF_IP4 : AF_IP6);
@@ -619,6 +631,8 @@ lcp_router_mk_addr46 (const struct nl_addr *rna, ip46_address_t *ia)
fib_protocol_t fproto;
fproto = lcp_router_proto_k2f (nl_addr_get_family (rna));
+ ASSERT (FIB_PROTOCOL_MPLS != fproto);
+
ip46_address_reset (ia);
if (FIB_PROTOCOL_IP4 == fproto)
memcpy (&ia->ip4, nl_addr_get_binary_addr (rna), nl_addr_get_len (rna));
@@ -988,9 +1002,31 @@ static void
lcp_router_route_mk_prefix (struct rtnl_route *r, fib_prefix_t *p)
{
const struct nl_addr *addr = rtnl_route_get_dst (r);
+ u32 *baddr = nl_addr_get_binary_addr (addr);
+ u32 blen = nl_addr_get_len (addr);
+ ip46_address_t *paddr = &p->fp_addr;
+ u32 entry;
+
+ p->fp_proto = lcp_router_proto_k2f (nl_addr_get_family (addr));
+
+ switch (p->fp_proto)
+ {
+ case FIB_PROTOCOL_MPLS:
+ entry = ntohl (*baddr);
+ p->fp_label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+ p->fp_len = 21;
+ p->fp_eos = MPLS_NON_EOS;
+ return;
+ case FIB_PROTOCOL_IP4:
+ ip46_address_reset (paddr);
+ memcpy (&paddr->ip4, baddr, blen);
+ break;
+ case FIB_PROTOCOL_IP6:
+ memcpy (&paddr->ip6, baddr, blen);
+ break;
+ }
p->fp_len = nl_addr_get_prefixlen (addr);
- p->fp_proto = lcp_router_mk_addr46 (addr, &p->fp_addr);
}
static void
@@ -1008,6 +1044,37 @@ lcp_router_route_mk_mprefix (struct rtnl_route *r, mfib_prefix_t *p)
p->fp_proto = lcp_router_mk_addr46 (addr, &p->fp_src_addr);
}
+static int
+lcp_router_mpls_nladdr_to_path (fib_route_path_t *path, struct nl_addr *addr)
+{
+ if (!addr)
+ return 0;
+
+ struct mpls_label *stack = nl_addr_get_binary_addr (addr);
+ u32 entry, label;
+ u8 exp, ttl;
+ int label_count = 0;
+
+ while (1)
+ {
+ entry = ntohl (stack[label_count++].entry);
+ label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+ exp = (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT;
+ ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
+
+ fib_mpls_label_t fml = {
+ .fml_value = label,
+ .fml_exp = exp,
+ .fml_ttl = ttl,
+ };
+ vec_add1 (path->frp_label_stack, fml);
+
+ if (entry & MPLS_LS_S_MASK)
+ break;
+ }
+ return label_count;
+}
+
typedef struct lcp_router_route_path_parse_t_
{
fib_route_path_t *paths;
@@ -1023,6 +1090,7 @@ lcp_router_route_path_parse (struct rtnl_nexthop *rnh, void *arg)
lcp_router_route_path_parse_t *ctx = arg;
fib_route_path_t *path;
u32 sw_if_index;
+ int label_count = 0;
sw_if_index = lcp_router_intf_h2p (rtnl_route_nh_get_ifindex (rnh));
@@ -1035,9 +1103,16 @@ lcp_router_route_path_parse (struct rtnl_nexthop *rnh, void *arg)
path->frp_flags = FIB_ROUTE_PATH_FLAG_NONE | ctx->type_flags;
path->frp_sw_if_index = sw_if_index;
- path->frp_weight = rtnl_route_nh_get_weight (rnh);
path->frp_preference = ctx->preference;
+ /*
+ * FIB Path Weight of 0 is meaningless and replaced with 1 further along.
+ * See fib_path_create. fib_path_cmp_w_route_path would fail to match
+ * such a fib_route_path_t with any fib_path_t, because a fib_path_t's
+ * fp_weight can never be 0.
+ */
+ path->frp_weight = clib_max (1, rtnl_route_nh_get_weight (rnh));
+
addr = rtnl_route_nh_get_gateway (rnh);
if (!addr)
addr = rtnl_route_nh_get_via (rnh);
@@ -1049,6 +1124,32 @@ lcp_router_route_path_parse (struct rtnl_nexthop *rnh, void *arg)
path->frp_proto = fib_proto_to_dpo (fproto);
+ if (ctx->route_proto == FIB_PROTOCOL_MPLS)
+ {
+ addr = rtnl_route_nh_get_newdst (rnh);
+ label_count = lcp_router_mpls_nladdr_to_path (path, addr);
+ if (label_count)
+ {
+ LCP_ROUTER_DBG (" is label swap to %u",
+ path->frp_label_stack[0].fml_value);
+ }
+ else
+ {
+ fib_mpls_label_t fml = {
+ .fml_value = MPLS_LABEL_POP,
+ };
+ vec_add1 (path->frp_label_stack, fml);
+ LCP_ROUTER_DBG (" is label pop");
+ }
+ }
+
+#ifdef NL_CAPABILITY_VERSION_3_6_0
+ addr = rtnl_route_nh_get_encap_mpls_dst (rnh);
+ label_count = lcp_router_mpls_nladdr_to_path (path, addr);
+ if (label_count)
+ LCP_ROUTER_DBG (" has encap mpls, %d labels", label_count);
+#endif
+
if (ctx->is_mcast)
path->frp_mitf_flags = MFIB_ITF_FLAG_FORWARD;
@@ -1171,11 +1272,20 @@ lcp_router_route_del (struct rtnl_route *rr)
fib_src = lcp_router_proto_fib_source (rproto);
- if (pfx.fp_proto == FIB_PROTOCOL_IP6)
- fib_table_entry_delete (nlt->nlt_fib_index, &pfx, fib_src);
- else
- fib_table_entry_path_remove2 (nlt->nlt_fib_index, &pfx, fib_src,
- np.paths);
+ switch (pfx.fp_proto)
+ {
+ case FIB_PROTOCOL_IP6:
+ fib_table_entry_delete (nlt->nlt_fib_index, &pfx, fib_src);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ fib_table_entry_path_remove2 (nlt->nlt_fib_index, &pfx, fib_src,
+ np.paths);
+ /* delete the EOS route in addition to NEOS - fallthrough */
+ pfx.fp_eos = MPLS_EOS;
+ default:
+ fib_table_entry_path_remove2 (nlt->nlt_fib_index, &pfx, fib_src,
+ np.paths);
+ }
}
vec_free (np.paths);
@@ -1183,6 +1293,26 @@ lcp_router_route_del (struct rtnl_route *rr)
lcp_router_table_unlock (nlt);
}
+static fib_route_path_t *
+lcp_router_fib_route_path_dup (fib_route_path_t *old)
+{
+ int idx;
+ fib_route_path_t *p;
+
+ fib_route_path_t *new = vec_dup (old);
+ if (!new)
+ return NULL;
+
+ for (idx = 0; idx < vec_len (new); idx++)
+ {
+ p = &new[idx];
+ if (p->frp_label_stack)
+ p->frp_label_stack = vec_dup (p->frp_label_stack);
+ }
+
+ return new;
+}
+
static void
lcp_router_route_add (struct rtnl_route *rr, int is_replace)
{
@@ -1263,6 +1393,24 @@ lcp_router_route_add (struct rtnl_route *rr, int is_replace)
fib_src = lcp_router_proto_fib_source (rproto);
+ if (pfx.fp_proto == FIB_PROTOCOL_MPLS)
+ {
+ /* in order to avoid double-frees, we duplicate the paths. */
+ fib_route_path_t *pathdup =
+ lcp_router_fib_route_path_dup (np.paths);
+ if (is_replace)
+ fib_table_entry_update (nlt->nlt_fib_index, &pfx, fib_src,
+ entry_flags, pathdup);
+ else
+ fib_table_entry_path_add2 (nlt->nlt_fib_index, &pfx, fib_src,
+ entry_flags, pathdup);
+ vec_free (pathdup);
+
+ /* install EOS route in addition to NEOS */
+ pfx.fp_eos = MPLS_EOS;
+ pfx.fp_payload_proto = np.paths[0].frp_proto;
+ }
+
if (is_replace)
fib_table_entry_update (nlt->nlt_fib_index, &pfx, fib_src,
entry_flags, np.paths);