aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPim van Pelt <pim@ipng.nl>2021-09-09 17:53:09 +0000
committerMatthew Smith <mgsmith@netgate.com>2022-01-11 19:31:25 +0000
commit1705a6baefe205bb6792b547c7376eee3f328a71 (patch)
treee524c5abb4cc39b735fa55a643475482206b49b9
parent0cef5f5d7171e05389beee0e6b4250b366b2b28e (diff)
linux-cp: Add VPP->Linux synchronization
Part 1 -- notes in https://ipng.ch/s/articles/2021/08/13/vpp-2.html Add the ability for VPP to copy out (sync) its state from the dataplane to Linux Interface Pairs, when they exist. Gated by a configuration flag (linux-cp { lcp-sync }), and by a CLI option to toggle on/off, synchronize the following events: - Interface state changes - Interface MTU changes - Interface IPv4/IPv6 address add/deletion In VPP, subints can have any link state and MTU, orthogonal to their phy. In Linux, setting admin-down on a phy forces its children to be down as well. Also, in Linux, MTU of children must not exceed that of the phy. Add a state synchronizer which walks over phy+subints to ensure Linux and VPP end up in the same consistent state. Part 2 -- notes in https://ipng.ch/s/articles/2021/08/15/vpp-3.html Add the ability for VPP to autocreate sub-interfaces of existing Linux Interface pairs. Gated by a configuration flag (linux-cp { lcp-auto-subint }), and by a CLI option to toggle on/off, synchronize the following event: - Sub-interface creation (dot1q, dot1ad, QinQ and QinAD) A few other changes: - Add two functions into netlink.[ch] to delete ip4 and ip6 addresses. - Remove a spurious logline (printing MTU) in netlink.c. - Resolve a TODO around vnet_sw_interface_supports_addressing() Type: improvement Signed-off-by: Pim van Pelt <pim@ipng.nl> Change-Id: I34fc070e80af4013be58d7a8cbf64296cc760e4e Signed-off-by: Pim van Pelt <pim@ipng.nl>
-rw-r--r--src/plugins/linux-cp/CMakeLists.txt1
-rw-r--r--src/plugins/linux-cp/lcp.c37
-rw-r--r--src/plugins/linux-cp/lcp.h5
-rw-r--r--src/plugins/linux-cp/lcp_cli.c62
-rw-r--r--src/plugins/linux-cp/lcp_interface.c262
-rw-r--r--src/plugins/linux-cp/lcp_interface.h54
-rw-r--r--src/plugins/linux-cp/lcp_interface_sync.c442
-rw-r--r--src/vnet/devices/netlink.c45
-rw-r--r--src/vnet/devices/netlink.h2
9 files changed, 743 insertions, 167 deletions
diff --git a/src/plugins/linux-cp/CMakeLists.txt b/src/plugins/linux-cp/CMakeLists.txt
index 080f73ecce4..5053207fff4 100644
--- a/src/plugins/linux-cp/CMakeLists.txt
+++ b/src/plugins/linux-cp/CMakeLists.txt
@@ -27,6 +27,7 @@ include_directories(${LIBMNL_INCLUDE_DIR})
add_vpp_library(lcp
SOURCES
lcp_interface.c
+ lcp_interface_sync.c
lcp_adj.c
lcp.c
diff --git a/src/plugins/linux-cp/lcp.c b/src/plugins/linux-cp/lcp.c
index a4d3faf081a..69a7a6e42ba 100644
--- a/src/plugins/linux-cp/lcp.c
+++ b/src/plugins/linux-cp/lcp.c
@@ -20,6 +20,7 @@
#include <net/if.h>
#include <plugins/linux-cp/lcp.h>
+#include <plugins/linux-cp/lcp_interface.h>
lcp_main_t lcp_main;
@@ -76,6 +77,42 @@ lcp_set_default_ns (u8 *ns)
return 0;
}
+void
+lcp_set_sync (u8 is_auto)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ lcpm->lcp_sync = (is_auto != 0);
+
+ // If we set to 'on', do a one-off sync of LCP interfaces
+ if (is_auto)
+ lcp_itf_pair_sync_state_all ();
+}
+
+int
+lcp_sync (void)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ return lcpm->lcp_sync;
+}
+
+void
+lcp_set_auto_subint (u8 is_auto)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ lcpm->lcp_auto_subint = (is_auto != 0);
+}
+
+int
+lcp_auto_subint (void)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ return lcpm->lcp_auto_subint;
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/linux-cp/lcp.h b/src/plugins/linux-cp/lcp.h
index 19636c546a8..14c1a6e2a75 100644
--- a/src/plugins/linux-cp/lcp.h
+++ b/src/plugins/linux-cp/lcp.h
@@ -24,8 +24,9 @@ typedef struct lcp_main_s
u16 msg_id_base; /* API message ID base */
u8 *default_namespace; /* default namespace if set */
int default_ns_fd;
- /* Set when Unit testing */
- u8 test_mode;
+ u8 lcp_auto_subint; /* Automatically create/delete LCP sub-interfaces */
+ u8 lcp_sync; /* Automatically sync VPP changes to LCP */
+ u8 test_mode; /* Set when Unit testing */
} lcp_main_t;
extern lcp_main_t lcp_main;
diff --git a/src/plugins/linux-cp/lcp_cli.c b/src/plugins/linux-cp/lcp_cli.c
index cb874b1c023..8f2d17ab209 100644
--- a/src/plugins/linux-cp/lcp_cli.c
+++ b/src/plugins/linux-cp/lcp_cli.c
@@ -112,6 +112,68 @@ VLIB_CLI_COMMAND (lcp_itf_pair_create_command, static) = {
};
static clib_error_t *
+lcp_sync_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "on") || unformat (line_input, "enable"))
+ lcp_set_sync (1);
+ else if (unformat (line_input, "off") ||
+ unformat (line_input, "disable"))
+ lcp_set_sync (0);
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+
+ unformat_free (line_input);
+ return 0;
+}
+
+VLIB_CLI_COMMAND (lcp_sync_command, static) = {
+ .path = "lcp lcp-sync",
+ .short_help = "lcp lcp-sync [on|enable|off|disable]",
+ .function = lcp_sync_command_fn,
+};
+
+static clib_error_t *
+lcp_auto_subint_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "on") || unformat (line_input, "enable"))
+ lcp_set_auto_subint (1);
+ else if (unformat (line_input, "off") ||
+ unformat (line_input, "disable"))
+ lcp_set_auto_subint (0);
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+
+ unformat_free (line_input);
+ return 0;
+}
+
+VLIB_CLI_COMMAND (lcp_auto_subint_command, static) = {
+ .path = "lcp lcp-auto-subint",
+ .short_help = "lcp lcp-auto-subint [on|enable|off|disable]",
+ .function = lcp_auto_subint_command_fn,
+};
+
+static clib_error_t *
lcp_default_netns_command_fn (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd)
{
diff --git a/src/plugins/linux-cp/lcp_interface.c b/src/plugins/linux-cp/lcp_interface.c
index d044635334f..3dbcb5987a1 100644
--- a/src/plugins/linux-cp/lcp_interface.c
+++ b/src/plugins/linux-cp/lcp_interface.c
@@ -39,7 +39,7 @@
#include <vlibapi/api_helper_macros.h>
#include <vnet/ipsec/ipsec_punt.h>
-static vlib_log_class_t lcp_itf_pair_logger;
+vlib_log_class_t lcp_itf_pair_logger;
/**
* Pool of LIP objects
@@ -73,14 +73,6 @@ lcp_itf_pair_register_vft (lcp_itf_pair_vft_t *lcp_itf_vft)
vec_add1 (lcp_itf_vfts, *lcp_itf_vft);
}
-#define LCP_ITF_PAIR_DBG(...) \
- vlib_log_notice (lcp_itf_pair_logger, __VA_ARGS__);
-
-#define LCP_ITF_PAIR_INFO(...) \
- vlib_log_notice (lcp_itf_pair_logger, __VA_ARGS__);
-
-#define LCP_ITF_PAIR_ERR(...) vlib_log_err (lcp_itf_pair_logger, __VA_ARGS__);
-
u8 *
format_lcp_itf_pair (u8 *s, va_list *args)
{
@@ -139,6 +131,9 @@ lcp_itf_pair_show (u32 phy_sw_if_index)
ns = lcp_get_default_ns ();
vlib_cli_output (vm, "lcp default netns '%s'\n",
ns ? (char *) ns : "<unset>");
+ vlib_cli_output (vm, "lcp lcp-auto-subint %s\n",
+ lcp_auto_subint () ? "on" : "off");
+ vlib_cli_output (vm, "lcp lcp-sync %s\n", lcp_sync () ? "on" : "off");
if (phy_sw_if_index == ~0)
{
@@ -157,6 +152,8 @@ lcp_itf_pair_get (u32 index)
{
if (!lcp_itf_pair_pool)
return NULL;
+ if (index == INDEX_INVALID)
+ return NULL;
return pool_elt_at_index (lcp_itf_pair_pool, index);
}
@@ -174,25 +171,6 @@ lcp_itf_pair_find_by_vif (u32 vif_index)
return INDEX_INVALID;
}
-int
-lcp_itf_pair_add_sub (u32 vif, u8 *host_if_name, u32 sub_sw_if_index,
- u32 phy_sw_if_index, u8 *ns)
-{
- lcp_itf_pair_t *lip;
-
- lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (phy_sw_if_index));
- if (!lip)
- {
- LCP_ITF_PAIR_DBG ("lcp_itf_pair_add_sub: can't find LCP of parent %U",
- format_vnet_sw_if_index_name, vnet_get_main (),
- phy_sw_if_index);
- return VNET_API_ERROR_INVALID_SW_IF_INDEX;
- }
-
- return lcp_itf_pair_add (lip->lip_host_sw_if_index, sub_sw_if_index,
- host_if_name, vif, lip->lip_host_type, ns);
-}
-
const char *lcp_itf_l3_feat_names[N_LCP_ITF_HOST][N_AF] = {
[LCP_ITF_HOST_TAP] = {
[AF_IP4] = "linux-cp-xc-ip4",
@@ -248,17 +226,23 @@ lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name,
index_t lipi;
lcp_itf_pair_t *lip;
+ if (host_sw_if_index == ~0)
+ {
+ LCP_ITF_PAIR_ERR ("pair_add: Cannot add LIP - invalid host");
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ }
+
lipi = lcp_itf_pair_find_by_phy (phy_sw_if_index);
+ if (lipi != INDEX_INVALID)
+ return VNET_API_ERROR_VALUE_EXIST;
+
LCP_ITF_PAIR_INFO ("add: host:%U phy:%U, host_if:%v vif:%d ns:%s",
format_vnet_sw_if_index_name, vnet_get_main (),
host_sw_if_index, format_vnet_sw_if_index_name,
vnet_get_main (), phy_sw_if_index, host_name, host_index,
ns);
- if (lipi != INDEX_INVALID)
- return VNET_API_ERROR_VALUE_EXIST;
-
/*
* Create a new pair.
*/
@@ -279,9 +263,6 @@ lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name,
lip->lip_vif_index = host_index;
lip->lip_namespace = vec_dup (ns);
- if (lip->lip_host_sw_if_index == ~0)
- return 0;
-
/*
* First use of this host interface.
* Enable the x-connect feature on the host to send
@@ -421,10 +402,11 @@ lcp_itf_pair_del (u32 phy_sw_if_index)
lip = lcp_itf_pair_get (lipi);
- LCP_ITF_PAIR_INFO ("pair delete: {%U, %U, %v}", format_vnet_sw_if_index_name,
- vnet_get_main (), lip->lip_phy_sw_if_index,
- format_vnet_sw_if_index_name, vnet_get_main (),
- lip->lip_host_sw_if_index, lip->lip_host_name);
+ LCP_ITF_PAIR_NOTICE (
+ "pair_del: host:%U phy:%U host_if:%s vif:%d ns:%s",
+ format_vnet_sw_if_index_name, vnet_get_main (), lip->lip_host_sw_if_index,
+ format_vnet_sw_if_index_name, vnet_get_main (), lip->lip_phy_sw_if_index,
+ lip->lip_host_name, lip->lip_vif_index, lip->lip_namespace);
/* invoke registered callbacks for pair deletion */
vec_foreach (vft, lcp_itf_vfts)
@@ -475,24 +457,45 @@ lcp_itf_pair_delete_by_index (index_t lipi)
{
u32 host_sw_if_index;
lcp_itf_pair_t *lip;
- u8 *host_name;
+ u8 *host_name, *ns;
lip = lcp_itf_pair_get (lipi);
host_name = vec_dup (lip->lip_host_name);
host_sw_if_index = lip->lip_host_sw_if_index;
+ ns = vec_dup (lip->lip_namespace);
lcp_itf_pair_del (lip->lip_phy_sw_if_index);
if (vnet_sw_interface_is_sub (vnet_get_main (), host_sw_if_index))
{
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+ if (ns)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open ((u8 *) ns);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
lcp_netlink_del_link ((const char *) host_name);
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+
vnet_delete_sub_interface (host_sw_if_index);
}
else
tap_delete_if (vlib_get_main (), host_sw_if_index);
vec_free (host_name);
+ vec_free (ns);
}
int
@@ -539,58 +542,16 @@ lcp_itf_pair_walk (lcp_itf_pair_walk_cb_t cb, void *ctx)
};
}
-typedef struct lcp_itf_pair_names_t_
-{
- u8 *lipn_host_name;
- u8 *lipn_phy_name;
- u8 *lipn_namespace;
- u32 lipn_phy_sw_if_index;
-} lcp_itf_pair_names_t;
-
-static lcp_itf_pair_names_t *lipn_names;
-
static clib_error_t *
lcp_itf_pair_config (vlib_main_t *vm, unformat_input_t *input)
{
- u8 *host, *phy;
- u8 *ns;
u8 *default_ns;
- host = phy = ns = default_ns = NULL;
+ default_ns = NULL;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- vec_reset_length (host);
-
- if (unformat (input, "pair %s %s %s", &phy, &host, &ns))
- {
- lcp_itf_pair_names_t *lipn;
-
- if (vec_len (ns) > LCP_NS_LEN)
- {
- return clib_error_return (0,
- "linux-cp namespace must"
- " be less than %d characters",
- LCP_NS_LEN);
- }
-
- vec_add2 (lipn_names, lipn, 1);
-
- lipn->lipn_host_name = vec_dup (host);
- lipn->lipn_phy_name = vec_dup (phy);
- lipn->lipn_namespace = vec_dup (ns);
- }
- else if (unformat (input, "pair %v %v", &phy, &host))
- {
- lcp_itf_pair_names_t *lipn;
-
- vec_add2 (lipn_names, lipn, 1);
-
- lipn->lipn_host_name = vec_dup (host);
- lipn->lipn_phy_name = vec_dup (phy);
- lipn->lipn_namespace = 0;
- }
- else if (unformat (input, "default netns %v", &default_ns))
+ if (unformat (input, "default netns %v", &default_ns))
{
vec_add1 (default_ns, 0);
if (lcp_set_default_ns (default_ns) < 0)
@@ -601,14 +562,14 @@ lcp_itf_pair_config (vlib_main_t *vm, unformat_input_t *input)
LCP_NS_LEN);
}
}
- else if (unformat (input, "interface-auto-create"))
- lcp_set_auto_intf (1 /* is_auto */);
+ else if (unformat (input, "lcp-auto-subint"))
+ lcp_set_auto_subint (1 /* is_auto */);
+ else if (unformat (input, "lcp-sync"))
+ lcp_set_sync (1 /* is_auto */);
else
return clib_error_return (0, "interfaces not found");
}
- vec_free (host);
- vec_free (phy);
vec_free (default_ns);
return NULL;
@@ -653,7 +614,7 @@ lcp_validate_if_name (u8 *name)
return 1;
}
-static void
+void
lcp_itf_set_link_state (const lcp_itf_pair_t *lip, u8 state)
{
int curr_ns_fd, vif_ns_fd;
@@ -671,6 +632,8 @@ lcp_itf_set_link_state (const lcp_itf_pair_t *lip, u8 state)
clib_setns (vif_ns_fd);
}
+ /* Set the same link state on the netlink interface
+ */
vnet_netlink_set_link_state (lip->lip_vif_index, state);
if (vif_ns_fd != -1)
@@ -685,6 +648,58 @@ lcp_itf_set_link_state (const lcp_itf_pair_t *lip, u8 state)
return;
}
+void
+lcp_itf_set_interface_addr (const lcp_itf_pair_t *lip)
+{
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+ ip_lookup_main_t *lm4 = &im4->lookup_main;
+ ip_lookup_main_t *lm6 = &im6->lookup_main;
+ ip_interface_address_t *ia = 0;
+ int vif_ns_fd = -1;
+ int curr_ns_fd = -1;
+
+ if (!lip)
+ return;
+
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
+ /* Sync any IP4 addressing info into LCP */
+ foreach_ip_interface_address (
+ lm4, ia, lip->lip_phy_sw_if_index, 1 /* honor unnumbered */, ({
+ ip4_address_t *r4 = ip_interface_address_get_address (lm4, ia);
+ LCP_ITF_PAIR_NOTICE ("set_interface_addr: %U add ip4 %U/%d",
+ format_lcp_itf_pair, lip, format_ip4_address, r4,
+ ia->address_length);
+ vnet_netlink_add_ip4_addr (lip->lip_vif_index, r4, ia->address_length);
+ }));
+
+ /* Sync any IP6 addressing info into LCP */
+ foreach_ip_interface_address (
+ lm6, ia, lip->lip_phy_sw_if_index, 1 /* honor unnumbered */, ({
+ ip6_address_t *r6 = ip_interface_address_get_address (lm6, ia);
+ LCP_ITF_PAIR_NOTICE ("set_interface_addr: %U add ip6 %U/%d",
+ format_lcp_itf_pair, lip, format_ip6_address, r6,
+ ia->address_length);
+ vnet_netlink_add_ip6_addr (lip->lip_vif_index, r6, ia->address_length);
+ }));
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+}
+
typedef struct
{
u32 vlan;
@@ -792,9 +807,8 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
u16 vlan, proto;
u32 parent_vif_index;
- // TODO(pim) replace with vnet_sw_interface_supports_addressing()
- if (sw->type == VNET_SW_INTERFACE_TYPE_SUB &&
- sw->sub.eth.flags.exact_match == 0)
+ err = vnet_sw_interface_supports_addressing (vnm, phy_sw_if_index);
+ if (err)
{
LCP_ITF_PAIR_ERR ("pair_create: can't create LCP for a "
"sub-interface without exact-match set");
@@ -921,7 +935,7 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
outer_vlan, inner_vlan, format_vnet_sw_if_index_name, vnm,
lip->lip_host_sw_if_index);
err = clib_error_return (
- 0, "failed to create tap subinti: %d.%d. on %U", outer_vlan,
+ 0, "failed to create tap subint: %d.%d. on %U", outer_vlan,
inner_vlan, format_vnet_sw_if_index_name, vnm,
lip->lip_host_sw_if_index);
}
@@ -1107,70 +1121,6 @@ lcp_itf_pair_replace_end (void)
return (0);
}
-static uword
-lcp_itf_pair_process (vlib_main_t *vm, vlib_node_runtime_t *rt,
- vlib_frame_t *f)
-{
- uword *event_data = 0;
- uword *lipn_index;
-
- while (1)
- {
- vlib_process_wait_for_event (vm);
-
- vlib_process_get_events (vm, &event_data);
-
- vec_foreach (lipn_index, event_data)
- {
- lcp_itf_pair_names_t *lipn;
-
- lipn = &lipn_names[*lipn_index];
- lcp_itf_pair_create (lipn->lipn_phy_sw_if_index,
- lipn->lipn_host_name, LCP_ITF_HOST_TAP,
- lipn->lipn_namespace, NULL);
- }
-
- vec_reset_length (event_data);
- }
-
- return 0;
-}
-
-VLIB_REGISTER_NODE (lcp_itf_pair_process_node, static) = {
- .function = lcp_itf_pair_process,
- .name = "linux-cp-itf-process",
- .type = VLIB_NODE_TYPE_PROCESS,
-};
-
-static clib_error_t *
-lcp_itf_phy_add (vnet_main_t *vnm, u32 sw_if_index, u32 is_create)
-{
- lcp_itf_pair_names_t *lipn;
- vlib_main_t *vm = vlib_get_main ();
- vnet_hw_interface_t *hw;
-
- if (!is_create || vnet_sw_interface_is_sub (vnm, sw_if_index))
- return NULL;
-
- hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
-
- vec_foreach (lipn, lipn_names)
- {
- if (!vec_cmp (hw->name, lipn->lipn_phy_name))
- {
- lipn->lipn_phy_sw_if_index = sw_if_index;
-
- vlib_process_signal_event (vm, lcp_itf_pair_process_node.index, 0,
- lipn - lipn_names);
- break;
- }
- }
-
- return NULL;
-}
-
-VNET_SW_INTERFACE_ADD_DEL_FUNCTION (lcp_itf_phy_add);
-
static clib_error_t *
lcp_itf_pair_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
{
diff --git a/src/plugins/linux-cp/lcp_interface.h b/src/plugins/linux-cp/lcp_interface.h
index bed30248845..cfcd3925a15 100644
--- a/src/plugins/linux-cp/lcp_interface.h
+++ b/src/plugins/linux-cp/lcp_interface.h
@@ -21,6 +21,22 @@
#include <plugins/linux-cp/lcp.h>
+extern vlib_log_class_t lcp_itf_pair_logger;
+
+#define LCP_ITF_PAIR_DBG(...) \
+ vlib_log_debug (lcp_itf_pair_logger, __VA_ARGS__);
+
+#define LCP_ITF_PAIR_INFO(...) \
+ vlib_log_info (lcp_itf_pair_logger, __VA_ARGS__);
+
+#define LCP_ITF_PAIR_NOTICE(...) \
+ vlib_log_notice (lcp_itf_pair_logger, __VA_ARGS__);
+
+#define LCP_ITF_PAIR_WARN(...) \
+ vlib_log_warn (lcp_itf_pair_logger, __VA_ARGS__);
+
+#define LCP_ITF_PAIR_ERR(...) vlib_log_err (lcp_itf_pair_logger, __VA_ARGS__);
+
#define foreach_lcp_itf_pair_flag _ (STALE, 0, "stale")
typedef enum lip_flag_t_
@@ -88,8 +104,6 @@ extern index_t lcp_itf_pair_find_by_vif (u32 vif_index);
extern int lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index,
u8 *host_name, u32 host_index,
lip_host_type_t host_type, u8 *ns);
-extern int lcp_itf_pair_add_sub (u32 vif, u8 *host_name, u32 sub_sw_if_index,
- u32 phy_sw_if_index, u8 *ns);
extern int lcp_itf_pair_del (u32 phy_sw_if_index);
/**
@@ -144,12 +158,6 @@ lcp_itf_pair_find_by_host (u32 host_sw_if_index)
return (lip_db_by_host[host_sw_if_index]);
}
-/**
- * manage interface auto creation
- */
-void lcp_set_auto_intf (u8 is_auto);
-int lcp_auto_intf (void);
-
typedef void (*lcp_itf_pair_add_cb_t) (lcp_itf_pair_t *);
typedef void (*lcp_itf_pair_del_cb_t) (lcp_itf_pair_t *);
@@ -160,6 +168,36 @@ typedef struct lcp_itf_pair_vft
} lcp_itf_pair_vft_t;
void lcp_itf_pair_register_vft (lcp_itf_pair_vft_t *lcp_itf_vft);
+
+/**
+ * sub-interface auto creation/deletion for LCP
+ */
+void lcp_set_auto_subint (u8 is_auto);
+int lcp_auto_subint (void);
+
+/**
+ * sync state changes from VPP into LCP
+ */
+void lcp_set_sync (u8 is_auto);
+int lcp_sync (void);
+
+/* Set TAP and Linux host link state */
+void lcp_itf_set_link_state (const lcp_itf_pair_t *lip, u8 state);
+
+/* Set any VPP L3 addresses on Linux host device */
+void lcp_itf_set_interface_addr (const lcp_itf_pair_t *lip);
+
+/* Sync all state from VPP to a specific Linux device, all sub-interfaces
+ * of a hardware interface, or all interfaces in the system.
+ *
+ * Note: in some circumstances, this syncer will (have to) make changes to
+ * the VPP interface, for example if its MTU is greater than its parent.
+ * See the function for rationale.
+ */
+void lcp_itf_pair_sync_state (lcp_itf_pair_t *lip);
+void lcp_itf_pair_sync_state_hw (vnet_hw_interface_t *hi);
+void lcp_itf_pair_sync_state_all ();
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/linux-cp/lcp_interface_sync.c b/src/plugins/linux-cp/lcp_interface_sync.c
new file mode 100644
index 00000000000..a4e343f6b9a
--- /dev/null
+++ b/src/plugins/linux-cp/lcp_interface_sync.c
@@ -0,0 +1,442 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright 2021 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <vnet/devices/netlink.h>
+#include <vnet/ip/ip.h>
+#include <vppinfra/linux/netns.h>
+#include <plugins/linux-cp/lcp_interface.h>
+
+/* helper function to copy forward all sw interface link state flags
+ * MTU, and IP addresses into their counterpart LIP interface.
+ *
+ * This is called upon MTU changes and state changes.
+ */
+void
+lcp_itf_pair_sync_state (lcp_itf_pair_t *lip)
+{
+ vnet_sw_interface_t *sw;
+ vnet_sw_interface_t *sup_sw;
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+ u32 mtu;
+ u32 netlink_mtu;
+
+ if (!lcp_sync ())
+ return;
+
+ sw =
+ vnet_get_sw_interface_or_null (vnet_get_main (), lip->lip_phy_sw_if_index);
+ if (!sw)
+ return;
+ sup_sw =
+ vnet_get_sw_interface_or_null (vnet_get_main (), sw->sup_sw_if_index);
+
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
+ LCP_ITF_PAIR_INFO ("sync_state: %U flags %u sup-flags %u mtu %u sup-mtu %u",
+ format_lcp_itf_pair, lip, sw->flags, sup_sw->flags,
+ sw->mtu[VNET_MTU_L3], sup_sw->mtu[VNET_MTU_L3]);
+
+ /* Linux will not allow children to be admin-up if their parent is
+ * admin-down. If child is up but parent is not, force it down.
+ */
+ int state = sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP;
+
+ if (state && !(sup_sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ LCP_ITF_PAIR_WARN (
+ "sync_state: %U flags %u sup-flags %u mtu %u sup-mtu %u: "
+ "forcing state to sup-flags to satisfy netlink",
+ format_lcp_itf_pair, lip, sw->flags, sup_sw->flags,
+ sw->mtu[VNET_MTU_L3], sup_sw->mtu[VNET_MTU_L3]);
+ state = 0;
+ }
+ lcp_itf_set_link_state (lip, state);
+
+ /* Linux will clamp MTU of children when the parent is lower. VPP is fine
+ * with differing MTUs. VPP assumes that if a subint has MTU of 0, that it
+ * inherits from its parent. Linux likes to be more explicit, so we
+ * reconcile any differences.
+ */
+ mtu = sw->mtu[VNET_MTU_L3];
+ if (mtu == 0)
+ mtu = sup_sw->mtu[VNET_MTU_L3];
+
+ if (sup_sw->mtu[VNET_MTU_L3] < sw->mtu[VNET_MTU_L3])
+ {
+ LCP_ITF_PAIR_WARN ("sync_state: %U flags %u mtu %u sup-mtu %u: "
+ "clamping to sup-mtu to satisfy netlink",
+ format_lcp_itf_pair, lip, sw->flags,
+ sw->mtu[VNET_MTU_L3], sup_sw->mtu[VNET_MTU_L3]);
+ mtu = sup_sw->mtu[VNET_MTU_L3];
+ }
+
+ /* Set MTU on all of {sw, tap, netlink}. Only send a netlink message if we
+ * really do want to change the MTU.
+ */
+ vnet_sw_interface_set_mtu (vnet_get_main (), lip->lip_phy_sw_if_index, mtu);
+ vnet_sw_interface_set_mtu (vnet_get_main (), lip->lip_host_sw_if_index, mtu);
+ if (NULL == vnet_netlink_get_link_mtu (lip->lip_vif_index, &netlink_mtu))
+ {
+ if (netlink_mtu != mtu)
+ vnet_netlink_set_link_mtu (lip->lip_vif_index, mtu);
+ }
+
+ /* Linux will remove IPv6 addresses on children when the parent state
+ * goes down, so we ensure all IPv4/IPv6 addresses are synced.
+ */
+ lcp_itf_set_interface_addr (lip);
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+
+ return;
+}
+
+static walk_rc_t
+lcp_itf_pair_walk_sync_state_all_cb (index_t lipi, void *ctx)
+{
+ lcp_itf_pair_t *lip;
+ lip = lcp_itf_pair_get (lipi);
+ if (!lip)
+ return WALK_CONTINUE;
+
+ lcp_itf_pair_sync_state (lip);
+ return WALK_CONTINUE;
+}
+
+static walk_rc_t
+lcp_itf_pair_walk_sync_state_hw_cb (vnet_main_t *vnm, u32 sw_if_index,
+ void *arg)
+{
+ lcp_itf_pair_t *lip;
+
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (!lip)
+ {
+ return WALK_CONTINUE;
+ }
+
+ lcp_itf_pair_sync_state (lip);
+ return WALK_CONTINUE;
+}
+
+void
+lcp_itf_pair_sync_state_all ()
+{
+ lcp_itf_pair_walk (lcp_itf_pair_walk_sync_state_all_cb, 0);
+}
+
+void
+lcp_itf_pair_sync_state_hw (vnet_hw_interface_t *hi)
+{
+ if (!hi)
+ return;
+ LCP_ITF_PAIR_DBG ("sync_state_hw: hi %U", format_vnet_sw_if_index_name,
+ vnet_get_main (), hi->hw_if_index);
+
+ vnet_hw_interface_walk_sw (vnet_get_main (), hi->hw_if_index,
+ lcp_itf_pair_walk_sync_state_hw_cb, NULL);
+}
+
+static clib_error_t *
+lcp_itf_admin_state_change (vnet_main_t *vnm, u32 sw_if_index, u32 flags)
+{
+ lcp_itf_pair_t *lip;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+
+ if (!lcp_sync ())
+ return 0;
+
+ LCP_ITF_PAIR_DBG ("admin_state_change: sw %U %u",
+ format_vnet_sw_if_index_name, vnm, sw_if_index, flags);
+
+ // Sync interface state changes into host
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (!lip)
+ return NULL;
+ LCP_ITF_PAIR_INFO ("admin_state_change: %U flags %u", format_lcp_itf_pair,
+ lip, flags);
+
+ if (vnet_sw_interface_is_sub (vnm, sw_if_index))
+ {
+ lcp_itf_pair_sync_state (lip);
+ return NULL;
+ }
+
+ // When Linux changes link on a parent interface, all of its children also
+ // change. If a parent interface changes MTU, all of its children are clamped
+ // at that MTU by Linux. Neither holds true in VPP, so we are forced to undo
+ // change by walking the sub-interfaces of a phy and syncing their state back
+ // into Linux.
+ si = vnet_get_sw_interface_or_null (vnm, sw_if_index);
+ if (!si)
+ return NULL;
+
+ hi = vnet_get_hw_interface_or_null (vnm, si->hw_if_index);
+ if (!hi)
+ return NULL;
+ LCP_ITF_PAIR_DBG ("admin_state_change: si %U hi %U, syncing children",
+ format_vnet_sw_if_index_name, vnm, si->sw_if_index,
+ format_vnet_sw_if_index_name, vnm, hi->sw_if_index);
+
+ lcp_itf_pair_sync_state_hw (hi);
+
+ return NULL;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (lcp_itf_admin_state_change);
+
+static clib_error_t *
+lcp_itf_mtu_change (vnet_main_t *vnm, u32 sw_if_index, u32 flags)
+{
+ vnet_sw_interface_t *si;
+ vnet_hw_interface_t *hi;
+ if (!lcp_sync ())
+ return NULL;
+
+ LCP_ITF_PAIR_DBG ("mtu_change: sw %U %u", format_vnet_sw_if_index_name, vnm,
+ sw_if_index, flags);
+
+ if (vnet_sw_interface_is_sub (vnm, sw_if_index))
+ {
+ lcp_itf_pair_t *lip;
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (lip)
+ lcp_itf_pair_sync_state (lip);
+ return NULL;
+ }
+
+ // When Linux changes link on a parent interface, all of its children also
+ // change. If a parent interface changes MTU, all of its children are clamped
+ // at that MTU by Linux. Neither holds true in VPP, so we are forced to undo
+ // change by walking the sub-interfaces of a phy and syncing their state back
+ // into Linux.
+ si = vnet_get_sw_interface_or_null (vnm, sw_if_index);
+ if (!si)
+ return NULL;
+
+ hi = vnet_get_hw_interface_or_null (vnm, si->hw_if_index);
+ if (!hi)
+ return NULL;
+ LCP_ITF_PAIR_DBG ("mtu_change: si %U hi %U, syncing children",
+ format_vnet_sw_if_index_name, vnm, si->sw_if_index,
+ format_vnet_sw_if_index_name, vnm, hi->sw_if_index);
+
+ lcp_itf_pair_sync_state_hw (hi);
+
+ return NULL;
+}
+
+VNET_SW_INTERFACE_MTU_CHANGE_FUNCTION (lcp_itf_mtu_change);
+
+static void
+lcp_itf_ip4_add_del_interface_addr (ip4_main_t *im, uword opaque,
+ u32 sw_if_index, ip4_address_t *address,
+ u32 address_length, u32 if_address_index,
+ u32 is_del)
+{
+ const lcp_itf_pair_t *lip;
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+
+ if (!lcp_sync ())
+ return;
+
+ LCP_ITF_PAIR_DBG ("ip4_addr_%s: si:%U %U/%u", is_del ? "del" : "add",
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index, format_ip4_address, address, address_length);
+
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (!lip)
+ return;
+
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
+ LCP_ITF_PAIR_DBG ("ip4_addr_%s: %U ip4 %U/%u", is_del ? "del" : "add",
+ format_lcp_itf_pair, lip, format_ip4_address, address,
+ address_length);
+
+ if (is_del)
+ vnet_netlink_del_ip4_addr (lip->lip_vif_index, address, address_length);
+ else
+ vnet_netlink_add_ip4_addr (lip->lip_vif_index, address, address_length);
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+ return;
+}
+
+static void
+lcp_itf_ip6_add_del_interface_addr (ip6_main_t *im, uword opaque,
+ u32 sw_if_index, ip6_address_t *address,
+ u32 address_length, u32 if_address_index,
+ u32 is_del)
+{
+ const lcp_itf_pair_t *lip;
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+
+ if (!lcp_sync ())
+ return;
+
+ LCP_ITF_PAIR_DBG ("ip6_addr_%s: si:%U %U/%u", is_del ? "del" : "add",
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index, format_ip6_address, address, address_length);
+
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (!lip)
+ return;
+
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+ LCP_ITF_PAIR_DBG ("ip6_addr_%s: %U ip4 %U/%u", is_del ? "del" : "add",
+ format_lcp_itf_pair, lip, format_ip6_address, address,
+ address_length);
+ if (is_del)
+ vnet_netlink_del_ip6_addr (lip->lip_vif_index, address, address_length);
+ else
+ vnet_netlink_add_ip6_addr (lip->lip_vif_index, address, address_length);
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+}
+
+static clib_error_t *
+lcp_itf_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_create)
+{
+ const vnet_sw_interface_t *sw;
+ uword is_sub;
+
+ if (!lcp_auto_subint ())
+ return NULL;
+
+ sw = vnet_get_sw_interface_or_null (vnm, sw_if_index);
+ if (!sw)
+ return NULL;
+
+ is_sub = vnet_sw_interface_is_sub (vnm, sw_if_index);
+ if (!is_sub)
+ return NULL;
+
+ LCP_ITF_PAIR_DBG ("interface_%s: sw %U parent %U", is_create ? "add" : "del",
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw->sw_if_index, format_vnet_sw_if_index_name,
+ vnet_get_main (), sw->sup_sw_if_index);
+
+ if (is_create)
+ {
+ const lcp_itf_pair_t *sup_lip;
+ u8 *name = 0;
+
+ // If the parent has a LIP auto-create a LIP for this interface
+ sup_lip =
+ lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw->sup_sw_if_index));
+ if (!sup_lip)
+ return NULL;
+
+ name = format (name, "%s.%d", sup_lip->lip_host_name, sw->sub.id);
+
+ LCP_ITF_PAIR_INFO (
+ "interface_%s: %U has parent %U, auto-creating LCP with host-if %s",
+ is_create ? "add" : "del", format_vnet_sw_if_index_name,
+ vnet_get_main (), sw->sw_if_index, format_lcp_itf_pair, sup_lip, name);
+
+ lcp_itf_pair_create (sw->sw_if_index, name, LCP_ITF_HOST_TAP,
+ sup_lip->lip_namespace, NULL);
+
+ vec_free (name);
+ }
+ else
+ {
+ lcp_itf_pair_delete (sw_if_index);
+ }
+
+ return NULL;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (lcp_itf_interface_add_del);
+
+static clib_error_t *
+lcp_itf_sync_init (vlib_main_t *vm)
+{
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+
+ ip4_add_del_interface_address_callback_t cb4;
+ ip6_add_del_interface_address_callback_t cb6;
+
+ cb4.function = lcp_itf_ip4_add_del_interface_addr;
+ cb4.function_opaque = 0;
+ vec_add1 (im4->add_del_interface_address_callbacks, cb4);
+
+ cb6.function = lcp_itf_ip6_add_del_interface_addr;
+ cb6.function_opaque = 0;
+ vec_add1 (im6->add_del_interface_address_callbacks, cb6);
+
+ return NULL;
+}
+
+VLIB_INIT_FUNCTION (lcp_itf_sync_init) = {
+ .runs_after = VLIB_INITS ("vnet_interface_init", "tcp_init", "udp_init"),
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/netlink.c b/src/vnet/devices/netlink.c
index 9aae205c54f..da21e9adea1 100644
--- a/src/vnet/devices/netlink.c
+++ b/src/vnet/devices/netlink.c
@@ -273,7 +273,6 @@ vnet_netlink_get_link_mtu (int ifindex, u32 *mtu)
*mtu = clib_net_to_host_u32 (msg_mtu);
else
*mtu = msg_mtu;
- clib_warning ("mtu: %d", *mtu);
goto done;
}
offset = NLA_ALIGN (attr->nla_len);
@@ -409,6 +408,50 @@ vnet_netlink_add_ip6_route (void *dst, u8 dst_len, void *gw)
return err;
}
+clib_error_t *
+vnet_netlink_del_ip4_addr (int ifindex, void *addr, int pfx_len)
+{
+ vnet_netlink_msg_t m;
+ struct ifaddrmsg ifa = { 0 };
+ clib_error_t *err = 0;
+
+ ifa.ifa_family = AF_INET;
+ ifa.ifa_prefixlen = pfx_len;
+ ifa.ifa_index = ifindex;
+
+ vnet_netlink_msg_init (&m, RTM_DELADDR, NLM_F_REQUEST, &ifa,
+ sizeof (struct ifaddrmsg));
+
+ vnet_netlink_msg_add_rtattr (&m, IFA_LOCAL, addr, 4);
+ vnet_netlink_msg_add_rtattr (&m, IFA_ADDRESS, addr, 4);
+ err = vnet_netlink_msg_send (&m, NULL);
+ if (err)
+ err = clib_error_return (0, "del ip4 addr %U", format_clib_error, err);
+ return err;
+}
+
+clib_error_t *
+vnet_netlink_del_ip6_addr (int ifindex, void *addr, int pfx_len)
+{
+ vnet_netlink_msg_t m;
+ struct ifaddrmsg ifa = { 0 };
+ clib_error_t *err = 0;
+
+ ifa.ifa_family = AF_INET6;
+ ifa.ifa_prefixlen = pfx_len;
+ ifa.ifa_index = ifindex;
+
+ vnet_netlink_msg_init (&m, RTM_DELADDR, NLM_F_REQUEST, &ifa,
+ sizeof (struct ifaddrmsg));
+
+ vnet_netlink_msg_add_rtattr (&m, IFA_LOCAL, addr, 16);
+ vnet_netlink_msg_add_rtattr (&m, IFA_ADDRESS, addr, 16);
+ err = vnet_netlink_msg_send (&m, NULL);
+ if (err)
+ err = clib_error_return (0, "del ip6 addr %U", format_clib_error, err);
+ return err;
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/devices/netlink.h b/src/vnet/devices/netlink.h
index f1c42609cbf..086781fdbff 100644
--- a/src/vnet/devices/netlink.h
+++ b/src/vnet/devices/netlink.h
@@ -26,8 +26,10 @@ clib_error_t *vnet_netlink_get_link_mtu (int ifindex, u32 *mtu);
clib_error_t *vnet_netlink_set_link_mtu (int ifindex, int mtu);
clib_error_t *vnet_netlink_add_ip4_addr (int ifindex, void *addr,
int pfx_len);
+clib_error_t *vnet_netlink_del_ip4_addr (int ifindex, void *addr, int pfx_len);
clib_error_t *vnet_netlink_add_ip6_addr (int ifindex, void *addr,
int pfx_len);
+clib_error_t *vnet_netlink_del_ip6_addr (int ifindex, void *addr, int pfx_len);
clib_error_t *vnet_netlink_add_ip4_route (void *dst, u8 dst_len, void *gw);
clib_error_t *vnet_netlink_add_ip6_route (void *dst, u8 dst_len, void *gw);