From 1705a6baefe205bb6792b547c7376eee3f328a71 Mon Sep 17 00:00:00 2001 From: Pim van Pelt Date: Thu, 9 Sep 2021 17:53:09 +0000 Subject: linux-cp: Add VPP->Linux synchronization Part 1 -- notes in https://ipng.ch/s/articles/2021/08/13/vpp-2.html Add the ability for VPP to copy out (sync) its state from the dataplane to Linux Interface Pairs, when they exist. Gated by a configuration flag (linux-cp { lcp-sync }), and by a CLI option to toggle on/off, synchronize the following events: - Interface state changes - Interface MTU changes - Interface IPv4/IPv6 address add/deletion In VPP, subints can have any link state and MTU, orthogonal to their phy. In Linux, setting admin-down on a phy forces its children to be down as well. Also, in Linux, MTU of children must not exceed that of the phy. Add a state synchronizer which walks over phy+subints to ensure Linux and VPP end up in the same consistent state. Part 2 -- notes in https://ipng.ch/s/articles/2021/08/15/vpp-3.html Add the ability for VPP to autocreate sub-interfaces of existing Linux Interface pairs. Gated by a configuration flag (linux-cp { lcp-auto-subint }), and by a CLI option to toggle on/off, synchronize the following event: - Sub-interface creation (dot1q, dot1ad, QinQ and QinAD) A few other changes: - Add two functions into netlink.[ch] to delete ip4 and ip6 addresses. - Remove a spurious logline (printing MTU) in netlink.c. - Resolve a TODO around vnet_sw_interface_supports_addressing() Type: improvement Signed-off-by: Pim van Pelt Change-Id: I34fc070e80af4013be58d7a8cbf64296cc760e4e Signed-off-by: Pim van Pelt --- src/plugins/linux-cp/CMakeLists.txt | 1 + src/plugins/linux-cp/lcp.c | 37 +++ src/plugins/linux-cp/lcp.h | 5 +- src/plugins/linux-cp/lcp_cli.c | 62 +++++ src/plugins/linux-cp/lcp_interface.c | 262 +++++++----------- src/plugins/linux-cp/lcp_interface.h | 54 +++- src/plugins/linux-cp/lcp_interface_sync.c | 442 ++++++++++++++++++++++++++++++ src/vnet/devices/netlink.c | 45 ++- src/vnet/devices/netlink.h | 2 + 9 files changed, 743 insertions(+), 167 deletions(-) create mode 100644 src/plugins/linux-cp/lcp_interface_sync.c (limited to 'src') diff --git a/src/plugins/linux-cp/CMakeLists.txt b/src/plugins/linux-cp/CMakeLists.txt index 080f73ecce4..5053207fff4 100644 --- a/src/plugins/linux-cp/CMakeLists.txt +++ b/src/plugins/linux-cp/CMakeLists.txt @@ -27,6 +27,7 @@ include_directories(${LIBMNL_INCLUDE_DIR}) add_vpp_library(lcp SOURCES lcp_interface.c + lcp_interface_sync.c lcp_adj.c lcp.c diff --git a/src/plugins/linux-cp/lcp.c b/src/plugins/linux-cp/lcp.c index a4d3faf081a..69a7a6e42ba 100644 --- a/src/plugins/linux-cp/lcp.c +++ b/src/plugins/linux-cp/lcp.c @@ -20,6 +20,7 @@ #include #include +#include lcp_main_t lcp_main; @@ -76,6 +77,42 @@ lcp_set_default_ns (u8 *ns) return 0; } +void +lcp_set_sync (u8 is_auto) +{ + lcp_main_t *lcpm = &lcp_main; + + lcpm->lcp_sync = (is_auto != 0); + + // If we set to 'on', do a one-off sync of LCP interfaces + if (is_auto) + lcp_itf_pair_sync_state_all (); +} + +int +lcp_sync (void) +{ + lcp_main_t *lcpm = &lcp_main; + + return lcpm->lcp_sync; +} + +void +lcp_set_auto_subint (u8 is_auto) +{ + lcp_main_t *lcpm = &lcp_main; + + lcpm->lcp_auto_subint = (is_auto != 0); +} + +int +lcp_auto_subint (void) +{ + lcp_main_t *lcpm = &lcp_main; + + return lcpm->lcp_auto_subint; +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/linux-cp/lcp.h b/src/plugins/linux-cp/lcp.h index 19636c546a8..14c1a6e2a75 100644 --- a/src/plugins/linux-cp/lcp.h +++ b/src/plugins/linux-cp/lcp.h @@ -24,8 +24,9 @@ typedef struct lcp_main_s u16 msg_id_base; /* API message ID base */ u8 *default_namespace; /* default namespace if set */ int default_ns_fd; - /* Set when Unit testing */ - u8 test_mode; + u8 lcp_auto_subint; /* Automatically create/delete LCP sub-interfaces */ + u8 lcp_sync; /* Automatically sync VPP changes to LCP */ + u8 test_mode; /* Set when Unit testing */ } lcp_main_t; extern lcp_main_t lcp_main; diff --git a/src/plugins/linux-cp/lcp_cli.c b/src/plugins/linux-cp/lcp_cli.c index cb874b1c023..8f2d17ab209 100644 --- a/src/plugins/linux-cp/lcp_cli.c +++ b/src/plugins/linux-cp/lcp_cli.c @@ -111,6 +111,68 @@ VLIB_CLI_COMMAND (lcp_itf_pair_create_command, static) = { .function = lcp_itf_pair_create_command_fn, }; +static clib_error_t * +lcp_sync_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "on") || unformat (line_input, "enable")) + lcp_set_sync (1); + else if (unformat (line_input, "off") || + unformat (line_input, "disable")) + lcp_set_sync (0); + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + return 0; +} + +VLIB_CLI_COMMAND (lcp_sync_command, static) = { + .path = "lcp lcp-sync", + .short_help = "lcp lcp-sync [on|enable|off|disable]", + .function = lcp_sync_command_fn, +}; + +static clib_error_t * +lcp_auto_subint_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "on") || unformat (line_input, "enable")) + lcp_set_auto_subint (1); + else if (unformat (line_input, "off") || + unformat (line_input, "disable")) + lcp_set_auto_subint (0); + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + return 0; +} + +VLIB_CLI_COMMAND (lcp_auto_subint_command, static) = { + .path = "lcp lcp-auto-subint", + .short_help = "lcp lcp-auto-subint [on|enable|off|disable]", + .function = lcp_auto_subint_command_fn, +}; + static clib_error_t * lcp_default_netns_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) diff --git a/src/plugins/linux-cp/lcp_interface.c b/src/plugins/linux-cp/lcp_interface.c index d044635334f..3dbcb5987a1 100644 --- a/src/plugins/linux-cp/lcp_interface.c +++ b/src/plugins/linux-cp/lcp_interface.c @@ -39,7 +39,7 @@ #include #include -static vlib_log_class_t lcp_itf_pair_logger; +vlib_log_class_t lcp_itf_pair_logger; /** * Pool of LIP objects @@ -73,14 +73,6 @@ lcp_itf_pair_register_vft (lcp_itf_pair_vft_t *lcp_itf_vft) vec_add1 (lcp_itf_vfts, *lcp_itf_vft); } -#define LCP_ITF_PAIR_DBG(...) \ - vlib_log_notice (lcp_itf_pair_logger, __VA_ARGS__); - -#define LCP_ITF_PAIR_INFO(...) \ - vlib_log_notice (lcp_itf_pair_logger, __VA_ARGS__); - -#define LCP_ITF_PAIR_ERR(...) vlib_log_err (lcp_itf_pair_logger, __VA_ARGS__); - u8 * format_lcp_itf_pair (u8 *s, va_list *args) { @@ -139,6 +131,9 @@ lcp_itf_pair_show (u32 phy_sw_if_index) ns = lcp_get_default_ns (); vlib_cli_output (vm, "lcp default netns '%s'\n", ns ? (char *) ns : ""); + vlib_cli_output (vm, "lcp lcp-auto-subint %s\n", + lcp_auto_subint () ? "on" : "off"); + vlib_cli_output (vm, "lcp lcp-sync %s\n", lcp_sync () ? "on" : "off"); if (phy_sw_if_index == ~0) { @@ -157,6 +152,8 @@ lcp_itf_pair_get (u32 index) { if (!lcp_itf_pair_pool) return NULL; + if (index == INDEX_INVALID) + return NULL; return pool_elt_at_index (lcp_itf_pair_pool, index); } @@ -174,25 +171,6 @@ lcp_itf_pair_find_by_vif (u32 vif_index) return INDEX_INVALID; } -int -lcp_itf_pair_add_sub (u32 vif, u8 *host_if_name, u32 sub_sw_if_index, - u32 phy_sw_if_index, u8 *ns) -{ - lcp_itf_pair_t *lip; - - lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (phy_sw_if_index)); - if (!lip) - { - LCP_ITF_PAIR_DBG ("lcp_itf_pair_add_sub: can't find LCP of parent %U", - format_vnet_sw_if_index_name, vnet_get_main (), - phy_sw_if_index); - return VNET_API_ERROR_INVALID_SW_IF_INDEX; - } - - return lcp_itf_pair_add (lip->lip_host_sw_if_index, sub_sw_if_index, - host_if_name, vif, lip->lip_host_type, ns); -} - const char *lcp_itf_l3_feat_names[N_LCP_ITF_HOST][N_AF] = { [LCP_ITF_HOST_TAP] = { [AF_IP4] = "linux-cp-xc-ip4", @@ -248,17 +226,23 @@ lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name, index_t lipi; lcp_itf_pair_t *lip; + if (host_sw_if_index == ~0) + { + LCP_ITF_PAIR_ERR ("pair_add: Cannot add LIP - invalid host"); + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + } + lipi = lcp_itf_pair_find_by_phy (phy_sw_if_index); + if (lipi != INDEX_INVALID) + return VNET_API_ERROR_VALUE_EXIST; + LCP_ITF_PAIR_INFO ("add: host:%U phy:%U, host_if:%v vif:%d ns:%s", format_vnet_sw_if_index_name, vnet_get_main (), host_sw_if_index, format_vnet_sw_if_index_name, vnet_get_main (), phy_sw_if_index, host_name, host_index, ns); - if (lipi != INDEX_INVALID) - return VNET_API_ERROR_VALUE_EXIST; - /* * Create a new pair. */ @@ -279,9 +263,6 @@ lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name, lip->lip_vif_index = host_index; lip->lip_namespace = vec_dup (ns); - if (lip->lip_host_sw_if_index == ~0) - return 0; - /* * First use of this host interface. * Enable the x-connect feature on the host to send @@ -421,10 +402,11 @@ lcp_itf_pair_del (u32 phy_sw_if_index) lip = lcp_itf_pair_get (lipi); - LCP_ITF_PAIR_INFO ("pair delete: {%U, %U, %v}", format_vnet_sw_if_index_name, - vnet_get_main (), lip->lip_phy_sw_if_index, - format_vnet_sw_if_index_name, vnet_get_main (), - lip->lip_host_sw_if_index, lip->lip_host_name); + LCP_ITF_PAIR_NOTICE ( + "pair_del: host:%U phy:%U host_if:%s vif:%d ns:%s", + format_vnet_sw_if_index_name, vnet_get_main (), lip->lip_host_sw_if_index, + format_vnet_sw_if_index_name, vnet_get_main (), lip->lip_phy_sw_if_index, + lip->lip_host_name, lip->lip_vif_index, lip->lip_namespace); /* invoke registered callbacks for pair deletion */ vec_foreach (vft, lcp_itf_vfts) @@ -475,24 +457,45 @@ lcp_itf_pair_delete_by_index (index_t lipi) { u32 host_sw_if_index; lcp_itf_pair_t *lip; - u8 *host_name; + u8 *host_name, *ns; lip = lcp_itf_pair_get (lipi); host_name = vec_dup (lip->lip_host_name); host_sw_if_index = lip->lip_host_sw_if_index; + ns = vec_dup (lip->lip_namespace); lcp_itf_pair_del (lip->lip_phy_sw_if_index); if (vnet_sw_interface_is_sub (vnet_get_main (), host_sw_if_index)) { + int curr_ns_fd = -1; + int vif_ns_fd = -1; + if (ns) + { + curr_ns_fd = clib_netns_open (NULL /* self */); + vif_ns_fd = clib_netns_open ((u8 *) ns); + if (vif_ns_fd != -1) + clib_setns (vif_ns_fd); + } + lcp_netlink_del_link ((const char *) host_name); + if (vif_ns_fd != -1) + close (vif_ns_fd); + + if (curr_ns_fd != -1) + { + clib_setns (curr_ns_fd); + close (curr_ns_fd); + } + vnet_delete_sub_interface (host_sw_if_index); } else tap_delete_if (vlib_get_main (), host_sw_if_index); vec_free (host_name); + vec_free (ns); } int @@ -539,58 +542,16 @@ lcp_itf_pair_walk (lcp_itf_pair_walk_cb_t cb, void *ctx) }; } -typedef struct lcp_itf_pair_names_t_ -{ - u8 *lipn_host_name; - u8 *lipn_phy_name; - u8 *lipn_namespace; - u32 lipn_phy_sw_if_index; -} lcp_itf_pair_names_t; - -static lcp_itf_pair_names_t *lipn_names; - static clib_error_t * lcp_itf_pair_config (vlib_main_t *vm, unformat_input_t *input) { - u8 *host, *phy; - u8 *ns; u8 *default_ns; - host = phy = ns = default_ns = NULL; + default_ns = NULL; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - vec_reset_length (host); - - if (unformat (input, "pair %s %s %s", &phy, &host, &ns)) - { - lcp_itf_pair_names_t *lipn; - - if (vec_len (ns) > LCP_NS_LEN) - { - return clib_error_return (0, - "linux-cp namespace must" - " be less than %d characters", - LCP_NS_LEN); - } - - vec_add2 (lipn_names, lipn, 1); - - lipn->lipn_host_name = vec_dup (host); - lipn->lipn_phy_name = vec_dup (phy); - lipn->lipn_namespace = vec_dup (ns); - } - else if (unformat (input, "pair %v %v", &phy, &host)) - { - lcp_itf_pair_names_t *lipn; - - vec_add2 (lipn_names, lipn, 1); - - lipn->lipn_host_name = vec_dup (host); - lipn->lipn_phy_name = vec_dup (phy); - lipn->lipn_namespace = 0; - } - else if (unformat (input, "default netns %v", &default_ns)) + if (unformat (input, "default netns %v", &default_ns)) { vec_add1 (default_ns, 0); if (lcp_set_default_ns (default_ns) < 0) @@ -601,14 +562,14 @@ lcp_itf_pair_config (vlib_main_t *vm, unformat_input_t *input) LCP_NS_LEN); } } - else if (unformat (input, "interface-auto-create")) - lcp_set_auto_intf (1 /* is_auto */); + else if (unformat (input, "lcp-auto-subint")) + lcp_set_auto_subint (1 /* is_auto */); + else if (unformat (input, "lcp-sync")) + lcp_set_sync (1 /* is_auto */); else return clib_error_return (0, "interfaces not found"); } - vec_free (host); - vec_free (phy); vec_free (default_ns); return NULL; @@ -653,7 +614,7 @@ lcp_validate_if_name (u8 *name) return 1; } -static void +void lcp_itf_set_link_state (const lcp_itf_pair_t *lip, u8 state) { int curr_ns_fd, vif_ns_fd; @@ -671,6 +632,8 @@ lcp_itf_set_link_state (const lcp_itf_pair_t *lip, u8 state) clib_setns (vif_ns_fd); } + /* Set the same link state on the netlink interface + */ vnet_netlink_set_link_state (lip->lip_vif_index, state); if (vif_ns_fd != -1) @@ -685,6 +648,58 @@ lcp_itf_set_link_state (const lcp_itf_pair_t *lip, u8 state) return; } +void +lcp_itf_set_interface_addr (const lcp_itf_pair_t *lip) +{ + ip4_main_t *im4 = &ip4_main; + ip6_main_t *im6 = &ip6_main; + ip_lookup_main_t *lm4 = &im4->lookup_main; + ip_lookup_main_t *lm6 = &im6->lookup_main; + ip_interface_address_t *ia = 0; + int vif_ns_fd = -1; + int curr_ns_fd = -1; + + if (!lip) + return; + + if (lip->lip_namespace) + { + curr_ns_fd = clib_netns_open (NULL /* self */); + vif_ns_fd = clib_netns_open (lip->lip_namespace); + if (vif_ns_fd != -1) + clib_setns (vif_ns_fd); + } + + /* Sync any IP4 addressing info into LCP */ + foreach_ip_interface_address ( + lm4, ia, lip->lip_phy_sw_if_index, 1 /* honor unnumbered */, ({ + ip4_address_t *r4 = ip_interface_address_get_address (lm4, ia); + LCP_ITF_PAIR_NOTICE ("set_interface_addr: %U add ip4 %U/%d", + format_lcp_itf_pair, lip, format_ip4_address, r4, + ia->address_length); + vnet_netlink_add_ip4_addr (lip->lip_vif_index, r4, ia->address_length); + })); + + /* Sync any IP6 addressing info into LCP */ + foreach_ip_interface_address ( + lm6, ia, lip->lip_phy_sw_if_index, 1 /* honor unnumbered */, ({ + ip6_address_t *r6 = ip_interface_address_get_address (lm6, ia); + LCP_ITF_PAIR_NOTICE ("set_interface_addr: %U add ip6 %U/%d", + format_lcp_itf_pair, lip, format_ip6_address, r6, + ia->address_length); + vnet_netlink_add_ip6_addr (lip->lip_vif_index, r6, ia->address_length); + })); + + if (vif_ns_fd != -1) + close (vif_ns_fd); + + if (curr_ns_fd != -1) + { + clib_setns (curr_ns_fd); + close (curr_ns_fd); + } +} + typedef struct { u32 vlan; @@ -792,9 +807,8 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name, u16 vlan, proto; u32 parent_vif_index; - // TODO(pim) replace with vnet_sw_interface_supports_addressing() - if (sw->type == VNET_SW_INTERFACE_TYPE_SUB && - sw->sub.eth.flags.exact_match == 0) + err = vnet_sw_interface_supports_addressing (vnm, phy_sw_if_index); + if (err) { LCP_ITF_PAIR_ERR ("pair_create: can't create LCP for a " "sub-interface without exact-match set"); @@ -921,7 +935,7 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name, outer_vlan, inner_vlan, format_vnet_sw_if_index_name, vnm, lip->lip_host_sw_if_index); err = clib_error_return ( - 0, "failed to create tap subinti: %d.%d. on %U", outer_vlan, + 0, "failed to create tap subint: %d.%d. on %U", outer_vlan, inner_vlan, format_vnet_sw_if_index_name, vnm, lip->lip_host_sw_if_index); } @@ -1107,70 +1121,6 @@ lcp_itf_pair_replace_end (void) return (0); } -static uword -lcp_itf_pair_process (vlib_main_t *vm, vlib_node_runtime_t *rt, - vlib_frame_t *f) -{ - uword *event_data = 0; - uword *lipn_index; - - while (1) - { - vlib_process_wait_for_event (vm); - - vlib_process_get_events (vm, &event_data); - - vec_foreach (lipn_index, event_data) - { - lcp_itf_pair_names_t *lipn; - - lipn = &lipn_names[*lipn_index]; - lcp_itf_pair_create (lipn->lipn_phy_sw_if_index, - lipn->lipn_host_name, LCP_ITF_HOST_TAP, - lipn->lipn_namespace, NULL); - } - - vec_reset_length (event_data); - } - - return 0; -} - -VLIB_REGISTER_NODE (lcp_itf_pair_process_node, static) = { - .function = lcp_itf_pair_process, - .name = "linux-cp-itf-process", - .type = VLIB_NODE_TYPE_PROCESS, -}; - -static clib_error_t * -lcp_itf_phy_add (vnet_main_t *vnm, u32 sw_if_index, u32 is_create) -{ - lcp_itf_pair_names_t *lipn; - vlib_main_t *vm = vlib_get_main (); - vnet_hw_interface_t *hw; - - if (!is_create || vnet_sw_interface_is_sub (vnm, sw_if_index)) - return NULL; - - hw = vnet_get_sup_hw_interface (vnm, sw_if_index); - - vec_foreach (lipn, lipn_names) - { - if (!vec_cmp (hw->name, lipn->lipn_phy_name)) - { - lipn->lipn_phy_sw_if_index = sw_if_index; - - vlib_process_signal_event (vm, lcp_itf_pair_process_node.index, 0, - lipn - lipn_names); - break; - } - } - - return NULL; -} - -VNET_SW_INTERFACE_ADD_DEL_FUNCTION (lcp_itf_phy_add); - static clib_error_t * lcp_itf_pair_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags) { diff --git a/src/plugins/linux-cp/lcp_interface.h b/src/plugins/linux-cp/lcp_interface.h index bed30248845..cfcd3925a15 100644 --- a/src/plugins/linux-cp/lcp_interface.h +++ b/src/plugins/linux-cp/lcp_interface.h @@ -21,6 +21,22 @@ #include +extern vlib_log_class_t lcp_itf_pair_logger; + +#define LCP_ITF_PAIR_DBG(...) \ + vlib_log_debug (lcp_itf_pair_logger, __VA_ARGS__); + +#define LCP_ITF_PAIR_INFO(...) \ + vlib_log_info (lcp_itf_pair_logger, __VA_ARGS__); + +#define LCP_ITF_PAIR_NOTICE(...) \ + vlib_log_notice (lcp_itf_pair_logger, __VA_ARGS__); + +#define LCP_ITF_PAIR_WARN(...) \ + vlib_log_warn (lcp_itf_pair_logger, __VA_ARGS__); + +#define LCP_ITF_PAIR_ERR(...) vlib_log_err (lcp_itf_pair_logger, __VA_ARGS__); + #define foreach_lcp_itf_pair_flag _ (STALE, 0, "stale") typedef enum lip_flag_t_ @@ -88,8 +104,6 @@ extern index_t lcp_itf_pair_find_by_vif (u32 vif_index); extern int lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name, u32 host_index, lip_host_type_t host_type, u8 *ns); -extern int lcp_itf_pair_add_sub (u32 vif, u8 *host_name, u32 sub_sw_if_index, - u32 phy_sw_if_index, u8 *ns); extern int lcp_itf_pair_del (u32 phy_sw_if_index); /** @@ -144,12 +158,6 @@ lcp_itf_pair_find_by_host (u32 host_sw_if_index) return (lip_db_by_host[host_sw_if_index]); } -/** - * manage interface auto creation - */ -void lcp_set_auto_intf (u8 is_auto); -int lcp_auto_intf (void); - typedef void (*lcp_itf_pair_add_cb_t) (lcp_itf_pair_t *); typedef void (*lcp_itf_pair_del_cb_t) (lcp_itf_pair_t *); @@ -160,6 +168,36 @@ typedef struct lcp_itf_pair_vft } lcp_itf_pair_vft_t; void lcp_itf_pair_register_vft (lcp_itf_pair_vft_t *lcp_itf_vft); + +/** + * sub-interface auto creation/deletion for LCP + */ +void lcp_set_auto_subint (u8 is_auto); +int lcp_auto_subint (void); + +/** + * sync state changes from VPP into LCP + */ +void lcp_set_sync (u8 is_auto); +int lcp_sync (void); + +/* Set TAP and Linux host link state */ +void lcp_itf_set_link_state (const lcp_itf_pair_t *lip, u8 state); + +/* Set any VPP L3 addresses on Linux host device */ +void lcp_itf_set_interface_addr (const lcp_itf_pair_t *lip); + +/* Sync all state from VPP to a specific Linux device, all sub-interfaces + * of a hardware interface, or all interfaces in the system. + * + * Note: in some circumstances, this syncer will (have to) make changes to + * the VPP interface, for example if its MTU is greater than its parent. + * See the function for rationale. + */ +void lcp_itf_pair_sync_state (lcp_itf_pair_t *lip); +void lcp_itf_pair_sync_state_hw (vnet_hw_interface_t *hi); +void lcp_itf_pair_sync_state_all (); + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/linux-cp/lcp_interface_sync.c b/src/plugins/linux-cp/lcp_interface_sync.c new file mode 100644 index 00000000000..a4e343f6b9a --- /dev/null +++ b/src/plugins/linux-cp/lcp_interface_sync.c @@ -0,0 +1,442 @@ +/* Hey Emacs use -*- mode: C -*- */ +/* + * Copyright 2021 Cisco and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +/* helper function to copy forward all sw interface link state flags + * MTU, and IP addresses into their counterpart LIP interface. + * + * This is called upon MTU changes and state changes. + */ +void +lcp_itf_pair_sync_state (lcp_itf_pair_t *lip) +{ + vnet_sw_interface_t *sw; + vnet_sw_interface_t *sup_sw; + int curr_ns_fd = -1; + int vif_ns_fd = -1; + u32 mtu; + u32 netlink_mtu; + + if (!lcp_sync ()) + return; + + sw = + vnet_get_sw_interface_or_null (vnet_get_main (), lip->lip_phy_sw_if_index); + if (!sw) + return; + sup_sw = + vnet_get_sw_interface_or_null (vnet_get_main (), sw->sup_sw_if_index); + + if (lip->lip_namespace) + { + curr_ns_fd = clib_netns_open (NULL /* self */); + vif_ns_fd = clib_netns_open (lip->lip_namespace); + if (vif_ns_fd != -1) + clib_setns (vif_ns_fd); + } + + LCP_ITF_PAIR_INFO ("sync_state: %U flags %u sup-flags %u mtu %u sup-mtu %u", + format_lcp_itf_pair, lip, sw->flags, sup_sw->flags, + sw->mtu[VNET_MTU_L3], sup_sw->mtu[VNET_MTU_L3]); + + /* Linux will not allow children to be admin-up if their parent is + * admin-down. If child is up but parent is not, force it down. + */ + int state = sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP; + + if (state && !(sup_sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) + { + LCP_ITF_PAIR_WARN ( + "sync_state: %U flags %u sup-flags %u mtu %u sup-mtu %u: " + "forcing state to sup-flags to satisfy netlink", + format_lcp_itf_pair, lip, sw->flags, sup_sw->flags, + sw->mtu[VNET_MTU_L3], sup_sw->mtu[VNET_MTU_L3]); + state = 0; + } + lcp_itf_set_link_state (lip, state); + + /* Linux will clamp MTU of children when the parent is lower. VPP is fine + * with differing MTUs. VPP assumes that if a subint has MTU of 0, that it + * inherits from its parent. Linux likes to be more explicit, so we + * reconcile any differences. + */ + mtu = sw->mtu[VNET_MTU_L3]; + if (mtu == 0) + mtu = sup_sw->mtu[VNET_MTU_L3]; + + if (sup_sw->mtu[VNET_MTU_L3] < sw->mtu[VNET_MTU_L3]) + { + LCP_ITF_PAIR_WARN ("sync_state: %U flags %u mtu %u sup-mtu %u: " + "clamping to sup-mtu to satisfy netlink", + format_lcp_itf_pair, lip, sw->flags, + sw->mtu[VNET_MTU_L3], sup_sw->mtu[VNET_MTU_L3]); + mtu = sup_sw->mtu[VNET_MTU_L3]; + } + + /* Set MTU on all of {sw, tap, netlink}. Only send a netlink message if we + * really do want to change the MTU. + */ + vnet_sw_interface_set_mtu (vnet_get_main (), lip->lip_phy_sw_if_index, mtu); + vnet_sw_interface_set_mtu (vnet_get_main (), lip->lip_host_sw_if_index, mtu); + if (NULL == vnet_netlink_get_link_mtu (lip->lip_vif_index, &netlink_mtu)) + { + if (netlink_mtu != mtu) + vnet_netlink_set_link_mtu (lip->lip_vif_index, mtu); + } + + /* Linux will remove IPv6 addresses on children when the parent state + * goes down, so we ensure all IPv4/IPv6 addresses are synced. + */ + lcp_itf_set_interface_addr (lip); + + if (vif_ns_fd != -1) + close (vif_ns_fd); + + if (curr_ns_fd != -1) + { + clib_setns (curr_ns_fd); + close (curr_ns_fd); + } + + return; +} + +static walk_rc_t +lcp_itf_pair_walk_sync_state_all_cb (index_t lipi, void *ctx) +{ + lcp_itf_pair_t *lip; + lip = lcp_itf_pair_get (lipi); + if (!lip) + return WALK_CONTINUE; + + lcp_itf_pair_sync_state (lip); + return WALK_CONTINUE; +} + +static walk_rc_t +lcp_itf_pair_walk_sync_state_hw_cb (vnet_main_t *vnm, u32 sw_if_index, + void *arg) +{ + lcp_itf_pair_t *lip; + + lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index)); + if (!lip) + { + return WALK_CONTINUE; + } + + lcp_itf_pair_sync_state (lip); + return WALK_CONTINUE; +} + +void +lcp_itf_pair_sync_state_all () +{ + lcp_itf_pair_walk (lcp_itf_pair_walk_sync_state_all_cb, 0); +} + +void +lcp_itf_pair_sync_state_hw (vnet_hw_interface_t *hi) +{ + if (!hi) + return; + LCP_ITF_PAIR_DBG ("sync_state_hw: hi %U", format_vnet_sw_if_index_name, + vnet_get_main (), hi->hw_if_index); + + vnet_hw_interface_walk_sw (vnet_get_main (), hi->hw_if_index, + lcp_itf_pair_walk_sync_state_hw_cb, NULL); +} + +static clib_error_t * +lcp_itf_admin_state_change (vnet_main_t *vnm, u32 sw_if_index, u32 flags) +{ + lcp_itf_pair_t *lip; + vnet_hw_interface_t *hi; + vnet_sw_interface_t *si; + + if (!lcp_sync ()) + return 0; + + LCP_ITF_PAIR_DBG ("admin_state_change: sw %U %u", + format_vnet_sw_if_index_name, vnm, sw_if_index, flags); + + // Sync interface state changes into host + lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index)); + if (!lip) + return NULL; + LCP_ITF_PAIR_INFO ("admin_state_change: %U flags %u", format_lcp_itf_pair, + lip, flags); + + if (vnet_sw_interface_is_sub (vnm, sw_if_index)) + { + lcp_itf_pair_sync_state (lip); + return NULL; + } + + // When Linux changes link on a parent interface, all of its children also + // change. If a parent interface changes MTU, all of its children are clamped + // at that MTU by Linux. Neither holds true in VPP, so we are forced to undo + // change by walking the sub-interfaces of a phy and syncing their state back + // into Linux. + si = vnet_get_sw_interface_or_null (vnm, sw_if_index); + if (!si) + return NULL; + + hi = vnet_get_hw_interface_or_null (vnm, si->hw_if_index); + if (!hi) + return NULL; + LCP_ITF_PAIR_DBG ("admin_state_change: si %U hi %U, syncing children", + format_vnet_sw_if_index_name, vnm, si->sw_if_index, + format_vnet_sw_if_index_name, vnm, hi->sw_if_index); + + lcp_itf_pair_sync_state_hw (hi); + + return NULL; +} + +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (lcp_itf_admin_state_change); + +static clib_error_t * +lcp_itf_mtu_change (vnet_main_t *vnm, u32 sw_if_index, u32 flags) +{ + vnet_sw_interface_t *si; + vnet_hw_interface_t *hi; + if (!lcp_sync ()) + return NULL; + + LCP_ITF_PAIR_DBG ("mtu_change: sw %U %u", format_vnet_sw_if_index_name, vnm, + sw_if_index, flags); + + if (vnet_sw_interface_is_sub (vnm, sw_if_index)) + { + lcp_itf_pair_t *lip; + lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index)); + if (lip) + lcp_itf_pair_sync_state (lip); + return NULL; + } + + // When Linux changes link on a parent interface, all of its children also + // change. If a parent interface changes MTU, all of its children are clamped + // at that MTU by Linux. Neither holds true in VPP, so we are forced to undo + // change by walking the sub-interfaces of a phy and syncing their state back + // into Linux. + si = vnet_get_sw_interface_or_null (vnm, sw_if_index); + if (!si) + return NULL; + + hi = vnet_get_hw_interface_or_null (vnm, si->hw_if_index); + if (!hi) + return NULL; + LCP_ITF_PAIR_DBG ("mtu_change: si %U hi %U, syncing children", + format_vnet_sw_if_index_name, vnm, si->sw_if_index, + format_vnet_sw_if_index_name, vnm, hi->sw_if_index); + + lcp_itf_pair_sync_state_hw (hi); + + return NULL; +} + +VNET_SW_INTERFACE_MTU_CHANGE_FUNCTION (lcp_itf_mtu_change); + +static void +lcp_itf_ip4_add_del_interface_addr (ip4_main_t *im, uword opaque, + u32 sw_if_index, ip4_address_t *address, + u32 address_length, u32 if_address_index, + u32 is_del) +{ + const lcp_itf_pair_t *lip; + int curr_ns_fd = -1; + int vif_ns_fd = -1; + + if (!lcp_sync ()) + return; + + LCP_ITF_PAIR_DBG ("ip4_addr_%s: si:%U %U/%u", is_del ? "del" : "add", + format_vnet_sw_if_index_name, vnet_get_main (), + sw_if_index, format_ip4_address, address, address_length); + + lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index)); + if (!lip) + return; + + if (lip->lip_namespace) + { + curr_ns_fd = clib_netns_open (NULL /* self */); + vif_ns_fd = clib_netns_open (lip->lip_namespace); + if (vif_ns_fd != -1) + clib_setns (vif_ns_fd); + } + + LCP_ITF_PAIR_DBG ("ip4_addr_%s: %U ip4 %U/%u", is_del ? "del" : "add", + format_lcp_itf_pair, lip, format_ip4_address, address, + address_length); + + if (is_del) + vnet_netlink_del_ip4_addr (lip->lip_vif_index, address, address_length); + else + vnet_netlink_add_ip4_addr (lip->lip_vif_index, address, address_length); + + if (vif_ns_fd != -1) + close (vif_ns_fd); + + if (curr_ns_fd != -1) + { + clib_setns (curr_ns_fd); + close (curr_ns_fd); + } + return; +} + +static void +lcp_itf_ip6_add_del_interface_addr (ip6_main_t *im, uword opaque, + u32 sw_if_index, ip6_address_t *address, + u32 address_length, u32 if_address_index, + u32 is_del) +{ + const lcp_itf_pair_t *lip; + int curr_ns_fd = -1; + int vif_ns_fd = -1; + + if (!lcp_sync ()) + return; + + LCP_ITF_PAIR_DBG ("ip6_addr_%s: si:%U %U/%u", is_del ? "del" : "add", + format_vnet_sw_if_index_name, vnet_get_main (), + sw_if_index, format_ip6_address, address, address_length); + + lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index)); + if (!lip) + return; + + if (lip->lip_namespace) + { + curr_ns_fd = clib_netns_open (NULL /* self */); + vif_ns_fd = clib_netns_open (lip->lip_namespace); + if (vif_ns_fd != -1) + clib_setns (vif_ns_fd); + } + LCP_ITF_PAIR_DBG ("ip6_addr_%s: %U ip4 %U/%u", is_del ? "del" : "add", + format_lcp_itf_pair, lip, format_ip6_address, address, + address_length); + if (is_del) + vnet_netlink_del_ip6_addr (lip->lip_vif_index, address, address_length); + else + vnet_netlink_add_ip6_addr (lip->lip_vif_index, address, address_length); + + if (vif_ns_fd != -1) + close (vif_ns_fd); + + if (curr_ns_fd != -1) + { + clib_setns (curr_ns_fd); + close (curr_ns_fd); + } +} + +static clib_error_t * +lcp_itf_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_create) +{ + const vnet_sw_interface_t *sw; + uword is_sub; + + if (!lcp_auto_subint ()) + return NULL; + + sw = vnet_get_sw_interface_or_null (vnm, sw_if_index); + if (!sw) + return NULL; + + is_sub = vnet_sw_interface_is_sub (vnm, sw_if_index); + if (!is_sub) + return NULL; + + LCP_ITF_PAIR_DBG ("interface_%s: sw %U parent %U", is_create ? "add" : "del", + format_vnet_sw_if_index_name, vnet_get_main (), + sw->sw_if_index, format_vnet_sw_if_index_name, + vnet_get_main (), sw->sup_sw_if_index); + + if (is_create) + { + const lcp_itf_pair_t *sup_lip; + u8 *name = 0; + + // If the parent has a LIP auto-create a LIP for this interface + sup_lip = + lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw->sup_sw_if_index)); + if (!sup_lip) + return NULL; + + name = format (name, "%s.%d", sup_lip->lip_host_name, sw->sub.id); + + LCP_ITF_PAIR_INFO ( + "interface_%s: %U has parent %U, auto-creating LCP with host-if %s", + is_create ? "add" : "del", format_vnet_sw_if_index_name, + vnet_get_main (), sw->sw_if_index, format_lcp_itf_pair, sup_lip, name); + + lcp_itf_pair_create (sw->sw_if_index, name, LCP_ITF_HOST_TAP, + sup_lip->lip_namespace, NULL); + + vec_free (name); + } + else + { + lcp_itf_pair_delete (sw_if_index); + } + + return NULL; +} + +VNET_SW_INTERFACE_ADD_DEL_FUNCTION (lcp_itf_interface_add_del); + +static clib_error_t * +lcp_itf_sync_init (vlib_main_t *vm) +{ + ip4_main_t *im4 = &ip4_main; + ip6_main_t *im6 = &ip6_main; + + ip4_add_del_interface_address_callback_t cb4; + ip6_add_del_interface_address_callback_t cb6; + + cb4.function = lcp_itf_ip4_add_del_interface_addr; + cb4.function_opaque = 0; + vec_add1 (im4->add_del_interface_address_callbacks, cb4); + + cb6.function = lcp_itf_ip6_add_del_interface_addr; + cb6.function_opaque = 0; + vec_add1 (im6->add_del_interface_address_callbacks, cb6); + + return NULL; +} + +VLIB_INIT_FUNCTION (lcp_itf_sync_init) = { + .runs_after = VLIB_INITS ("vnet_interface_init", "tcp_init", "udp_init"), +}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/devices/netlink.c b/src/vnet/devices/netlink.c index 9aae205c54f..da21e9adea1 100644 --- a/src/vnet/devices/netlink.c +++ b/src/vnet/devices/netlink.c @@ -273,7 +273,6 @@ vnet_netlink_get_link_mtu (int ifindex, u32 *mtu) *mtu = clib_net_to_host_u32 (msg_mtu); else *mtu = msg_mtu; - clib_warning ("mtu: %d", *mtu); goto done; } offset = NLA_ALIGN (attr->nla_len); @@ -409,6 +408,50 @@ vnet_netlink_add_ip6_route (void *dst, u8 dst_len, void *gw) return err; } +clib_error_t * +vnet_netlink_del_ip4_addr (int ifindex, void *addr, int pfx_len) +{ + vnet_netlink_msg_t m; + struct ifaddrmsg ifa = { 0 }; + clib_error_t *err = 0; + + ifa.ifa_family = AF_INET; + ifa.ifa_prefixlen = pfx_len; + ifa.ifa_index = ifindex; + + vnet_netlink_msg_init (&m, RTM_DELADDR, NLM_F_REQUEST, &ifa, + sizeof (struct ifaddrmsg)); + + vnet_netlink_msg_add_rtattr (&m, IFA_LOCAL, addr, 4); + vnet_netlink_msg_add_rtattr (&m, IFA_ADDRESS, addr, 4); + err = vnet_netlink_msg_send (&m, NULL); + if (err) + err = clib_error_return (0, "del ip4 addr %U", format_clib_error, err); + return err; +} + +clib_error_t * +vnet_netlink_del_ip6_addr (int ifindex, void *addr, int pfx_len) +{ + vnet_netlink_msg_t m; + struct ifaddrmsg ifa = { 0 }; + clib_error_t *err = 0; + + ifa.ifa_family = AF_INET6; + ifa.ifa_prefixlen = pfx_len; + ifa.ifa_index = ifindex; + + vnet_netlink_msg_init (&m, RTM_DELADDR, NLM_F_REQUEST, &ifa, + sizeof (struct ifaddrmsg)); + + vnet_netlink_msg_add_rtattr (&m, IFA_LOCAL, addr, 16); + vnet_netlink_msg_add_rtattr (&m, IFA_ADDRESS, addr, 16); + err = vnet_netlink_msg_send (&m, NULL); + if (err) + err = clib_error_return (0, "del ip6 addr %U", format_clib_error, err); + return err; +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/devices/netlink.h b/src/vnet/devices/netlink.h index f1c42609cbf..086781fdbff 100644 --- a/src/vnet/devices/netlink.h +++ b/src/vnet/devices/netlink.h @@ -26,8 +26,10 @@ clib_error_t *vnet_netlink_get_link_mtu (int ifindex, u32 *mtu); clib_error_t *vnet_netlink_set_link_mtu (int ifindex, int mtu); clib_error_t *vnet_netlink_add_ip4_addr (int ifindex, void *addr, int pfx_len); +clib_error_t *vnet_netlink_del_ip4_addr (int ifindex, void *addr, int pfx_len); clib_error_t *vnet_netlink_add_ip6_addr (int ifindex, void *addr, int pfx_len); +clib_error_t *vnet_netlink_del_ip6_addr (int ifindex, void *addr, int pfx_len); clib_error_t *vnet_netlink_add_ip4_route (void *dst, u8 dst_len, void *gw); clib_error_t *vnet_netlink_add_ip6_route (void *dst, u8 dst_len, void *gw); -- cgit 1.2.3-korg