diff options
author | Pim van Pelt <pim@ipng.nl> | 2021-09-09 17:53:09 +0000 |
---|---|---|
committer | Matthew Smith <mgsmith@netgate.com> | 2022-01-11 19:31:25 +0000 |
commit | 1705a6baefe205bb6792b547c7376eee3f328a71 (patch) | |
tree | e524c5abb4cc39b735fa55a643475482206b49b9 /src/plugins/linux-cp/lcp_interface_sync.c | |
parent | 0cef5f5d7171e05389beee0e6b4250b366b2b28e (diff) |
linux-cp: Add VPP->Linux synchronization
Part 1 -- notes in https://ipng.ch/s/articles/2021/08/13/vpp-2.html
Add the ability for VPP to copy out (sync) its state from the dataplane
to Linux Interface Pairs, when they exist. Gated by a configuration
flag (linux-cp { lcp-sync }), and by a CLI option to toggle on/off,
synchronize the following events:
- Interface state changes
- Interface MTU changes
- Interface IPv4/IPv6 address add/deletion
In VPP, subints can have any link state and MTU, orthogonal to their
phy. In Linux, setting admin-down on a phy forces its children to be
down as well. Also, in Linux, MTU of children must not exceed that of
the phy. Add a state synchronizer which walks over phy+subints to
ensure Linux and VPP end up in the same consistent state.
Part 2 -- notes in https://ipng.ch/s/articles/2021/08/15/vpp-3.html
Add the ability for VPP to autocreate sub-interfaces of existing Linux
Interface pairs. Gated by a configuration flag
(linux-cp { lcp-auto-subint }), and by a CLI option to toggle on/off,
synchronize the following event:
- Sub-interface creation (dot1q, dot1ad, QinQ and QinAD)
A few other changes:
- Add two functions into netlink.[ch] to delete ip4 and ip6 addresses.
- Remove a spurious logline (printing MTU) in netlink.c.
- Resolve a TODO around vnet_sw_interface_supports_addressing()
Type: improvement
Signed-off-by: Pim van Pelt <pim@ipng.nl>
Change-Id: I34fc070e80af4013be58d7a8cbf64296cc760e4e
Signed-off-by: Pim van Pelt <pim@ipng.nl>
Diffstat (limited to 'src/plugins/linux-cp/lcp_interface_sync.c')
-rw-r--r-- | src/plugins/linux-cp/lcp_interface_sync.c | 442 |
1 files changed, 442 insertions, 0 deletions
diff --git a/src/plugins/linux-cp/lcp_interface_sync.c b/src/plugins/linux-cp/lcp_interface_sync.c new file mode 100644 index 00000000000..a4e343f6b9a --- /dev/null +++ b/src/plugins/linux-cp/lcp_interface_sync.c @@ -0,0 +1,442 @@ +/* Hey Emacs use -*- mode: C -*- */ +/* + * Copyright 2021 Cisco and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/plugin/plugin.h> +#include <vnet/devices/netlink.h> +#include <vnet/ip/ip.h> +#include <vppinfra/linux/netns.h> +#include <plugins/linux-cp/lcp_interface.h> + +/* helper function to copy forward all sw interface link state flags + * MTU, and IP addresses into their counterpart LIP interface. + * + * This is called upon MTU changes and state changes. + */ +void +lcp_itf_pair_sync_state (lcp_itf_pair_t *lip) +{ + vnet_sw_interface_t *sw; + vnet_sw_interface_t *sup_sw; + int curr_ns_fd = -1; + int vif_ns_fd = -1; + u32 mtu; + u32 netlink_mtu; + + if (!lcp_sync ()) + return; + + sw = + vnet_get_sw_interface_or_null (vnet_get_main (), lip->lip_phy_sw_if_index); + if (!sw) + return; + sup_sw = + vnet_get_sw_interface_or_null (vnet_get_main (), sw->sup_sw_if_index); + + if (lip->lip_namespace) + { + curr_ns_fd = clib_netns_open (NULL /* self */); + vif_ns_fd = clib_netns_open (lip->lip_namespace); + if (vif_ns_fd != -1) + clib_setns (vif_ns_fd); + } + + LCP_ITF_PAIR_INFO ("sync_state: %U flags %u sup-flags %u mtu %u sup-mtu %u", + format_lcp_itf_pair, lip, sw->flags, sup_sw->flags, + sw->mtu[VNET_MTU_L3], sup_sw->mtu[VNET_MTU_L3]); + + /* Linux will not allow children to be admin-up if their parent is + * admin-down. If child is up but parent is not, force it down. + */ + int state = sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP; + + if (state && !(sup_sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) + { + LCP_ITF_PAIR_WARN ( + "sync_state: %U flags %u sup-flags %u mtu %u sup-mtu %u: " + "forcing state to sup-flags to satisfy netlink", + format_lcp_itf_pair, lip, sw->flags, sup_sw->flags, + sw->mtu[VNET_MTU_L3], sup_sw->mtu[VNET_MTU_L3]); + state = 0; + } + lcp_itf_set_link_state (lip, state); + + /* Linux will clamp MTU of children when the parent is lower. VPP is fine + * with differing MTUs. VPP assumes that if a subint has MTU of 0, that it + * inherits from its parent. Linux likes to be more explicit, so we + * reconcile any differences. + */ + mtu = sw->mtu[VNET_MTU_L3]; + if (mtu == 0) + mtu = sup_sw->mtu[VNET_MTU_L3]; + + if (sup_sw->mtu[VNET_MTU_L3] < sw->mtu[VNET_MTU_L3]) + { + LCP_ITF_PAIR_WARN ("sync_state: %U flags %u mtu %u sup-mtu %u: " + "clamping to sup-mtu to satisfy netlink", + format_lcp_itf_pair, lip, sw->flags, + sw->mtu[VNET_MTU_L3], sup_sw->mtu[VNET_MTU_L3]); + mtu = sup_sw->mtu[VNET_MTU_L3]; + } + + /* Set MTU on all of {sw, tap, netlink}. Only send a netlink message if we + * really do want to change the MTU. + */ + vnet_sw_interface_set_mtu (vnet_get_main (), lip->lip_phy_sw_if_index, mtu); + vnet_sw_interface_set_mtu (vnet_get_main (), lip->lip_host_sw_if_index, mtu); + if (NULL == vnet_netlink_get_link_mtu (lip->lip_vif_index, &netlink_mtu)) + { + if (netlink_mtu != mtu) + vnet_netlink_set_link_mtu (lip->lip_vif_index, mtu); + } + + /* Linux will remove IPv6 addresses on children when the parent state + * goes down, so we ensure all IPv4/IPv6 addresses are synced. + */ + lcp_itf_set_interface_addr (lip); + + if (vif_ns_fd != -1) + close (vif_ns_fd); + + if (curr_ns_fd != -1) + { + clib_setns (curr_ns_fd); + close (curr_ns_fd); + } + + return; +} + +static walk_rc_t +lcp_itf_pair_walk_sync_state_all_cb (index_t lipi, void *ctx) +{ + lcp_itf_pair_t *lip; + lip = lcp_itf_pair_get (lipi); + if (!lip) + return WALK_CONTINUE; + + lcp_itf_pair_sync_state (lip); + return WALK_CONTINUE; +} + +static walk_rc_t +lcp_itf_pair_walk_sync_state_hw_cb (vnet_main_t *vnm, u32 sw_if_index, + void *arg) +{ + lcp_itf_pair_t *lip; + + lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index)); + if (!lip) + { + return WALK_CONTINUE; + } + + lcp_itf_pair_sync_state (lip); + return WALK_CONTINUE; +} + +void +lcp_itf_pair_sync_state_all () +{ + lcp_itf_pair_walk (lcp_itf_pair_walk_sync_state_all_cb, 0); +} + +void +lcp_itf_pair_sync_state_hw (vnet_hw_interface_t *hi) +{ + if (!hi) + return; + LCP_ITF_PAIR_DBG ("sync_state_hw: hi %U", format_vnet_sw_if_index_name, + vnet_get_main (), hi->hw_if_index); + + vnet_hw_interface_walk_sw (vnet_get_main (), hi->hw_if_index, + lcp_itf_pair_walk_sync_state_hw_cb, NULL); +} + +static clib_error_t * +lcp_itf_admin_state_change (vnet_main_t *vnm, u32 sw_if_index, u32 flags) +{ + lcp_itf_pair_t *lip; + vnet_hw_interface_t *hi; + vnet_sw_interface_t *si; + + if (!lcp_sync ()) + return 0; + + LCP_ITF_PAIR_DBG ("admin_state_change: sw %U %u", + format_vnet_sw_if_index_name, vnm, sw_if_index, flags); + + // Sync interface state changes into host + lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index)); + if (!lip) + return NULL; + LCP_ITF_PAIR_INFO ("admin_state_change: %U flags %u", format_lcp_itf_pair, + lip, flags); + + if (vnet_sw_interface_is_sub (vnm, sw_if_index)) + { + lcp_itf_pair_sync_state (lip); + return NULL; + } + + // When Linux changes link on a parent interface, all of its children also + // change. If a parent interface changes MTU, all of its children are clamped + // at that MTU by Linux. Neither holds true in VPP, so we are forced to undo + // change by walking the sub-interfaces of a phy and syncing their state back + // into Linux. + si = vnet_get_sw_interface_or_null (vnm, sw_if_index); + if (!si) + return NULL; + + hi = vnet_get_hw_interface_or_null (vnm, si->hw_if_index); + if (!hi) + return NULL; + LCP_ITF_PAIR_DBG ("admin_state_change: si %U hi %U, syncing children", + format_vnet_sw_if_index_name, vnm, si->sw_if_index, + format_vnet_sw_if_index_name, vnm, hi->sw_if_index); + + lcp_itf_pair_sync_state_hw (hi); + + return NULL; +} + +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (lcp_itf_admin_state_change); + +static clib_error_t * +lcp_itf_mtu_change (vnet_main_t *vnm, u32 sw_if_index, u32 flags) +{ + vnet_sw_interface_t *si; + vnet_hw_interface_t *hi; + if (!lcp_sync ()) + return NULL; + + LCP_ITF_PAIR_DBG ("mtu_change: sw %U %u", format_vnet_sw_if_index_name, vnm, + sw_if_index, flags); + + if (vnet_sw_interface_is_sub (vnm, sw_if_index)) + { + lcp_itf_pair_t *lip; + lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index)); + if (lip) + lcp_itf_pair_sync_state (lip); + return NULL; + } + + // When Linux changes link on a parent interface, all of its children also + // change. If a parent interface changes MTU, all of its children are clamped + // at that MTU by Linux. Neither holds true in VPP, so we are forced to undo + // change by walking the sub-interfaces of a phy and syncing their state back + // into Linux. + si = vnet_get_sw_interface_or_null (vnm, sw_if_index); + if (!si) + return NULL; + + hi = vnet_get_hw_interface_or_null (vnm, si->hw_if_index); + if (!hi) + return NULL; + LCP_ITF_PAIR_DBG ("mtu_change: si %U hi %U, syncing children", + format_vnet_sw_if_index_name, vnm, si->sw_if_index, + format_vnet_sw_if_index_name, vnm, hi->sw_if_index); + + lcp_itf_pair_sync_state_hw (hi); + + return NULL; +} + +VNET_SW_INTERFACE_MTU_CHANGE_FUNCTION (lcp_itf_mtu_change); + +static void +lcp_itf_ip4_add_del_interface_addr (ip4_main_t *im, uword opaque, + u32 sw_if_index, ip4_address_t *address, + u32 address_length, u32 if_address_index, + u32 is_del) +{ + const lcp_itf_pair_t *lip; + int curr_ns_fd = -1; + int vif_ns_fd = -1; + + if (!lcp_sync ()) + return; + + LCP_ITF_PAIR_DBG ("ip4_addr_%s: si:%U %U/%u", is_del ? "del" : "add", + format_vnet_sw_if_index_name, vnet_get_main (), + sw_if_index, format_ip4_address, address, address_length); + + lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index)); + if (!lip) + return; + + if (lip->lip_namespace) + { + curr_ns_fd = clib_netns_open (NULL /* self */); + vif_ns_fd = clib_netns_open (lip->lip_namespace); + if (vif_ns_fd != -1) + clib_setns (vif_ns_fd); + } + + LCP_ITF_PAIR_DBG ("ip4_addr_%s: %U ip4 %U/%u", is_del ? "del" : "add", + format_lcp_itf_pair, lip, format_ip4_address, address, + address_length); + + if (is_del) + vnet_netlink_del_ip4_addr (lip->lip_vif_index, address, address_length); + else + vnet_netlink_add_ip4_addr (lip->lip_vif_index, address, address_length); + + if (vif_ns_fd != -1) + close (vif_ns_fd); + + if (curr_ns_fd != -1) + { + clib_setns (curr_ns_fd); + close (curr_ns_fd); + } + return; +} + +static void +lcp_itf_ip6_add_del_interface_addr (ip6_main_t *im, uword opaque, + u32 sw_if_index, ip6_address_t *address, + u32 address_length, u32 if_address_index, + u32 is_del) +{ + const lcp_itf_pair_t *lip; + int curr_ns_fd = -1; + int vif_ns_fd = -1; + + if (!lcp_sync ()) + return; + + LCP_ITF_PAIR_DBG ("ip6_addr_%s: si:%U %U/%u", is_del ? "del" : "add", + format_vnet_sw_if_index_name, vnet_get_main (), + sw_if_index, format_ip6_address, address, address_length); + + lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index)); + if (!lip) + return; + + if (lip->lip_namespace) + { + curr_ns_fd = clib_netns_open (NULL /* self */); + vif_ns_fd = clib_netns_open (lip->lip_namespace); + if (vif_ns_fd != -1) + clib_setns (vif_ns_fd); + } + LCP_ITF_PAIR_DBG ("ip6_addr_%s: %U ip4 %U/%u", is_del ? "del" : "add", + format_lcp_itf_pair, lip, format_ip6_address, address, + address_length); + if (is_del) + vnet_netlink_del_ip6_addr (lip->lip_vif_index, address, address_length); + else + vnet_netlink_add_ip6_addr (lip->lip_vif_index, address, address_length); + + if (vif_ns_fd != -1) + close (vif_ns_fd); + + if (curr_ns_fd != -1) + { + clib_setns (curr_ns_fd); + close (curr_ns_fd); + } +} + +static clib_error_t * +lcp_itf_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_create) +{ + const vnet_sw_interface_t *sw; + uword is_sub; + + if (!lcp_auto_subint ()) + return NULL; + + sw = vnet_get_sw_interface_or_null (vnm, sw_if_index); + if (!sw) + return NULL; + + is_sub = vnet_sw_interface_is_sub (vnm, sw_if_index); + if (!is_sub) + return NULL; + + LCP_ITF_PAIR_DBG ("interface_%s: sw %U parent %U", is_create ? "add" : "del", + format_vnet_sw_if_index_name, vnet_get_main (), + sw->sw_if_index, format_vnet_sw_if_index_name, + vnet_get_main (), sw->sup_sw_if_index); + + if (is_create) + { + const lcp_itf_pair_t *sup_lip; + u8 *name = 0; + + // If the parent has a LIP auto-create a LIP for this interface + sup_lip = + lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw->sup_sw_if_index)); + if (!sup_lip) + return NULL; + + name = format (name, "%s.%d", sup_lip->lip_host_name, sw->sub.id); + + LCP_ITF_PAIR_INFO ( + "interface_%s: %U has parent %U, auto-creating LCP with host-if %s", + is_create ? "add" : "del", format_vnet_sw_if_index_name, + vnet_get_main (), sw->sw_if_index, format_lcp_itf_pair, sup_lip, name); + + lcp_itf_pair_create (sw->sw_if_index, name, LCP_ITF_HOST_TAP, + sup_lip->lip_namespace, NULL); + + vec_free (name); + } + else + { + lcp_itf_pair_delete (sw_if_index); + } + + return NULL; +} + +VNET_SW_INTERFACE_ADD_DEL_FUNCTION (lcp_itf_interface_add_del); + +static clib_error_t * +lcp_itf_sync_init (vlib_main_t *vm) +{ + ip4_main_t *im4 = &ip4_main; + ip6_main_t *im6 = &ip6_main; + + ip4_add_del_interface_address_callback_t cb4; + ip6_add_del_interface_address_callback_t cb6; + + cb4.function = lcp_itf_ip4_add_del_interface_addr; + cb4.function_opaque = 0; + vec_add1 (im4->add_del_interface_address_callbacks, cb4); + + cb6.function = lcp_itf_ip6_add_del_interface_addr; + cb6.function_opaque = 0; + vec_add1 (im6->add_del_interface_address_callbacks, cb6); + + return NULL; +} + +VLIB_INIT_FUNCTION (lcp_itf_sync_init) = { + .runs_after = VLIB_INITS ("vnet_interface_init", "tcp_init", "udp_init"), +}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |