aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins/linux-cp
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/linux-cp')
-rw-r--r--src/plugins/linux-cp/CMakeLists.txt17
-rw-r--r--src/plugins/linux-cp/FEATURE.yaml14
-rw-r--r--src/plugins/linux-cp/lcp.api76
-rw-r--r--src/plugins/linux-cp/lcp.c107
-rw-r--r--src/plugins/linux-cp/lcp.h36
-rw-r--r--src/plugins/linux-cp/lcp.rst5
-rw-r--r--src/plugins/linux-cp/lcp_api.c142
-rw-r--r--src/plugins/linux-cp/lcp_cli.c272
-rw-r--r--src/plugins/linux-cp/lcp_interface.c399
-rw-r--r--src/plugins/linux-cp/lcp_interface.h54
-rw-r--r--src/plugins/linux-cp/lcp_interface_sync.c445
-rw-r--r--src/plugins/linux-cp/lcp_mpls_sync.c160
-rw-r--r--src/plugins/linux-cp/lcp_nl.c1043
-rw-r--r--src/plugins/linux-cp/lcp_nl.h161
-rw-r--r--src/plugins/linux-cp/lcp_node.c126
-rw-r--r--src/plugins/linux-cp/lcp_router.c1578
16 files changed, 4283 insertions, 352 deletions
diff --git a/src/plugins/linux-cp/CMakeLists.txt b/src/plugins/linux-cp/CMakeLists.txt
index 080f73ecce4..c891689b4b4 100644
--- a/src/plugins/linux-cp/CMakeLists.txt
+++ b/src/plugins/linux-cp/CMakeLists.txt
@@ -12,12 +12,18 @@
# limitations under the License.
vpp_find_path(LIBNL3_INCLUDE_DIR NAMES libnl3/netlink/route/link/vlan.h)
+vpp_find_path(LIBMNL_INCLUDE_DIR NAMES libmnl/libmnl.h)
if (NOT LIBNL3_INCLUDE_DIR)
message(WARNING "-- libnl3 headers not found - linux-cp plugin disabled")
return()
endif()
+if (NOT LIBMNL_INCLUDE_DIR)
+ message(WARNING "-- libmnl headers not found - linux-cp plugin disabled")
+ return()
+endif()
+
vpp_plugin_find_library(linux-cp LIBNL3_LIB libnl-3.so)
vpp_plugin_find_library(linux-cp LIBNL3_ROUTE_LIB libnl-route-3.so.200)
@@ -27,6 +33,8 @@ include_directories(${LIBMNL_INCLUDE_DIR})
add_vpp_library(lcp
SOURCES
lcp_interface.c
+ lcp_interface_sync.c
+ lcp_mpls_sync.c
lcp_adj.c
lcp.c
@@ -59,3 +67,12 @@ add_vpp_plugin(linux_cp_unittest
LINK_LIBRARIES
lcp
)
+
+add_vpp_plugin(linux_nl
+ SOURCES
+ lcp_router.c
+ lcp_nl.c
+
+ LINK_LIBRARIES
+ lcp
+)
diff --git a/src/plugins/linux-cp/FEATURE.yaml b/src/plugins/linux-cp/FEATURE.yaml
index 088b0606f58..425858591f2 100644
--- a/src/plugins/linux-cp/FEATURE.yaml
+++ b/src/plugins/linux-cp/FEATURE.yaml
@@ -3,10 +3,10 @@ name: Linux Control Plane (integration)
maintainer: Neale Ranns <neale@grahpiant.com>
description: |-
- This plugin provides the beginnings of an integration with the
- Linux network stack.
- The plugin provides the capability to 'mirror' VPP interfaces in
- the Linux kernel. This means that for any interface in VPP the user
+ These plugins provide an integration with the Linux network stack.
+
+ The "linux_cp" plugin provides the capability to 'mirror' VPP interfaces
+ in the Linux kernel. This means that for any interface in VPP the user
can create a corresponding TAP or TUN device in the Linux kernel
and have VPP plumb them together.
The plumbing mechanics is different in each direction.
@@ -17,8 +17,10 @@ description: |-
In the TX direction, packets received by VPP an the mirror Tap/Tun
are cross-connected to the VPP interfaces. For IP packets, IP output
features are applied.
- This is the beginnings of integration, because there needs to be
- an external agent that will configure (and synchronize) the IP
+ If MPLS is enabled on a VPP interface, state is synced to Linux and
+ in TX direction a special feature is enabled to pass MPLS packets through
+ untouched.
+ The "linux_nl" plugin listens to netlink messages and synchronizes the IP
configuration of the paired interfaces.
state: experimental
diff --git a/src/plugins/linux-cp/lcp.api b/src/plugins/linux-cp/lcp.api
index 319dd3e6483..e7eaa5a3669 100644
--- a/src/plugins/linux-cp/lcp.api
+++ b/src/plugins/linux-cp/lcp.api
@@ -21,19 +21,20 @@ option version = "1.0.0";
import "vnet/interface_types.api";
-/** \brief Set the default Linux Control Plane namespace
+/** \brief Set the default Linux Control Plane netns
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
- @param namespace - the new default namespace; namespace[0] == 0 iff none
+ @param netns - the new default netns; netns[0] == 0 if none
*/
autoreply define lcp_default_ns_set
{
u32 client_index;
u32 context;
- string namespace[32]; /* LCP_NS_LEN */
+ string netns[32]; /* LCP_NS_LEN */
+ option in_progress;
};
-/** \brief get the default Linux Control Plane namespace
+/** \brief get the default Linux Control Plane netns
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
*/
@@ -43,15 +44,16 @@ define lcp_default_ns_get
u32 context;
};
-/** \brief get the default Linux Control Plane namespace
+/** \brief get the default Linux Control Plane netns
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
- @param namespace - the default namespace; namespace[0] == 0 iff none
+ @param netns - the default netns; netns[0] == 0 if none
*/
define lcp_default_ns_get_reply
{
u32 context;
- string namespace[32]; /* LCP_NS_LEN */
+ string netns[32]; /* LCP_NS_LEN */
+ option in_progress;
};
enum lcp_itf_host_type : u8
@@ -67,32 +69,59 @@ enum lcp_itf_host_type : u8
@param sw_if_index - index of VPP PHY SW interface
@param host_if_name - host tap interface name
@param host_if_type - the type of host interface to create (tun, tap)
- @param namespace - optional tap namespace; namespace[0] == 0 iff none
+ @param netns - optional tap netns; netns[0] == 0 if none
*/
autoreply autoendian define lcp_itf_pair_add_del
{
+ option deprecated;
+
u32 client_index;
u32 context;
bool is_add;
vl_api_interface_index_t sw_if_index;
string host_if_name[16]; /* IFNAMSIZ */
vl_api_lcp_itf_host_type_t host_if_type;
- string namespace[32]; /* LCP_NS_LEN */
+ string netns[32]; /* LCP_NS_LEN */
};
autoendian define lcp_itf_pair_add_del_v2
{
+ option in_progress;
+
u32 client_index;
u32 context;
bool is_add;
vl_api_interface_index_t sw_if_index;
string host_if_name[16]; /* IFNAMSIZ */
vl_api_lcp_itf_host_type_t host_if_type;
- string namespace[32]; /* LCP_NS_LEN */
+ string netns[32]; /* LCP_NS_LEN */
};
define lcp_itf_pair_add_del_v2_reply
{
+ option in_progress;
+
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t host_sw_if_index;
+};
+autoendian define lcp_itf_pair_add_del_v3
+{
+ option in_progress;
+
+ u32 client_index;
+ u32 context;
+ bool is_add;
+ vl_api_interface_index_t sw_if_index;
+ string host_if_name[16]; /* IFNAMSIZ */
+ vl_api_lcp_itf_host_type_t host_if_type;
+ string netns[32]; /* LCP_NS_LEN */
+};
+define lcp_itf_pair_add_del_v3_reply
+{
+ option in_progress;
+
u32 context;
i32 retval;
+ u32 vif_index;
vl_api_interface_index_t host_sw_if_index;
};
@@ -101,13 +130,26 @@ define lcp_itf_pair_add_del_v2_reply
@param context - sender context, to match reply w/ request
@param sw_if_index - interface to use as filter (~0 == "all")
*/
-define lcp_itf_pair_get
+autoendian define lcp_itf_pair_get
{
u32 client_index;
u32 context;
u32 cursor;
};
-define lcp_itf_pair_get_reply
+autoendian define lcp_itf_pair_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 cursor;
+};
+autoendian define lcp_itf_pair_get_v2
+{
+ u32 client_index;
+ u32 context;
+ u32 cursor;
+ vl_api_interface_index_t sw_if_index;
+};
+autoendian define lcp_itf_pair_get_v2_reply
{
u32 context;
i32 retval;
@@ -121,7 +163,7 @@ define lcp_itf_pair_get_reply
@param vif_index - tap linux index
@param host_if_name - host interface name
@param host_if_type - host interface type (tun, tap)
- @param namespace - host interface namespace
+ @param netns - host interface netns
*/
autoendian define lcp_itf_pair_details
{
@@ -131,7 +173,8 @@ autoendian define lcp_itf_pair_details
u32 vif_index;
string host_if_name[16]; /* IFNAMSIZ */
vl_api_lcp_itf_host_type_t host_if_type;
- string namespace[32]; /* LCP_NS_LEN */
+ string netns[32]; /* LCP_NS_LEN */
+ option in_progress;
};
service {
@@ -139,6 +182,11 @@ service {
stream lcp_itf_pair_details;
};
+service {
+ rpc lcp_itf_pair_get_v2 returns lcp_itf_pair_get_v2_reply
+ stream lcp_itf_pair_details;
+};
+
/** \brief Replace end/begin
*/
autoreply define lcp_itf_pair_replace_begin
diff --git a/src/plugins/linux-cp/lcp.c b/src/plugins/linux-cp/lcp.c
index a4d3faf081a..34e8550a13f 100644
--- a/src/plugins/linux-cp/lcp.c
+++ b/src/plugins/linux-cp/lcp.c
@@ -20,6 +20,7 @@
#include <net/if.h>
#include <plugins/linux-cp/lcp.h>
+#include <plugins/linux-cp/lcp_interface.h>
lcp_main_t lcp_main;
@@ -76,6 +77,112 @@ lcp_set_default_ns (u8 *ns)
return 0;
}
+void
+lcp_set_sync (u8 is_auto)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ lcpm->lcp_sync = (is_auto != 0);
+
+ // If we set to 'on', do a one-off sync of LCP interfaces
+ if (is_auto)
+ lcp_itf_pair_sync_state_all ();
+}
+
+int
+lcp_sync (void)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ return lcpm->lcp_sync;
+}
+
+void
+lcp_set_auto_subint (u8 is_auto)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ lcpm->lcp_auto_subint = (is_auto != 0);
+}
+
+int
+lcp_auto_subint (void)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ return lcpm->lcp_auto_subint;
+}
+
+void
+lcp_set_del_static_on_link_down (u8 is_del)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ lcpm->del_static_on_link_down = (is_del != 0);
+}
+
+u8
+lcp_get_del_static_on_link_down (void)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ return lcpm->del_static_on_link_down;
+}
+
+void
+lcp_set_del_dynamic_on_link_down (u8 is_del)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ lcpm->del_dynamic_on_link_down = (is_del != 0);
+}
+
+u8
+lcp_get_del_dynamic_on_link_down (void)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ return lcpm->del_dynamic_on_link_down;
+}
+
+void
+lcp_set_netlink_processing_active (u8 is_processing)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ lcpm->netlink_processing_active = (is_processing != 0);
+}
+
+u8
+lcp_get_netlink_processing_active (void)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ return lcpm->netlink_processing_active;
+}
+
+void
+lcp_set_default_num_queues (u16 num_queues, u8 is_tx)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ if (is_tx)
+ lcpm->num_tx_queues = num_queues;
+ else
+ lcpm->num_rx_queues = num_queues;
+}
+
+u16
+lcp_get_default_num_queues (u8 is_tx)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ if (is_tx)
+ return lcpm->num_tx_queues;
+
+ return lcpm->num_rx_queues ?: vlib_num_workers ();
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/linux-cp/lcp.h b/src/plugins/linux-cp/lcp.h
index 19636c546a8..e89b149f67d 100644
--- a/src/plugins/linux-cp/lcp.h
+++ b/src/plugins/linux-cp/lcp.h
@@ -24,8 +24,15 @@ typedef struct lcp_main_s
u16 msg_id_base; /* API message ID base */
u8 *default_namespace; /* default namespace if set */
int default_ns_fd;
- /* Set when Unit testing */
- u8 test_mode;
+ u8 lcp_auto_subint; /* Automatically create/delete LCP sub-interfaces */
+ u8 lcp_sync; /* Automatically sync VPP changes to LCP */
+ u8 del_static_on_link_down; /* Delete static routes when link goes down */
+ u8 del_dynamic_on_link_down; /* Delete dynamic routes when link goes down */
+ u16 num_rx_queues;
+ u16 num_tx_queues;
+ u8 test_mode; /* Set when Unit testing */
+ u8 netlink_processing_active; /* Set while a batch of Netlink messages are
+ being processed */
} lcp_main_t;
extern lcp_main_t lcp_main;
@@ -37,6 +44,31 @@ int lcp_set_default_ns (u8 *ns);
u8 *lcp_get_default_ns (void); /* Returns NULL or shared string */
int lcp_get_default_ns_fd (void);
+/**
+ * Get/Set whether to delete static routes when the link goes down.
+ */
+void lcp_set_del_static_on_link_down (u8 is_del);
+u8 lcp_get_del_static_on_link_down (void);
+
+/**
+ * Get/Set whether to delete dynamic routes when the link goes down.
+ */
+void lcp_set_del_dynamic_on_link_down (u8 is_del);
+u8 lcp_get_del_dynamic_on_link_down (void);
+
+/**
+ * Get/Set when we're processing a batch of netlink messages.
+ * This is used to avoid looping messages between lcp-sync and netlink.
+ */
+void lcp_set_netlink_processing_active (u8 is_processing);
+u8 lcp_get_netlink_processing_active (void);
+
+/**
+ * Get/Set the default queue number for LCP host taps.
+ */
+void lcp_set_default_num_queues (u16 num_queues, u8 is_tx);
+u16 lcp_get_default_num_queues (u8 is_tx);
+
#endif
/*
diff --git a/src/plugins/linux-cp/lcp.rst b/src/plugins/linux-cp/lcp.rst
index f19981297a6..6f82a29bfbb 100644
--- a/src/plugins/linux-cp/lcp.rst
+++ b/src/plugins/linux-cp/lcp.rst
@@ -42,10 +42,7 @@ interfaces. Any configuration that is made on these Linux interfaces,
also needs to be applied on the corresponding physical interface in
VPP.
-This is functionality is not provided in this plugin, but it can be
-achieved in various ways, for example by listening to the netlink
-messages and applying the config. As a result all e.g. routes
-programmed in Linux, will also be present in VPP's FIB.
+This is functionality is provided by the "linux_nl" plugin.
Linux will own the [ARP/ND] neighbor tables (which will be copied via
netlink to VPP also). This means that Linux will send packets with the
diff --git a/src/plugins/linux-cp/lcp_api.c b/src/plugins/linux-cp/lcp_api.c
index 7f77bf71f1d..74421230e9d 100644
--- a/src/plugins/linux-cp/lcp_api.c
+++ b/src/plugins/linux-cp/lcp_api.c
@@ -45,7 +45,7 @@ static int
vl_api_lcp_itf_pair_add (u32 phy_sw_if_index, lip_host_type_t lip_host_type,
u8 *mp_host_if_name, size_t sizeof_host_if_name,
u8 *mp_namespace, size_t sizeof_mp_namespace,
- u32 *host_sw_if_index_p)
+ u32 *host_sw_if_index_p, u32 *vif_index_p)
{
u8 *host_if_name, *netns;
int host_len, netns_len, rv;
@@ -64,6 +64,13 @@ vl_api_lcp_itf_pair_add (u32 phy_sw_if_index, lip_host_type_t lip_host_type,
rv = lcp_itf_pair_create (phy_sw_if_index, host_if_name, lip_host_type,
netns, host_sw_if_index_p);
+ if (!rv && (vif_index_p != NULL))
+ {
+ lcp_itf_pair_t *pair =
+ lcp_itf_pair_get (lcp_itf_pair_find_by_phy (phy_sw_if_index));
+ *vif_index_p = pair->lip_vif_index;
+ }
+
vec_free (host_if_name);
vec_free (netns);
@@ -78,20 +85,15 @@ vl_api_lcp_itf_pair_add_del_t_handler (vl_api_lcp_itf_pair_add_del_t *mp)
lip_host_type_t lip_host_type;
int rv;
- if (!vnet_sw_if_index_is_api_valid (mp->sw_if_index))
- {
- rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
- goto bad_sw_if_index;
- }
+ VALIDATE_SW_IF_INDEX_END (mp);
phy_sw_if_index = mp->sw_if_index;
lip_host_type = api_decode_host_type (mp->host_if_type);
if (mp->is_add)
{
- rv =
- vl_api_lcp_itf_pair_add (phy_sw_if_index, lip_host_type,
- mp->host_if_name, sizeof (mp->host_if_name),
- mp->namespace, sizeof (mp->namespace), NULL);
+ rv = vl_api_lcp_itf_pair_add (
+ phy_sw_if_index, lip_host_type, mp->host_if_name,
+ sizeof (mp->host_if_name), mp->netns, sizeof (mp->netns), NULL, NULL);
}
else
{
@@ -99,7 +101,7 @@ vl_api_lcp_itf_pair_add_del_t_handler (vl_api_lcp_itf_pair_add_del_t *mp)
}
BAD_SW_IF_INDEX_LABEL;
- REPLY_MACRO (VL_API_LCP_ITF_PAIR_ADD_DEL_REPLY);
+ REPLY_MACRO_END (VL_API_LCP_ITF_PAIR_ADD_DEL_REPLY);
}
static void
@@ -110,20 +112,45 @@ vl_api_lcp_itf_pair_add_del_v2_t_handler (vl_api_lcp_itf_pair_add_del_v2_t *mp)
lip_host_type_t lip_host_type;
int rv;
- if (!vnet_sw_if_index_is_api_valid (mp->sw_if_index))
+ VALIDATE_SW_IF_INDEX_END (mp);
+
+ phy_sw_if_index = mp->sw_if_index;
+ lip_host_type = api_decode_host_type (mp->host_if_type);
+ if (mp->is_add)
{
- rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
- goto bad_sw_if_index;
+ rv = vl_api_lcp_itf_pair_add (
+ phy_sw_if_index, lip_host_type, mp->host_if_name,
+ sizeof (mp->host_if_name), mp->netns, sizeof (mp->netns),
+ &host_sw_if_index, NULL);
}
+ else
+ {
+ rv = lcp_itf_pair_delete (phy_sw_if_index);
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO2_END (VL_API_LCP_ITF_PAIR_ADD_DEL_V2_REPLY,
+ { rmp->host_sw_if_index = host_sw_if_index; });
+}
+
+static void
+vl_api_lcp_itf_pair_add_del_v3_t_handler (vl_api_lcp_itf_pair_add_del_v3_t *mp)
+{
+ u32 phy_sw_if_index, host_sw_if_index = ~0, vif_index = ~0;
+ vl_api_lcp_itf_pair_add_del_v3_reply_t *rmp;
+ lip_host_type_t lip_host_type;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX_END (mp);
phy_sw_if_index = mp->sw_if_index;
lip_host_type = api_decode_host_type (mp->host_if_type);
if (mp->is_add)
{
- rv = vl_api_lcp_itf_pair_add (phy_sw_if_index, lip_host_type,
- mp->host_if_name,
- sizeof (mp->host_if_name), mp->namespace,
- sizeof (mp->namespace), &host_sw_if_index);
+ rv = vl_api_lcp_itf_pair_add (
+ phy_sw_if_index, lip_host_type, mp->host_if_name,
+ sizeof (mp->host_if_name), mp->netns, sizeof (mp->netns),
+ &host_sw_if_index, &vif_index);
}
else
{
@@ -131,8 +158,10 @@ vl_api_lcp_itf_pair_add_del_v2_t_handler (vl_api_lcp_itf_pair_add_del_v2_t *mp)
}
BAD_SW_IF_INDEX_LABEL;
- REPLY_MACRO2 (VL_API_LCP_ITF_PAIR_ADD_DEL_V2_REPLY,
- { rmp->host_sw_if_index = ntohl (host_sw_if_index); });
+ REPLY_MACRO2_END (VL_API_LCP_ITF_PAIR_ADD_DEL_V3_REPLY, ({
+ rmp->host_sw_if_index = host_sw_if_index;
+ rmp->vif_index = vif_index;
+ }));
}
static void
@@ -142,7 +171,7 @@ send_lcp_itf_pair_details (index_t lipi, vl_api_registration_t *rp,
vl_api_lcp_itf_pair_details_t *rmp;
lcp_itf_pair_t *lcp_pair = lcp_itf_pair_get (lipi);
- REPLY_MACRO_DETAILS4 (
+ REPLY_MACRO_DETAILS4_END (
VL_API_LCP_ITF_PAIR_DETAILS, rp, context, ({
rmp->phy_sw_if_index = lcp_pair->lip_phy_sw_if_index;
rmp->host_sw_if_index = lcp_pair->lip_host_sw_if_index;
@@ -151,9 +180,11 @@ send_lcp_itf_pair_details (index_t lipi, vl_api_registration_t *rp,
memcpy_s (rmp->host_if_name, sizeof (rmp->host_if_name),
lcp_pair->lip_host_name, vec_len (lcp_pair->lip_host_name));
+ rmp->host_if_name[vec_len (lcp_pair->lip_host_name)] = 0;
- clib_strncpy ((char *) rmp->namespace, (char *) lcp_pair->lip_namespace,
- vec_len (lcp_pair->lip_namespace));
+ memcpy_s (rmp->netns, sizeof (rmp->netns), lcp_pair->lip_namespace,
+ vec_len (lcp_pair->lip_namespace));
+ rmp->netns[vec_len (lcp_pair->lip_namespace)] = 0;
}));
}
@@ -163,19 +194,51 @@ vl_api_lcp_itf_pair_get_t_handler (vl_api_lcp_itf_pair_get_t *mp)
vl_api_lcp_itf_pair_get_reply_t *rmp;
i32 rv = 0;
- REPLY_AND_DETAILS_MACRO (
+ REPLY_AND_DETAILS_MACRO_END (
VL_API_LCP_ITF_PAIR_GET_REPLY, lcp_itf_pair_pool,
({ send_lcp_itf_pair_details (cursor, rp, mp->context); }));
}
static void
+vl_api_lcp_itf_pair_get_v2_t_handler (vl_api_lcp_itf_pair_get_v2_t *mp)
+{
+ vl_api_lcp_itf_pair_get_v2_reply_t *rmp;
+ i32 rv = 0;
+
+ if (mp->sw_if_index == ~0)
+ {
+ REPLY_AND_DETAILS_MACRO_END (
+ VL_API_LCP_ITF_PAIR_GET_REPLY, lcp_itf_pair_pool,
+ ({ send_lcp_itf_pair_details (cursor, rp, mp->context); }));
+ }
+ else
+ {
+ VALIDATE_SW_IF_INDEX_END (mp);
+
+ u32 pair_index = lcp_itf_pair_find_by_phy (mp->sw_if_index);
+ if (pair_index == INDEX_INVALID)
+ {
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ goto bad_sw_if_index;
+ }
+ send_lcp_itf_pair_details (
+ pair_index, vl_api_client_index_to_registration (mp->client_index),
+ mp->context);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO2_END (VL_API_LCP_ITF_PAIR_GET_V2_REPLY,
+ ({ rmp->cursor = ~0; }));
+ }
+}
+
+static void
vl_api_lcp_default_ns_set_t_handler (vl_api_lcp_default_ns_set_t *mp)
{
vl_api_lcp_default_ns_set_reply_t *rmp;
int rv;
- mp->namespace[LCP_NS_LEN - 1] = 0;
- rv = lcp_set_default_ns (mp->namespace);
+ mp->netns[LCP_NS_LEN - 1] = 0;
+ rv = lcp_set_default_ns (mp->netns);
REPLY_MACRO (VL_API_LCP_DEFAULT_NS_SET_REPLY);
}
@@ -183,25 +246,14 @@ vl_api_lcp_default_ns_set_t_handler (vl_api_lcp_default_ns_set_t *mp)
static void
vl_api_lcp_default_ns_get_t_handler (vl_api_lcp_default_ns_get_t *mp)
{
- lcp_main_t *lcpm = &lcp_main;
vl_api_lcp_default_ns_get_reply_t *rmp;
- vl_api_registration_t *reg;
- char *ns;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- rmp = vl_msg_api_alloc (sizeof (*rmp));
- clib_memset (rmp, 0, sizeof (*rmp));
- rmp->_vl_msg_id = (VL_API_LCP_DEFAULT_NS_GET_REPLY + lcpm->msg_id_base);
- rmp->context = mp->context;
-
- ns = (char *) lcp_get_default_ns ();
- if (ns)
- clib_strncpy ((char *) rmp->namespace, ns, LCP_NS_LEN - 1);
- vl_api_send_msg (reg, (u8 *) rmp);
+ REPLY_MACRO_DETAILS2 (VL_API_LCP_DEFAULT_NS_GET_REPLY, ({
+ char *ns = (char *) lcp_get_default_ns ();
+ if (ns)
+ clib_strncpy ((char *) rmp->netns, ns,
+ LCP_NS_LEN - 1);
+ }));
}
static void
@@ -234,7 +286,7 @@ vl_api_lcp_itf_pair_replace_end_t_handler (
#include <linux-cp/lcp.api.c>
static clib_error_t *
-lcp_plugin_api_hookup (vlib_main_t *vm)
+lcp_api_init (vlib_main_t *vm)
{
/* Ask for a correctly-sized block of API message decode slots */
lcp_msg_id_base = setup_message_id_table ();
@@ -242,7 +294,7 @@ lcp_plugin_api_hookup (vlib_main_t *vm)
return (NULL);
}
-VLIB_INIT_FUNCTION (lcp_plugin_api_hookup);
+VLIB_INIT_FUNCTION (lcp_api_init);
#include <vpp/app/version.h>
VLIB_PLUGIN_REGISTER () = {
diff --git a/src/plugins/linux-cp/lcp_cli.c b/src/plugins/linux-cp/lcp_cli.c
index cb874b1c023..0dcf600b301 100644
--- a/src/plugins/linux-cp/lcp_cli.c
+++ b/src/plugins/linux-cp/lcp_cli.c
@@ -34,81 +34,178 @@ lcp_itf_pair_create_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
unformat_input_t _line_input, *line_input = &_line_input;
vnet_main_t *vnm = vnet_get_main ();
- u32 sw_if_index;
- u8 *host_if_name;
- lip_host_type_t host_if_type;
- u8 *ns;
- int r;
+ u32 sw_if_index = ~0;
+ u8 *host_if_name = NULL;
+ lip_host_type_t host_if_type = LCP_ITF_HOST_TAP;
+ u8 *ns = NULL;
+ clib_error_t *error = NULL;
+
+ if (unformat_user (input, unformat_line_input, line_input))
+ {
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%d", &sw_if_index))
+ ;
+ else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else if (unformat (line_input, "host-if %s", &host_if_name))
+ ;
+ else if (unformat (line_input, "netns %s", &ns))
+ ;
+ else if (unformat (line_input, "tun"))
+ host_if_type = LCP_ITF_HOST_TUN;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ break;
+ }
+ }
+ unformat_free (line_input);
+ }
+
+ if (error)
+ ;
+ else if (sw_if_index == ~0)
+ error = clib_error_return (0, "interface name or sw_if_index required");
+ else if (!host_if_name)
+ error = clib_error_return (0, "host interface name required");
+ else if (vec_len (ns) >= LCP_NS_LEN)
+ error = clib_error_return (
+ 0, "Namespace name should be fewer than %d characters", LCP_NS_LEN);
+ else
+ {
+ int r;
+
+ r = lcp_itf_pair_create (sw_if_index, host_if_name, host_if_type, ns,
+ NULL);
+ if (r)
+ error = clib_error_return (0, "linux-cp pair creation failed (%d)", r);
+ }
+
+ vec_free (host_if_name);
+ vec_free (ns);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (lcp_itf_pair_create_command, static) = {
+ .path = "lcp create",
+ .short_help = "lcp create <sw_if_index>|<if-name> host-if <host-if-name> "
+ "netns <namespace> [tun]",
+ .function = lcp_itf_pair_create_command_fn,
+};
+
+static clib_error_t *
+lcp_sync_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
- sw_if_index = ~0;
- host_if_name = ns = NULL;
- host_if_type = LCP_ITF_HOST_TAP;
-
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "%d", &sw_if_index))
- ;
- else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
- &sw_if_index))
- ;
- else if (unformat (line_input, "host-if %s", &host_if_name))
- ;
- else if (unformat (line_input, "netns %s", &ns))
- ;
- else if (unformat (line_input, "tun"))
- host_if_type = LCP_ITF_HOST_TUN;
+ if (unformat (line_input, "on") || unformat (line_input, "enable"))
+ lcp_set_sync (1);
+ else if (unformat (line_input, "off") ||
+ unformat (line_input, "disable"))
+ lcp_set_sync (0);
else
- {
- unformat_free (line_input);
- vec_free (host_if_name);
- vec_free (ns);
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- }
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
}
unformat_free (line_input);
+ return 0;
+}
- if (!host_if_name)
- {
- vec_free (ns);
- return clib_error_return (0, "host interface name required");
- }
+VLIB_CLI_COMMAND (lcp_sync_command, static) = {
+ .path = "lcp lcp-sync",
+ .short_help = "lcp lcp-sync [on|enable|off|disable]",
+ .function = lcp_sync_command_fn,
+};
- if (sw_if_index == ~0)
- {
- vec_free (host_if_name);
- vec_free (ns);
- return clib_error_return (0, "interface name or sw_if_index required");
- }
+static clib_error_t *
+lcp_auto_subint_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
- if (vec_len (ns) >= LCP_NS_LEN)
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- vec_free (host_if_name);
- vec_free (ns);
- return clib_error_return (
- 0, "Namespace name should be fewer than %d characters", LCP_NS_LEN);
+ if (unformat (line_input, "on") || unformat (line_input, "enable"))
+ lcp_set_auto_subint (1);
+ else if (unformat (line_input, "off") ||
+ unformat (line_input, "disable"))
+ lcp_set_auto_subint (0);
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
}
- r = lcp_itf_pair_create (sw_if_index, host_if_name, host_if_type, ns, NULL);
+ unformat_free (line_input);
+ return 0;
+}
- vec_free (host_if_name);
- vec_free (ns);
+VLIB_CLI_COMMAND (lcp_auto_subint_command, static) = {
+ .path = "lcp lcp-auto-subint",
+ .short_help = "lcp lcp-auto-subint [on|enable|off|disable]",
+ .function = lcp_auto_subint_command_fn,
+};
- if (r)
- return clib_error_return (0, "linux-cp pair creation failed (%d)", r);
+static clib_error_t *
+lcp_param_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del-static-on-link-down"))
+ {
+ if (unformat (line_input, "on") || unformat (line_input, "enable"))
+ lcp_set_del_static_on_link_down (1 /* is_del */);
+ else if (unformat (line_input, "off") ||
+ unformat (line_input, "disable"))
+ lcp_set_del_static_on_link_down (0 /* is_del */);
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+ else if (unformat (line_input, "del-dynamic-on-link-down"))
+ {
+ if (unformat (line_input, "on") || unformat (line_input, "enable"))
+ lcp_set_del_dynamic_on_link_down (1 /* is_del */);
+ else if (unformat (line_input, "off") ||
+ unformat (line_input, "disable"))
+ lcp_set_del_dynamic_on_link_down (0 /* is_del */);
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+
+ unformat_free (line_input);
return 0;
}
-VLIB_CLI_COMMAND (lcp_itf_pair_create_command, static) = {
- .path = "lcp create",
- .short_help = "lcp create <sw_if_index>|<if-name> host-if <host-if-name> "
- "netns <namespace> [tun]",
- .function = lcp_itf_pair_create_command_fn,
+VLIB_CLI_COMMAND (lcp_param_command, static) = {
+ .path = "lcp param",
+ .short_help = "lcp param [del-static-on-link-down (on|enable|off|disable)] "
+ "[del-dynamic-on-link-down (on|enable|off|disable)]",
+ .function = lcp_param_command_fn,
};
static clib_error_t *
@@ -118,6 +215,7 @@ lcp_default_netns_command_fn (vlib_main_t *vm, unformat_input_t *input,
unformat_input_t _line_input, *line_input = &_line_input;
u8 *ns;
int r;
+ clib_error_t *error = NULL;
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
@@ -130,10 +228,15 @@ lcp_default_netns_command_fn (vlib_main_t *vm, unformat_input_t *input,
;
else if (unformat (line_input, "clear netns"))
;
+ else
+ {
+ vec_free (ns);
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
}
- unformat_free (line_input);
-
vlib_cli_output (vm, "lcp set default netns '%s'\n", (char *) ns);
r = lcp_set_default_ns (ns);
@@ -141,7 +244,10 @@ lcp_default_netns_command_fn (vlib_main_t *vm, unformat_input_t *input,
if (r)
return clib_error_return (0, "linux-cp set default netns failed (%d)", r);
- return 0;
+done:
+ unformat_free (line_input);
+
+ return error;
}
VLIB_CLI_COMMAND (lcp_default_netns_command, static) = {
@@ -156,36 +262,42 @@ lcp_itf_pair_delete_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
vnet_main_t *vnm = vnet_get_main ();
unformat_input_t _line_input, *line_input = &_line_input;
- u32 sw_if_index;
- int r;
+ u32 sw_if_index = ~0;
+ clib_error_t *error = NULL;
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- sw_if_index = ~0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ if (unformat_user (input, unformat_line_input, line_input))
{
- if (unformat (line_input, "%d", &sw_if_index))
- ;
- else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
- &sw_if_index))
- ;
- else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%d", &sw_if_index))
+ ;
+ else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ break;
+ }
+ }
+ unformat_free (line_input);
}
- unformat_free (line_input);
-
- if (sw_if_index == ~0)
- return clib_error_return (0, "interface name or sw_if_index required");
+ if (error)
+ ;
+ else if (sw_if_index == ~0)
+ error = clib_error_return (0, "interface name or sw_if_index required");
+ else
+ {
+ int r;
- r = lcp_itf_pair_delete (sw_if_index);
+ r = lcp_itf_pair_delete (sw_if_index);
+ if (r)
+ error = clib_error_return (0, "linux-cp pair deletion failed (%d)", r);
+ }
- if (r)
- return clib_error_return (0, "linux-cp pair deletion failed (%d)", r);
- return 0;
+ return error;
}
VLIB_CLI_COMMAND (lcp_itf_pair_delete_command, static) = {
diff --git a/src/plugins/linux-cp/lcp_interface.c b/src/plugins/linux-cp/lcp_interface.c
index 6eb94068d63..e1f4a6a1d69 100644
--- a/src/plugins/linux-cp/lcp_interface.c
+++ b/src/plugins/linux-cp/lcp_interface.c
@@ -39,7 +39,7 @@
#include <vlibapi/api_helper_macros.h>
#include <vnet/ipsec/ipsec_punt.h>
-static vlib_log_class_t lcp_itf_pair_logger;
+vlib_log_class_t lcp_itf_pair_logger;
/**
* Pool of LIP objects
@@ -73,14 +73,6 @@ lcp_itf_pair_register_vft (lcp_itf_pair_vft_t *lcp_itf_vft)
vec_add1 (lcp_itf_vfts, *lcp_itf_vft);
}
-#define LCP_ITF_PAIR_DBG(...) \
- vlib_log_notice (lcp_itf_pair_logger, __VA_ARGS__);
-
-#define LCP_ITF_PAIR_INFO(...) \
- vlib_log_notice (lcp_itf_pair_logger, __VA_ARGS__);
-
-#define LCP_ITF_PAIR_ERR(...) vlib_log_err (lcp_itf_pair_logger, __VA_ARGS__);
-
u8 *
format_lcp_itf_pair (u8 *s, va_list *args)
{
@@ -139,6 +131,13 @@ lcp_itf_pair_show (u32 phy_sw_if_index)
ns = lcp_get_default_ns ();
vlib_cli_output (vm, "lcp default netns '%s'\n",
ns ? (char *) ns : "<unset>");
+ vlib_cli_output (vm, "lcp lcp-auto-subint %s\n",
+ lcp_auto_subint () ? "on" : "off");
+ vlib_cli_output (vm, "lcp lcp-sync %s\n", lcp_sync () ? "on" : "off");
+ vlib_cli_output (vm, "lcp del-static-on-link-down %s\n",
+ lcp_get_del_static_on_link_down () ? "on" : "off");
+ vlib_cli_output (vm, "lcp del-dynamic-on-link-down %s\n",
+ lcp_get_del_dynamic_on_link_down () ? "on" : "off");
if (phy_sw_if_index == ~0)
{
@@ -157,6 +156,8 @@ lcp_itf_pair_get (u32 index)
{
if (!lcp_itf_pair_pool)
return NULL;
+ if (index == INDEX_INVALID)
+ return NULL;
return pool_elt_at_index (lcp_itf_pair_pool, index);
}
@@ -174,25 +175,6 @@ lcp_itf_pair_find_by_vif (u32 vif_index)
return INDEX_INVALID;
}
-int
-lcp_itf_pair_add_sub (u32 vif, u8 *host_if_name, u32 sub_sw_if_index,
- u32 phy_sw_if_index, u8 *ns)
-{
- lcp_itf_pair_t *lip;
-
- lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (phy_sw_if_index));
- if (!lip)
- {
- LCP_ITF_PAIR_DBG ("lcp_itf_pair_add_sub: can't find LCP of parent %U",
- format_vnet_sw_if_index_name, vnet_get_main (),
- phy_sw_if_index);
- return VNET_API_ERROR_INVALID_SW_IF_INDEX;
- }
-
- return lcp_itf_pair_add (lip->lip_host_sw_if_index, sub_sw_if_index,
- host_if_name, vif, lip->lip_host_type, ns);
-}
-
const char *lcp_itf_l3_feat_names[N_LCP_ITF_HOST][N_AF] = {
[LCP_ITF_HOST_TAP] = {
[AF_IP4] = "linux-cp-xc-ip4",
@@ -248,17 +230,23 @@ lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name,
index_t lipi;
lcp_itf_pair_t *lip;
+ if (host_sw_if_index == ~0)
+ {
+ LCP_ITF_PAIR_ERR ("pair_add: Cannot add LIP - invalid host");
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ }
+
lipi = lcp_itf_pair_find_by_phy (phy_sw_if_index);
+ if (lipi != INDEX_INVALID)
+ return VNET_API_ERROR_VALUE_EXIST;
+
LCP_ITF_PAIR_INFO ("add: host:%U phy:%U, host_if:%v vif:%d ns:%s",
format_vnet_sw_if_index_name, vnet_get_main (),
host_sw_if_index, format_vnet_sw_if_index_name,
vnet_get_main (), phy_sw_if_index, host_name, host_index,
ns);
- if (lipi != INDEX_INVALID)
- return VNET_API_ERROR_VALUE_EXIST;
-
/*
* Create a new pair.
*/
@@ -279,9 +267,6 @@ lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name,
lip->lip_vif_index = host_index;
lip->lip_namespace = vec_dup (ns);
- if (lip->lip_host_sw_if_index == ~0)
- return 0;
-
/*
* First use of this host interface.
* Enable the x-connect feature on the host to send
@@ -327,10 +312,13 @@ lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name,
}
else
{
- vnet_feature_enable_disable ("ip4-punt", "linux-cp-punt-l3", 0, 1, NULL,
- 0);
- vnet_feature_enable_disable ("ip6-punt", "linux-cp-punt-l3", 0, 1, NULL,
- 0);
+ if (hash_elts (lip_db_by_vif) == 1)
+ {
+ vnet_feature_enable_disable ("ip4-punt", "linux-cp-punt-l3", 0, 1,
+ NULL, 0);
+ vnet_feature_enable_disable ("ip6-punt", "linux-cp-punt-l3", 0, 1,
+ NULL, 0);
+ }
}
/* invoke registered callbacks for pair addition */
@@ -421,10 +409,11 @@ lcp_itf_pair_del (u32 phy_sw_if_index)
lip = lcp_itf_pair_get (lipi);
- LCP_ITF_PAIR_INFO ("pair delete: {%U, %U, %v}", format_vnet_sw_if_index_name,
- vnet_get_main (), lip->lip_phy_sw_if_index,
- format_vnet_sw_if_index_name, vnet_get_main (),
- lip->lip_host_sw_if_index, lip->lip_host_name);
+ LCP_ITF_PAIR_NOTICE (
+ "pair_del: host:%U phy:%U host_if:%v vif:%d ns:%v",
+ format_vnet_sw_if_index_name, vnet_get_main (), lip->lip_host_sw_if_index,
+ format_vnet_sw_if_index_name, vnet_get_main (), lip->lip_phy_sw_if_index,
+ lip->lip_host_name, lip->lip_vif_index, lip->lip_namespace);
/* invoke registered callbacks for pair deletion */
vec_foreach (vft, lcp_itf_vfts)
@@ -453,12 +442,14 @@ lcp_itf_pair_del (u32 phy_sw_if_index)
}
else
{
- vnet_feature_enable_disable ("ip4-punt", "linux-cp-punt-l3", 0, 0, NULL,
- 0);
- vnet_feature_enable_disable ("ip6-punt", "linux-cp-punt-l3", 0, 0, NULL,
- 0);
+ if (hash_elts (lip_db_by_vif) == 1)
+ {
+ vnet_feature_enable_disable ("ip4-punt", "linux-cp-punt-l3", 0, 0,
+ NULL, 0);
+ vnet_feature_enable_disable ("ip6-punt", "linux-cp-punt-l3", 0, 0,
+ NULL, 0);
+ }
}
-
lip_db_by_phy[phy_sw_if_index] = INDEX_INVALID;
lip_db_by_host[lip->lip_host_sw_if_index] = INDEX_INVALID;
hash_unset (lip_db_by_vif, lip->lip_vif_index);
@@ -475,24 +466,45 @@ lcp_itf_pair_delete_by_index (index_t lipi)
{
u32 host_sw_if_index;
lcp_itf_pair_t *lip;
- u8 *host_name;
+ u8 *host_name, *ns;
lip = lcp_itf_pair_get (lipi);
host_name = vec_dup (lip->lip_host_name);
host_sw_if_index = lip->lip_host_sw_if_index;
+ ns = vec_dup (lip->lip_namespace);
lcp_itf_pair_del (lip->lip_phy_sw_if_index);
if (vnet_sw_interface_is_sub (vnet_get_main (), host_sw_if_index))
{
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+ if (ns)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open ((u8 *) ns);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
lcp_netlink_del_link ((const char *) host_name);
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+
vnet_delete_sub_interface (host_sw_if_index);
}
else
tap_delete_if (vlib_get_main (), host_sw_if_index);
vec_free (host_name);
+ vec_free (ns);
}
int
@@ -510,6 +522,23 @@ lcp_itf_pair_delete (u32 phy_sw_if_index)
return 0;
}
+/**
+ * lcp_itf_interface_add_del
+ *
+ * Registered to receive interface Add and delete notifications
+ */
+static clib_error_t *
+lcp_itf_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_add)
+{
+ if (!is_add)
+ /* remove any interface pair we have for this interface */
+ lcp_itf_pair_delete (sw_if_index);
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (lcp_itf_interface_add_del);
+
void
lcp_itf_pair_walk (lcp_itf_pair_walk_cb_t cb, void *ctx)
{
@@ -522,58 +551,17 @@ lcp_itf_pair_walk (lcp_itf_pair_walk_cb_t cb, void *ctx)
};
}
-typedef struct lcp_itf_pair_names_t_
-{
- u8 *lipn_host_name;
- u8 *lipn_phy_name;
- u8 *lipn_namespace;
- u32 lipn_phy_sw_if_index;
-} lcp_itf_pair_names_t;
-
-static lcp_itf_pair_names_t *lipn_names;
-
static clib_error_t *
lcp_itf_pair_config (vlib_main_t *vm, unformat_input_t *input)
{
- u8 *host, *phy;
- u8 *ns;
u8 *default_ns;
+ u32 tmp;
- host = phy = ns = default_ns = NULL;
+ default_ns = NULL;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- vec_reset_length (host);
-
- if (unformat (input, "pair %s %s %s", &phy, &host, &ns))
- {
- lcp_itf_pair_names_t *lipn;
-
- if (vec_len (ns) > LCP_NS_LEN)
- {
- return clib_error_return (0,
- "linux-cp namespace must"
- " be less than %d characters",
- LCP_NS_LEN);
- }
-
- vec_add2 (lipn_names, lipn, 1);
-
- lipn->lipn_host_name = vec_dup (host);
- lipn->lipn_phy_name = vec_dup (phy);
- lipn->lipn_namespace = vec_dup (ns);
- }
- else if (unformat (input, "pair %v %v", &phy, &host))
- {
- lcp_itf_pair_names_t *lipn;
-
- vec_add2 (lipn_names, lipn, 1);
-
- lipn->lipn_host_name = vec_dup (host);
- lipn->lipn_phy_name = vec_dup (phy);
- lipn->lipn_namespace = 0;
- }
- else if (unformat (input, "default netns %v", &default_ns))
+ if (unformat (input, "default netns %v", &default_ns))
{
vec_add1 (default_ns, 0);
if (lcp_set_default_ns (default_ns) < 0)
@@ -584,14 +572,22 @@ lcp_itf_pair_config (vlib_main_t *vm, unformat_input_t *input)
LCP_NS_LEN);
}
}
- else if (unformat (input, "interface-auto-create"))
- lcp_set_auto_intf (1 /* is_auto */);
+ else if (unformat (input, "lcp-auto-subint"))
+ lcp_set_auto_subint (1 /* is_auto */);
+ else if (unformat (input, "lcp-sync"))
+ lcp_set_sync (1 /* is_auto */);
+ else if (unformat (input, "del-static-on-link-down"))
+ lcp_set_del_static_on_link_down (1 /* is_del */);
+ else if (unformat (input, "del-dynamic-on-link-down"))
+ lcp_set_del_dynamic_on_link_down (1 /* is_del */);
+ else if (unformat (input, "num-rx-queues %d", &tmp))
+ lcp_set_default_num_queues (tmp, 0 /* is_tx */);
+ else if (unformat (input, "num-tx-queues %d", &tmp))
+ lcp_set_default_num_queues (tmp, 1 /* is_tx */);
else
return clib_error_return (0, "interfaces not found");
}
- vec_free (host);
- vec_free (phy);
vec_free (default_ns);
return NULL;
@@ -636,7 +632,7 @@ lcp_validate_if_name (u8 *name)
return 1;
}
-static void
+void
lcp_itf_set_link_state (const lcp_itf_pair_t *lip, u8 state)
{
int curr_ns_fd, vif_ns_fd;
@@ -654,6 +650,8 @@ lcp_itf_set_link_state (const lcp_itf_pair_t *lip, u8 state)
clib_setns (vif_ns_fd);
}
+ /* Set the same link state on the netlink interface
+ */
vnet_netlink_set_link_state (lip->lip_vif_index, state);
if (vif_ns_fd != -1)
@@ -668,6 +666,58 @@ lcp_itf_set_link_state (const lcp_itf_pair_t *lip, u8 state)
return;
}
+void
+lcp_itf_set_interface_addr (const lcp_itf_pair_t *lip)
+{
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+ ip_lookup_main_t *lm4 = &im4->lookup_main;
+ ip_lookup_main_t *lm6 = &im6->lookup_main;
+ ip_interface_address_t *ia = 0;
+ int vif_ns_fd = -1;
+ int curr_ns_fd = -1;
+
+ if (!lip)
+ return;
+
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
+ /* Sync any IP4 addressing info into LCP */
+ foreach_ip_interface_address (
+ lm4, ia, lip->lip_phy_sw_if_index, 1 /* honor unnumbered */, ({
+ ip4_address_t *r4 = ip_interface_address_get_address (lm4, ia);
+ LCP_ITF_PAIR_NOTICE ("set_interface_addr: %U add ip4 %U/%d",
+ format_lcp_itf_pair, lip, format_ip4_address, r4,
+ ia->address_length);
+ vnet_netlink_add_ip4_addr (lip->lip_vif_index, r4, ia->address_length);
+ }));
+
+ /* Sync any IP6 addressing info into LCP */
+ foreach_ip_interface_address (
+ lm6, ia, lip->lip_phy_sw_if_index, 1 /* honor unnumbered */, ({
+ ip6_address_t *r6 = ip_interface_address_get_address (lm6, ia);
+ LCP_ITF_PAIR_NOTICE ("set_interface_addr: %U add ip6 %U/%d",
+ format_lcp_itf_pair, lip, format_ip6_address, r6,
+ ia->address_length);
+ vnet_netlink_add_ip6_addr (lip->lip_vif_index, r6, ia->address_length);
+ }));
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+}
+
typedef struct
{
u32 vlan;
@@ -723,6 +773,9 @@ lcp_itf_pair_find_by_outer_vlan (u32 sup_if_index, u16 vlan, bool dot1ad)
return lip_db_by_phy[match.matched_sw_if_index];
}
+static clib_error_t *lcp_itf_pair_link_up_down (vnet_main_t *vnm,
+ u32 hw_if_index, u32 flags);
+
int
lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
lip_host_type_t host_if_type, u8 *ns,
@@ -730,10 +783,19 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
{
vlib_main_t *vm;
vnet_main_t *vnm;
- u32 vif_index = 0, host_sw_if_index;
+ u32 vif_index = 0, host_sw_if_index = ~0;
const vnet_sw_interface_t *sw;
const vnet_hw_interface_t *hw;
const lcp_itf_pair_t *lip;
+ index_t lipi;
+
+ lipi = lcp_itf_pair_find_by_phy (phy_sw_if_index);
+
+ if (lipi != INDEX_INVALID)
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: already created");
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
if (!vnet_sw_if_index_is_api_valid (phy_sw_if_index))
{
@@ -757,6 +819,14 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
return VNET_API_ERROR_INVALID_SW_IF_INDEX;
}
+ if (hw->hw_class_index != ethernet_hw_interface_class.index &&
+ host_if_type == LCP_ITF_HOST_TAP)
+ {
+ LCP_ITF_PAIR_ERR (
+ "pair_create: don't create TAP for non-eth interface; use tun");
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+
/*
* Use interface-specific netns if supplied.
* Otherwise, use netns if defined, otherwise use the OS default.
@@ -775,9 +845,8 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
u16 vlan, proto;
u32 parent_vif_index;
- // TODO(pim) replace with vnet_sw_interface_supports_addressing()
- if (sw->type == VNET_SW_INTERFACE_TYPE_SUB &&
- sw->sub.eth.flags.exact_match == 0)
+ err = vnet_sw_interface_supports_addressing (vnm, phy_sw_if_index);
+ if (err)
{
LCP_ITF_PAIR_ERR ("pair_create: can't create LCP for a "
"sub-interface without exact-match set");
@@ -904,7 +973,7 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
outer_vlan, inner_vlan, format_vnet_sw_if_index_name, vnm,
lip->lip_host_sw_if_index);
err = clib_error_return (
- 0, "failed to create tap subinti: %d.%d. on %U", outer_vlan,
+ 0, "failed to create tap subint: %d.%d. on %U", outer_vlan,
inner_vlan, format_vnet_sw_if_index_name, vnm,
lip->lip_host_sw_if_index);
}
@@ -924,15 +993,21 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
else
{
tap_create_if_args_t args = {
- .num_rx_queues = clib_max (1, vlib_num_workers ()),
+ .num_rx_queues =
+ clib_max (1, lcp_get_default_num_queues (0 /* is_tx */)),
+ .num_tx_queues =
+ clib_max (1, lcp_get_default_num_queues (1 /* is_tx */)),
.id = hw->hw_if_index,
.sw_if_index = ~0,
.rx_ring_sz = 256,
.tx_ring_sz = 256,
.host_if_name = host_if_name,
.host_namespace = 0,
+ .rv = 0,
+ .error = NULL,
};
ethernet_interface_t *ei;
+ u32 host_sw_mtu_size;
if (host_if_type == LCP_ITF_HOST_TUN)
args.tap_flags |= TAP_FLAG_TUN;
@@ -942,39 +1017,37 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
mac_address_copy (&args.host_mac_addr, &ei->address.mac);
}
- if (sw->mtu[VNET_MTU_L3])
+ /*
+ * The TAP interface does copy forward the host MTU based on the VPP
+ * interface's L3 MTU, but it should also ensure that the VPP tap
+ * interface has an MTU that is greater-or-equal to those. Considering
+ * users can set the interfaces at runtime (set interface mtu packet ...)
+ * ensure that the tap MTU is large enough, taking the VPP interface L3
+ * if it's set, and otherwise a sensible default.
+ */
+ host_sw_mtu_size = sw->mtu[VNET_MTU_L3];
+ if (host_sw_mtu_size)
{
args.host_mtu_set = 1;
- args.host_mtu_size = sw->mtu[VNET_MTU_L3];
+ args.host_mtu_size = host_sw_mtu_size;
}
+ else
+ host_sw_mtu_size = ETHERNET_MAX_PACKET_BYTES;
if (ns && ns[0] != 0)
args.host_namespace = ns;
vm = vlib_get_main ();
tap_create_if (vm, &args);
-
if (args.rv < 0)
{
LCP_ITF_PAIR_ERR ("pair_create: could not create tap, retval:%d",
args.rv);
+ clib_error_free (args.error);
return args.rv;
}
- /*
- * The TAP interface does copy forward the host MTU based on the VPP
- * interface's L3 MTU, but it should also ensure that the VPP tap
- * interface has an MTU that is greater-or-equal to those. Considering
- * users can set the interfaces at runtime (set interface mtu packet ...)
- * ensure that the tap MTU is large enough, taking the VPP interface L3
- * if it's set, and otherwise a sensible default.
- */
- if (sw->mtu[VNET_MTU_L3])
- vnet_sw_interface_set_mtu (vnm, args.sw_if_index,
- sw->mtu[VNET_MTU_L3]);
- else
- vnet_sw_interface_set_mtu (vnm, args.sw_if_index,
- ETHERNET_MAX_PACKET_BYTES);
+ vnet_sw_interface_set_mtu (vnm, args.sw_if_index, host_sw_mtu_size);
/*
* get the hw and ethernet of the tap
@@ -1020,13 +1093,24 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
* The TAP is shared by many interfaces, always keep it up.
* This controls whether the host can RX/TX.
*/
-
+ sw = vnet_get_sw_interface (vnm, phy_sw_if_index);
lip = lcp_itf_pair_get (lcp_itf_pair_find_by_vif (vif_index));
LCP_ITF_PAIR_INFO ("pair create: %U sw-flags %u hw-flags %u",
format_lcp_itf_pair, lip, sw->flags, hw->flags);
vnet_sw_interface_admin_up (vnm, host_sw_if_index);
lcp_itf_set_link_state (lip, sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ /*
+ * Reflect current link state and link speed of the hardware interface on the
+ * TAP interface.
+ */
+ if (host_if_type == LCP_ITF_HOST_TAP &&
+ !vnet_sw_interface_is_sub (vnm, phy_sw_if_index))
+ {
+ hw = vnet_get_sup_hw_interface (vnm, phy_sw_if_index);
+ lcp_itf_pair_link_up_down (vnm, hw->hw_if_index, hw->flags);
+ }
+
if (host_sw_if_indexp)
*host_sw_if_indexp = host_sw_if_index;
@@ -1089,70 +1173,6 @@ lcp_itf_pair_replace_end (void)
return (0);
}
-static uword
-lcp_itf_pair_process (vlib_main_t *vm, vlib_node_runtime_t *rt,
- vlib_frame_t *f)
-{
- uword *event_data = 0;
- uword *lipn_index;
-
- while (1)
- {
- vlib_process_wait_for_event (vm);
-
- vlib_process_get_events (vm, &event_data);
-
- vec_foreach (lipn_index, event_data)
- {
- lcp_itf_pair_names_t *lipn;
-
- lipn = &lipn_names[*lipn_index];
- lcp_itf_pair_create (lipn->lipn_phy_sw_if_index,
- lipn->lipn_host_name, LCP_ITF_HOST_TAP,
- lipn->lipn_namespace, NULL);
- }
-
- vec_reset_length (event_data);
- }
-
- return 0;
-}
-
-VLIB_REGISTER_NODE (lcp_itf_pair_process_node, static) = {
- .function = lcp_itf_pair_process,
- .name = "linux-cp-itf-process",
- .type = VLIB_NODE_TYPE_PROCESS,
-};
-
-static clib_error_t *
-lcp_itf_phy_add (vnet_main_t *vnm, u32 sw_if_index, u32 is_create)
-{
- lcp_itf_pair_names_t *lipn;
- vlib_main_t *vm = vlib_get_main ();
- vnet_hw_interface_t *hw;
-
- if (!is_create || vnet_sw_interface_is_sub (vnm, sw_if_index))
- return NULL;
-
- hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
-
- vec_foreach (lipn, lipn_names)
- {
- if (!vec_cmp (hw->name, lipn->lipn_phy_name))
- {
- lipn->lipn_phy_sw_if_index = sw_if_index;
-
- vlib_process_signal_event (vm, lcp_itf_pair_process_node.index, 0,
- lipn - lipn_names);
- break;
- }
- }
-
- return NULL;
-}
-
-VNET_SW_INTERFACE_ADD_DEL_FUNCTION (lcp_itf_phy_add);
-
static clib_error_t *
lcp_itf_pair_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
{
@@ -1179,7 +1199,8 @@ lcp_itf_pair_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
tap_set_carrier (si->hw_if_index,
(flags & VNET_HW_INTERFACE_FLAG_LINK_UP));
- if (flags & VNET_HW_INTERFACE_FLAG_LINK_UP)
+ if (flags & VNET_HW_INTERFACE_FLAG_LINK_UP &&
+ hi->link_speed != UINT32_MAX)
{
tap_set_speed (si->hw_if_index, hi->link_speed / 1000);
}
@@ -1191,13 +1212,15 @@ lcp_itf_pair_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (lcp_itf_pair_link_up_down);
static clib_error_t *
-lcp_itf_pair_init (vlib_main_t *vm)
+lcp_interface_init (vlib_main_t *vm)
{
vlib_punt_hdl_t punt_hdl = vlib_punt_client_register ("linux-cp");
/* punt IKE */
vlib_punt_register (punt_hdl, ipsec_punt_reason[IPSEC_PUNT_IP4_SPI_UDP_0],
"linux-cp-punt");
+ vlib_punt_register (punt_hdl, ipsec_punt_reason[IPSEC_PUNT_IP6_SPI_UDP_0],
+ "linux-cp-punt");
/* punt all unknown ports */
udp_punt_unknown (vm, 0, 1);
@@ -1210,7 +1233,7 @@ lcp_itf_pair_init (vlib_main_t *vm)
return NULL;
}
-VLIB_INIT_FUNCTION (lcp_itf_pair_init) = {
+VLIB_INIT_FUNCTION (lcp_interface_init) = {
.runs_after = VLIB_INITS ("vnet_interface_init", "tcp_init", "udp_init"),
};
diff --git a/src/plugins/linux-cp/lcp_interface.h b/src/plugins/linux-cp/lcp_interface.h
index bed30248845..cfcd3925a15 100644
--- a/src/plugins/linux-cp/lcp_interface.h
+++ b/src/plugins/linux-cp/lcp_interface.h
@@ -21,6 +21,22 @@
#include <plugins/linux-cp/lcp.h>
+extern vlib_log_class_t lcp_itf_pair_logger;
+
+#define LCP_ITF_PAIR_DBG(...) \
+ vlib_log_debug (lcp_itf_pair_logger, __VA_ARGS__);
+
+#define LCP_ITF_PAIR_INFO(...) \
+ vlib_log_info (lcp_itf_pair_logger, __VA_ARGS__);
+
+#define LCP_ITF_PAIR_NOTICE(...) \
+ vlib_log_notice (lcp_itf_pair_logger, __VA_ARGS__);
+
+#define LCP_ITF_PAIR_WARN(...) \
+ vlib_log_warn (lcp_itf_pair_logger, __VA_ARGS__);
+
+#define LCP_ITF_PAIR_ERR(...) vlib_log_err (lcp_itf_pair_logger, __VA_ARGS__);
+
#define foreach_lcp_itf_pair_flag _ (STALE, 0, "stale")
typedef enum lip_flag_t_
@@ -88,8 +104,6 @@ extern index_t lcp_itf_pair_find_by_vif (u32 vif_index);
extern int lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index,
u8 *host_name, u32 host_index,
lip_host_type_t host_type, u8 *ns);
-extern int lcp_itf_pair_add_sub (u32 vif, u8 *host_name, u32 sub_sw_if_index,
- u32 phy_sw_if_index, u8 *ns);
extern int lcp_itf_pair_del (u32 phy_sw_if_index);
/**
@@ -144,12 +158,6 @@ lcp_itf_pair_find_by_host (u32 host_sw_if_index)
return (lip_db_by_host[host_sw_if_index]);
}
-/**
- * manage interface auto creation
- */
-void lcp_set_auto_intf (u8 is_auto);
-int lcp_auto_intf (void);
-
typedef void (*lcp_itf_pair_add_cb_t) (lcp_itf_pair_t *);
typedef void (*lcp_itf_pair_del_cb_t) (lcp_itf_pair_t *);
@@ -160,6 +168,36 @@ typedef struct lcp_itf_pair_vft
} lcp_itf_pair_vft_t;
void lcp_itf_pair_register_vft (lcp_itf_pair_vft_t *lcp_itf_vft);
+
+/**
+ * sub-interface auto creation/deletion for LCP
+ */
+void lcp_set_auto_subint (u8 is_auto);
+int lcp_auto_subint (void);
+
+/**
+ * sync state changes from VPP into LCP
+ */
+void lcp_set_sync (u8 is_auto);
+int lcp_sync (void);
+
+/* Set TAP and Linux host link state */
+void lcp_itf_set_link_state (const lcp_itf_pair_t *lip, u8 state);
+
+/* Set any VPP L3 addresses on Linux host device */
+void lcp_itf_set_interface_addr (const lcp_itf_pair_t *lip);
+
+/* Sync all state from VPP to a specific Linux device, all sub-interfaces
+ * of a hardware interface, or all interfaces in the system.
+ *
+ * Note: in some circumstances, this syncer will (have to) make changes to
+ * the VPP interface, for example if its MTU is greater than its parent.
+ * See the function for rationale.
+ */
+void lcp_itf_pair_sync_state (lcp_itf_pair_t *lip);
+void lcp_itf_pair_sync_state_hw (vnet_hw_interface_t *hi);
+void lcp_itf_pair_sync_state_all ();
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/linux-cp/lcp_interface_sync.c b/src/plugins/linux-cp/lcp_interface_sync.c
new file mode 100644
index 00000000000..ca7638e1799
--- /dev/null
+++ b/src/plugins/linux-cp/lcp_interface_sync.c
@@ -0,0 +1,445 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright 2021 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <vnet/devices/netlink.h>
+#include <vnet/ip/ip.h>
+#include <vppinfra/linux/netns.h>
+#include <plugins/linux-cp/lcp_interface.h>
+
+/* helper function to copy forward all sw interface link state flags
+ * MTU, and IP addresses into their counterpart LIP interface.
+ *
+ * This is called upon MTU changes and state changes.
+ */
+void
+lcp_itf_pair_sync_state (lcp_itf_pair_t *lip)
+{
+ vnet_sw_interface_t *sw;
+ vnet_sw_interface_t *sup_sw;
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+ u32 mtu;
+ u32 netlink_mtu;
+
+ if (!lcp_sync () || lcp_get_netlink_processing_active ())
+ return;
+
+ sw =
+ vnet_get_sw_interface_or_null (vnet_get_main (), lip->lip_phy_sw_if_index);
+ if (!sw)
+ return;
+ sup_sw =
+ vnet_get_sw_interface_or_null (vnet_get_main (), sw->sup_sw_if_index);
+ if (!sup_sw)
+ return;
+
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
+ LCP_ITF_PAIR_INFO ("sync_state: %U flags %u sup-flags %u mtu %u sup-mtu %u",
+ format_lcp_itf_pair, lip, sw->flags, sup_sw->flags,
+ sw->mtu[VNET_MTU_L3], sup_sw->mtu[VNET_MTU_L3]);
+
+ /* Linux will not allow children to be admin-up if their parent is
+ * admin-down. If child is up but parent is not, force it down.
+ */
+ int state = sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP;
+
+ if (state && !(sup_sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ LCP_ITF_PAIR_WARN (
+ "sync_state: %U flags %u sup-flags %u mtu %u sup-mtu %u: "
+ "forcing state to sup-flags to satisfy netlink",
+ format_lcp_itf_pair, lip, sw->flags, sup_sw->flags,
+ sw->mtu[VNET_MTU_L3], sup_sw->mtu[VNET_MTU_L3]);
+ state = 0;
+ }
+ lcp_itf_set_link_state (lip, state);
+
+ /* Linux will clamp MTU of children when the parent is lower. VPP is fine
+ * with differing MTUs. VPP assumes that if a subint has MTU of 0, that it
+ * inherits from its parent. Linux likes to be more explicit, so we
+ * reconcile any differences.
+ */
+ mtu = sw->mtu[VNET_MTU_L3];
+ if (mtu == 0)
+ mtu = sup_sw->mtu[VNET_MTU_L3];
+
+ if (sup_sw->mtu[VNET_MTU_L3] < sw->mtu[VNET_MTU_L3])
+ {
+ LCP_ITF_PAIR_WARN ("sync_state: %U flags %u mtu %u sup-mtu %u: "
+ "clamping to sup-mtu to satisfy netlink",
+ format_lcp_itf_pair, lip, sw->flags,
+ sw->mtu[VNET_MTU_L3], sup_sw->mtu[VNET_MTU_L3]);
+ mtu = sup_sw->mtu[VNET_MTU_L3];
+ }
+
+ /* Set MTU on all of {sw, tap, netlink}. Only send a netlink message if we
+ * really do want to change the MTU.
+ */
+ vnet_sw_interface_set_mtu (vnet_get_main (), lip->lip_phy_sw_if_index, mtu);
+ vnet_sw_interface_set_mtu (vnet_get_main (), lip->lip_host_sw_if_index, mtu);
+ if (NULL == vnet_netlink_get_link_mtu (lip->lip_vif_index, &netlink_mtu))
+ {
+ if (netlink_mtu != mtu)
+ vnet_netlink_set_link_mtu (lip->lip_vif_index, mtu);
+ }
+
+ /* Linux will remove IPv6 addresses on children when the parent state
+ * goes down, so we ensure all IPv4/IPv6 addresses are synced.
+ */
+ lcp_itf_set_interface_addr (lip);
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+
+ return;
+}
+
+static walk_rc_t
+lcp_itf_pair_walk_sync_state_all_cb (index_t lipi, void *ctx)
+{
+ lcp_itf_pair_t *lip;
+ lip = lcp_itf_pair_get (lipi);
+ if (!lip)
+ return WALK_CONTINUE;
+
+ lcp_itf_pair_sync_state (lip);
+ return WALK_CONTINUE;
+}
+
+static walk_rc_t
+lcp_itf_pair_walk_sync_state_hw_cb (vnet_main_t *vnm, u32 sw_if_index,
+ void *arg)
+{
+ lcp_itf_pair_t *lip;
+
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (!lip)
+ {
+ return WALK_CONTINUE;
+ }
+
+ lcp_itf_pair_sync_state (lip);
+ return WALK_CONTINUE;
+}
+
+void
+lcp_itf_pair_sync_state_all ()
+{
+ lcp_itf_pair_walk (lcp_itf_pair_walk_sync_state_all_cb, 0);
+}
+
+void
+lcp_itf_pair_sync_state_hw (vnet_hw_interface_t *hi)
+{
+ if (!hi)
+ return;
+ LCP_ITF_PAIR_DBG ("sync_state_hw: hi %U", format_vnet_sw_if_index_name,
+ vnet_get_main (), hi->hw_if_index);
+
+ vnet_hw_interface_walk_sw (vnet_get_main (), hi->hw_if_index,
+ lcp_itf_pair_walk_sync_state_hw_cb, NULL);
+}
+
+static clib_error_t *
+lcp_itf_admin_state_change (vnet_main_t *vnm, u32 sw_if_index, u32 flags)
+{
+ lcp_itf_pair_t *lip;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+
+ if (!lcp_sync () || lcp_get_netlink_processing_active ())
+ return 0;
+
+ LCP_ITF_PAIR_DBG ("admin_state_change: sw %U %u",
+ format_vnet_sw_if_index_name, vnm, sw_if_index, flags);
+
+ // Sync interface state changes into host
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (!lip)
+ return NULL;
+ LCP_ITF_PAIR_INFO ("admin_state_change: %U flags %u", format_lcp_itf_pair,
+ lip, flags);
+
+ if (vnet_sw_interface_is_sub (vnm, sw_if_index))
+ {
+ lcp_itf_pair_sync_state (lip);
+ return NULL;
+ }
+
+ // When Linux changes link on a parent interface, all of its children also
+ // change. If a parent interface changes MTU, all of its children are clamped
+ // at that MTU by Linux. Neither holds true in VPP, so we are forced to undo
+ // change by walking the sub-interfaces of a phy and syncing their state back
+ // into Linux.
+ si = vnet_get_sw_interface_or_null (vnm, sw_if_index);
+ if (!si)
+ return NULL;
+
+ hi = vnet_get_hw_interface_or_null (vnm, si->hw_if_index);
+ if (!hi)
+ return NULL;
+ LCP_ITF_PAIR_DBG ("admin_state_change: si %U hi %U, syncing children",
+ format_vnet_sw_if_index_name, vnm, si->sw_if_index,
+ format_vnet_sw_if_index_name, vnm, hi->sw_if_index);
+
+ lcp_itf_pair_sync_state_hw (hi);
+
+ return NULL;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (lcp_itf_admin_state_change);
+
+static clib_error_t *
+lcp_itf_mtu_change (vnet_main_t *vnm, u32 sw_if_index, u32 flags)
+{
+ vnet_sw_interface_t *si;
+ vnet_hw_interface_t *hi;
+
+ if (!lcp_sync () || lcp_get_netlink_processing_active ())
+ return NULL;
+
+ LCP_ITF_PAIR_DBG ("mtu_change: sw %U %u", format_vnet_sw_if_index_name, vnm,
+ sw_if_index, flags);
+
+ if (vnet_sw_interface_is_sub (vnm, sw_if_index))
+ {
+ lcp_itf_pair_t *lip;
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (lip)
+ lcp_itf_pair_sync_state (lip);
+ return NULL;
+ }
+
+ // When Linux changes link on a parent interface, all of its children also
+ // change. If a parent interface changes MTU, all of its children are clamped
+ // at that MTU by Linux. Neither holds true in VPP, so we are forced to undo
+ // change by walking the sub-interfaces of a phy and syncing their state back
+ // into Linux.
+ si = vnet_get_sw_interface_or_null (vnm, sw_if_index);
+ if (!si)
+ return NULL;
+
+ hi = vnet_get_hw_interface_or_null (vnm, si->hw_if_index);
+ if (!hi)
+ return NULL;
+ LCP_ITF_PAIR_DBG ("mtu_change: si %U hi %U, syncing children",
+ format_vnet_sw_if_index_name, vnm, si->sw_if_index,
+ format_vnet_sw_if_index_name, vnm, hi->sw_if_index);
+
+ lcp_itf_pair_sync_state_hw (hi);
+
+ return NULL;
+}
+
+VNET_SW_INTERFACE_MTU_CHANGE_FUNCTION (lcp_itf_mtu_change);
+
+static void
+lcp_itf_ip4_add_del_interface_addr (ip4_main_t *im, uword opaque,
+ u32 sw_if_index, ip4_address_t *address,
+ u32 address_length, u32 if_address_index,
+ u32 is_del)
+{
+ const lcp_itf_pair_t *lip;
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+
+ if (!lcp_sync () || lcp_get_netlink_processing_active ())
+ return;
+
+ LCP_ITF_PAIR_DBG ("ip4_addr_%s: si:%U %U/%u", is_del ? "del" : "add",
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index, format_ip4_address, address, address_length);
+
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (!lip)
+ return;
+
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
+ LCP_ITF_PAIR_DBG ("ip4_addr_%s: %U ip4 %U/%u", is_del ? "del" : "add",
+ format_lcp_itf_pair, lip, format_ip4_address, address,
+ address_length);
+
+ if (is_del)
+ vnet_netlink_del_ip4_addr (lip->lip_vif_index, address, address_length);
+ else
+ vnet_netlink_add_ip4_addr (lip->lip_vif_index, address, address_length);
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+ return;
+}
+
+static void
+lcp_itf_ip6_add_del_interface_addr (ip6_main_t *im, uword opaque,
+ u32 sw_if_index, ip6_address_t *address,
+ u32 address_length, u32 if_address_index,
+ u32 is_del)
+{
+ const lcp_itf_pair_t *lip;
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+
+ if (!lcp_sync () || lcp_get_netlink_processing_active ())
+ return;
+
+ LCP_ITF_PAIR_DBG ("ip6_addr_%s: si:%U %U/%u", is_del ? "del" : "add",
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index, format_ip6_address, address, address_length);
+
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (!lip)
+ return;
+
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+ LCP_ITF_PAIR_DBG ("ip6_addr_%s: %U ip4 %U/%u", is_del ? "del" : "add",
+ format_lcp_itf_pair, lip, format_ip6_address, address,
+ address_length);
+ if (is_del)
+ vnet_netlink_del_ip6_addr (lip->lip_vif_index, address, address_length);
+ else
+ vnet_netlink_add_ip6_addr (lip->lip_vif_index, address, address_length);
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+}
+
+static clib_error_t *
+lcp_itf_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_create)
+{
+ const vnet_sw_interface_t *sw;
+ uword is_sub;
+
+ if (!lcp_auto_subint ())
+ return NULL;
+
+ sw = vnet_get_sw_interface_or_null (vnm, sw_if_index);
+ if (!sw)
+ return NULL;
+
+ is_sub = vnet_sw_interface_is_sub (vnm, sw_if_index);
+ if (!is_sub)
+ return NULL;
+
+ LCP_ITF_PAIR_DBG ("interface_%s: sw %U parent %U", is_create ? "add" : "del",
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw->sw_if_index, format_vnet_sw_if_index_name,
+ vnet_get_main (), sw->sup_sw_if_index);
+
+ if (is_create)
+ {
+ const lcp_itf_pair_t *sup_lip;
+ u8 *name = 0;
+
+ // If the parent has a LIP auto-create a LIP for this interface
+ sup_lip =
+ lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw->sup_sw_if_index));
+ if (!sup_lip)
+ return NULL;
+
+ name = format (name, "%s.%d%c", sup_lip->lip_host_name, sw->sub.id, 0);
+
+ LCP_ITF_PAIR_INFO (
+ "interface_%s: %U has parent %U, auto-creating LCP with host-if %s",
+ is_create ? "add" : "del", format_vnet_sw_if_index_name,
+ vnet_get_main (), sw->sw_if_index, format_lcp_itf_pair, sup_lip, name);
+
+ lcp_itf_pair_create (sw->sw_if_index, name, LCP_ITF_HOST_TAP,
+ sup_lip->lip_namespace, NULL);
+
+ vec_free (name);
+ }
+ else
+ {
+ lcp_itf_pair_delete (sw_if_index);
+ }
+
+ return NULL;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (lcp_itf_interface_add_del);
+
+static clib_error_t *
+lcp_itf_sync_init (vlib_main_t *vm)
+{
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+
+ ip4_add_del_interface_address_callback_t cb4;
+ ip6_add_del_interface_address_callback_t cb6;
+
+ cb4.function = lcp_itf_ip4_add_del_interface_addr;
+ cb4.function_opaque = 0;
+ vec_add1 (im4->add_del_interface_address_callbacks, cb4);
+
+ cb6.function = lcp_itf_ip6_add_del_interface_addr;
+ cb6.function_opaque = 0;
+ vec_add1 (im6->add_del_interface_address_callbacks, cb6);
+
+ return NULL;
+}
+
+VLIB_INIT_FUNCTION (lcp_itf_sync_init) = {
+ .runs_after = VLIB_INITS ("vnet_interface_init", "tcp_init", "udp_init"),
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/lcp_mpls_sync.c b/src/plugins/linux-cp/lcp_mpls_sync.c
new file mode 100644
index 00000000000..c08fcb4d1d9
--- /dev/null
+++ b/src/plugins/linux-cp/lcp_mpls_sync.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+
+#include <linux-cp/lcp_interface.h>
+
+#include <vnet/plugin/plugin.h>
+#include <vnet/mpls/mpls.h>
+#include <vppinfra/linux/netns.h>
+
+#include <fcntl.h>
+
+vlib_log_class_t lcp_mpls_sync_logger;
+
+#define LCP_MPLS_SYNC_DBG(...) \
+ vlib_log_debug (lcp_mpls_sync_logger, __VA_ARGS__);
+
+void
+lcp_mpls_sync_pair_add_cb (lcp_itf_pair_t *lip)
+{
+ u8 phy_is_enabled = mpls_sw_interface_is_enabled (lip->lip_phy_sw_if_index);
+ LCP_MPLS_SYNC_DBG ("pair_add_cb: mpls enabled %u, parent %U", phy_is_enabled,
+ format_lcp_itf_pair, lip);
+ if (phy_is_enabled)
+ mpls_sw_interface_enable_disable (&mpls_main, lip->lip_host_sw_if_index,
+ 1);
+}
+
+void
+lcp_mpls_sync_state_cb (struct mpls_main_t *mm, uword opaque, u32 sw_if_index,
+ u32 is_enable)
+{
+ lcp_itf_pair_t *lip;
+ index_t lipi;
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+ int ctl_fd = -1;
+ u8 *ctl_path = NULL;
+
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: called for sw_if_index %u", sw_if_index);
+
+ // If device is LCP PHY, sync state to host tap.
+ lipi = lcp_itf_pair_find_by_phy (sw_if_index);
+ if (INDEX_INVALID != lipi)
+ {
+ lip = lcp_itf_pair_get (lipi);
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: mpls enabled %u parent %U", is_enable,
+ format_lcp_itf_pair, lip);
+ mpls_sw_interface_enable_disable (&mpls_main, lip->lip_host_sw_if_index,
+ is_enable);
+ return;
+ }
+
+ // If device is LCP host, toggle MPLS XC feature.
+ lipi = lcp_itf_pair_find_by_host (sw_if_index);
+ if (INDEX_INVALID == lipi)
+ return;
+ lip = lcp_itf_pair_get (lipi);
+
+ vnet_feature_enable_disable ("mpls-input", "linux-cp-xc-mpls", sw_if_index,
+ is_enable, NULL, 0);
+
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: mpls xc state %u parent %U", is_enable,
+ format_lcp_itf_pair, lip);
+
+ // If syncing is enabled, sync Linux state as well.
+ // This can happen regardless of lcp_get_netlink_processing_active(),
+ // provided it does not generate Netlink messages.
+ if (!lcp_sync ())
+ return;
+
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
+ ctl_path = format (NULL, "/proc/sys/net/mpls/conf/%s/input%c",
+ lip->lip_host_name, NULL);
+ if (NULL == ctl_path)
+ {
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: failed to format sysctl");
+ goto SYNC_CLEANUP;
+ }
+
+ ctl_fd = open ((char *) ctl_path, O_WRONLY);
+ if (ctl_fd < 0)
+ {
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: failed to open %s for writing",
+ ctl_path);
+ goto SYNC_CLEANUP;
+ }
+
+ if (fdformat (ctl_fd, "%u", is_enable) < 1)
+ {
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: failed to write to %s", ctl_path);
+ goto SYNC_CLEANUP;
+ }
+
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: set mpls input for %s",
+ lip->lip_host_name);
+
+SYNC_CLEANUP:
+ if (ctl_fd > -1)
+ close (ctl_fd);
+
+ if (NULL != ctl_path)
+ vec_free (ctl_path);
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+}
+
+static clib_error_t *
+lcp_mpls_sync_init (vlib_main_t *vm)
+{
+ lcp_itf_pair_vft_t mpls_sync_itf_pair_vft = {
+ .pair_add_fn = lcp_mpls_sync_pair_add_cb,
+ };
+ lcp_itf_pair_register_vft (&mpls_sync_itf_pair_vft);
+
+ mpls_interface_state_change_add_callback (lcp_mpls_sync_state_cb, 0);
+
+ lcp_mpls_sync_logger = vlib_log_register_class ("linux-cp", "mpls-sync");
+
+ return NULL;
+}
+
+VLIB_INIT_FUNCTION (lcp_mpls_sync_init) = {
+ .runs_after = VLIB_INITS ("lcp_interface_init", "mpls_init"),
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/lcp_nl.c b/src/plugins/linux-cp/lcp_nl.c
new file mode 100644
index 00000000000..85b6447007a
--- /dev/null
+++ b/src/plugins/linux-cp/lcp_nl.c
@@ -0,0 +1,1043 @@
+/*
+ * Copyright (c) 2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <fcntl.h>
+
+#include <linux-cp/lcp_nl.h>
+
+#include <netlink/route/rule.h>
+#include <netlink/msg.h>
+#include <netlink/netlink.h>
+#include <netlink/socket.h>
+#include <netlink/route/link.h>
+#include <netlink/route/route.h>
+#include <netlink/route/neighbour.h>
+#include <netlink/route/addr.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vppinfra/error.h>
+#include <vppinfra/linux/netns.h>
+
+#include <vnet/fib/fib_table.h>
+
+#include <libmnl/libmnl.h>
+
+#include <plugins/linux-cp/lcp_interface.h>
+
+typedef enum nl_status_t_
+{
+ NL_STATUS_NOTIF_PROC,
+ NL_STATUS_SYNC,
+} nl_status_t;
+
+typedef enum nl_sock_type_t_
+{
+ NL_SOCK_TYPE_LINK,
+ NL_SOCK_TYPE_ADDR,
+ NL_SOCK_TYPE_NEIGH,
+ NL_SOCK_TYPE_ROUTE,
+} nl_sock_type_t;
+
+#define NL_SOCK_TYPES_N (NL_SOCK_TYPE_ROUTE + 1)
+
+/* Socket type, message type, type name, function subname */
+#define foreach_sock_type \
+ _ (NL_SOCK_TYPE_LINK, RTM_GETLINK, "link", link) \
+ _ (NL_SOCK_TYPE_ADDR, RTM_GETADDR, "address", link_addr) \
+ _ (NL_SOCK_TYPE_NEIGH, RTM_GETNEIGH, "neighbor", neigh) \
+ _ (NL_SOCK_TYPE_ROUTE, RTM_GETROUTE, "route", route)
+
+typedef enum nl_event_type_t_
+{
+ NL_EVENT_READ,
+ NL_EVENT_ERR,
+} nl_event_type_t;
+
+typedef struct nl_main
+{
+
+ nl_status_t nl_status;
+
+ struct nl_sock *sk_route;
+ struct nl_sock *sk_route_sync[NL_SOCK_TYPES_N];
+ vlib_log_class_t nl_logger;
+ nl_vft_t *nl_vfts;
+ struct nl_cache *nl_caches[LCP_NL_N_OBJS];
+ nl_msg_info_t *nl_msg_queue;
+ uword clib_file_index;
+
+ u32 rx_buf_size;
+ u32 tx_buf_size;
+ u32 batch_size;
+ u32 batch_delay_ms;
+
+ u32 sync_batch_limit;
+ u32 sync_batch_delay_ms;
+ u32 sync_attempt_delay_ms;
+
+} nl_main_t;
+
+#define NL_RX_BUF_SIZE_DEF (1 << 27) /* 128 MB */
+#define NL_TX_BUF_SIZE_DEF (1 << 18) /* 256 kB */
+#define NL_BATCH_SIZE_DEF (1 << 11) /* 2048 */
+#define NL_BATCH_DELAY_MS_DEF 50 /* 50 ms, max 20 batch/s */
+
+#define NL_SYNC_BATCH_LIMIT_DEF (1 << 10) /* 1024 */
+#define NL_SYNC_BATCH_DELAY_MS_DEF 20 /* 20ms, max 50 batch/s */
+#define NL_SYNC_ATTEMPT_DELAY_MS_DEF 2000 /* 2s */
+
+static nl_main_t nl_main = {
+ .rx_buf_size = NL_RX_BUF_SIZE_DEF,
+ .tx_buf_size = NL_TX_BUF_SIZE_DEF,
+ .batch_size = NL_BATCH_SIZE_DEF,
+ .batch_delay_ms = NL_BATCH_DELAY_MS_DEF,
+ .sync_batch_limit = NL_SYNC_BATCH_LIMIT_DEF,
+ .sync_batch_delay_ms = NL_SYNC_BATCH_DELAY_MS_DEF,
+ .sync_attempt_delay_ms = NL_SYNC_ATTEMPT_DELAY_MS_DEF,
+};
+
+/* #define foreach_nl_nft_proto \ */
+/* _(IP4, "ip", AF_INT) \ */
+/* _(IP6, "ip6", NFPROTO_IPV6) */
+
+/* typedef enum nl_nft_proto_t_ */
+/* { */
+/* #define _(a,b,c) NL_NFT_PROTO_##a = c, */
+/* foreach_nl_nft_proto */
+/* #undef _ */
+/* } nl_nft_proto_t; */
+
+#define FOREACH_VFT(__func, __arg) \
+ { \
+ nl_main_t *nm = &nl_main; \
+ nl_vft_t *__nv; \
+ vec_foreach (__nv, nm->nl_vfts) \
+ { \
+ if (!__nv->__func.cb) \
+ continue; \
+ \
+ if (!__nv->__func.is_mp_safe) \
+ vlib_worker_thread_barrier_sync (vlib_get_main ()); \
+ \
+ __nv->__func.cb (__arg); \
+ \
+ if (!__nv->__func.is_mp_safe) \
+ vlib_worker_thread_barrier_release (vlib_get_main ()); \
+ } \
+ }
+
+#define FOREACH_VFT_NO_ARG(__func) \
+ { \
+ nl_main_t *nm = &nl_main; \
+ nl_vft_t *__nv; \
+ vec_foreach (__nv, nm->nl_vfts) \
+ { \
+ if (!__nv->__func.cb) \
+ continue; \
+ \
+ if (!__nv->__func.is_mp_safe) \
+ vlib_worker_thread_barrier_sync (vlib_get_main ()); \
+ \
+ __nv->__func.cb (); \
+ \
+ if (!__nv->__func.is_mp_safe) \
+ vlib_worker_thread_barrier_release (vlib_get_main ()); \
+ } \
+ }
+
+#define FOREACH_VFT_CTX(__func, __arg, __ctx) \
+ { \
+ nl_main_t *nm = &nl_main; \
+ nl_vft_t *__nv; \
+ vec_foreach (__nv, nm->nl_vfts) \
+ { \
+ if (!__nv->__func.cb) \
+ continue; \
+ \
+ if (!__nv->__func.is_mp_safe) \
+ vlib_worker_thread_barrier_sync (vlib_get_main ()); \
+ \
+ __nv->__func.cb (__arg, __ctx); \
+ \
+ if (!__nv->__func.is_mp_safe) \
+ vlib_worker_thread_barrier_release (vlib_get_main ()); \
+ } \
+ }
+
+void
+nl_register_vft (const nl_vft_t *nv)
+{
+ nl_main_t *nm = &nl_main;
+
+ vec_add1 (nm->nl_vfts, *nv);
+}
+
+#define NL_DBG(...) vlib_log_debug (nl_main.nl_logger, __VA_ARGS__);
+#define NL_INFO(...) vlib_log_notice (nl_main.nl_logger, __VA_ARGS__);
+#define NL_ERROR(...) vlib_log_err (nl_main.nl_logger, __VA_ARGS__);
+
+static void lcp_nl_open_socket (void);
+static void lcp_nl_close_socket (void);
+static void lcp_nl_open_sync_socket (nl_sock_type_t sock_type);
+static void lcp_nl_close_sync_socket (nl_sock_type_t sock_type);
+
+static void
+nl_route_del (struct rtnl_route *rr, void *arg)
+{
+ FOREACH_VFT (nvl_rt_route_del, rr);
+}
+
+static void
+nl_route_add (struct rtnl_route *rr, void *arg)
+{
+ int is_replace = 0;
+
+ if (arg)
+ {
+ nl_msg_info_t *msg_info = (nl_msg_info_t *) arg;
+ struct nlmsghdr *nlh = nlmsg_hdr (msg_info->msg);
+
+ is_replace = (nlh->nlmsg_flags & NLM_F_REPLACE);
+ }
+
+ FOREACH_VFT_CTX (nvl_rt_route_add, rr, is_replace);
+}
+
+static void
+nl_route_sync_begin (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_route_sync_begin);
+}
+
+static void
+nl_route_sync_end (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_route_sync_end);
+}
+
+static void
+nl_neigh_del (struct rtnl_neigh *rn, void *arg)
+{
+ FOREACH_VFT (nvl_rt_neigh_del, rn);
+}
+
+static void
+nl_neigh_add (struct rtnl_neigh *rn, void *arg)
+{
+ FOREACH_VFT (nvl_rt_neigh_add, rn);
+}
+
+static void
+nl_neigh_sync_begin (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_neigh_sync_begin);
+}
+
+static void
+nl_neigh_sync_end (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_neigh_sync_end);
+}
+
+static void
+nl_link_addr_del (struct rtnl_addr *rla, void *arg)
+{
+ FOREACH_VFT (nvl_rt_addr_del, rla);
+}
+
+static void
+nl_link_addr_add (struct rtnl_addr *rla, void *arg)
+{
+ FOREACH_VFT (nvl_rt_addr_add, rla);
+}
+
+static void
+nl_link_addr_sync_begin (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_addr_sync_begin);
+}
+
+static void
+nl_link_addr_sync_end (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_addr_sync_end);
+}
+
+static void
+nl_link_del (struct rtnl_link *rl, void *arg)
+{
+ FOREACH_VFT_CTX (nvl_rt_link_del, rl, arg);
+}
+
+static void
+nl_link_add (struct rtnl_link *rl, void *arg)
+{
+ FOREACH_VFT_CTX (nvl_rt_link_add, rl, arg);
+}
+
+static void
+nl_link_sync_begin (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_link_sync_begin);
+}
+
+static void
+nl_link_sync_end (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_link_sync_end);
+}
+
+static void
+nl_route_dispatch (struct nl_object *obj, void *arg)
+{
+ /* nothing can be done without interface mappings */
+ if (!lcp_itf_num_pairs ())
+ return;
+
+ switch (nl_object_get_msgtype (obj))
+ {
+ case RTM_NEWROUTE:
+ nl_route_add ((struct rtnl_route *) obj, arg);
+ break;
+ case RTM_DELROUTE:
+ nl_route_del ((struct rtnl_route *) obj, arg);
+ break;
+ case RTM_NEWNEIGH:
+ nl_neigh_add ((struct rtnl_neigh *) obj, arg);
+ break;
+ case RTM_DELNEIGH:
+ nl_neigh_del ((struct rtnl_neigh *) obj, arg);
+ break;
+ case RTM_NEWADDR:
+ nl_link_addr_add ((struct rtnl_addr *) obj, arg);
+ break;
+ case RTM_DELADDR:
+ nl_link_addr_del ((struct rtnl_addr *) obj, arg);
+ break;
+ case RTM_NEWLINK:
+ nl_link_add ((struct rtnl_link *) obj, arg);
+ break;
+ case RTM_DELLINK:
+ nl_link_del ((struct rtnl_link *) obj, arg);
+ break;
+ default:
+ NL_INFO ("unhandled: %s", nl_object_get_type (obj));
+ break;
+ }
+}
+
+static int
+nl_route_process_msgs (void)
+{
+ nl_main_t *nm = &nl_main;
+ nl_msg_info_t *msg_info;
+ int err, n_msgs = 0;
+
+ lcp_set_netlink_processing_active (1);
+
+ /* process a batch of messages. break if we hit our limit */
+ vec_foreach (msg_info, nm->nl_msg_queue)
+ {
+ if ((err = nl_msg_parse (msg_info->msg, nl_route_dispatch, msg_info)) <
+ 0)
+ NL_ERROR ("Unable to parse object: %s", nl_geterror (err));
+ nlmsg_free (msg_info->msg);
+ if (++n_msgs >= nm->batch_size)
+ break;
+ }
+
+ /* remove the messages we processed from the head of the queue */
+ if (n_msgs)
+ vec_delete (nm->nl_msg_queue, n_msgs, 0);
+
+ NL_DBG ("Processed %u messages", n_msgs);
+
+ lcp_set_netlink_processing_active (0);
+
+ return n_msgs;
+}
+
+static int
+lcp_nl_route_discard_msgs (void)
+{
+ nl_main_t *nm = &nl_main;
+ nl_msg_info_t *msg_info;
+ int n_msgs;
+
+ n_msgs = vec_len (nm->nl_msg_queue);
+ if (n_msgs == 0)
+ return 0;
+
+ vec_foreach (msg_info, nm->nl_msg_queue)
+ {
+ nlmsg_free (msg_info->msg);
+ }
+
+ vec_reset_length (nm->nl_msg_queue);
+
+ NL_INFO ("Discarded %u messages", n_msgs);
+
+ return n_msgs;
+}
+
+static int
+lcp_nl_route_send_dump_req (nl_sock_type_t sock_type, int msg_type)
+{
+ nl_main_t *nm = &nl_main;
+ struct nl_sock *sk_route = nm->sk_route_sync[sock_type];
+ int err;
+ struct rtgenmsg rt_hdr = {
+ .rtgen_family = AF_UNSPEC,
+ };
+
+ err =
+ nl_send_simple (sk_route, msg_type, NLM_F_DUMP, &rt_hdr, sizeof (rt_hdr));
+
+ if (err < 0)
+ {
+ NL_ERROR ("Unable to send a dump request: %s", nl_geterror (err));
+ }
+ else
+ NL_INFO ("Dump request sent via socket %d of type %d",
+ nl_socket_get_fd (sk_route), sock_type);
+
+ return err;
+}
+
+static int
+lcp_nl_route_dump_cb (struct nl_msg *msg, void *arg)
+{
+ int err;
+
+ if ((err = nl_msg_parse (msg, nl_route_dispatch, NULL)) < 0)
+ NL_ERROR ("Unable to parse object: %s", nl_geterror (err));
+
+ return NL_OK;
+}
+
+static int
+lcp_nl_recv_dump_replies (nl_sock_type_t sock_type, int msg_limit,
+ int *is_done_rcvd)
+{
+ nl_main_t *nm = &nl_main;
+ struct nl_sock *sk_route = nm->sk_route_sync[sock_type];
+ struct sockaddr_nl nla;
+ uint8_t *buf = NULL;
+ int n_bytes;
+ struct nlmsghdr *hdr;
+ struct nl_msg *msg = NULL;
+ int err = 0;
+ int done = 0;
+ int n_msgs = 0;
+
+ lcp_set_netlink_processing_active (1);
+
+continue_reading:
+ n_bytes = nl_recv (sk_route, &nla, &buf, /* creds */ NULL);
+ if (n_bytes <= 0)
+ {
+ lcp_set_netlink_processing_active (0);
+ return n_bytes;
+ }
+
+ hdr = (struct nlmsghdr *) buf;
+ while (nlmsg_ok (hdr, n_bytes))
+ {
+ nlmsg_free (msg);
+ msg = nlmsg_convert (hdr);
+ if (!msg)
+ {
+ err = -NLE_NOMEM;
+ goto out;
+ }
+
+ n_msgs++;
+
+ nlmsg_set_proto (msg, NETLINK_ROUTE);
+ nlmsg_set_src (msg, &nla);
+
+ /* Message that terminates a multipart message. Finish parsing and signal
+ * the caller that all dump replies have been received
+ */
+ if (hdr->nlmsg_type == NLMSG_DONE)
+ {
+ done = 1;
+ goto out;
+ }
+ /* Message to be ignored. Continue parsing */
+ else if (hdr->nlmsg_type == NLMSG_NOOP)
+ ;
+ /* Message that indicates data was lost. Finish parsing and return an
+ * error
+ */
+ else if (hdr->nlmsg_type == NLMSG_OVERRUN)
+ {
+ err = -NLE_MSG_OVERFLOW;
+ goto out;
+ }
+ /* Message that indicates an error. Finish parsing, extract the error
+ * code, and return it */
+ else if (hdr->nlmsg_type == NLMSG_ERROR)
+ {
+ struct nlmsgerr *e = nlmsg_data (hdr);
+
+ if (hdr->nlmsg_len < nlmsg_size (sizeof (*e)))
+ {
+ err = -NLE_MSG_TRUNC;
+ goto out;
+ }
+ else if (e->error)
+ {
+ err = -nl_syserr2nlerr (e->error);
+ goto out;
+ }
+ /* Message is an acknowledgement (err_code = 0). Continue parsing */
+ else
+ ;
+ }
+ /* Message that contains the requested data. Pass it for processing and
+ * continue parsing
+ */
+ else
+ {
+ lcp_nl_route_dump_cb (msg, NULL);
+ }
+
+ hdr = nlmsg_next (hdr, &n_bytes);
+ }
+
+ nlmsg_free (msg);
+ free (buf);
+ msg = NULL;
+ buf = NULL;
+
+ if (!done && n_msgs < msg_limit)
+ goto continue_reading;
+
+out:
+ lcp_set_netlink_processing_active (0);
+
+ nlmsg_free (msg);
+ free (buf);
+
+ if (err)
+ return err;
+
+ *is_done_rcvd = done;
+
+ return n_msgs;
+}
+
+#define DAY_F64 (1.0 * (24 * 60 * 60))
+
+static uword
+nl_route_process (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ nl_main_t *nm = &nl_main;
+ uword event_type;
+ uword *event_data = 0;
+ f64 wait_time = DAY_F64;
+ int n_msgs;
+ int is_done;
+
+ while (1)
+ {
+ if (nm->nl_status == NL_STATUS_NOTIF_PROC)
+ {
+ /* If we process a batch of messages and stop because we reached the
+ * batch size limit, we want to wake up after the batch delay and
+ * process more. Otherwise we just want to wait for a read event.
+ */
+ vlib_process_wait_for_event_or_clock (vm, wait_time);
+ event_type = vlib_process_get_events (vm, &event_data);
+ vec_reset_length (event_data);
+
+ switch (event_type)
+ {
+ /* Process batch of queued messages on timeout or read event
+ * signal
+ */
+ case ~0:
+ case NL_EVENT_READ:
+ nl_route_process_msgs ();
+ wait_time = (vec_len (nm->nl_msg_queue) != 0) ?
+ nm->batch_delay_ms * 1e-3 :
+ DAY_F64;
+ break;
+
+ /* Initiate synchronization if there was an error polling or
+ * reading the notification socket
+ */
+ case NL_EVENT_ERR:
+ nm->nl_status = NL_STATUS_SYNC;
+ break;
+
+ default:
+ NL_ERROR ("Unknown event type: %u", (u32) event_type);
+ }
+ }
+ else if (nm->nl_status == NL_STATUS_SYNC)
+ {
+ /* Stop processing notifications - close the notification socket and
+ * discard all messages that are currently in the queue
+ */
+ lcp_nl_close_socket ();
+ lcp_nl_route_discard_msgs ();
+
+ /* Wait some time before next synchronization attempt. Allows to
+ * reduce the number of failed attempts that stall the main thread by
+ * waiting out the notification storm
+ */
+ NL_INFO ("Wait before next synchronization attempt for %ums",
+ nm->sync_attempt_delay_ms);
+ vlib_process_suspend (vm, nm->sync_attempt_delay_ms * 1e-3);
+
+ /* Open netlink synchronization socket, one for every data type of
+ * interest: link, address, neighbor, and route. That is needed to
+ * be able to send dump requests for every data type simultaneously.
+ * If send a dump request while the previous one is in progress,
+ * the request will fail and EBUSY returned
+ */
+#define _(stype, mtype, tname, fn) lcp_nl_open_sync_socket (stype);
+ foreach_sock_type
+#undef _
+
+ /* Start reading notifications and enqueueing them for further
+ * processing. The notifications will serve as a difference between
+ * the snapshot made after the dump request and the actual state at
+ * the moment. Once all the dump replies are processed, the
+ * notifications will be processed
+ */
+ lcp_nl_open_socket ();
+
+ /* Request the current entry set from the kernel for every data type
+ * of interest. Thus requesting a snapshot of the current routing
+ * state that the kernel will make and then reply with
+ */
+#define _(stype, mtype, tname, fn) lcp_nl_route_send_dump_req (stype, mtype);
+ foreach_sock_type
+#undef _
+
+ /* Process all the dump replies */
+#define _(stype, mtype, tname, fn) \
+ nl_##fn##_sync_begin (); \
+ is_done = 0; \
+ do \
+ { \
+ n_msgs = \
+ lcp_nl_recv_dump_replies (stype, nm->sync_batch_limit, &is_done); \
+ if (n_msgs < 0) \
+ { \
+ NL_ERROR ("Error receiving dump replies of type " tname \
+ ": %s (%d)", \
+ nl_geterror (n_msgs), n_msgs); \
+ break; \
+ } \
+ else if (n_msgs == 0) \
+ { \
+ NL_ERROR ("EOF while receiving dump replies of type " tname); \
+ break; \
+ } \
+ else \
+ NL_INFO ("Processed %u dump replies of type " tname, n_msgs); \
+ \
+ /* Suspend the processing loop and wait until event signal is \
+ * received or timeout expires. During synchronization, only \
+ * error event is expected because read event is suppressed. \
+ * Allows not to stall the main thread and detect errors on the \
+ * notification socket that will make synchronization \
+ * incomplete \
+ */ \
+ vlib_process_wait_for_event_or_clock (vm, \
+ nm->sync_batch_delay_ms * 1e-3); \
+ event_type = vlib_process_get_events (vm, &event_data); \
+ vec_reset_length (event_data); \
+ \
+ /* If error event received, stop synchronization and repeat an \
+ * attempt later \
+ */ \
+ if (event_type == NL_EVENT_ERR) \
+ goto sync_later; \
+ } \
+ while (!is_done); \
+ nl_##fn##_sync_end ();
+
+ foreach_sock_type
+#undef _
+
+ /* Start processing notifications */
+ nm->nl_status = NL_STATUS_NOTIF_PROC;
+
+ /* Trigger messages processing if there are notifications received
+ * during synchronization
+ */
+ wait_time = (vec_len (nm->nl_msg_queue) != 0) ? 1e-3 : DAY_F64;
+
+ sync_later:
+ /* Close netlink synchronization sockets */
+#define _(stype, mtype, tname, fn) lcp_nl_close_sync_socket (stype);
+ foreach_sock_type
+#undef _
+ }
+ else
+ NL_ERROR ("Unknown status: %d", nm->nl_status);
+ }
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (nl_route_process_node, static) = {
+ .function = nl_route_process,
+ .name = "linux-cp-netlink-process",
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .process_log2_n_stack_bytes = 17,
+};
+
+static int
+nl_route_cb (struct nl_msg *msg, void *arg)
+{
+ nl_main_t *nm = &nl_main;
+ nl_msg_info_t *msg_info = 0;
+
+ /* delay processing - increment ref count and queue for later */
+ vec_add2 (nm->nl_msg_queue, msg_info, 1);
+
+ /* store a timestamp for the message */
+ msg_info->ts = vlib_time_now (vlib_get_main ());
+ msg_info->msg = msg;
+ nlmsg_get (msg);
+
+ return 0;
+}
+
+int
+lcp_nl_drain_messages (void)
+{
+ int err;
+ nl_main_t *nm = &nl_main;
+
+ /* Read until there's an error */
+ while ((err = nl_recvmsgs_default (nm->sk_route)) > -1)
+ ;
+
+ /* If there was an error other then EAGAIN, signal process node */
+ if (err != -NLE_AGAIN)
+ vlib_process_signal_event (vlib_get_main (), nl_route_process_node.index,
+ NL_EVENT_ERR, 0);
+ else
+ {
+ /* If netlink notification processing is active, signal process node
+ * there were notifications read
+ */
+ if (nm->nl_status == NL_STATUS_NOTIF_PROC)
+ vlib_process_signal_event (
+ vlib_get_main (), nl_route_process_node.index, NL_EVENT_READ, 0);
+ }
+
+ return err;
+}
+
+void
+lcp_nl_pair_add_cb (lcp_itf_pair_t *pair)
+{
+ lcp_nl_drain_messages ();
+}
+
+static clib_error_t *
+nl_route_read_cb (clib_file_t *f)
+{
+ int err;
+ err = lcp_nl_drain_messages ();
+ if (err < 0 && err != -NLE_AGAIN)
+ NL_ERROR ("Error reading netlink socket (fd %d): %s (%d)",
+ f->file_descriptor, nl_geterror (err), err);
+
+ return 0;
+}
+
+static clib_error_t *
+nl_route_error_cb (clib_file_t *f)
+{
+ NL_ERROR ("Error polling netlink socket (fd %d)", f->file_descriptor);
+
+ /* notify process node */
+ vlib_process_signal_event (vlib_get_main (), nl_route_process_node.index,
+ NL_EVENT_ERR, 0);
+
+ return clib_error_return (0, "Error polling netlink socket %d",
+ f->file_descriptor);
+}
+
+struct nl_cache *
+lcp_nl_get_cache (lcp_nl_obj_t t)
+{
+ nl_main_t *nm = &nl_main;
+
+ return nm->nl_caches[t];
+}
+
+/* Set the RX buffer size to be used on the netlink socket */
+void
+lcp_nl_set_buffer_size (u32 buf_size)
+{
+ nl_main_t *nm = &nl_main;
+
+ nm->rx_buf_size = buf_size;
+
+ if (nm->sk_route)
+ nl_socket_set_buffer_size (nm->sk_route, nm->rx_buf_size, nm->tx_buf_size);
+}
+
+/* Set the batch size - maximum netlink messages to process at one time */
+void
+lcp_nl_set_batch_size (u32 batch_size)
+{
+ nl_main_t *nm = &nl_main;
+
+ nm->batch_size = batch_size;
+}
+
+/* Set the batch delay - how long to wait in ms between processing batches */
+void
+lcp_nl_set_batch_delay (u32 batch_delay_ms)
+{
+ nl_main_t *nm = &nl_main;
+
+ nm->batch_delay_ms = batch_delay_ms;
+}
+
+static clib_error_t *
+lcp_itf_pair_config (vlib_main_t *vm, unformat_input_t *input)
+{
+ u32 buf_size, batch_size, batch_delay_ms;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "nl-rx-buffer-size %u", &buf_size))
+ lcp_nl_set_buffer_size (buf_size);
+ else if (unformat (input, "nl-batch-size %u", &batch_size))
+ lcp_nl_set_batch_size (batch_size);
+ else if (unformat (input, "nl-batch-delay-ms %u", &batch_delay_ms))
+ lcp_nl_set_batch_delay (batch_delay_ms);
+ else
+ return clib_error_return (0, "invalid netlink option: %U",
+ format_unformat_error, input);
+ }
+
+ return NULL;
+}
+
+VLIB_CONFIG_FUNCTION (lcp_itf_pair_config, "linux-nl");
+
+static void
+lcp_nl_close_socket (void)
+{
+ nl_main_t *nm = &nl_main;
+
+ /* delete existing fd from epoll fd set */
+ if (nm->clib_file_index != ~0)
+ {
+ clib_file_main_t *fm = &file_main;
+ clib_file_t *f = clib_file_get (fm, nm->clib_file_index);
+
+ if (f)
+ {
+ NL_INFO ("Stopping poll of fd %u", f->file_descriptor);
+ fm->file_update (f, UNIX_FILE_UPDATE_DELETE);
+ }
+ else
+ /* stored index was not a valid file, reset stored index to ~0 */
+ nm->clib_file_index = ~0;
+ }
+
+ /* If we already created a socket, close/free it */
+ if (nm->sk_route)
+ {
+ NL_INFO ("Closing netlink socket %d", nl_socket_get_fd (nm->sk_route));
+ nl_socket_free (nm->sk_route);
+ nm->sk_route = NULL;
+ }
+}
+
+static void
+lcp_nl_open_socket (void)
+{
+ nl_main_t *nm = &nl_main;
+ int dest_ns_fd, curr_ns_fd;
+
+ /* Allocate a new socket for both routes and acls
+ * Notifications do not use sequence numbers, disable sequence number
+ * checking.
+ * Define a callback function, which will be called for each notification
+ * received
+ */
+ nm->sk_route = nl_socket_alloc ();
+ nl_socket_disable_seq_check (nm->sk_route);
+
+ dest_ns_fd = lcp_get_default_ns_fd ();
+ if (dest_ns_fd)
+ {
+ curr_ns_fd = open ("/proc/self/ns/net", O_RDONLY);
+ setns (dest_ns_fd, CLONE_NEWNET);
+ }
+
+ nl_connect (nm->sk_route, NETLINK_ROUTE);
+
+ if (dest_ns_fd && curr_ns_fd >= 0)
+ {
+ setns (curr_ns_fd, CLONE_NEWNET);
+ close (curr_ns_fd);
+ }
+
+ /* Subscribe to all the 'routing' notifications on the route socket */
+ nl_socket_add_memberships (nm->sk_route, RTNLGRP_LINK, RTNLGRP_IPV6_IFADDR,
+ RTNLGRP_IPV4_IFADDR, RTNLGRP_IPV4_ROUTE,
+ RTNLGRP_IPV6_ROUTE, RTNLGRP_NEIGH, RTNLGRP_NOTIFY,
+#ifdef RTNLGRP_MPLS_ROUTE /* not defined on CentOS/RHEL 7 */
+ RTNLGRP_MPLS_ROUTE,
+#endif
+ RTNLGRP_IPV4_RULE, RTNLGRP_IPV6_RULE, 0);
+
+ /* Set socket in nonblocking mode and increase buffer sizes */
+ nl_socket_set_nonblocking (nm->sk_route);
+ nl_socket_set_buffer_size (nm->sk_route, nm->rx_buf_size, nm->tx_buf_size);
+
+ if (nm->clib_file_index == ~0)
+ {
+ clib_file_t rt_file = {
+ .read_function = nl_route_read_cb,
+ .error_function = nl_route_error_cb,
+ .file_descriptor = nl_socket_get_fd (nm->sk_route),
+ .description = format (0, "linux-cp netlink route socket"),
+ };
+
+ nm->clib_file_index = clib_file_add (&file_main, &rt_file);
+ NL_INFO ("Added file %u", nm->clib_file_index);
+ }
+ else
+ /* clib file already created and socket was closed due to error */
+ {
+ clib_file_main_t *fm = &file_main;
+ clib_file_t *f = clib_file_get (fm, nm->clib_file_index);
+
+ f->file_descriptor = nl_socket_get_fd (nm->sk_route);
+ fm->file_update (f, UNIX_FILE_UPDATE_ADD);
+ NL_INFO ("Starting poll of %d", f->file_descriptor);
+ }
+
+ nl_socket_modify_cb (nm->sk_route, NL_CB_VALID, NL_CB_CUSTOM, nl_route_cb,
+ NULL);
+ NL_INFO ("Opened netlink socket %d", nl_socket_get_fd (nm->sk_route));
+}
+
+static void
+lcp_nl_open_sync_socket (nl_sock_type_t sock_type)
+{
+ nl_main_t *nm = &nl_main;
+ int dest_ns_fd, curr_ns_fd;
+ struct nl_sock *sk_route;
+
+ /* Allocate a new blocking socket for routes that will be used for dump
+ * requests. Buffer sizes are left default because replies to dump requests
+ * are flow-controlled and the kernel will not overflow the socket by sending
+ * these
+ */
+
+ nm->sk_route_sync[sock_type] = sk_route = nl_socket_alloc ();
+
+ dest_ns_fd = lcp_get_default_ns_fd ();
+ if (dest_ns_fd > 0)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ if (clib_setns (dest_ns_fd) == -1)
+ NL_ERROR ("Cannot set destination ns");
+ }
+
+ nl_connect (sk_route, NETLINK_ROUTE);
+
+ if (dest_ns_fd > 0)
+ {
+ if (curr_ns_fd == -1)
+ {
+ NL_ERROR ("No previous ns to set");
+ }
+ else
+ {
+ if (clib_setns (curr_ns_fd) == -1)
+ NL_ERROR ("Cannot set previous ns");
+ close (curr_ns_fd);
+ }
+ }
+
+ NL_INFO ("Opened netlink synchronization socket %d of type %d",
+ nl_socket_get_fd (sk_route), sock_type);
+}
+
+static void
+lcp_nl_close_sync_socket (nl_sock_type_t sock_type)
+{
+ nl_main_t *nm = &nl_main;
+ struct nl_sock *sk_route = nm->sk_route_sync[sock_type];
+
+ if (sk_route)
+ {
+ NL_INFO ("Closing netlink synchronization socket %d of type %d",
+ nl_socket_get_fd (sk_route), sock_type);
+ nl_socket_free (sk_route);
+ nm->sk_route_sync[sock_type] = NULL;
+ }
+}
+
+#include <vnet/plugin/plugin.h>
+clib_error_t *
+lcp_nl_init (vlib_main_t *vm)
+{
+ nl_main_t *nm = &nl_main;
+ lcp_itf_pair_vft_t nl_itf_pair_vft = {
+ .pair_add_fn = lcp_nl_pair_add_cb,
+ };
+
+ nm->nl_status = NL_STATUS_NOTIF_PROC;
+ nm->clib_file_index = ~0;
+ nm->nl_logger = vlib_log_register_class ("nl", "nl");
+
+ lcp_nl_open_socket ();
+ lcp_itf_pair_register_vft (&nl_itf_pair_vft);
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (lcp_nl_init) = {
+ .runs_after = VLIB_INITS ("lcp_interface_init", "tuntap_init",
+ "ip_neighbor_init"),
+};
+
+#include <vpp/app/version.h>
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "linux Control Plane - Netlink listener",
+ .default_disabled = 1,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/lcp_nl.h b/src/plugins/linux-cp/lcp_nl.h
new file mode 100644
index 00000000000..41757e9b983
--- /dev/null
+++ b/src/plugins/linux-cp/lcp_nl.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+
+#include <netlink/route/link.h>
+#include <netlink/route/route.h>
+#include <netlink/route/neighbour.h>
+#include <netlink/route/addr.h>
+
+typedef void (*nl_rt_link_cb_t) (struct rtnl_link *rl, void *ctx);
+typedef void (*nl_rt_link_sync_cb_t) (void);
+typedef void (*nl_rt_addr_cb_t) (struct rtnl_addr *ra);
+typedef void (*nl_rt_addr_sync_cb_t) (void);
+typedef void (*nl_rt_neigh_cb_t) (struct rtnl_neigh *rr);
+typedef void (*nl_rt_neigh_sync_cb_t) (void);
+typedef void (*nl_rt_route_add_cb_t) (struct rtnl_route *rn, int is_replace);
+typedef void (*nl_rt_route_del_cb_t) (struct rtnl_route *rn);
+typedef void (*nl_rt_route_sync_cb_t) (void);
+
+#define NL_RT_COMMON uword is_mp_safe
+
+typedef struct nl_rt_link_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_link_cb_t cb;
+} nl_rt_link_t;
+
+typedef struct nl_rt_link_sync_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_link_sync_cb_t cb;
+} nl_rt_link_sync_t;
+
+typedef struct nl_rt_addr_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_addr_cb_t cb;
+} nl_rt_addr_t;
+
+typedef struct nl_rt_addr_sync_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_addr_sync_cb_t cb;
+} nl_rt_addr_sync_t;
+
+typedef struct nl_rt_neigh_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_neigh_cb_t cb;
+} nl_rt_neigh_t;
+
+typedef struct nl_rt_neigh_sync_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_neigh_sync_cb_t cb;
+} nl_rt_neigh_sync_t;
+
+typedef struct nl_rt_route_add_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_route_add_cb_t cb;
+} nl_rt_route_add_t;
+
+typedef struct nl_rt_route_del_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_route_del_cb_t cb;
+} nl_rt_route_del_t;
+
+typedef struct nl_rt_route_sync_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_route_sync_cb_t cb;
+} nl_rt_route_sync_t;
+
+#undef NL_RT_COMMON
+
+typedef struct nl_vft_t_
+{
+ nl_rt_link_t nvl_rt_link_add;
+ nl_rt_link_t nvl_rt_link_del;
+ nl_rt_link_sync_t nvl_rt_link_sync_begin;
+ nl_rt_link_sync_t nvl_rt_link_sync_end;
+ nl_rt_addr_t nvl_rt_addr_add;
+ nl_rt_addr_t nvl_rt_addr_del;
+ nl_rt_addr_sync_t nvl_rt_addr_sync_begin;
+ nl_rt_addr_sync_t nvl_rt_addr_sync_end;
+ nl_rt_neigh_t nvl_rt_neigh_add;
+ nl_rt_neigh_t nvl_rt_neigh_del;
+ nl_rt_neigh_sync_t nvl_rt_neigh_sync_begin;
+ nl_rt_neigh_sync_t nvl_rt_neigh_sync_end;
+ nl_rt_route_add_t nvl_rt_route_add;
+ nl_rt_route_del_t nvl_rt_route_del;
+ nl_rt_route_sync_t nvl_rt_route_sync_begin;
+ nl_rt_route_sync_t nvl_rt_route_sync_end;
+} nl_vft_t;
+
+extern void nl_register_vft (const nl_vft_t *nv);
+
+typedef enum lcp_nl_obj_t_
+{
+ LCP_NL_LINK,
+ LCP_NL_ADDR,
+ LCP_NL_NEIGH,
+ LCP_NL_ROUTE,
+} lcp_nl_obj_t;
+
+/* struct type to hold context on the netlink message being processed.
+ *
+ * At creation of a pair, a tap/tun is created and configured to match its
+ * corresponding hardware interface (MAC address, link state, MTU). Netlink
+ * messages are sent announcing the creation and subsequent configuration.
+ * We do not need to (and should not) act on those messages since applying
+ * those same configurations again is unnecessary and can be disruptive. So
+ * a timestamp for a message is stored and can be compared against the time
+ * the interface came under linux-cp management in order to figure out
+ * whether we should apply any configuration.
+ */
+typedef struct nl_msg_info
+{
+ struct nl_msg *msg;
+ f64 ts;
+} nl_msg_info_t;
+
+#define LCP_NL_N_OBJS (LCP_NL_ROUTE + 1)
+
+extern struct nl_cache *lcp_nl_get_cache (lcp_nl_obj_t t);
+extern int lcp_nl_drain_messages (void);
+extern void lcp_nl_set_buffer_size (u32 buf_size);
+extern void lcp_nl_set_batch_size (u32 batch_size);
+extern void lcp_nl_set_batch_delay (u32 batch_delay_ms);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/lcp_node.c b/src/plugins/linux-cp/lcp_node.c
index 3a88c3b0a52..241cc5e4bff 100644
--- a/src/plugins/linux-cp/lcp_node.c
+++ b/src/plugins/linux-cp/lcp_node.c
@@ -31,6 +31,7 @@
#include <vnet/ip/ip4.h>
#include <vnet/ip/ip6.h>
#include <vnet/l2/l2_input.h>
+#include <vnet/mpls/mpls.h>
#define foreach_lip_punt \
_ (IO, "punt to host") \
@@ -438,14 +439,112 @@ VNET_FEATURE_INIT (lcp_xc_ip6_mcast_node, static) = {
typedef enum
{
+ LCP_XC_MPLS_NEXT_DROP,
+ LCP_XC_MPLS_NEXT_IO,
+ LCP_XC_MPLS_N_NEXT,
+} lcp_xc_mpls_next_t;
+
+static_always_inline uword
+lcp_xc_mpls_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 n_left_from, *from, *to_next, n_left_to_next;
+ lcp_xc_next_t next_index;
+
+ next_index = 0;
+ n_left_from = frame->n_vectors;
+ from = vlib_frame_vector_args (frame);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ const ethernet_header_t *eth;
+ const lcp_itf_pair_t *lip;
+ u32 next0, bi0, lipi, ai;
+ vlib_buffer_t *b0;
+ // const ip_adjacency_t *adj;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ lipi =
+ lcp_itf_pair_find_by_host (vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+ lip = lcp_itf_pair_get (lipi);
+
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = lip->lip_phy_sw_if_index;
+ vlib_buffer_advance (b0, -lip->lip_rewrite_len);
+ eth = vlib_buffer_get_current (b0);
+
+ ai = ADJ_INDEX_INVALID;
+ next0 = LCP_XC_MPLS_NEXT_DROP;
+ if (!ethernet_address_cast (eth->dst_address))
+ ai = lcp_adj_lkup ((u8 *) eth, lip->lip_rewrite_len,
+ vnet_buffer (b0)->sw_if_index[VLIB_TX]);
+ if (ai != ADJ_INDEX_INVALID)
+ {
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ai;
+ next0 = LCP_XC_MPLS_NEXT_IO;
+ }
+
+ if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ lcp_xc_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->phy_sw_if_index = lip->lip_phy_sw_if_index;
+ t->adj_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (lcp_xc_mpls)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return (lcp_xc_mpls_inline (vm, node, frame));
+}
+
+VLIB_REGISTER_NODE (
+ lcp_xc_mpls) = { .name = "linux-cp-xc-mpls",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lcp_xc_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_next_nodes = LCP_XC_MPLS_N_NEXT,
+ .next_nodes = {
+ [LCP_XC_MPLS_NEXT_DROP] = "error-drop",
+ [LCP_XC_MPLS_NEXT_IO] = "interface-output",
+ } };
+
+VNET_FEATURE_INIT (lcp_xc_mpls_node, static) = {
+ .arc_name = "mpls-input",
+ .node_name = "linux-cp-xc-mpls",
+};
+
+typedef enum
+{
LCP_XC_L3_NEXT_XC,
+ LCP_XC_L3_NEXT_LOOKUP,
LCP_XC_L3_N_NEXT,
} lcp_xc_l3_next_t;
/**
* X-connect all packets from the HOST to the PHY on L3 interfaces
*
- * There's only one adjacency that can be used on thises links.
+ * There's only one adjacency that can be used on these links.
*/
static_always_inline u32
lcp_xc_l3_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
@@ -453,6 +552,7 @@ lcp_xc_l3_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
u32 n_left_from, *from, *to_next, n_left_to_next;
lcp_xc_next_t next_index;
+ vnet_main_t *vnm = vnet_get_main ();
next_index = 0;
n_left_from = frame->n_vectors;
@@ -488,10 +588,24 @@ lcp_xc_l3_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
lcp_itf_pair_find_by_host (vnet_buffer (b0)->sw_if_index[VLIB_RX]);
lip = lcp_itf_pair_get (lipi);
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = lip->lip_phy_sw_if_index;
- next0 = LCP_XC_L3_NEXT_XC;
- vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
- lip->lip_phy_adjs.adj_index[af];
+ /* P2P tunnels can use generic adjacency */
+ if (PREDICT_TRUE (
+ vnet_sw_interface_is_p2p (vnm, lip->lip_phy_sw_if_index)))
+ {
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ lip->lip_phy_sw_if_index;
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
+ lip->lip_phy_adjs.adj_index[af];
+ next0 = LCP_XC_L3_NEXT_XC;
+ }
+ /* P2MP tunnels require a fib lookup to find the right adjacency */
+ else
+ {
+ /* lookup should use FIB table associated with phy interface */
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+ lip->lip_phy_sw_if_index;
+ next0 = LCP_XC_L3_NEXT_LOOKUP;
+ }
if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
{
@@ -534,6 +648,7 @@ VLIB_REGISTER_NODE (lcp_xc_l3_ip4_node) = {
.n_next_nodes = LCP_XC_L3_N_NEXT,
.next_nodes = {
[LCP_XC_L3_NEXT_XC] = "ip4-midchain",
+ [LCP_XC_L3_NEXT_LOOKUP] = "ip4-lookup",
},
};
@@ -556,6 +671,7 @@ VLIB_REGISTER_NODE (lcp_xc_l3_ip6_node) = {
.n_next_nodes = LCP_XC_L3_N_NEXT,
.next_nodes = {
[LCP_XC_L3_NEXT_XC] = "ip6-midchain",
+ [LCP_XC_L3_NEXT_LOOKUP] = "ip6-lookup",
},
};
diff --git a/src/plugins/linux-cp/lcp_router.c b/src/plugins/linux-cp/lcp_router.c
new file mode 100644
index 00000000000..0efd53e64ef
--- /dev/null
+++ b/src/plugins/linux-cp/lcp_router.c
@@ -0,0 +1,1578 @@
+/*
+ * Copyright (c) 2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sys/socket.h>
+#include <linux/if.h>
+#include <linux/mpls.h>
+
+//#include <vlib/vlib.h>
+#include <vlib/unix/plugin.h>
+#include <linux-cp/lcp_nl.h>
+#include <linux-cp/lcp_interface.h>
+
+#include <netlink/msg.h>
+#include <netlink/netlink.h>
+#include <netlink/socket.h>
+#include <netlink/route/link.h>
+#include <netlink/route/route.h>
+#include <netlink/route/neighbour.h>
+#include <netlink/route/nexthop.h>
+#include <netlink/route/addr.h>
+#include <netlink/route/link/vlan.h>
+
+#include <vnet/fib/fib_table.h>
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/ip/ip6_ll_table.h>
+#include <vnet/ip-neighbor/ip_neighbor.h>
+#include <vnet/ip/ip6_link.h>
+
+typedef struct lcp_router_table_t_
+{
+ uint32_t nlt_id;
+ fib_protocol_t nlt_proto;
+ u32 nlt_fib_index;
+ u32 nlt_mfib_index;
+ u32 nlt_refs;
+} lcp_router_table_t;
+
+static uword *lcp_router_table_db[FIB_PROTOCOL_MAX];
+static lcp_router_table_t *lcp_router_table_pool;
+static vlib_log_class_t lcp_router_logger;
+
+const static fib_prefix_t pfx_all1s = {
+ .fp_addr = {
+ .ip4 = {
+ .as_u32 = 0xffffffff,
+ }
+ },
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = 32,
+};
+
+static fib_source_t lcp_rt_fib_src;
+static fib_source_t lcp_rt_fib_src_dynamic;
+
+#define LCP_ROUTER_DBG(...) vlib_log_debug (lcp_router_logger, __VA_ARGS__);
+
+#define LCP_ROUTER_INFO(...) vlib_log_notice (lcp_router_logger, __VA_ARGS__);
+
+#define LCP_ROUTER_ERROR(...) vlib_log_err (lcp_router_logger, __VA_ARGS__);
+
+static const mfib_prefix_t ip4_specials[] = {
+ /* ALL prefixes are in network order */
+ {
+ /* (*,224.0.0.0)/24 - all local subnet */
+ .fp_grp_addr = {
+ .ip4.data_u32 = 0x000000e0,
+ },
+ .fp_len = 24,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ },
+};
+
+static const mfib_prefix_t ip6_specials[] = {
+ /* ALL prefixes are in network order */
+ {
+ /* (*,ff00::)/8 - all local subnet */
+ .fp_grp_addr = {
+ .ip6.as_u64[0] = 0x00000000000000ff,
+ },
+ .fp_len = 8,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ },
+};
+
+/* VIF to PHY DB of managed interfaces */
+static uword *lcp_routing_itf_db;
+
+static u32
+lcp_router_intf_h2p (u32 host)
+{
+ lcp_itf_pair_t *lip;
+ index_t lipi;
+ uword *p;
+
+ /*
+ * first check the linux side created interface (i.e. vlans, tunnels etc)
+ */
+ p = hash_get (lcp_routing_itf_db, host);
+
+ if (p)
+ return p[0];
+
+ /*
+ * then check the paired phys
+ */
+ lipi = lcp_itf_pair_find_by_vif (host);
+
+ if (INDEX_INVALID == lipi)
+ return (~0);
+
+ lip = lcp_itf_pair_get (lipi);
+
+ return lip->lip_phy_sw_if_index;
+}
+
+/*
+ * Check timestamps on netlink message and interface pair to decide whether
+ * the message should be applied. See the declaration of nl_msg_info_t for
+ * an explanation on why this is necessary.
+ * If timestamps are good (message ts is newer than intf pair ts), return 0.
+ * Else, return -1.
+ */
+static int
+lcp_router_lip_ts_check (nl_msg_info_t *msg_info, lcp_itf_pair_t *lip)
+{
+ if (!msg_info)
+ return 0;
+
+ if (msg_info->ts > lip->lip_create_ts)
+ return 0;
+
+ LCP_ROUTER_INFO ("Early message received for %U",
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_phy_sw_if_index);
+ return -1;
+}
+
+static void
+lcp_router_link_del (struct rtnl_link *rl, void *ctx)
+{
+ index_t lipi;
+
+ if (!lcp_auto_subint ())
+ return;
+
+ lipi = lcp_itf_pair_find_by_vif (rtnl_link_get_ifindex (rl));
+
+ if (INDEX_INVALID != lipi)
+ {
+ lcp_itf_pair_t *lip;
+
+ lip = lcp_itf_pair_get (lipi);
+
+ if (lcp_router_lip_ts_check ((nl_msg_info_t *) ctx, lip))
+ return;
+
+ LCP_ROUTER_INFO ("delete link: %s - %U", rtnl_link_get_type (rl),
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_phy_sw_if_index);
+ lcp_itf_pair_delete (lip->lip_phy_sw_if_index);
+
+ if (rtnl_link_is_vlan (rl))
+ {
+ LCP_ROUTER_INFO ("delete vlan: %s -> %U", rtnl_link_get_name (rl),
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_phy_sw_if_index);
+ vnet_delete_sub_interface (lip->lip_phy_sw_if_index);
+ vnet_delete_sub_interface (lip->lip_host_sw_if_index);
+ }
+ }
+ else
+ LCP_ROUTER_INFO ("ignore link del: %s - %s", rtnl_link_get_type (rl),
+ rtnl_link_get_name (rl));
+}
+
+static void
+lcp_router_ip4_mroutes_add_del (u32 sw_if_index, u8 is_add)
+{
+ const fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_IP4,
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = sw_if_index,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT,
+ };
+ u32 mfib_index;
+ int ii;
+
+ mfib_index =
+ mfib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
+
+ for (ii = 0; ii < ARRAY_LEN (ip4_specials); ii++)
+ {
+ if (is_add)
+ {
+ mfib_table_entry_path_update (mfib_index, &ip4_specials[ii],
+ MFIB_SOURCE_PLUGIN_LOW,
+ MFIB_ENTRY_FLAG_NONE, &path);
+ }
+ else
+ {
+ mfib_table_entry_path_remove (mfib_index, &ip4_specials[ii],
+ MFIB_SOURCE_PLUGIN_LOW, &path);
+ }
+ }
+}
+
+static void
+lcp_router_ip6_mroutes_add_del (u32 sw_if_index, u8 is_add)
+{
+ const fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_IP6,
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = sw_if_index,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT,
+ };
+ u32 mfib_index;
+ int ii;
+
+ mfib_index =
+ mfib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
+
+ for (ii = 0; ii < ARRAY_LEN (ip6_specials); ii++)
+ {
+ if (is_add)
+ {
+ mfib_table_entry_path_update (mfib_index, &ip6_specials[ii],
+ MFIB_SOURCE_PLUGIN_LOW,
+ MFIB_ENTRY_FLAG_NONE, &path);
+ }
+ else
+ {
+ mfib_table_entry_path_remove (mfib_index, &ip6_specials[ii],
+ MFIB_SOURCE_PLUGIN_LOW, &path);
+ }
+ }
+}
+
+static void
+lcp_router_link_mtu (struct rtnl_link *rl, u32 sw_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 mtu;
+ vnet_sw_interface_t *sw;
+ vnet_hw_interface_t *hw;
+
+ mtu = rtnl_link_get_mtu (rl);
+ if (!mtu)
+ return;
+
+ sw = vnet_get_sw_interface (vnm, sw_if_index);
+ hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+
+ /* If HW interface, try to change hw link */
+ if ((sw->sw_if_index == sw->sup_sw_if_index) &&
+ (hw->hw_class_index == ethernet_hw_interface_class.index))
+ vnet_hw_interface_set_mtu (vnm, hw->hw_if_index, mtu);
+ else
+ vnet_sw_interface_set_mtu (vnm, sw->sw_if_index, mtu);
+}
+
+static walk_rc_t
+lcp_router_link_addr_adj_upd_cb (vnet_main_t *vnm, u32 sw_if_index, void *arg)
+{
+ lcp_itf_pair_t *lip;
+
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (!lip)
+ {
+ return WALK_CONTINUE;
+ }
+
+ vnet_update_adjacency_for_sw_interface (vnm, lip->lip_phy_sw_if_index,
+ lip->lip_phy_adjs.adj_index[AF_IP4]);
+ vnet_update_adjacency_for_sw_interface (vnm, lip->lip_phy_sw_if_index,
+ lip->lip_phy_adjs.adj_index[AF_IP6]);
+
+ return WALK_CONTINUE;
+}
+
+static void
+lcp_router_link_addr (struct rtnl_link *rl, lcp_itf_pair_t *lip)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ struct nl_addr *mac_addr;
+ vnet_sw_interface_t *sw;
+ vnet_hw_interface_t *hw;
+ void *mac_addr_bytes;
+
+ mac_addr = rtnl_link_get_addr (rl);
+ if (!mac_addr || (nl_addr_get_family (mac_addr) != AF_LLC))
+ return;
+
+ sw = vnet_get_sw_interface (vnm, lip->lip_phy_sw_if_index);
+
+ /* can only change address on hw interface */
+ if (sw->sw_if_index != sw->sup_sw_if_index)
+ return;
+
+ hw = vnet_get_sup_hw_interface (vnm, lip->lip_phy_sw_if_index);
+ if (!vec_len (hw->hw_address))
+ return;
+
+ mac_addr_bytes = nl_addr_get_binary_addr (mac_addr);
+ if (clib_memcmp (mac_addr_bytes, hw->hw_address, nl_addr_get_len (mac_addr)))
+ vnet_hw_interface_change_mac_address (vnm, hw->hw_if_index,
+ mac_addr_bytes);
+
+ /* mcast adjacencies need to be updated */
+ vnet_hw_interface_walk_sw (vnm, hw->hw_if_index,
+ lcp_router_link_addr_adj_upd_cb, NULL);
+}
+
+static void lcp_router_table_flush (lcp_router_table_t *nlt,
+ u32 *sw_if_index_to_bool,
+ fib_source_t source);
+
+static void
+lcp_router_link_add (struct rtnl_link *rl, void *ctx)
+{
+ index_t lipi;
+ int up;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ lipi = lcp_itf_pair_find_by_vif (rtnl_link_get_ifindex (rl));
+ up = IFF_UP & rtnl_link_get_flags (rl);
+
+ if (INDEX_INVALID != lipi)
+ {
+ lcp_itf_pair_t *lip;
+ u32 sw_if_flags;
+ u32 sw_if_up;
+
+ lip = lcp_itf_pair_get (lipi);
+ if (!vnet_get_sw_interface (vnm, lip->lip_phy_sw_if_index))
+ return;
+
+ if (lcp_router_lip_ts_check ((nl_msg_info_t *) ctx, lip))
+ return;
+
+ sw_if_flags =
+ vnet_sw_interface_get_flags (vnm, lip->lip_phy_sw_if_index);
+ sw_if_up = (sw_if_flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ if (!sw_if_up && up)
+ {
+ vnet_sw_interface_admin_up (vnet_get_main (),
+ lip->lip_phy_sw_if_index);
+ }
+ else if (sw_if_up && !up)
+ {
+ vnet_sw_interface_admin_down (vnet_get_main (),
+ lip->lip_phy_sw_if_index);
+
+ /* When an interface is brought down administratively, the kernel
+ * removes routes which resolve through that interface. For IPv4
+ * routes, the kernel will not send any explicit RTM_DELROUTE
+ * messages about removing them. In order to synchronize with the
+ * kernel, affected IPv4 routes need to be manually removed from the
+ * FIB. The behavior is different for IPv6 routes. Explicit
+ * RTM_DELROUTE messages are sent about IPv6 routes being removed.
+ */
+ u32 fib_index;
+ lcp_router_table_t *nlt;
+
+ fib_index = fib_table_get_index_for_sw_if_index (
+ FIB_PROTOCOL_IP4, lip->lip_phy_sw_if_index);
+
+ pool_foreach (nlt, lcp_router_table_pool)
+ {
+ if (fib_index == nlt->nlt_fib_index &&
+ FIB_PROTOCOL_IP4 == nlt->nlt_proto)
+ {
+ u32 *sw_if_index_to_bool = NULL;
+
+ vec_validate_init_empty (sw_if_index_to_bool,
+ lip->lip_phy_sw_if_index, false);
+ sw_if_index_to_bool[lip->lip_phy_sw_if_index] = true;
+
+ lcp_router_table_flush (nlt, sw_if_index_to_bool,
+ lcp_rt_fib_src);
+ lcp_router_table_flush (nlt, sw_if_index_to_bool,
+ lcp_rt_fib_src_dynamic);
+
+ vec_free (sw_if_index_to_bool);
+ break;
+ }
+ }
+ }
+
+ LCP_ROUTER_DBG ("link: %s (%d) -> %U/%U %s", rtnl_link_get_name (rl),
+ rtnl_link_get_ifindex (rl), format_vnet_sw_if_index_name,
+ vnm, lip->lip_phy_sw_if_index,
+ format_vnet_sw_if_index_name, vnm,
+ lip->lip_host_sw_if_index, (up ? "up" : "down"));
+
+ lcp_router_link_mtu (rl, lip->lip_phy_sw_if_index);
+ lcp_router_link_addr (rl, lip);
+ }
+ else if (lcp_auto_subint () && rtnl_link_is_vlan (rl))
+ {
+ /* Find the pair based on the parent VIF */
+ lipi = lcp_itf_pair_find_by_vif (rtnl_link_get_link (rl));
+
+ if (INDEX_INVALID != lipi)
+ {
+ u32 sub_phy_sw_if_index, sub_host_sw_if_index;
+ const lcp_itf_pair_t *lip;
+ int vlan;
+ u8 *ns = 0; /* FIXME */
+
+ lip = lcp_itf_pair_get (lipi);
+
+ vlan = rtnl_link_vlan_get_id (rl);
+
+ /* create the vlan interface on the parent phy */
+ if (vnet_create_sub_interface (lip->lip_phy_sw_if_index, vlan, 18, 0,
+ vlan, &sub_phy_sw_if_index))
+ {
+ LCP_ROUTER_INFO ("failed create phy vlan: %s on %U",
+ rtnl_link_get_name (rl),
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_phy_sw_if_index);
+ return;
+ }
+
+ /* pool could grow during the previous operation */
+ lip = lcp_itf_pair_get (lipi);
+
+ /* create the vlan interface on the parent host */
+ if (vnet_create_sub_interface (lip->lip_host_sw_if_index, vlan, 18,
+ 0, vlan, &sub_host_sw_if_index))
+ {
+ LCP_ROUTER_INFO ("failed create vlan: %s on %U",
+ rtnl_link_get_name (rl),
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_host_sw_if_index);
+ return;
+ }
+
+ char *if_name;
+ u8 *if_namev = 0;
+
+ LCP_ROUTER_INFO (
+ "create vlan: %s -> (%U, %U) : (%U, %U)", rtnl_link_get_name (rl),
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_phy_sw_if_index, format_vnet_sw_if_index_name,
+ vnet_get_main (), sub_phy_sw_if_index,
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_host_sw_if_index, format_vnet_sw_if_index_name,
+ vnet_get_main (), sub_host_sw_if_index);
+
+ if ((if_name = rtnl_link_get_name (rl)) != NULL)
+ vec_validate_init_c_string (if_namev, if_name,
+ strnlen (if_name, IFNAMSIZ));
+ lcp_itf_pair_add (sub_host_sw_if_index, sub_phy_sw_if_index,
+ if_namev, rtnl_link_get_ifindex (rl),
+ lip->lip_host_type, ns);
+ if (up)
+ vnet_sw_interface_admin_up (vnet_get_main (), sub_phy_sw_if_index);
+ vnet_sw_interface_admin_up (vnet_get_main (), sub_host_sw_if_index);
+
+ vec_free (if_namev);
+ }
+ else
+ {
+ LCP_ROUTER_INFO ("ignore parent-link add: %s - %s",
+ rtnl_link_get_type (rl), rtnl_link_get_name (rl));
+ }
+ }
+ else
+ LCP_ROUTER_INFO ("ignore link add: %s - %s", rtnl_link_get_type (rl),
+ rtnl_link_get_name (rl));
+}
+
+static void
+lcp_router_link_sync_begin (void)
+{
+ LCP_ROUTER_INFO ("Begin synchronization of interface configurations");
+}
+
+static void
+lcp_router_link_sync_end (void)
+{
+ LCP_ROUTER_INFO ("End synchronization of interface configurations");
+}
+
+static clib_error_t *
+lcp_router_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
+{
+ vnet_hw_interface_t *hi;
+ index_t lipi;
+
+ hi = vnet_get_hw_interface_or_null (vnm, hw_if_index);
+ if (!hi)
+ return 0;
+
+ lipi = lcp_itf_pair_find_by_phy (hi->sw_if_index);
+ if (lipi == INDEX_INVALID)
+ return 0;
+
+ /* When the link goes down on an interface, the kernel processes routes which
+ * resolve through that interface depending on how they were created:
+ * - Legacy Route API: the kernel retains the routes and marks them as
+ * "linkdown";
+ * - Nexthop API: the kernel removes the next-hop objects and the routes
+ * which reference them.
+ *
+ * For IPv4 routes created with Nexthop API, the kernel will not send any
+ * explicit RTM_DELROUTE messages about removing them. In order to
+ * synchronize with the kernel, affected routes need to be manually removed
+ * from the FIB.
+ *
+ * The behavior is different for IPv6 routes created with Nexthop API. The
+ * kernel will send explicit RTM_DELROUTE messages about IPv6 routes being
+ * removed.
+ */
+ if (!(flags & VNET_HW_INTERFACE_FLAG_LINK_UP) &&
+ (lcp_get_del_static_on_link_down () ||
+ lcp_get_del_dynamic_on_link_down ()))
+ {
+ u32 fib_index;
+ u32 **fib_index_to_sw_if_index_to_bool = NULL;
+ u32 id, sw_if_index;
+ lcp_router_table_t *nlt;
+
+ fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+ hi->sw_if_index);
+
+ vec_validate_init_empty (fib_index_to_sw_if_index_to_bool, fib_index,
+ NULL);
+ vec_validate_init_empty (fib_index_to_sw_if_index_to_bool[fib_index],
+ hi->sw_if_index, false);
+ fib_index_to_sw_if_index_to_bool[fib_index][hi->sw_if_index] = true;
+
+ /* clang-format off */
+ hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id,
+ ({
+ fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+ sw_if_index);
+ vec_validate_init_empty (fib_index_to_sw_if_index_to_bool, fib_index,
+ NULL);
+ vec_validate_init_empty (fib_index_to_sw_if_index_to_bool[fib_index],
+ sw_if_index, false);
+ fib_index_to_sw_if_index_to_bool[fib_index][sw_if_index] = true;
+ }));
+ /* clang-format on */
+
+ vec_foreach_index (fib_index, fib_index_to_sw_if_index_to_bool)
+ {
+ u32 *sw_if_index_to_bool;
+
+ sw_if_index_to_bool = fib_index_to_sw_if_index_to_bool[fib_index];
+ if (NULL == sw_if_index_to_bool)
+ continue;
+
+ pool_foreach (nlt, lcp_router_table_pool)
+ {
+ if (fib_index == nlt->nlt_fib_index &&
+ FIB_PROTOCOL_IP4 == nlt->nlt_proto)
+ {
+ if (lcp_get_del_static_on_link_down ())
+ lcp_router_table_flush (nlt, sw_if_index_to_bool,
+ lcp_rt_fib_src);
+ if (lcp_get_del_dynamic_on_link_down ())
+ lcp_router_table_flush (nlt, sw_if_index_to_bool,
+ lcp_rt_fib_src_dynamic);
+ break;
+ }
+ }
+
+ vec_free (sw_if_index_to_bool);
+ }
+
+ vec_free (fib_index_to_sw_if_index_to_bool);
+ }
+
+ return 0;
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (lcp_router_link_up_down);
+
+static fib_protocol_t
+lcp_router_proto_k2f (uint32_t k)
+{
+ switch (k)
+ {
+ case AF_INET6:
+ return FIB_PROTOCOL_IP6;
+ case AF_INET:
+ return FIB_PROTOCOL_IP4;
+ case AF_MPLS:
+ return FIB_PROTOCOL_MPLS;
+ default:
+ ASSERT (0);
+ return FIB_PROTOCOL_NONE;
+ }
+}
+
+static void
+lcp_router_mk_addr (const struct nl_addr *rna, ip_address_t *ia)
+{
+ fib_protocol_t fproto;
+
+ ip_address_reset (ia);
+ fproto = lcp_router_proto_k2f (nl_addr_get_family (rna));
+ ASSERT (FIB_PROTOCOL_MPLS != fproto);
+
+ ip_address_set (ia, nl_addr_get_binary_addr (rna),
+ FIB_PROTOCOL_IP4 == fproto ? AF_IP4 : AF_IP6);
+}
+
+static fib_protocol_t
+lcp_router_mk_addr46 (const struct nl_addr *rna, ip46_address_t *ia)
+{
+ fib_protocol_t fproto;
+
+ fproto = lcp_router_proto_k2f (nl_addr_get_family (rna));
+ ASSERT (FIB_PROTOCOL_MPLS != fproto);
+
+ ip46_address_reset (ia);
+ if (FIB_PROTOCOL_IP4 == fproto)
+ memcpy (&ia->ip4, nl_addr_get_binary_addr (rna), nl_addr_get_len (rna));
+ else
+ memcpy (&ia->ip6, nl_addr_get_binary_addr (rna), nl_addr_get_len (rna));
+
+ return (fproto);
+}
+
+static void
+lcp_router_link_addr_add_del (struct rtnl_addr *rla, int is_del)
+{
+ u32 sw_if_index;
+
+ sw_if_index = lcp_router_intf_h2p (rtnl_addr_get_ifindex (rla));
+
+ if (~0 != sw_if_index)
+ {
+ ip_address_t nh;
+
+ lcp_router_mk_addr (rtnl_addr_get_local (rla), &nh);
+
+ if (AF_IP4 == ip_addr_version (&nh))
+ {
+ ip4_add_del_interface_address (
+ vlib_get_main (), sw_if_index, &ip_addr_v4 (&nh),
+ rtnl_addr_get_prefixlen (rla), is_del);
+ lcp_router_ip4_mroutes_add_del (sw_if_index, !is_del);
+ }
+ else if (AF_IP6 == ip_addr_version (&nh))
+ {
+ if (ip6_address_is_link_local_unicast (&ip_addr_v6 (&nh)))
+ if (is_del)
+ ip6_link_disable (sw_if_index);
+ else
+ {
+ ip6_link_enable (sw_if_index, NULL);
+ ip6_link_set_local_address (sw_if_index, &ip_addr_v6 (&nh));
+ }
+ else
+ ip6_add_del_interface_address (
+ vlib_get_main (), sw_if_index, &ip_addr_v6 (&nh),
+ rtnl_addr_get_prefixlen (rla), is_del);
+ lcp_router_ip6_mroutes_add_del (sw_if_index, !is_del);
+ }
+
+ LCP_ROUTER_DBG ("link-addr: %U %U/%d", format_vnet_sw_if_index_name,
+ vnet_get_main (), sw_if_index, format_ip_address, &nh,
+ rtnl_addr_get_prefixlen (rla));
+ }
+}
+
+static void
+lcp_router_link_addr_del (struct rtnl_addr *la)
+{
+ lcp_router_link_addr_add_del (la, 1);
+}
+
+static void
+lcp_router_link_addr_add (struct rtnl_addr *la)
+{
+ lcp_router_link_addr_add_del (la, 0);
+}
+
+static walk_rc_t
+lcp_router_address_mark (index_t index, void *ctx)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+
+ lcp_itf_pair_t *lip = lcp_itf_pair_get (index);
+ if (!lip)
+ return WALK_CONTINUE;
+
+ ip_interface_address_mark_one_interface (
+ vnm, vnet_get_sw_interface (vnm, lip->lip_phy_sw_if_index), 0);
+
+ return WALK_CONTINUE;
+}
+
+static void
+lcp_router_link_addr_sync_begin (void)
+{
+ lcp_itf_pair_walk (lcp_router_address_mark, 0);
+
+ LCP_ROUTER_INFO ("Begin synchronization of interface addresses");
+}
+
+static void
+lcp_router_link_addr_sync_end (void)
+{
+ ip_interface_address_sweep ();
+
+ LCP_ROUTER_INFO ("End synchronization of interface addresses");
+}
+
+static void
+lcp_router_mk_mac_addr (const struct nl_addr *rna, mac_address_t *mac)
+{
+ mac_address_from_bytes (mac, nl_addr_get_binary_addr (rna));
+}
+
+static void
+lcp_router_neigh_del (struct rtnl_neigh *rn)
+{
+ u32 sw_if_index;
+
+ sw_if_index = lcp_router_intf_h2p (rtnl_neigh_get_ifindex (rn));
+
+ if (~0 != sw_if_index)
+ {
+ ip_address_t nh;
+ int rv;
+ struct nl_addr *rna;
+
+ if ((rna = rtnl_neigh_get_dst (rn)) == NULL)
+ return;
+ lcp_router_mk_addr (rna, &nh);
+
+ if (ip46_address_is_multicast (&ip_addr_46 (&nh)))
+ {
+ LCP_ROUTER_DBG ("ignore neighbor del: %U %U", format_ip_address, &nh,
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+ return;
+ }
+
+ rv = ip_neighbor_del (&nh, sw_if_index);
+
+ if (rv)
+ {
+ LCP_ROUTER_ERROR (
+ "Failed to delete neighbor: %U %U", format_ip_address, &nh,
+ format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index);
+ }
+ else
+ {
+ LCP_ROUTER_DBG ("neighbor del: %U %U", format_ip_address, &nh,
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+ }
+ }
+ else
+ LCP_ROUTER_INFO ("ignore neighbour del on: %d",
+ rtnl_neigh_get_ifindex (rn));
+}
+
+#ifndef NUD_VALID
+#define NUD_VALID \
+ (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE | \
+ NUD_DELAY)
+#endif
+
+static void
+lcp_router_neigh_add (struct rtnl_neigh *rn)
+{
+ u32 sw_if_index;
+
+ sw_if_index = lcp_router_intf_h2p (rtnl_neigh_get_ifindex (rn));
+
+ if (~0 != sw_if_index)
+ {
+ struct nl_addr *ll;
+ ip_address_t nh;
+ int state;
+ struct nl_addr *rna;
+
+ if ((rna = rtnl_neigh_get_dst (rn)) == NULL)
+ return;
+ lcp_router_mk_addr (rna, &nh);
+
+ if (ip46_address_is_multicast (&ip_addr_46 (&nh)))
+ {
+ LCP_ROUTER_DBG ("ignore neighbor add: %U %U", format_ip_address, &nh,
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+ return;
+ }
+
+ ll = rtnl_neigh_get_lladdr (rn);
+ state = rtnl_neigh_get_state (rn);
+
+ if (ll && (state & NUD_VALID))
+ {
+ mac_address_t mac;
+ ip_neighbor_flags_t flags;
+ int rv;
+
+ lcp_router_mk_mac_addr (ll, &mac);
+
+ if (state & (NUD_NOARP | NUD_PERMANENT))
+ flags = IP_NEIGHBOR_FLAG_STATIC;
+ else
+ flags = IP_NEIGHBOR_FLAG_DYNAMIC;
+
+ rv = ip_neighbor_add (&nh, &mac, sw_if_index, flags, NULL);
+
+ if (rv)
+ {
+ LCP_ROUTER_ERROR (
+ "Failed to create neighbor: %U %U", format_ip_address, &nh,
+ format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index);
+ }
+ else
+ {
+ LCP_ROUTER_DBG ("neighbor add: %U %U", format_ip_address, &nh,
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+ }
+ }
+ else
+ /* It's a delete */
+ lcp_router_neigh_del (rn);
+ }
+ else
+ LCP_ROUTER_INFO ("ignore neighbour add on: %d",
+ rtnl_neigh_get_ifindex (rn));
+}
+
+static walk_rc_t
+lcp_router_neighbor_mark (index_t index, void *ctx)
+{
+ lcp_itf_pair_t *lip = lcp_itf_pair_get (index);
+ if (!lip)
+ return WALK_CONTINUE;
+
+ ip_neighbor_walk (AF_IP4, lip->lip_phy_sw_if_index, ip_neighbor_mark_one, 0);
+ ip_neighbor_walk (AF_IP6, lip->lip_phy_sw_if_index, ip_neighbor_mark_one, 0);
+
+ return WALK_CONTINUE;
+}
+
+static void
+lcp_router_neigh_sync_begin (void)
+{
+ lcp_itf_pair_walk (lcp_router_neighbor_mark, 0);
+
+ LCP_ROUTER_INFO ("Begin synchronization of neighbors");
+}
+
+static void
+lcp_router_neigh_sync_end (void)
+{
+ ip_neighbor_sweep (AF_IP4);
+ ip_neighbor_sweep (AF_IP6);
+
+ LCP_ROUTER_INFO ("End synchronization of neighbors");
+}
+
+static lcp_router_table_t *
+lcp_router_table_find (uint32_t id, fib_protocol_t fproto)
+{
+ uword *p;
+
+ p = hash_get (lcp_router_table_db[fproto], id);
+
+ if (p)
+ return pool_elt_at_index (lcp_router_table_pool, p[0]);
+
+ return (NULL);
+}
+
+static uint32_t
+lcp_router_table_k2f (uint32_t k)
+{
+ // the kernel's table ID 255 is the default table
+ if (k == 255 || k == 254)
+ return 0;
+ return k;
+}
+
+static lcp_router_table_t *
+lcp_router_table_add_or_lock (uint32_t id, fib_protocol_t fproto)
+{
+ lcp_router_table_t *nlt;
+
+ id = lcp_router_table_k2f (id);
+ nlt = lcp_router_table_find (id, fproto);
+
+ if (NULL == nlt)
+ {
+ pool_get_zero (lcp_router_table_pool, nlt);
+
+ nlt->nlt_id = id;
+ nlt->nlt_proto = fproto;
+
+ nlt->nlt_fib_index = fib_table_find_or_create_and_lock (
+ nlt->nlt_proto, nlt->nlt_id, lcp_rt_fib_src);
+ nlt->nlt_mfib_index = mfib_table_find_or_create_and_lock (
+ nlt->nlt_proto, nlt->nlt_id, MFIB_SOURCE_PLUGIN_LOW);
+
+ hash_set (lcp_router_table_db[fproto], nlt->nlt_id,
+ nlt - lcp_router_table_pool);
+
+ if (FIB_PROTOCOL_IP4 == fproto)
+ {
+ /* Set the all 1s address in this table to punt */
+ fib_table_entry_special_add (nlt->nlt_fib_index, &pfx_all1s,
+ lcp_rt_fib_src, FIB_ENTRY_FLAG_LOCAL);
+
+ const fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_IP4,
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
+ .frp_flags = FIB_ROUTE_PATH_LOCAL,
+ };
+ int ii;
+
+ for (ii = 0; ii < ARRAY_LEN (ip4_specials); ii++)
+ {
+ mfib_table_entry_path_update (
+ nlt->nlt_mfib_index, &ip4_specials[ii], MFIB_SOURCE_PLUGIN_LOW,
+ MFIB_ENTRY_FLAG_NONE, &path);
+ }
+ }
+ else if (FIB_PROTOCOL_IP6 == fproto)
+ {
+ const fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_IP6,
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
+ .frp_flags = FIB_ROUTE_PATH_LOCAL,
+ };
+ int ii;
+
+ for (ii = 0; ii < ARRAY_LEN (ip6_specials); ii++)
+ {
+ mfib_table_entry_path_update (
+ nlt->nlt_mfib_index, &ip6_specials[ii], MFIB_SOURCE_PLUGIN_LOW,
+ MFIB_ENTRY_FLAG_NONE, &path);
+ }
+ }
+ }
+
+ nlt->nlt_refs++;
+
+ return (nlt);
+}
+
+static void
+lcp_router_table_unlock (lcp_router_table_t *nlt)
+{
+ nlt->nlt_refs--;
+
+ if (0 == nlt->nlt_refs)
+ {
+ if (FIB_PROTOCOL_IP4 == nlt->nlt_proto)
+ {
+ /* Set the all 1s address in this table to punt */
+ fib_table_entry_special_remove (nlt->nlt_fib_index, &pfx_all1s,
+ lcp_rt_fib_src);
+ }
+
+ fib_table_unlock (nlt->nlt_fib_index, nlt->nlt_proto, lcp_rt_fib_src);
+
+ hash_unset (lcp_router_table_db[nlt->nlt_proto], nlt->nlt_id);
+ pool_put (lcp_router_table_pool, nlt);
+ }
+}
+
+static void
+lcp_router_route_mk_prefix (struct rtnl_route *r, fib_prefix_t *p)
+{
+ const struct nl_addr *addr = rtnl_route_get_dst (r);
+ u32 *baddr = nl_addr_get_binary_addr (addr);
+ u32 blen = nl_addr_get_len (addr);
+ ip46_address_t *paddr = &p->fp_addr;
+ u32 entry;
+
+ ip46_address_reset (paddr);
+ p->fp_proto = lcp_router_proto_k2f (nl_addr_get_family (addr));
+
+ switch (p->fp_proto)
+ {
+ case FIB_PROTOCOL_MPLS:
+ entry = ntohl (*baddr);
+ p->fp_label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+ p->fp_len = 21;
+ p->fp_eos = MPLS_NON_EOS;
+ return;
+ case FIB_PROTOCOL_IP4:
+ memcpy (&paddr->ip4, baddr, blen);
+ break;
+ case FIB_PROTOCOL_IP6:
+ memcpy (&paddr->ip6, baddr, blen);
+ break;
+ }
+
+ p->fp_len = nl_addr_get_prefixlen (addr);
+}
+
+static void
+lcp_router_route_mk_mprefix (struct rtnl_route *r, mfib_prefix_t *p)
+{
+ const struct nl_addr *addr;
+
+ addr = rtnl_route_get_dst (r);
+
+ p->fp_len = nl_addr_get_prefixlen (addr);
+ p->fp_proto = lcp_router_mk_addr46 (addr, &p->fp_grp_addr);
+
+ addr = rtnl_route_get_src (r);
+ if (addr)
+ p->fp_proto = lcp_router_mk_addr46 (addr, &p->fp_src_addr);
+}
+
+static int
+lcp_router_mpls_nladdr_to_path (fib_route_path_t *path, struct nl_addr *addr)
+{
+ if (!addr)
+ return 0;
+
+ struct mpls_label *stack = nl_addr_get_binary_addr (addr);
+ u32 entry, label;
+ u8 exp, ttl;
+ int label_count = 0;
+
+ while (1)
+ {
+ entry = ntohl (stack[label_count++].entry);
+ label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+ exp = (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT;
+ ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
+
+ fib_mpls_label_t fml = {
+ .fml_value = label,
+ .fml_exp = exp,
+ .fml_ttl = ttl,
+ };
+ vec_add1 (path->frp_label_stack, fml);
+
+ if (entry & MPLS_LS_S_MASK)
+ break;
+ }
+ return label_count;
+}
+
+typedef struct lcp_router_route_path_parse_t_
+{
+ fib_route_path_t *paths;
+ fib_protocol_t route_proto;
+ bool is_mcast;
+ fib_route_path_flags_t type_flags;
+ u8 preference;
+} lcp_router_route_path_parse_t;
+
+static void
+lcp_router_route_path_parse (struct rtnl_nexthop *rnh, void *arg)
+{
+ lcp_router_route_path_parse_t *ctx = arg;
+ fib_route_path_t *path;
+ u32 sw_if_index;
+ int label_count = 0;
+
+ sw_if_index = lcp_router_intf_h2p (rtnl_route_nh_get_ifindex (rnh));
+
+ if (~0 != sw_if_index)
+ {
+ fib_protocol_t fproto;
+ struct nl_addr *addr;
+
+ vec_add2 (ctx->paths, path, 1);
+
+ path->frp_flags = FIB_ROUTE_PATH_FLAG_NONE | ctx->type_flags;
+ path->frp_sw_if_index = sw_if_index;
+ path->frp_preference = ctx->preference;
+
+ /*
+ * FIB Path Weight of 0 is meaningless and replaced with 1 further along.
+ * See fib_path_create. fib_path_cmp_w_route_path would fail to match
+ * such a fib_route_path_t with any fib_path_t, because a fib_path_t's
+ * fp_weight can never be 0.
+ */
+ path->frp_weight = clib_max (1, rtnl_route_nh_get_weight (rnh));
+
+ addr = rtnl_route_nh_get_gateway (rnh);
+ if (!addr)
+ addr = rtnl_route_nh_get_via (rnh);
+
+ if (addr)
+ fproto = lcp_router_mk_addr46 (addr, &path->frp_addr);
+ else
+ fproto = ctx->route_proto;
+
+ path->frp_proto = fib_proto_to_dpo (fproto);
+
+ if (ctx->route_proto == FIB_PROTOCOL_MPLS)
+ {
+ addr = rtnl_route_nh_get_newdst (rnh);
+ label_count = lcp_router_mpls_nladdr_to_path (path, addr);
+ if (label_count)
+ {
+ LCP_ROUTER_DBG (" is label swap to %u",
+ path->frp_label_stack[0].fml_value);
+ }
+ else
+ {
+ fib_mpls_label_t fml = {
+ .fml_value = MPLS_LABEL_POP,
+ };
+ vec_add1 (path->frp_label_stack, fml);
+ LCP_ROUTER_DBG (" is label pop");
+ }
+ }
+
+#ifdef NL_CAPABILITY_VERSION_3_6_0
+ addr = rtnl_route_nh_get_encap_mpls_dst (rnh);
+ label_count = lcp_router_mpls_nladdr_to_path (path, addr);
+ if (label_count)
+ LCP_ROUTER_DBG (" has encap mpls, %d labels", label_count);
+#endif
+
+ if (ctx->is_mcast)
+ path->frp_mitf_flags = MFIB_ITF_FLAG_FORWARD;
+
+ LCP_ROUTER_DBG (" path:[%U]", format_fib_route_path, path);
+ }
+}
+
+/*
+ * blackhole, unreachable, prohibit will not have a next hop in an
+ * RTM_NEWROUTE. Add a path for them.
+ */
+static void
+lcp_router_route_path_add_special (struct rtnl_route *rr,
+ lcp_router_route_path_parse_t *ctx)
+{
+ fib_route_path_t *path;
+
+ if (rtnl_route_get_type (rr) < RTN_BLACKHOLE)
+ return;
+
+ /* if it already has a path, it does not need us to add one */
+ if (vec_len (ctx->paths) > 0)
+ return;
+
+ vec_add2 (ctx->paths, path, 1);
+
+ path->frp_flags = FIB_ROUTE_PATH_FLAG_NONE | ctx->type_flags;
+ path->frp_sw_if_index = ~0;
+ path->frp_proto = fib_proto_to_dpo (ctx->route_proto);
+ path->frp_preference = ctx->preference;
+
+ LCP_ROUTER_DBG (" path:[%U]", format_fib_route_path, path);
+}
+
+/*
+ * Map of supported route types. Some types are omitted:
+ * RTN_LOCAL - interface address addition creates these automatically
+ * RTN_BROADCAST - same as RTN_LOCAL
+ * RTN_UNSPEC, RTN_ANYCAST, RTN_THROW, RTN_NAT, RTN_XRESOLVE -
+ * There's not a VPP equivalent for these currently.
+ */
+static const u8 lcp_router_route_type_valid[__RTN_MAX] = {
+ [RTN_UNICAST] = 1, [RTN_MULTICAST] = 1, [RTN_BLACKHOLE] = 1,
+ [RTN_UNREACHABLE] = 1, [RTN_PROHIBIT] = 1,
+};
+
+/* Map of fib entry flags by route type */
+static const fib_entry_flag_t lcp_router_route_type_feflags[__RTN_MAX] = {
+ [RTN_LOCAL] = FIB_ENTRY_FLAG_LOCAL | FIB_ENTRY_FLAG_CONNECTED,
+ [RTN_BROADCAST] = FIB_ENTRY_FLAG_DROP | FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT,
+ [RTN_BLACKHOLE] = FIB_ENTRY_FLAG_DROP,
+};
+
+/* Map of fib route path flags by route type */
+static const fib_route_path_flags_t
+ lcp_router_route_type_frpflags[__RTN_MAX] = {
+ [RTN_UNREACHABLE] = FIB_ROUTE_PATH_ICMP_UNREACH,
+ [RTN_PROHIBIT] = FIB_ROUTE_PATH_ICMP_PROHIBIT,
+ [RTN_BLACKHOLE] = FIB_ROUTE_PATH_DROP,
+ };
+
+static inline fib_source_t
+lcp_router_proto_fib_source (u8 rt_proto)
+{
+ return (rt_proto <= RTPROT_STATIC) ? lcp_rt_fib_src : lcp_rt_fib_src_dynamic;
+}
+
+static fib_entry_flag_t
+lcp_router_route_mk_entry_flags (uint8_t rtype, int table_id, uint8_t rproto)
+{
+ fib_entry_flag_t fef = FIB_ENTRY_FLAG_NONE;
+
+ fef |= lcp_router_route_type_feflags[rtype];
+ if ((rproto == RTPROT_KERNEL) || PREDICT_FALSE (255 == table_id))
+ /* kernel proto is interface prefixes, 255 is linux's 'local' table */
+ fef |= FIB_ENTRY_FLAG_ATTACHED | FIB_ENTRY_FLAG_CONNECTED;
+
+ return (fef);
+}
+
+static void
+lcp_router_route_del (struct rtnl_route *rr)
+{
+ fib_entry_flag_t entry_flags;
+ uint32_t table_id;
+ fib_prefix_t pfx;
+ lcp_router_table_t *nlt;
+ uint8_t rtype, rproto;
+
+ rtype = rtnl_route_get_type (rr);
+ table_id = rtnl_route_get_table (rr);
+ rproto = rtnl_route_get_protocol (rr);
+
+ /* skip unsupported route types and local table */
+ if (!lcp_router_route_type_valid[rtype] || (table_id == 255))
+ return;
+
+ lcp_router_route_mk_prefix (rr, &pfx);
+ entry_flags = lcp_router_route_mk_entry_flags (rtype, table_id, rproto);
+ nlt = lcp_router_table_find (lcp_router_table_k2f (table_id), pfx.fp_proto);
+
+ LCP_ROUTER_DBG ("route del: %d:%U %U", rtnl_route_get_table (rr),
+ format_fib_prefix, &pfx, format_fib_entry_flags,
+ entry_flags);
+
+ if (NULL == nlt)
+ return;
+
+ lcp_router_route_path_parse_t np = {
+ .route_proto = pfx.fp_proto,
+ .type_flags = lcp_router_route_type_frpflags[rtype],
+ };
+
+ rtnl_route_foreach_nexthop (rr, lcp_router_route_path_parse, &np);
+ lcp_router_route_path_add_special (rr, &np);
+
+ if (0 != vec_len (np.paths))
+ {
+ fib_source_t fib_src;
+
+ fib_src = lcp_router_proto_fib_source (rproto);
+
+ switch (pfx.fp_proto)
+ {
+ case FIB_PROTOCOL_IP6:
+ fib_table_entry_delete (nlt->nlt_fib_index, &pfx, fib_src);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ fib_table_entry_path_remove2 (nlt->nlt_fib_index, &pfx, fib_src,
+ np.paths);
+ /* delete the EOS route in addition to NEOS - fallthrough */
+ pfx.fp_eos = MPLS_EOS;
+ default:
+ fib_table_entry_path_remove2 (nlt->nlt_fib_index, &pfx, fib_src,
+ np.paths);
+ }
+ }
+
+ vec_free (np.paths);
+
+ lcp_router_table_unlock (nlt);
+}
+
+static fib_route_path_t *
+lcp_router_fib_route_path_dup (fib_route_path_t *old)
+{
+ int idx;
+ fib_route_path_t *p;
+
+ fib_route_path_t *new = vec_dup (old);
+ if (!new)
+ return NULL;
+
+ for (idx = 0; idx < vec_len (new); idx++)
+ {
+ p = &new[idx];
+ if (p->frp_label_stack)
+ p->frp_label_stack = vec_dup (p->frp_label_stack);
+ }
+
+ return new;
+}
+
+static void
+lcp_router_route_add (struct rtnl_route *rr, int is_replace)
+{
+ fib_entry_flag_t entry_flags;
+ uint32_t table_id;
+ fib_prefix_t pfx;
+ lcp_router_table_t *nlt;
+ uint8_t rtype, rproto;
+
+ rtype = rtnl_route_get_type (rr);
+ table_id = rtnl_route_get_table (rr);
+ rproto = rtnl_route_get_protocol (rr);
+
+ /* skip unsupported route types and local table */
+ if (!lcp_router_route_type_valid[rtype] || (table_id == 255))
+ return;
+
+ lcp_router_route_mk_prefix (rr, &pfx);
+ entry_flags = lcp_router_route_mk_entry_flags (rtype, table_id, rproto);
+
+ nlt = lcp_router_table_add_or_lock (table_id, pfx.fp_proto);
+ /* Skip any kernel routes and IPv6 LL or multicast routes */
+ if (rproto == RTPROT_KERNEL ||
+ (FIB_PROTOCOL_IP6 == pfx.fp_proto &&
+ (ip6_address_is_multicast (&pfx.fp_addr.ip6) ||
+ ip6_address_is_link_local_unicast (&pfx.fp_addr.ip6))))
+ {
+ LCP_ROUTER_DBG ("route skip: %d:%U %U", rtnl_route_get_table (rr),
+ format_fib_prefix, &pfx, format_fib_entry_flags,
+ entry_flags);
+ return;
+ }
+ LCP_ROUTER_DBG ("route %s: %d:%U %U", is_replace ? "replace" : "add",
+ rtnl_route_get_table (rr), format_fib_prefix, &pfx,
+ format_fib_entry_flags, entry_flags);
+
+ lcp_router_route_path_parse_t np = {
+ .route_proto = pfx.fp_proto,
+ .is_mcast = (rtype == RTN_MULTICAST),
+ .type_flags = lcp_router_route_type_frpflags[rtype],
+ .preference = (u8) rtnl_route_get_priority (rr),
+ };
+
+ rtnl_route_foreach_nexthop (rr, lcp_router_route_path_parse, &np);
+ lcp_router_route_path_add_special (rr, &np);
+
+ if (0 != vec_len (np.paths))
+ {
+ if (rtype == RTN_MULTICAST)
+ {
+ /* it's not clear to me how linux expresses the RPF paramters
+ * so we'll allow from all interfaces and hope for the best */
+ mfib_prefix_t mpfx = {};
+
+ lcp_router_route_mk_mprefix (rr, &mpfx);
+
+ mfib_table_entry_update (nlt->nlt_mfib_index, &mpfx,
+ MFIB_SOURCE_PLUGIN_LOW, MFIB_RPF_ID_NONE,
+ MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF);
+
+ mfib_table_entry_paths_update (nlt->nlt_mfib_index, &mpfx,
+ MFIB_SOURCE_PLUGIN_LOW,
+ MFIB_ENTRY_FLAG_NONE, np.paths);
+ }
+ else
+ {
+ fib_source_t fib_src;
+ const fib_route_path_t *rpath;
+
+ vec_foreach (rpath, np.paths)
+ {
+ if (fib_route_path_is_attached (rpath))
+ {
+ entry_flags |= FIB_ENTRY_FLAG_ATTACHED;
+ break;
+ }
+ }
+
+ fib_src = lcp_router_proto_fib_source (rproto);
+
+ if (pfx.fp_proto == FIB_PROTOCOL_MPLS)
+ {
+ /* in order to avoid double-frees, we duplicate the paths. */
+ fib_route_path_t *pathdup =
+ lcp_router_fib_route_path_dup (np.paths);
+ if (is_replace)
+ fib_table_entry_update (nlt->nlt_fib_index, &pfx, fib_src,
+ entry_flags, pathdup);
+ else
+ fib_table_entry_path_add2 (nlt->nlt_fib_index, &pfx, fib_src,
+ entry_flags, pathdup);
+ vec_free (pathdup);
+
+ /* install EOS route in addition to NEOS */
+ pfx.fp_eos = MPLS_EOS;
+ pfx.fp_payload_proto = np.paths[0].frp_proto;
+ }
+
+ if (is_replace)
+ fib_table_entry_update (nlt->nlt_fib_index, &pfx, fib_src,
+ entry_flags, np.paths);
+ else
+ fib_table_entry_path_add2 (nlt->nlt_fib_index, &pfx, fib_src,
+ entry_flags, np.paths);
+ }
+ }
+ else
+ {
+ LCP_ROUTER_DBG ("no paths for route: %d:%U %U",
+ rtnl_route_get_table (rr), format_fib_prefix, &pfx,
+ format_fib_entry_flags, entry_flags);
+ }
+ vec_free (np.paths);
+}
+
+static void
+lcp_router_route_sync_begin (void)
+{
+ lcp_router_table_t *nlt;
+
+ pool_foreach (nlt, lcp_router_table_pool)
+ {
+ fib_table_mark (nlt->nlt_fib_index, nlt->nlt_proto, lcp_rt_fib_src);
+ fib_table_mark (nlt->nlt_fib_index, nlt->nlt_proto,
+ lcp_rt_fib_src_dynamic);
+
+ LCP_ROUTER_INFO ("Begin synchronization of %U routes in table %u",
+ format_fib_protocol, nlt->nlt_proto,
+ nlt->nlt_fib_index);
+ }
+}
+
+static void
+lcp_router_route_sync_end (void)
+{
+ lcp_router_table_t *nlt;
+
+ pool_foreach (nlt, lcp_router_table_pool)
+ {
+ fib_table_sweep (nlt->nlt_fib_index, nlt->nlt_proto, lcp_rt_fib_src);
+ fib_table_sweep (nlt->nlt_fib_index, nlt->nlt_proto,
+ lcp_rt_fib_src_dynamic);
+
+ LCP_ROUTER_INFO ("End synchronization of %U routes in table %u",
+ format_fib_protocol, nlt->nlt_proto,
+ nlt->nlt_fib_index);
+ }
+}
+
+typedef struct lcp_router_table_flush_ctx_t_
+{
+ fib_node_index_t *lrtf_entries;
+ u32 *lrtf_sw_if_index_to_bool;
+ fib_source_t lrtf_source;
+} lcp_router_table_flush_ctx_t;
+
+static fib_table_walk_rc_t
+lcp_router_table_flush_cb (fib_node_index_t fib_entry_index, void *arg)
+{
+ lcp_router_table_flush_ctx_t *ctx = arg;
+ u32 sw_if_index;
+
+ sw_if_index = fib_entry_get_resolving_interface_for_source (
+ fib_entry_index, ctx->lrtf_source);
+
+ if (sw_if_index < vec_len (ctx->lrtf_sw_if_index_to_bool) &&
+ ctx->lrtf_sw_if_index_to_bool[sw_if_index])
+ {
+ vec_add1 (ctx->lrtf_entries, fib_entry_index);
+ }
+ return (FIB_TABLE_WALK_CONTINUE);
+}
+
+static void
+lcp_router_table_flush (lcp_router_table_t *nlt, u32 *sw_if_index_to_bool,
+ fib_source_t source)
+{
+ fib_node_index_t *fib_entry_index;
+ lcp_router_table_flush_ctx_t ctx = {
+ .lrtf_entries = NULL,
+ .lrtf_sw_if_index_to_bool = sw_if_index_to_bool,
+ .lrtf_source = source,
+ };
+
+ LCP_ROUTER_DBG (
+ "Flush table: proto %U, fib-index %u, max sw_if_index %u, source %U",
+ format_fib_protocol, nlt->nlt_proto, nlt->nlt_fib_index,
+ vec_len (sw_if_index_to_bool) - 1, format_fib_source, source);
+
+ fib_table_walk (nlt->nlt_fib_index, nlt->nlt_proto,
+ lcp_router_table_flush_cb, &ctx);
+
+ LCP_ROUTER_DBG ("Flush table: entries number to delete %u",
+ vec_len (ctx.lrtf_entries));
+
+ vec_foreach (fib_entry_index, ctx.lrtf_entries)
+ {
+ fib_table_entry_delete_index (*fib_entry_index, source);
+ lcp_router_table_unlock (nlt);
+ }
+
+ vec_free (ctx.lrtf_entries);
+}
+
+const nl_vft_t lcp_router_vft = {
+ .nvl_rt_link_add = { .is_mp_safe = 0, .cb = lcp_router_link_add },
+ .nvl_rt_link_del = { .is_mp_safe = 0, .cb = lcp_router_link_del },
+ .nvl_rt_link_sync_begin = { .is_mp_safe = 0,
+ .cb = lcp_router_link_sync_begin },
+ .nvl_rt_link_sync_end = { .is_mp_safe = 0, .cb = lcp_router_link_sync_end },
+ .nvl_rt_addr_add = { .is_mp_safe = 0, .cb = lcp_router_link_addr_add },
+ .nvl_rt_addr_del = { .is_mp_safe = 0, .cb = lcp_router_link_addr_del },
+ .nvl_rt_addr_sync_begin = { .is_mp_safe = 0,
+ .cb = lcp_router_link_addr_sync_begin },
+ .nvl_rt_addr_sync_end = { .is_mp_safe = 0,
+ .cb = lcp_router_link_addr_sync_end },
+ .nvl_rt_neigh_add = { .is_mp_safe = 0, .cb = lcp_router_neigh_add },
+ .nvl_rt_neigh_del = { .is_mp_safe = 0, .cb = lcp_router_neigh_del },
+ .nvl_rt_neigh_sync_begin = { .is_mp_safe = 0,
+ .cb = lcp_router_neigh_sync_begin },
+ .nvl_rt_neigh_sync_end = { .is_mp_safe = 0,
+ .cb = lcp_router_neigh_sync_end },
+ .nvl_rt_route_add = { .is_mp_safe = 1, .cb = lcp_router_route_add },
+ .nvl_rt_route_del = { .is_mp_safe = 1, .cb = lcp_router_route_del },
+ .nvl_rt_route_sync_begin = { .is_mp_safe = 0,
+ .cb = lcp_router_route_sync_begin },
+ .nvl_rt_route_sync_end = { .is_mp_safe = 0,
+ .cb = lcp_router_route_sync_end },
+};
+
+static clib_error_t *
+lcp_router_init (vlib_main_t *vm)
+{
+ lcp_router_logger = vlib_log_register_class ("linux-cp", "router");
+
+ nl_register_vft (&lcp_router_vft);
+
+ /*
+ * allocate 2 route sources. The low priority source will be for
+ * dynamic routes. If a dynamic route daemon (FRR) tries to remove its
+ * route, it will use the low priority source to ensure it will not
+ * remove static routes which were added with the higher priority source.
+ */
+ lcp_rt_fib_src =
+ fib_source_allocate ("lcp-rt", FIB_SOURCE_PRIORITY_HI, FIB_SOURCE_BH_API);
+
+ lcp_rt_fib_src_dynamic = fib_source_allocate (
+ "lcp-rt-dynamic", FIB_SOURCE_PRIORITY_HI + 1, FIB_SOURCE_BH_API);
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (lcp_router_init) = {
+ .runs_before = VLIB_INITS ("lcp_nl_init"),
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */