aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins/linux-cp
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/linux-cp')
-rw-r--r--src/plugins/linux-cp/CMakeLists.txt17
-rw-r--r--src/plugins/linux-cp/FEATURE.yaml14
-rw-r--r--src/plugins/linux-cp/lcp.api76
-rw-r--r--src/plugins/linux-cp/lcp.c119
-rw-r--r--src/plugins/linux-cp/lcp.h39
-rw-r--r--src/plugins/linux-cp/lcp.rst35
-rw-r--r--src/plugins/linux-cp/lcp_adj.c6
-rw-r--r--src/plugins/linux-cp/lcp_api.c158
-rw-r--r--src/plugins/linux-cp/lcp_cli.c272
-rw-r--r--src/plugins/linux-cp/lcp_interface.c630
-rw-r--r--src/plugins/linux-cp/lcp_interface.h54
-rw-r--r--src/plugins/linux-cp/lcp_interface_sync.c445
-rw-r--r--src/plugins/linux-cp/lcp_mpls_sync.c160
-rw-r--r--src/plugins/linux-cp/lcp_nl.c1043
-rw-r--r--src/plugins/linux-cp/lcp_nl.h161
-rw-r--r--src/plugins/linux-cp/lcp_node.c162
-rw-r--r--src/plugins/linux-cp/lcp_router.c1578
17 files changed, 4547 insertions, 422 deletions
diff --git a/src/plugins/linux-cp/CMakeLists.txt b/src/plugins/linux-cp/CMakeLists.txt
index 080f73ecce4..c891689b4b4 100644
--- a/src/plugins/linux-cp/CMakeLists.txt
+++ b/src/plugins/linux-cp/CMakeLists.txt
@@ -12,12 +12,18 @@
# limitations under the License.
vpp_find_path(LIBNL3_INCLUDE_DIR NAMES libnl3/netlink/route/link/vlan.h)
+vpp_find_path(LIBMNL_INCLUDE_DIR NAMES libmnl/libmnl.h)
if (NOT LIBNL3_INCLUDE_DIR)
message(WARNING "-- libnl3 headers not found - linux-cp plugin disabled")
return()
endif()
+if (NOT LIBMNL_INCLUDE_DIR)
+ message(WARNING "-- libmnl headers not found - linux-cp plugin disabled")
+ return()
+endif()
+
vpp_plugin_find_library(linux-cp LIBNL3_LIB libnl-3.so)
vpp_plugin_find_library(linux-cp LIBNL3_ROUTE_LIB libnl-route-3.so.200)
@@ -27,6 +33,8 @@ include_directories(${LIBMNL_INCLUDE_DIR})
add_vpp_library(lcp
SOURCES
lcp_interface.c
+ lcp_interface_sync.c
+ lcp_mpls_sync.c
lcp_adj.c
lcp.c
@@ -59,3 +67,12 @@ add_vpp_plugin(linux_cp_unittest
LINK_LIBRARIES
lcp
)
+
+add_vpp_plugin(linux_nl
+ SOURCES
+ lcp_router.c
+ lcp_nl.c
+
+ LINK_LIBRARIES
+ lcp
+)
diff --git a/src/plugins/linux-cp/FEATURE.yaml b/src/plugins/linux-cp/FEATURE.yaml
index 088b0606f58..425858591f2 100644
--- a/src/plugins/linux-cp/FEATURE.yaml
+++ b/src/plugins/linux-cp/FEATURE.yaml
@@ -3,10 +3,10 @@ name: Linux Control Plane (integration)
maintainer: Neale Ranns <neale@grahpiant.com>
description: |-
- This plugin provides the beginnings of an integration with the
- Linux network stack.
- The plugin provides the capability to 'mirror' VPP interfaces in
- the Linux kernel. This means that for any interface in VPP the user
+ These plugins provide an integration with the Linux network stack.
+
+ The "linux_cp" plugin provides the capability to 'mirror' VPP interfaces
+ in the Linux kernel. This means that for any interface in VPP the user
can create a corresponding TAP or TUN device in the Linux kernel
and have VPP plumb them together.
The plumbing mechanics is different in each direction.
@@ -17,8 +17,10 @@ description: |-
In the TX direction, packets received by VPP an the mirror Tap/Tun
are cross-connected to the VPP interfaces. For IP packets, IP output
features are applied.
- This is the beginnings of integration, because there needs to be
- an external agent that will configure (and synchronize) the IP
+ If MPLS is enabled on a VPP interface, state is synced to Linux and
+ in TX direction a special feature is enabled to pass MPLS packets through
+ untouched.
+ The "linux_nl" plugin listens to netlink messages and synchronizes the IP
configuration of the paired interfaces.
state: experimental
diff --git a/src/plugins/linux-cp/lcp.api b/src/plugins/linux-cp/lcp.api
index 319dd3e6483..e7eaa5a3669 100644
--- a/src/plugins/linux-cp/lcp.api
+++ b/src/plugins/linux-cp/lcp.api
@@ -21,19 +21,20 @@ option version = "1.0.0";
import "vnet/interface_types.api";
-/** \brief Set the default Linux Control Plane namespace
+/** \brief Set the default Linux Control Plane netns
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
- @param namespace - the new default namespace; namespace[0] == 0 iff none
+ @param netns - the new default netns; netns[0] == 0 if none
*/
autoreply define lcp_default_ns_set
{
u32 client_index;
u32 context;
- string namespace[32]; /* LCP_NS_LEN */
+ string netns[32]; /* LCP_NS_LEN */
+ option in_progress;
};
-/** \brief get the default Linux Control Plane namespace
+/** \brief get the default Linux Control Plane netns
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
*/
@@ -43,15 +44,16 @@ define lcp_default_ns_get
u32 context;
};
-/** \brief get the default Linux Control Plane namespace
+/** \brief get the default Linux Control Plane netns
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
- @param namespace - the default namespace; namespace[0] == 0 iff none
+ @param netns - the default netns; netns[0] == 0 if none
*/
define lcp_default_ns_get_reply
{
u32 context;
- string namespace[32]; /* LCP_NS_LEN */
+ string netns[32]; /* LCP_NS_LEN */
+ option in_progress;
};
enum lcp_itf_host_type : u8
@@ -67,32 +69,59 @@ enum lcp_itf_host_type : u8
@param sw_if_index - index of VPP PHY SW interface
@param host_if_name - host tap interface name
@param host_if_type - the type of host interface to create (tun, tap)
- @param namespace - optional tap namespace; namespace[0] == 0 iff none
+ @param netns - optional tap netns; netns[0] == 0 if none
*/
autoreply autoendian define lcp_itf_pair_add_del
{
+ option deprecated;
+
u32 client_index;
u32 context;
bool is_add;
vl_api_interface_index_t sw_if_index;
string host_if_name[16]; /* IFNAMSIZ */
vl_api_lcp_itf_host_type_t host_if_type;
- string namespace[32]; /* LCP_NS_LEN */
+ string netns[32]; /* LCP_NS_LEN */
};
autoendian define lcp_itf_pair_add_del_v2
{
+ option in_progress;
+
u32 client_index;
u32 context;
bool is_add;
vl_api_interface_index_t sw_if_index;
string host_if_name[16]; /* IFNAMSIZ */
vl_api_lcp_itf_host_type_t host_if_type;
- string namespace[32]; /* LCP_NS_LEN */
+ string netns[32]; /* LCP_NS_LEN */
};
define lcp_itf_pair_add_del_v2_reply
{
+ option in_progress;
+
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t host_sw_if_index;
+};
+autoendian define lcp_itf_pair_add_del_v3
+{
+ option in_progress;
+
+ u32 client_index;
+ u32 context;
+ bool is_add;
+ vl_api_interface_index_t sw_if_index;
+ string host_if_name[16]; /* IFNAMSIZ */
+ vl_api_lcp_itf_host_type_t host_if_type;
+ string netns[32]; /* LCP_NS_LEN */
+};
+define lcp_itf_pair_add_del_v3_reply
+{
+ option in_progress;
+
u32 context;
i32 retval;
+ u32 vif_index;
vl_api_interface_index_t host_sw_if_index;
};
@@ -101,13 +130,26 @@ define lcp_itf_pair_add_del_v2_reply
@param context - sender context, to match reply w/ request
@param sw_if_index - interface to use as filter (~0 == "all")
*/
-define lcp_itf_pair_get
+autoendian define lcp_itf_pair_get
{
u32 client_index;
u32 context;
u32 cursor;
};
-define lcp_itf_pair_get_reply
+autoendian define lcp_itf_pair_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 cursor;
+};
+autoendian define lcp_itf_pair_get_v2
+{
+ u32 client_index;
+ u32 context;
+ u32 cursor;
+ vl_api_interface_index_t sw_if_index;
+};
+autoendian define lcp_itf_pair_get_v2_reply
{
u32 context;
i32 retval;
@@ -121,7 +163,7 @@ define lcp_itf_pair_get_reply
@param vif_index - tap linux index
@param host_if_name - host interface name
@param host_if_type - host interface type (tun, tap)
- @param namespace - host interface namespace
+ @param netns - host interface netns
*/
autoendian define lcp_itf_pair_details
{
@@ -131,7 +173,8 @@ autoendian define lcp_itf_pair_details
u32 vif_index;
string host_if_name[16]; /* IFNAMSIZ */
vl_api_lcp_itf_host_type_t host_if_type;
- string namespace[32]; /* LCP_NS_LEN */
+ string netns[32]; /* LCP_NS_LEN */
+ option in_progress;
};
service {
@@ -139,6 +182,11 @@ service {
stream lcp_itf_pair_details;
};
+service {
+ rpc lcp_itf_pair_get_v2 returns lcp_itf_pair_get_v2_reply
+ stream lcp_itf_pair_details;
+};
+
/** \brief Replace end/begin
*/
autoreply define lcp_itf_pair_replace_begin
diff --git a/src/plugins/linux-cp/lcp.c b/src/plugins/linux-cp/lcp.c
index f4c491c9cb3..34e8550a13f 100644
--- a/src/plugins/linux-cp/lcp.c
+++ b/src/plugins/linux-cp/lcp.c
@@ -20,6 +20,7 @@
#include <net/if.h>
#include <plugins/linux-cp/lcp.h>
+#include <plugins/linux-cp/lcp_interface.h>
lcp_main_t lcp_main;
@@ -28,8 +29,9 @@ lcp_get_default_ns (void)
{
lcp_main_t *lcpm = &lcp_main;
- if (lcpm->default_namespace[0] == 0)
- return 0;
+ if (!lcpm->default_namespace || lcpm->default_namespace[0] == 0)
+ return NULL;
+
return lcpm->default_namespace;
}
@@ -59,16 +61,15 @@ lcp_set_default_ns (u8 *ns)
if (!p || *p == 0)
{
- clib_memset (lcpm->default_namespace, 0,
- sizeof (lcpm->default_namespace));
+ lcpm->default_namespace = NULL;
if (lcpm->default_ns_fd > 0)
close (lcpm->default_ns_fd);
lcpm->default_ns_fd = 0;
return 0;
}
- clib_strncpy ((char *) lcpm->default_namespace, p, LCP_NS_LEN - 1);
-
+ vec_validate_init_c_string (lcpm->default_namespace, p,
+ clib_strnlen (p, LCP_NS_LEN));
s = format (0, "/var/run/netns/%s%c", (char *) lcpm->default_namespace, 0);
lcpm->default_ns_fd = open ((char *) s, O_RDONLY);
vec_free (s);
@@ -76,6 +77,112 @@ lcp_set_default_ns (u8 *ns)
return 0;
}
+void
+lcp_set_sync (u8 is_auto)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ lcpm->lcp_sync = (is_auto != 0);
+
+ // If we set to 'on', do a one-off sync of LCP interfaces
+ if (is_auto)
+ lcp_itf_pair_sync_state_all ();
+}
+
+int
+lcp_sync (void)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ return lcpm->lcp_sync;
+}
+
+void
+lcp_set_auto_subint (u8 is_auto)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ lcpm->lcp_auto_subint = (is_auto != 0);
+}
+
+int
+lcp_auto_subint (void)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ return lcpm->lcp_auto_subint;
+}
+
+void
+lcp_set_del_static_on_link_down (u8 is_del)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ lcpm->del_static_on_link_down = (is_del != 0);
+}
+
+u8
+lcp_get_del_static_on_link_down (void)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ return lcpm->del_static_on_link_down;
+}
+
+void
+lcp_set_del_dynamic_on_link_down (u8 is_del)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ lcpm->del_dynamic_on_link_down = (is_del != 0);
+}
+
+u8
+lcp_get_del_dynamic_on_link_down (void)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ return lcpm->del_dynamic_on_link_down;
+}
+
+void
+lcp_set_netlink_processing_active (u8 is_processing)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ lcpm->netlink_processing_active = (is_processing != 0);
+}
+
+u8
+lcp_get_netlink_processing_active (void)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ return lcpm->netlink_processing_active;
+}
+
+void
+lcp_set_default_num_queues (u16 num_queues, u8 is_tx)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ if (is_tx)
+ lcpm->num_tx_queues = num_queues;
+ else
+ lcpm->num_rx_queues = num_queues;
+}
+
+u16
+lcp_get_default_num_queues (u8 is_tx)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ if (is_tx)
+ return lcpm->num_tx_queues;
+
+ return lcpm->num_rx_queues ?: vlib_num_workers ();
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/linux-cp/lcp.h b/src/plugins/linux-cp/lcp.h
index 7fdad3798bc..e89b149f67d 100644
--- a/src/plugins/linux-cp/lcp.h
+++ b/src/plugins/linux-cp/lcp.h
@@ -22,11 +22,17 @@
typedef struct lcp_main_s
{
u16 msg_id_base; /* API message ID base */
- u8 default_namespace[LCP_NS_LEN]; /* default namespace if set */
+ u8 *default_namespace; /* default namespace if set */
int default_ns_fd;
- u8 auto_intf;
- /* Set when Unit testing */
- u8 test_mode;
+ u8 lcp_auto_subint; /* Automatically create/delete LCP sub-interfaces */
+ u8 lcp_sync; /* Automatically sync VPP changes to LCP */
+ u8 del_static_on_link_down; /* Delete static routes when link goes down */
+ u8 del_dynamic_on_link_down; /* Delete dynamic routes when link goes down */
+ u16 num_rx_queues;
+ u16 num_tx_queues;
+ u8 test_mode; /* Set when Unit testing */
+ u8 netlink_processing_active; /* Set while a batch of Netlink messages are
+ being processed */
} lcp_main_t;
extern lcp_main_t lcp_main;
@@ -38,6 +44,31 @@ int lcp_set_default_ns (u8 *ns);
u8 *lcp_get_default_ns (void); /* Returns NULL or shared string */
int lcp_get_default_ns_fd (void);
+/**
+ * Get/Set whether to delete static routes when the link goes down.
+ */
+void lcp_set_del_static_on_link_down (u8 is_del);
+u8 lcp_get_del_static_on_link_down (void);
+
+/**
+ * Get/Set whether to delete dynamic routes when the link goes down.
+ */
+void lcp_set_del_dynamic_on_link_down (u8 is_del);
+u8 lcp_get_del_dynamic_on_link_down (void);
+
+/**
+ * Get/Set when we're processing a batch of netlink messages.
+ * This is used to avoid looping messages between lcp-sync and netlink.
+ */
+void lcp_set_netlink_processing_active (u8 is_processing);
+u8 lcp_get_netlink_processing_active (void);
+
+/**
+ * Get/Set the default queue number for LCP host taps.
+ */
+void lcp_set_default_num_queues (u16 num_queues, u8 is_tx);
+u16 lcp_get_default_num_queues (u8 is_tx);
+
#endif
/*
diff --git a/src/plugins/linux-cp/lcp.rst b/src/plugins/linux-cp/lcp.rst
index 6d81901cf7b..6f82a29bfbb 100644
--- a/src/plugins/linux-cp/lcp.rst
+++ b/src/plugins/linux-cp/lcp.rst
@@ -8,9 +8,9 @@ Linux Control Plane Integration
Overview
________
-This plugin allows VPP to integrate with the Linux. The
+This plugin allows VPP to integrate with the Linux kernel. The
general model is that Linux is the network stack, i.e. it has the
-control plane protocols, like ARP, IPv6 ND/MLD, Ping, etc, and VPP
+control plane protocols, like ARP, IPv6 ND/MLD, ping, etc, and VPP
provides a SW based ASIC for forwarding.
Interfaces
@@ -20,16 +20,17 @@ VPP owns the interfaces in the system; physical (.e.g PCI), quasi
physical (e.g. vhost), or virtual (e.g. tunnel). However,
for the Linux networking stack to function it needs a representation
of these interfaces; it needs a mirror image in the kernel. For this
-mirror we use a Tap interface, if the VPP interface is multi-point, a
-Tun if it's point-to-point. A physical and its mirror form an
+mirror we use a TAP interface, if the VPP interface is multi-point, a
+TUN if it's point-to-point. A physical and its mirror form an
interface 'pair'.
-The host interface has two identities; the sw_if_index of the Tap and
-the virtual interface index in the kernel. It may be in a Linux namespace.
+The host interface has two identities; the sw_if_index of the TAP and
+the virtual interface index in the kernel. It may be in a Linux network
+namespace.
The creation of the interface pairs is required from the control
plane. It can be statically configured in the VPP startup
-configuration file. The intent here was to make the pair creation
+configuration file. The intent here is to make the pair creation
explicit, rather than have VPP guess which of the interfaces it owns
require a mirror.
@@ -41,27 +42,23 @@ interfaces. Any configuration that is made on these Linux interfaces,
also needs to be applied on the corresponding physical interface in
VPP.
-This is functionality is not provided in this plugin, but it can be
-achieved in various ways, for example by listening to the netlink
-messages and applying the config. As a result all e.g. routes
-programmed in Linux, will also be present in VPP's FIB.
+This is functionality is provided by the "linux_nl" plugin.
-Linux will own the [ARP/ND] nieghbor tables (which will be copied via
+Linux will own the [ARP/ND] neighbor tables (which will be copied via
netlink to VPP also). This means that Linux will send packets with the
peer's MAC address in the rewrite to VPP. The receiving TAP interface
must therefore be in promiscuous mode.
-
Forwarding
__________
The basic principle is to x-connect traffic from a Linux host interface
-(received on the Tap/Tun) to its paired the physical, and vice-versa.
+(received on the tap/tun) to its paired the physical, and vice-versa.
Host to Physical
^^^^^^^^^^^^^^^^
-All packets sent by the host, and received by VPP on a Tap/Tun should
+All packets sent by the host, and received by VPP on a tap/tun should
be sent to its paired physical interface. However, they should be sent
with the same consequences as if they had originated from VPP,
i.e. they should be subject to all output features on the physical
@@ -73,17 +70,18 @@ adjacency that VPP would have used to send this packet; this adjacency
is stored in the buffer's meta data so that it is available to all
output features. Then the packet is sent through the physical
interface's IP output feature arc.
+
All ARP packets are x-connected from the tap to the physical.
Physical to Host
^^^^^^^^^^^^^^^^
All ARP packets received on the physical are sent to the paired
-Tap. This allows the Linux network stack to build the nieghbour table.
+tap. This allows the Linux network stack to build the neighbor table.
IP packets that are punted are sent to the host. They are sent on the
tap that is paired with the physical on which they were originally
-received. The packet is sent on the Tap/Tun 'exactly' as it was
+received. The packet is sent on the tap/tun 'exactly' as it was
received (i.e. with the L2 rewrite) but post any translations that
input features may have made.
@@ -92,5 +90,4 @@ Recommendations
^^^^^^^^^^^^^^^
When using this plugin disable the ARP, ND, IGMP plugins; this is the
-task for Linux.
-Disable ping plugin, since Linux will now respond.
+task for Linux. Disable ping plugin, since Linux will now respond.
diff --git a/src/plugins/linux-cp/lcp_adj.c b/src/plugins/linux-cp/lcp_adj.c
index bfbc2fec913..b10c70616b5 100644
--- a/src/plugins/linux-cp/lcp_adj.c
+++ b/src/plugins/linux-cp/lcp_adj.c
@@ -185,8 +185,8 @@ lcp_adj_show_cmd (vlib_main_t *vm, unformat_input_t *input,
if (unformat (input, "verbose"))
verbose = 1;
- vlib_cli_output (vm, "Linux-CP Adjs:\n%U", BV (format_bihash), &lcp_adj_tbl,
- verbose);
+ vlib_cli_output (vm, "linux-cp adjacencies:\n%U", BV (format_bihash),
+ &lcp_adj_tbl, verbose);
return 0;
}
@@ -210,7 +210,7 @@ lcp_adj_init (vlib_main_t *vm)
{
adj_type = adj_delegate_register_new_type (&lcp_adj_vft);
- BV (clib_bihash_init) (&lcp_adj_tbl, "linux-cp ADJ table", 1024, 1 << 24);
+ BV (clib_bihash_init) (&lcp_adj_tbl, "linux-cp adjacencies", 1024, 1 << 24);
BV (clib_bihash_set_kvp_format_fn) (&lcp_adj_tbl, format_lcp_adj_kvp);
return (NULL);
diff --git a/src/plugins/linux-cp/lcp_api.c b/src/plugins/linux-cp/lcp_api.c
index c9aa01566c6..74421230e9d 100644
--- a/src/plugins/linux-cp/lcp_api.c
+++ b/src/plugins/linux-cp/lcp_api.c
@@ -41,27 +41,11 @@ api_encode_host_type (lip_host_type_t type)
return LCP_API_ITF_HOST_TAP;
}
-void
-lcp_set_auto_intf (u8 is_auto)
-{
- lcp_main_t *lcpm = &lcp_main;
-
- lcpm->auto_intf = (is_auto != 0);
-}
-
-int
-lcp_auto_intf (void)
-{
- lcp_main_t *lcpm = &lcp_main;
-
- return lcpm->auto_intf;
-}
-
static int
vl_api_lcp_itf_pair_add (u32 phy_sw_if_index, lip_host_type_t lip_host_type,
u8 *mp_host_if_name, size_t sizeof_host_if_name,
u8 *mp_namespace, size_t sizeof_mp_namespace,
- u32 *host_sw_if_index_p)
+ u32 *host_sw_if_index_p, u32 *vif_index_p)
{
u8 *host_if_name, *netns;
int host_len, netns_len, rv;
@@ -80,6 +64,13 @@ vl_api_lcp_itf_pair_add (u32 phy_sw_if_index, lip_host_type_t lip_host_type,
rv = lcp_itf_pair_create (phy_sw_if_index, host_if_name, lip_host_type,
netns, host_sw_if_index_p);
+ if (!rv && (vif_index_p != NULL))
+ {
+ lcp_itf_pair_t *pair =
+ lcp_itf_pair_get (lcp_itf_pair_find_by_phy (phy_sw_if_index));
+ *vif_index_p = pair->lip_vif_index;
+ }
+
vec_free (host_if_name);
vec_free (netns);
@@ -94,20 +85,15 @@ vl_api_lcp_itf_pair_add_del_t_handler (vl_api_lcp_itf_pair_add_del_t *mp)
lip_host_type_t lip_host_type;
int rv;
- if (!vnet_sw_if_index_is_api_valid (mp->sw_if_index))
- {
- rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
- goto bad_sw_if_index;
- }
+ VALIDATE_SW_IF_INDEX_END (mp);
phy_sw_if_index = mp->sw_if_index;
lip_host_type = api_decode_host_type (mp->host_if_type);
if (mp->is_add)
{
- rv =
- vl_api_lcp_itf_pair_add (phy_sw_if_index, lip_host_type,
- mp->host_if_name, sizeof (mp->host_if_name),
- mp->namespace, sizeof (mp->namespace), NULL);
+ rv = vl_api_lcp_itf_pair_add (
+ phy_sw_if_index, lip_host_type, mp->host_if_name,
+ sizeof (mp->host_if_name), mp->netns, sizeof (mp->netns), NULL, NULL);
}
else
{
@@ -115,7 +101,7 @@ vl_api_lcp_itf_pair_add_del_t_handler (vl_api_lcp_itf_pair_add_del_t *mp)
}
BAD_SW_IF_INDEX_LABEL;
- REPLY_MACRO (VL_API_LCP_ITF_PAIR_ADD_DEL_REPLY);
+ REPLY_MACRO_END (VL_API_LCP_ITF_PAIR_ADD_DEL_REPLY);
}
static void
@@ -126,20 +112,45 @@ vl_api_lcp_itf_pair_add_del_v2_t_handler (vl_api_lcp_itf_pair_add_del_v2_t *mp)
lip_host_type_t lip_host_type;
int rv;
- if (!vnet_sw_if_index_is_api_valid (mp->sw_if_index))
+ VALIDATE_SW_IF_INDEX_END (mp);
+
+ phy_sw_if_index = mp->sw_if_index;
+ lip_host_type = api_decode_host_type (mp->host_if_type);
+ if (mp->is_add)
{
- rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
- goto bad_sw_if_index;
+ rv = vl_api_lcp_itf_pair_add (
+ phy_sw_if_index, lip_host_type, mp->host_if_name,
+ sizeof (mp->host_if_name), mp->netns, sizeof (mp->netns),
+ &host_sw_if_index, NULL);
}
+ else
+ {
+ rv = lcp_itf_pair_delete (phy_sw_if_index);
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO2_END (VL_API_LCP_ITF_PAIR_ADD_DEL_V2_REPLY,
+ { rmp->host_sw_if_index = host_sw_if_index; });
+}
+
+static void
+vl_api_lcp_itf_pair_add_del_v3_t_handler (vl_api_lcp_itf_pair_add_del_v3_t *mp)
+{
+ u32 phy_sw_if_index, host_sw_if_index = ~0, vif_index = ~0;
+ vl_api_lcp_itf_pair_add_del_v3_reply_t *rmp;
+ lip_host_type_t lip_host_type;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX_END (mp);
phy_sw_if_index = mp->sw_if_index;
lip_host_type = api_decode_host_type (mp->host_if_type);
if (mp->is_add)
{
- rv = vl_api_lcp_itf_pair_add (phy_sw_if_index, lip_host_type,
- mp->host_if_name,
- sizeof (mp->host_if_name), mp->namespace,
- sizeof (mp->namespace), &host_sw_if_index);
+ rv = vl_api_lcp_itf_pair_add (
+ phy_sw_if_index, lip_host_type, mp->host_if_name,
+ sizeof (mp->host_if_name), mp->netns, sizeof (mp->netns),
+ &host_sw_if_index, &vif_index);
}
else
{
@@ -147,8 +158,10 @@ vl_api_lcp_itf_pair_add_del_v2_t_handler (vl_api_lcp_itf_pair_add_del_v2_t *mp)
}
BAD_SW_IF_INDEX_LABEL;
- REPLY_MACRO2 (VL_API_LCP_ITF_PAIR_ADD_DEL_V2_REPLY,
- { rmp->host_sw_if_index = ntohl (host_sw_if_index); });
+ REPLY_MACRO2_END (VL_API_LCP_ITF_PAIR_ADD_DEL_V3_REPLY, ({
+ rmp->host_sw_if_index = host_sw_if_index;
+ rmp->vif_index = vif_index;
+ }));
}
static void
@@ -158,7 +171,7 @@ send_lcp_itf_pair_details (index_t lipi, vl_api_registration_t *rp,
vl_api_lcp_itf_pair_details_t *rmp;
lcp_itf_pair_t *lcp_pair = lcp_itf_pair_get (lipi);
- REPLY_MACRO_DETAILS4 (
+ REPLY_MACRO_DETAILS4_END (
VL_API_LCP_ITF_PAIR_DETAILS, rp, context, ({
rmp->phy_sw_if_index = lcp_pair->lip_phy_sw_if_index;
rmp->host_sw_if_index = lcp_pair->lip_host_sw_if_index;
@@ -167,9 +180,11 @@ send_lcp_itf_pair_details (index_t lipi, vl_api_registration_t *rp,
memcpy_s (rmp->host_if_name, sizeof (rmp->host_if_name),
lcp_pair->lip_host_name, vec_len (lcp_pair->lip_host_name));
+ rmp->host_if_name[vec_len (lcp_pair->lip_host_name)] = 0;
- clib_strncpy ((char *) rmp->namespace, (char *) lcp_pair->lip_namespace,
- vec_len (lcp_pair->lip_namespace));
+ memcpy_s (rmp->netns, sizeof (rmp->netns), lcp_pair->lip_namespace,
+ vec_len (lcp_pair->lip_namespace));
+ rmp->netns[vec_len (lcp_pair->lip_namespace)] = 0;
}));
}
@@ -179,19 +194,51 @@ vl_api_lcp_itf_pair_get_t_handler (vl_api_lcp_itf_pair_get_t *mp)
vl_api_lcp_itf_pair_get_reply_t *rmp;
i32 rv = 0;
- REPLY_AND_DETAILS_MACRO (
+ REPLY_AND_DETAILS_MACRO_END (
VL_API_LCP_ITF_PAIR_GET_REPLY, lcp_itf_pair_pool,
({ send_lcp_itf_pair_details (cursor, rp, mp->context); }));
}
static void
+vl_api_lcp_itf_pair_get_v2_t_handler (vl_api_lcp_itf_pair_get_v2_t *mp)
+{
+ vl_api_lcp_itf_pair_get_v2_reply_t *rmp;
+ i32 rv = 0;
+
+ if (mp->sw_if_index == ~0)
+ {
+ REPLY_AND_DETAILS_MACRO_END (
+ VL_API_LCP_ITF_PAIR_GET_REPLY, lcp_itf_pair_pool,
+ ({ send_lcp_itf_pair_details (cursor, rp, mp->context); }));
+ }
+ else
+ {
+ VALIDATE_SW_IF_INDEX_END (mp);
+
+ u32 pair_index = lcp_itf_pair_find_by_phy (mp->sw_if_index);
+ if (pair_index == INDEX_INVALID)
+ {
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ goto bad_sw_if_index;
+ }
+ send_lcp_itf_pair_details (
+ pair_index, vl_api_client_index_to_registration (mp->client_index),
+ mp->context);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO2_END (VL_API_LCP_ITF_PAIR_GET_V2_REPLY,
+ ({ rmp->cursor = ~0; }));
+ }
+}
+
+static void
vl_api_lcp_default_ns_set_t_handler (vl_api_lcp_default_ns_set_t *mp)
{
vl_api_lcp_default_ns_set_reply_t *rmp;
int rv;
- mp->namespace[LCP_NS_LEN - 1] = 0;
- rv = lcp_set_default_ns (mp->namespace);
+ mp->netns[LCP_NS_LEN - 1] = 0;
+ rv = lcp_set_default_ns (mp->netns);
REPLY_MACRO (VL_API_LCP_DEFAULT_NS_SET_REPLY);
}
@@ -199,25 +246,14 @@ vl_api_lcp_default_ns_set_t_handler (vl_api_lcp_default_ns_set_t *mp)
static void
vl_api_lcp_default_ns_get_t_handler (vl_api_lcp_default_ns_get_t *mp)
{
- lcp_main_t *lcpm = &lcp_main;
vl_api_lcp_default_ns_get_reply_t *rmp;
- vl_api_registration_t *reg;
- char *ns;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- rmp = vl_msg_api_alloc (sizeof (*rmp));
- clib_memset (rmp, 0, sizeof (*rmp));
- rmp->_vl_msg_id = (VL_API_LCP_DEFAULT_NS_GET_REPLY + lcpm->msg_id_base);
- rmp->context = mp->context;
-
- ns = (char *) lcp_get_default_ns ();
- if (ns)
- clib_strncpy ((char *) rmp->namespace, ns, LCP_NS_LEN - 1);
- vl_api_send_msg (reg, (u8 *) rmp);
+ REPLY_MACRO_DETAILS2 (VL_API_LCP_DEFAULT_NS_GET_REPLY, ({
+ char *ns = (char *) lcp_get_default_ns ();
+ if (ns)
+ clib_strncpy ((char *) rmp->netns, ns,
+ LCP_NS_LEN - 1);
+ }));
}
static void
@@ -250,7 +286,7 @@ vl_api_lcp_itf_pair_replace_end_t_handler (
#include <linux-cp/lcp.api.c>
static clib_error_t *
-lcp_plugin_api_hookup (vlib_main_t *vm)
+lcp_api_init (vlib_main_t *vm)
{
/* Ask for a correctly-sized block of API message decode slots */
lcp_msg_id_base = setup_message_id_table ();
@@ -258,7 +294,7 @@ lcp_plugin_api_hookup (vlib_main_t *vm)
return (NULL);
}
-VLIB_INIT_FUNCTION (lcp_plugin_api_hookup);
+VLIB_INIT_FUNCTION (lcp_api_init);
#include <vpp/app/version.h>
VLIB_PLUGIN_REGISTER () = {
diff --git a/src/plugins/linux-cp/lcp_cli.c b/src/plugins/linux-cp/lcp_cli.c
index cb874b1c023..0dcf600b301 100644
--- a/src/plugins/linux-cp/lcp_cli.c
+++ b/src/plugins/linux-cp/lcp_cli.c
@@ -34,81 +34,178 @@ lcp_itf_pair_create_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
unformat_input_t _line_input, *line_input = &_line_input;
vnet_main_t *vnm = vnet_get_main ();
- u32 sw_if_index;
- u8 *host_if_name;
- lip_host_type_t host_if_type;
- u8 *ns;
- int r;
+ u32 sw_if_index = ~0;
+ u8 *host_if_name = NULL;
+ lip_host_type_t host_if_type = LCP_ITF_HOST_TAP;
+ u8 *ns = NULL;
+ clib_error_t *error = NULL;
+
+ if (unformat_user (input, unformat_line_input, line_input))
+ {
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%d", &sw_if_index))
+ ;
+ else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else if (unformat (line_input, "host-if %s", &host_if_name))
+ ;
+ else if (unformat (line_input, "netns %s", &ns))
+ ;
+ else if (unformat (line_input, "tun"))
+ host_if_type = LCP_ITF_HOST_TUN;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ break;
+ }
+ }
+ unformat_free (line_input);
+ }
+
+ if (error)
+ ;
+ else if (sw_if_index == ~0)
+ error = clib_error_return (0, "interface name or sw_if_index required");
+ else if (!host_if_name)
+ error = clib_error_return (0, "host interface name required");
+ else if (vec_len (ns) >= LCP_NS_LEN)
+ error = clib_error_return (
+ 0, "Namespace name should be fewer than %d characters", LCP_NS_LEN);
+ else
+ {
+ int r;
+
+ r = lcp_itf_pair_create (sw_if_index, host_if_name, host_if_type, ns,
+ NULL);
+ if (r)
+ error = clib_error_return (0, "linux-cp pair creation failed (%d)", r);
+ }
+
+ vec_free (host_if_name);
+ vec_free (ns);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (lcp_itf_pair_create_command, static) = {
+ .path = "lcp create",
+ .short_help = "lcp create <sw_if_index>|<if-name> host-if <host-if-name> "
+ "netns <namespace> [tun]",
+ .function = lcp_itf_pair_create_command_fn,
+};
+
+static clib_error_t *
+lcp_sync_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
- sw_if_index = ~0;
- host_if_name = ns = NULL;
- host_if_type = LCP_ITF_HOST_TAP;
-
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "%d", &sw_if_index))
- ;
- else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
- &sw_if_index))
- ;
- else if (unformat (line_input, "host-if %s", &host_if_name))
- ;
- else if (unformat (line_input, "netns %s", &ns))
- ;
- else if (unformat (line_input, "tun"))
- host_if_type = LCP_ITF_HOST_TUN;
+ if (unformat (line_input, "on") || unformat (line_input, "enable"))
+ lcp_set_sync (1);
+ else if (unformat (line_input, "off") ||
+ unformat (line_input, "disable"))
+ lcp_set_sync (0);
else
- {
- unformat_free (line_input);
- vec_free (host_if_name);
- vec_free (ns);
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- }
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
}
unformat_free (line_input);
+ return 0;
+}
- if (!host_if_name)
- {
- vec_free (ns);
- return clib_error_return (0, "host interface name required");
- }
+VLIB_CLI_COMMAND (lcp_sync_command, static) = {
+ .path = "lcp lcp-sync",
+ .short_help = "lcp lcp-sync [on|enable|off|disable]",
+ .function = lcp_sync_command_fn,
+};
- if (sw_if_index == ~0)
- {
- vec_free (host_if_name);
- vec_free (ns);
- return clib_error_return (0, "interface name or sw_if_index required");
- }
+static clib_error_t *
+lcp_auto_subint_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
- if (vec_len (ns) >= LCP_NS_LEN)
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- vec_free (host_if_name);
- vec_free (ns);
- return clib_error_return (
- 0, "Namespace name should be fewer than %d characters", LCP_NS_LEN);
+ if (unformat (line_input, "on") || unformat (line_input, "enable"))
+ lcp_set_auto_subint (1);
+ else if (unformat (line_input, "off") ||
+ unformat (line_input, "disable"))
+ lcp_set_auto_subint (0);
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
}
- r = lcp_itf_pair_create (sw_if_index, host_if_name, host_if_type, ns, NULL);
+ unformat_free (line_input);
+ return 0;
+}
- vec_free (host_if_name);
- vec_free (ns);
+VLIB_CLI_COMMAND (lcp_auto_subint_command, static) = {
+ .path = "lcp lcp-auto-subint",
+ .short_help = "lcp lcp-auto-subint [on|enable|off|disable]",
+ .function = lcp_auto_subint_command_fn,
+};
- if (r)
- return clib_error_return (0, "linux-cp pair creation failed (%d)", r);
+static clib_error_t *
+lcp_param_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del-static-on-link-down"))
+ {
+ if (unformat (line_input, "on") || unformat (line_input, "enable"))
+ lcp_set_del_static_on_link_down (1 /* is_del */);
+ else if (unformat (line_input, "off") ||
+ unformat (line_input, "disable"))
+ lcp_set_del_static_on_link_down (0 /* is_del */);
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+ else if (unformat (line_input, "del-dynamic-on-link-down"))
+ {
+ if (unformat (line_input, "on") || unformat (line_input, "enable"))
+ lcp_set_del_dynamic_on_link_down (1 /* is_del */);
+ else if (unformat (line_input, "off") ||
+ unformat (line_input, "disable"))
+ lcp_set_del_dynamic_on_link_down (0 /* is_del */);
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+
+ unformat_free (line_input);
return 0;
}
-VLIB_CLI_COMMAND (lcp_itf_pair_create_command, static) = {
- .path = "lcp create",
- .short_help = "lcp create <sw_if_index>|<if-name> host-if <host-if-name> "
- "netns <namespace> [tun]",
- .function = lcp_itf_pair_create_command_fn,
+VLIB_CLI_COMMAND (lcp_param_command, static) = {
+ .path = "lcp param",
+ .short_help = "lcp param [del-static-on-link-down (on|enable|off|disable)] "
+ "[del-dynamic-on-link-down (on|enable|off|disable)]",
+ .function = lcp_param_command_fn,
};
static clib_error_t *
@@ -118,6 +215,7 @@ lcp_default_netns_command_fn (vlib_main_t *vm, unformat_input_t *input,
unformat_input_t _line_input, *line_input = &_line_input;
u8 *ns;
int r;
+ clib_error_t *error = NULL;
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
@@ -130,10 +228,15 @@ lcp_default_netns_command_fn (vlib_main_t *vm, unformat_input_t *input,
;
else if (unformat (line_input, "clear netns"))
;
+ else
+ {
+ vec_free (ns);
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
}
- unformat_free (line_input);
-
vlib_cli_output (vm, "lcp set default netns '%s'\n", (char *) ns);
r = lcp_set_default_ns (ns);
@@ -141,7 +244,10 @@ lcp_default_netns_command_fn (vlib_main_t *vm, unformat_input_t *input,
if (r)
return clib_error_return (0, "linux-cp set default netns failed (%d)", r);
- return 0;
+done:
+ unformat_free (line_input);
+
+ return error;
}
VLIB_CLI_COMMAND (lcp_default_netns_command, static) = {
@@ -156,36 +262,42 @@ lcp_itf_pair_delete_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
vnet_main_t *vnm = vnet_get_main ();
unformat_input_t _line_input, *line_input = &_line_input;
- u32 sw_if_index;
- int r;
+ u32 sw_if_index = ~0;
+ clib_error_t *error = NULL;
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- sw_if_index = ~0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ if (unformat_user (input, unformat_line_input, line_input))
{
- if (unformat (line_input, "%d", &sw_if_index))
- ;
- else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
- &sw_if_index))
- ;
- else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%d", &sw_if_index))
+ ;
+ else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ break;
+ }
+ }
+ unformat_free (line_input);
}
- unformat_free (line_input);
-
- if (sw_if_index == ~0)
- return clib_error_return (0, "interface name or sw_if_index required");
+ if (error)
+ ;
+ else if (sw_if_index == ~0)
+ error = clib_error_return (0, "interface name or sw_if_index required");
+ else
+ {
+ int r;
- r = lcp_itf_pair_delete (sw_if_index);
+ r = lcp_itf_pair_delete (sw_if_index);
+ if (r)
+ error = clib_error_return (0, "linux-cp pair deletion failed (%d)", r);
+ }
- if (r)
- return clib_error_return (0, "linux-cp pair deletion failed (%d)", r);
- return 0;
+ return error;
}
VLIB_CLI_COMMAND (lcp_itf_pair_delete_command, static) = {
diff --git a/src/plugins/linux-cp/lcp_interface.c b/src/plugins/linux-cp/lcp_interface.c
index da409619746..e1f4a6a1d69 100644
--- a/src/plugins/linux-cp/lcp_interface.c
+++ b/src/plugins/linux-cp/lcp_interface.c
@@ -21,6 +21,7 @@
#include <linux-cp/lcp_interface.h>
#include <netlink/route/link/vlan.h>
+#include <linux/if_ether.h>
#include <vnet/plugin/plugin.h>
#include <vnet/plugin/plugin.h>
@@ -38,12 +39,12 @@
#include <vlibapi/api_helper_macros.h>
#include <vnet/ipsec/ipsec_punt.h>
-static vlib_log_class_t lcp_itf_pair_logger;
+vlib_log_class_t lcp_itf_pair_logger;
/**
* Pool of LIP objects
*/
-lcp_itf_pair_t *lcp_itf_pair_pool;
+lcp_itf_pair_t *lcp_itf_pair_pool = NULL;
u32
lcp_itf_num_pairs (void)
@@ -72,12 +73,6 @@ lcp_itf_pair_register_vft (lcp_itf_pair_vft_t *lcp_itf_vft)
vec_add1 (lcp_itf_vfts, *lcp_itf_vft);
}
-#define LCP_ITF_PAIR_DBG(...) \
- vlib_log_notice (lcp_itf_pair_logger, __VA_ARGS__);
-
-#define LCP_ITF_PAIR_INFO(...) \
- vlib_log_notice (lcp_itf_pair_logger, __VA_ARGS__);
-
u8 *
format_lcp_itf_pair (u8 *s, va_list *args)
{
@@ -136,6 +131,13 @@ lcp_itf_pair_show (u32 phy_sw_if_index)
ns = lcp_get_default_ns ();
vlib_cli_output (vm, "lcp default netns '%s'\n",
ns ? (char *) ns : "<unset>");
+ vlib_cli_output (vm, "lcp lcp-auto-subint %s\n",
+ lcp_auto_subint () ? "on" : "off");
+ vlib_cli_output (vm, "lcp lcp-sync %s\n", lcp_sync () ? "on" : "off");
+ vlib_cli_output (vm, "lcp del-static-on-link-down %s\n",
+ lcp_get_del_static_on_link_down () ? "on" : "off");
+ vlib_cli_output (vm, "lcp del-dynamic-on-link-down %s\n",
+ lcp_get_del_dynamic_on_link_down () ? "on" : "off");
if (phy_sw_if_index == ~0)
{
@@ -152,6 +154,11 @@ lcp_itf_pair_show (u32 phy_sw_if_index)
lcp_itf_pair_t *
lcp_itf_pair_get (u32 index)
{
+ if (!lcp_itf_pair_pool)
+ return NULL;
+ if (index == INDEX_INVALID)
+ return NULL;
+
return pool_elt_at_index (lcp_itf_pair_pool, index);
}
@@ -168,18 +175,6 @@ lcp_itf_pair_find_by_vif (u32 vif_index)
return INDEX_INVALID;
}
-int
-lcp_itf_pair_add_sub (u32 vif, u8 *host_if_name, u32 sub_sw_if_index,
- u32 phy_sw_if_index, u8 *ns)
-{
- lcp_itf_pair_t *lip;
-
- lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (phy_sw_if_index));
-
- return lcp_itf_pair_add (lip->lip_host_sw_if_index, sub_sw_if_index,
- host_if_name, vif, lip->lip_host_type, ns);
-}
-
const char *lcp_itf_l3_feat_names[N_LCP_ITF_HOST][N_AF] = {
[LCP_ITF_HOST_TAP] = {
[AF_IP4] = "linux-cp-xc-ip4",
@@ -235,17 +230,23 @@ lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name,
index_t lipi;
lcp_itf_pair_t *lip;
+ if (host_sw_if_index == ~0)
+ {
+ LCP_ITF_PAIR_ERR ("pair_add: Cannot add LIP - invalid host");
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ }
+
lipi = lcp_itf_pair_find_by_phy (phy_sw_if_index);
- LCP_ITF_PAIR_INFO ("add: host:%U phy:%U, host_if:%v vif:%d ns:%v",
+ if (lipi != INDEX_INVALID)
+ return VNET_API_ERROR_VALUE_EXIST;
+
+ LCP_ITF_PAIR_INFO ("add: host:%U phy:%U, host_if:%v vif:%d ns:%s",
format_vnet_sw_if_index_name, vnet_get_main (),
host_sw_if_index, format_vnet_sw_if_index_name,
vnet_get_main (), phy_sw_if_index, host_name, host_index,
ns);
- if (lipi != INDEX_INVALID)
- return VNET_API_ERROR_VALUE_EXIST;
-
/*
* Create a new pair.
*/
@@ -266,9 +267,6 @@ lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name,
lip->lip_vif_index = host_index;
lip->lip_namespace = vec_dup (ns);
- if (lip->lip_host_sw_if_index == ~0)
- return 0;
-
/*
* First use of this host interface.
* Enable the x-connect feature on the host to send
@@ -314,10 +312,13 @@ lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name,
}
else
{
- vnet_feature_enable_disable ("ip4-punt", "linux-cp-punt-l3", 0, 1, NULL,
- 0);
- vnet_feature_enable_disable ("ip6-punt", "linux-cp-punt-l3", 0, 1, NULL,
- 0);
+ if (hash_elts (lip_db_by_vif) == 1)
+ {
+ vnet_feature_enable_disable ("ip4-punt", "linux-cp-punt-l3", 0, 1,
+ NULL, 0);
+ vnet_feature_enable_disable ("ip6-punt", "linux-cp-punt-l3", 0, 1,
+ NULL, 0);
+ }
}
/* invoke registered callbacks for pair addition */
@@ -336,7 +337,7 @@ lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name,
}
static clib_error_t *
-lcp_netlink_add_link_vlan (int parent, u32 vlan, const char *name)
+lcp_netlink_add_link_vlan (int parent, u32 vlan, u16 proto, const char *name)
{
struct rtnl_link *link;
struct nl_sock *sk;
@@ -344,17 +345,25 @@ lcp_netlink_add_link_vlan (int parent, u32 vlan, const char *name)
sk = nl_socket_alloc ();
if ((err = nl_connect (sk, NETLINK_ROUTE)) < 0)
- return clib_error_return (NULL, "Unable to connect socket: %d", err);
+ {
+ LCP_ITF_PAIR_ERR ("netlink_add_link_vlan: connect error: %s",
+ nl_geterror (err));
+ return clib_error_return (NULL, "Unable to connect socket: %d", err);
+ }
link = rtnl_link_vlan_alloc ();
rtnl_link_set_link (link, parent);
rtnl_link_set_name (link, name);
-
rtnl_link_vlan_set_id (link, vlan);
+ rtnl_link_vlan_set_protocol (link, htons (proto));
if ((err = rtnl_link_add (sk, link, NLM_F_CREATE)) < 0)
- return clib_error_return (NULL, "Unable to add link %s: %d", name, err);
+ {
+ LCP_ITF_PAIR_ERR ("netlink_add_link_vlan: link add error: %s",
+ nl_geterror (err));
+ return clib_error_return (NULL, "Unable to add link %s: %d", name, err);
+ }
rtnl_link_put (link);
nl_close (sk);
@@ -400,10 +409,11 @@ lcp_itf_pair_del (u32 phy_sw_if_index)
lip = lcp_itf_pair_get (lipi);
- LCP_ITF_PAIR_INFO ("pair delete: {%U, %U, %s}", format_vnet_sw_if_index_name,
- vnet_get_main (), lip->lip_phy_sw_if_index,
- format_vnet_sw_if_index_name, vnet_get_main (),
- lip->lip_host_sw_if_index, lip->lip_host_name);
+ LCP_ITF_PAIR_NOTICE (
+ "pair_del: host:%U phy:%U host_if:%v vif:%d ns:%v",
+ format_vnet_sw_if_index_name, vnet_get_main (), lip->lip_host_sw_if_index,
+ format_vnet_sw_if_index_name, vnet_get_main (), lip->lip_phy_sw_if_index,
+ lip->lip_host_name, lip->lip_vif_index, lip->lip_namespace);
/* invoke registered callbacks for pair deletion */
vec_foreach (vft, lcp_itf_vfts)
@@ -432,12 +442,14 @@ lcp_itf_pair_del (u32 phy_sw_if_index)
}
else
{
- vnet_feature_enable_disable ("ip4-punt", "linux-cp-punt-l3", 0, 0, NULL,
- 0);
- vnet_feature_enable_disable ("ip6-punt", "linux-cp-punt-l3", 0, 0, NULL,
- 0);
+ if (hash_elts (lip_db_by_vif) == 1)
+ {
+ vnet_feature_enable_disable ("ip4-punt", "linux-cp-punt-l3", 0, 0,
+ NULL, 0);
+ vnet_feature_enable_disable ("ip6-punt", "linux-cp-punt-l3", 0, 0,
+ NULL, 0);
+ }
}
-
lip_db_by_phy[phy_sw_if_index] = INDEX_INVALID;
lip_db_by_host[lip->lip_host_sw_if_index] = INDEX_INVALID;
hash_unset (lip_db_by_vif, lip->lip_vif_index);
@@ -454,24 +466,45 @@ lcp_itf_pair_delete_by_index (index_t lipi)
{
u32 host_sw_if_index;
lcp_itf_pair_t *lip;
- u8 *host_name;
+ u8 *host_name, *ns;
lip = lcp_itf_pair_get (lipi);
host_name = vec_dup (lip->lip_host_name);
host_sw_if_index = lip->lip_host_sw_if_index;
+ ns = vec_dup (lip->lip_namespace);
lcp_itf_pair_del (lip->lip_phy_sw_if_index);
if (vnet_sw_interface_is_sub (vnet_get_main (), host_sw_if_index))
{
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+ if (ns)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open ((u8 *) ns);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
lcp_netlink_del_link ((const char *) host_name);
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+
vnet_delete_sub_interface (host_sw_if_index);
}
else
tap_delete_if (vlib_get_main (), host_sw_if_index);
vec_free (host_name);
+ vec_free (ns);
}
int
@@ -489,6 +522,23 @@ lcp_itf_pair_delete (u32 phy_sw_if_index)
return 0;
}
+/**
+ * lcp_itf_interface_add_del
+ *
+ * Registered to receive interface Add and delete notifications
+ */
+static clib_error_t *
+lcp_itf_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_add)
+{
+ if (!is_add)
+ /* remove any interface pair we have for this interface */
+ lcp_itf_pair_delete (sw_if_index);
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (lcp_itf_interface_add_del);
+
void
lcp_itf_pair_walk (lcp_itf_pair_walk_cb_t cb, void *ctx)
{
@@ -501,58 +551,17 @@ lcp_itf_pair_walk (lcp_itf_pair_walk_cb_t cb, void *ctx)
};
}
-typedef struct lcp_itf_pair_names_t_
-{
- u8 *lipn_host_name;
- u8 *lipn_phy_name;
- u8 *lipn_namespace;
- u32 lipn_phy_sw_if_index;
-} lcp_itf_pair_names_t;
-
-static lcp_itf_pair_names_t *lipn_names;
-
static clib_error_t *
lcp_itf_pair_config (vlib_main_t *vm, unformat_input_t *input)
{
- u8 *host, *phy;
- u8 *ns;
u8 *default_ns;
+ u32 tmp;
- host = phy = ns = default_ns = NULL;
+ default_ns = NULL;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- vec_reset_length (host);
-
- if (unformat (input, "pair %s %s %s", &phy, &host, &ns))
- {
- lcp_itf_pair_names_t *lipn;
-
- if (vec_len (ns) > LCP_NS_LEN)
- {
- return clib_error_return (0,
- "linux-cp IF namespace must"
- " be less than %d characters",
- LCP_NS_LEN);
- }
-
- vec_add2 (lipn_names, lipn, 1);
-
- lipn->lipn_host_name = vec_dup (host);
- lipn->lipn_phy_name = vec_dup (phy);
- lipn->lipn_namespace = vec_dup (ns);
- }
- else if (unformat (input, "pair %v %v", &phy, &host))
- {
- lcp_itf_pair_names_t *lipn;
-
- vec_add2 (lipn_names, lipn, 1);
-
- lipn->lipn_host_name = vec_dup (host);
- lipn->lipn_phy_name = vec_dup (phy);
- lipn->lipn_namespace = 0;
- }
- else if (unformat (input, "default netns %v", &default_ns))
+ if (unformat (input, "default netns %v", &default_ns))
{
vec_add1 (default_ns, 0);
if (lcp_set_default_ns (default_ns) < 0)
@@ -563,14 +572,22 @@ lcp_itf_pair_config (vlib_main_t *vm, unformat_input_t *input)
LCP_NS_LEN);
}
}
- else if (unformat (input, "interface-auto-create"))
- lcp_set_auto_intf (1 /* is_auto */);
+ else if (unformat (input, "lcp-auto-subint"))
+ lcp_set_auto_subint (1 /* is_auto */);
+ else if (unformat (input, "lcp-sync"))
+ lcp_set_sync (1 /* is_auto */);
+ else if (unformat (input, "del-static-on-link-down"))
+ lcp_set_del_static_on_link_down (1 /* is_del */);
+ else if (unformat (input, "del-dynamic-on-link-down"))
+ lcp_set_del_dynamic_on_link_down (1 /* is_del */);
+ else if (unformat (input, "num-rx-queues %d", &tmp))
+ lcp_set_default_num_queues (tmp, 0 /* is_tx */);
+ else if (unformat (input, "num-tx-queues %d", &tmp))
+ lcp_set_default_num_queues (tmp, 1 /* is_tx */);
else
return clib_error_return (0, "interfaces not found");
}
- vec_free (host);
- vec_free (phy);
vec_free (default_ns);
return NULL;
@@ -615,22 +632,81 @@ lcp_validate_if_name (u8 *name)
return 1;
}
-static void
-lcp_itf_set_vif_link_state (u32 vif_index, u8 up, u8 *ns)
+void
+lcp_itf_set_link_state (const lcp_itf_pair_t *lip, u8 state)
{
int curr_ns_fd, vif_ns_fd;
+ if (!lip)
+ return;
+
curr_ns_fd = vif_ns_fd = -1;
- if (ns)
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
+ /* Set the same link state on the netlink interface
+ */
+ vnet_netlink_set_link_state (lip->lip_vif_index, state);
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+
+ return;
+}
+
+void
+lcp_itf_set_interface_addr (const lcp_itf_pair_t *lip)
+{
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+ ip_lookup_main_t *lm4 = &im4->lookup_main;
+ ip_lookup_main_t *lm6 = &im6->lookup_main;
+ ip_interface_address_t *ia = 0;
+ int vif_ns_fd = -1;
+ int curr_ns_fd = -1;
+
+ if (!lip)
+ return;
+
+ if (lip->lip_namespace)
{
curr_ns_fd = clib_netns_open (NULL /* self */);
- vif_ns_fd = clib_netns_open (ns);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
if (vif_ns_fd != -1)
clib_setns (vif_ns_fd);
}
- vnet_netlink_set_link_state (vif_index, up);
+ /* Sync any IP4 addressing info into LCP */
+ foreach_ip_interface_address (
+ lm4, ia, lip->lip_phy_sw_if_index, 1 /* honor unnumbered */, ({
+ ip4_address_t *r4 = ip_interface_address_get_address (lm4, ia);
+ LCP_ITF_PAIR_NOTICE ("set_interface_addr: %U add ip4 %U/%d",
+ format_lcp_itf_pair, lip, format_ip4_address, r4,
+ ia->address_length);
+ vnet_netlink_add_ip4_addr (lip->lip_vif_index, r4, ia->address_length);
+ }));
+
+ /* Sync any IP6 addressing info into LCP */
+ foreach_ip_interface_address (
+ lm6, ia, lip->lip_phy_sw_if_index, 1 /* honor unnumbered */, ({
+ ip6_address_t *r6 = ip_interface_address_get_address (lm6, ia);
+ LCP_ITF_PAIR_NOTICE ("set_interface_addr: %U add ip6 %U/%d",
+ format_lcp_itf_pair, lip, format_ip6_address, r6,
+ ia->address_length);
+ vnet_netlink_add_ip6_addr (lip->lip_vif_index, r6, ia->address_length);
+ }));
if (vif_ns_fd != -1)
close (vif_ns_fd);
@@ -642,6 +718,64 @@ lcp_itf_set_vif_link_state (u32 vif_index, u8 up, u8 *ns)
}
}
+typedef struct
+{
+ u32 vlan;
+ bool dot1ad;
+
+ u32 matched_sw_if_index;
+} lcp_itf_match_t;
+
+static walk_rc_t
+lcp_itf_pair_find_walk (vnet_main_t *vnm, u32 sw_if_index, void *arg)
+{
+ lcp_itf_match_t *match = arg;
+ const vnet_sw_interface_t *sw;
+
+ sw = vnet_get_sw_interface (vnm, sw_if_index);
+ if (sw && (sw->sub.eth.inner_vlan_id == 0) &&
+ (sw->sub.eth.outer_vlan_id == match->vlan) &&
+ (sw->sub.eth.flags.dot1ad == match->dot1ad))
+ {
+ LCP_ITF_PAIR_DBG ("find_walk: found match outer %d dot1ad %d "
+ "inner-dot1q %d: interface %U",
+ sw->sub.eth.outer_vlan_id, sw->sub.eth.flags.dot1ad,
+ sw->sub.eth.inner_vlan_id,
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw->sw_if_index);
+ match->matched_sw_if_index = sw->sw_if_index;
+ return WALK_STOP;
+ }
+
+ return WALK_CONTINUE;
+}
+
+/* Return the index of the sub-int on the phy that has the given vlan and
+ * proto,
+ */
+static index_t
+lcp_itf_pair_find_by_outer_vlan (u32 sup_if_index, u16 vlan, bool dot1ad)
+{
+ lcp_itf_match_t match;
+ const vnet_hw_interface_t *hw;
+
+ match.vlan = vlan;
+ match.dot1ad = dot1ad;
+ match.matched_sw_if_index = INDEX_INVALID;
+ hw = vnet_get_sup_hw_interface (vnet_get_main (), sup_if_index);
+
+ vnet_hw_interface_walk_sw (vnet_get_main (), hw->hw_if_index,
+ lcp_itf_pair_find_walk, &match);
+
+ if (match.matched_sw_if_index >= vec_len (lip_db_by_phy))
+ return INDEX_INVALID;
+
+ return lip_db_by_phy[match.matched_sw_if_index];
+}
+
+static clib_error_t *lcp_itf_pair_link_up_down (vnet_main_t *vnm,
+ u32 hw_if_index, u32 flags);
+
int
lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
lip_host_type_t host_if_type, u8 *ns,
@@ -649,24 +783,53 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
{
vlib_main_t *vm;
vnet_main_t *vnm;
- u32 vif_index = 0, host_sw_if_index;
+ u32 vif_index = 0, host_sw_if_index = ~0;
const vnet_sw_interface_t *sw;
const vnet_hw_interface_t *hw;
+ const lcp_itf_pair_t *lip;
+ index_t lipi;
+
+ lipi = lcp_itf_pair_find_by_phy (phy_sw_if_index);
+
+ if (lipi != INDEX_INVALID)
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: already created");
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
if (!vnet_sw_if_index_is_api_valid (phy_sw_if_index))
- return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: invalid phy index %u", phy_sw_if_index);
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ }
if (!lcp_validate_if_name (host_if_name))
- return VNET_API_ERROR_INVALID_ARGUMENT;
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: invalid host-if-name '%s'",
+ host_if_name);
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
vnm = vnet_get_main ();
sw = vnet_get_sw_interface (vnm, phy_sw_if_index);
hw = vnet_get_sup_hw_interface (vnm, phy_sw_if_index);
+ if (!sw || !hw)
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: invalid interface");
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ }
+
+ if (hw->hw_class_index != ethernet_hw_interface_class.index &&
+ host_if_type == LCP_ITF_HOST_TAP)
+ {
+ LCP_ITF_PAIR_ERR (
+ "pair_create: don't create TAP for non-eth interface; use tun");
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
/*
* Use interface-specific netns if supplied.
- * Otherwise, use default netns if defined.
- * Otherwise ignore a netns and use the OS default.
+ * Otherwise, use netns if defined, otherwise use the OS default.
*/
if (ns == 0 || ns[0] == 0)
ns = lcp_get_default_ns ();
@@ -674,16 +837,50 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
/* sub interfaces do not need a tap created */
if (vnet_sw_interface_is_sub (vnm, phy_sw_if_index))
{
- const lcp_itf_pair_t *lip;
+ index_t parent_if_index;
int orig_ns_fd, ns_fd;
clib_error_t *err;
- u16 vlan;
+ u16 outer_vlan, inner_vlan;
+ u16 outer_proto, inner_proto;
+ u16 vlan, proto;
+ u32 parent_vif_index;
- /*
- * Find the parent tap by finding the pair from the parent phy
- */
- lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw->sup_sw_if_index));
- vlan = sw->sub.eth.outer_vlan_id;
+ err = vnet_sw_interface_supports_addressing (vnm, phy_sw_if_index);
+ if (err)
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: can't create LCP for a "
+ "sub-interface without exact-match set");
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+
+ outer_vlan = sw->sub.eth.outer_vlan_id;
+ inner_vlan = sw->sub.eth.inner_vlan_id;
+ outer_proto = inner_proto = ETH_P_8021Q;
+ if (1 == sw->sub.eth.flags.dot1ad)
+ outer_proto = ETH_P_8021AD;
+
+ LCP_ITF_PAIR_INFO ("pair_create: subif: dot1%s outer %d inner %d on %U",
+ sw->sub.eth.flags.dot1ad ? "ad" : "q", outer_vlan,
+ inner_vlan, format_vnet_sw_if_index_name, vnm,
+ hw->sw_if_index);
+
+ parent_if_index = lcp_itf_pair_find_by_phy (sw->sup_sw_if_index);
+ if (INDEX_INVALID == parent_if_index)
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: can't find LCP for %U",
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw->sup_sw_if_index);
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ }
+ lip = lcp_itf_pair_get (parent_if_index);
+ if (!lip)
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: can't create LCP for a "
+ "sub-interface without an LCP on the parent");
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+ LCP_ITF_PAIR_DBG ("pair_create: parent %U", format_lcp_itf_pair, lip);
+ parent_vif_index = lip->lip_vif_index;
/*
* see if the requested host interface has already been created
@@ -708,11 +905,56 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
/*
* no existing host interface, create it now
*/
- err = lcp_netlink_add_link_vlan (lip->lip_vif_index, vlan,
- (const char *) host_if_name);
- if (!err && -1 != ns_fd)
- err = vnet_netlink_set_link_netns (vif_index, ns_fd, NULL);
+ /*
+ * Find the parent tap:
+ * - if this is an outer VLAN, use the pair from the parent phy
+ * - if this is an inner VLAN, find the pair from the outer sub-int,
+ * which must exist.
+ */
+ if (inner_vlan)
+ {
+ index_t linux_parent_if_index;
+ const lcp_itf_pair_t *llip;
+
+ vlan = inner_vlan;
+ proto = inner_proto;
+ linux_parent_if_index = lcp_itf_pair_find_by_outer_vlan (
+ hw->sw_if_index, sw->sub.eth.outer_vlan_id,
+ sw->sub.eth.flags.dot1ad);
+ if (INDEX_INVALID == linux_parent_if_index ||
+ !(llip = lcp_itf_pair_get (linux_parent_if_index)))
+ {
+ LCP_ITF_PAIR_ERR (
+ "pair_create: can't find LCP for outer vlan %d "
+ "proto %s on %U",
+ outer_vlan,
+ outer_proto == ETH_P_8021AD ? "dot1ad" : "dot1q",
+ format_vnet_sw_if_index_name, vnm, hw->sw_if_index);
+ err = clib_error_return (0, "parent pair not found");
+ goto socket_close;
+ }
+
+ LCP_ITF_PAIR_DBG ("pair_create: linux parent %U",
+ format_lcp_itf_pair, llip);
+ parent_vif_index = llip->lip_vif_index;
+ }
+ else
+ {
+ vlan = outer_vlan;
+ proto = outer_proto;
+ }
+
+ err = lcp_netlink_add_link_vlan (parent_vif_index, vlan, proto,
+ (const char *) host_if_name);
+ if (err != 0)
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: cannot create link "
+ "outer(proto:0x%04x,vlan:%u).inner(proto:0x%"
+ "04x,vlan:%u) name:'%s'",
+ outer_proto, outer_vlan, inner_proto,
+ inner_vlan, host_if_name);
+ }
if (!err)
vif_index = if_nametoindex ((char *) host_if_name);
@@ -721,13 +963,20 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
/*
* create a sub-interface on the tap
*/
- if (!err && vnet_create_sub_interface (lip->lip_host_sw_if_index,
- sw->sub.id, sw->sub.eth.raw_flags,
- sw->sub.eth.inner_vlan_id, vlan,
- &host_sw_if_index))
- LCP_ITF_PAIR_INFO ("failed create vlan: %d on %U", vlan,
- format_vnet_sw_if_index_name, vnet_get_main (),
- lip->lip_host_sw_if_index);
+ if (!err &&
+ vnet_create_sub_interface (lip->lip_host_sw_if_index, sw->sub.id,
+ sw->sub.eth.raw_flags, inner_vlan,
+ outer_vlan, &host_sw_if_index))
+ {
+ LCP_ITF_PAIR_ERR (
+ "pair_create: failed to create tap subint: %d.%d on %U",
+ outer_vlan, inner_vlan, format_vnet_sw_if_index_name, vnm,
+ lip->lip_host_sw_if_index);
+ err = clib_error_return (
+ 0, "failed to create tap subint: %d.%d. on %U", outer_vlan,
+ inner_vlan, format_vnet_sw_if_index_name, vnm,
+ lip->lip_host_sw_if_index);
+ }
socket_close:
if (orig_ns_fd != -1)
@@ -744,15 +993,21 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
else
{
tap_create_if_args_t args = {
- .num_rx_queues = clib_max (1, vlib_num_workers ()),
+ .num_rx_queues =
+ clib_max (1, lcp_get_default_num_queues (0 /* is_tx */)),
+ .num_tx_queues =
+ clib_max (1, lcp_get_default_num_queues (1 /* is_tx */)),
.id = hw->hw_if_index,
.sw_if_index = ~0,
.rx_ring_sz = 256,
.tx_ring_sz = 256,
.host_if_name = host_if_name,
.host_namespace = 0,
+ .rv = 0,
+ .error = NULL,
};
ethernet_interface_t *ei;
+ u32 host_sw_mtu_size;
if (host_if_type == LCP_ITF_HOST_TUN)
args.tap_flags |= TAP_FLAG_TUN;
@@ -762,38 +1017,45 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
mac_address_copy (&args.host_mac_addr, &ei->address.mac);
}
- if (sw->mtu[VNET_MTU_L3])
+ /*
+ * The TAP interface does copy forward the host MTU based on the VPP
+ * interface's L3 MTU, but it should also ensure that the VPP tap
+ * interface has an MTU that is greater-or-equal to those. Considering
+ * users can set the interfaces at runtime (set interface mtu packet ...)
+ * ensure that the tap MTU is large enough, taking the VPP interface L3
+ * if it's set, and otherwise a sensible default.
+ */
+ host_sw_mtu_size = sw->mtu[VNET_MTU_L3];
+ if (host_sw_mtu_size)
{
args.host_mtu_set = 1;
- args.host_mtu_size = sw->mtu[VNET_MTU_L3];
+ args.host_mtu_size = host_sw_mtu_size;
}
+ else
+ host_sw_mtu_size = ETHERNET_MAX_PACKET_BYTES;
if (ns && ns[0] != 0)
args.host_namespace = ns;
vm = vlib_get_main ();
tap_create_if (vm, &args);
-
if (args.rv < 0)
{
+ LCP_ITF_PAIR_ERR ("pair_create: could not create tap, retval:%d",
+ args.rv);
+ clib_error_free (args.error);
return args.rv;
}
+ vnet_sw_interface_set_mtu (vnm, args.sw_if_index, host_sw_mtu_size);
+
/*
* get the hw and ethernet of the tap
*/
hw = vnet_get_sup_hw_interface (vnm, args.sw_if_index);
-
- /*
- * Set the interface down on the host side.
- * This controls whether the host can RX/TX.
- */
virtio_main_t *mm = &virtio_main;
virtio_if_t *vif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
- lcp_itf_set_vif_link_state (vif->ifindex, 0 /* down */,
- args.host_namespace);
-
/*
* Leave the TAP permanently up on the VPP side.
* This TAP will be shared by many sub-interface.
@@ -819,14 +1081,35 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
return -1;
}
- vnet_sw_interface_admin_up (vnm, host_sw_if_index);
- lcp_itf_pair_add (host_sw_if_index, phy_sw_if_index, host_if_name, vif_index,
- host_if_type, ns);
-
LCP_ITF_PAIR_INFO ("pair create: {%U, %U, %s}", format_vnet_sw_if_index_name,
vnet_get_main (), phy_sw_if_index,
format_vnet_sw_if_index_name, vnet_get_main (),
host_sw_if_index, host_if_name);
+ lcp_itf_pair_add (host_sw_if_index, phy_sw_if_index, host_if_name, vif_index,
+ host_if_type, ns);
+
+ /*
+ * Copy the link state from VPP into the host side.
+ * The TAP is shared by many interfaces, always keep it up.
+ * This controls whether the host can RX/TX.
+ */
+ sw = vnet_get_sw_interface (vnm, phy_sw_if_index);
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_vif (vif_index));
+ LCP_ITF_PAIR_INFO ("pair create: %U sw-flags %u hw-flags %u",
+ format_lcp_itf_pair, lip, sw->flags, hw->flags);
+ vnet_sw_interface_admin_up (vnm, host_sw_if_index);
+ lcp_itf_set_link_state (lip, sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ /*
+ * Reflect current link state and link speed of the hardware interface on the
+ * TAP interface.
+ */
+ if (host_if_type == LCP_ITF_HOST_TAP &&
+ !vnet_sw_interface_is_sub (vnm, phy_sw_if_index))
+ {
+ hw = vnet_get_sup_hw_interface (vnm, phy_sw_if_index);
+ lcp_itf_pair_link_up_down (vnm, hw->hw_if_index, hw->flags);
+ }
if (host_sw_if_indexp)
*host_sw_if_indexp = host_sw_if_index;
@@ -890,70 +1173,6 @@ lcp_itf_pair_replace_end (void)
return (0);
}
-static uword
-lcp_itf_pair_process (vlib_main_t *vm, vlib_node_runtime_t *rt,
- vlib_frame_t *f)
-{
- uword *event_data = 0;
- uword *lipn_index;
-
- while (1)
- {
- vlib_process_wait_for_event (vm);
-
- vlib_process_get_events (vm, &event_data);
-
- vec_foreach (lipn_index, event_data)
- {
- lcp_itf_pair_names_t *lipn;
-
- lipn = &lipn_names[*lipn_index];
- lcp_itf_pair_create (lipn->lipn_phy_sw_if_index,
- lipn->lipn_host_name, LCP_ITF_HOST_TAP,
- lipn->lipn_namespace, NULL);
- }
-
- vec_reset_length (event_data);
- }
-
- return 0;
-}
-
-VLIB_REGISTER_NODE (lcp_itf_pair_process_node, static) = {
- .function = lcp_itf_pair_process,
- .name = "linux-cp-itf-process",
- .type = VLIB_NODE_TYPE_PROCESS,
-};
-
-static clib_error_t *
-lcp_itf_phy_add (vnet_main_t *vnm, u32 sw_if_index, u32 is_create)
-{
- lcp_itf_pair_names_t *lipn;
- vlib_main_t *vm = vlib_get_main ();
- vnet_hw_interface_t *hw;
-
- if (!is_create || vnet_sw_interface_is_sub (vnm, sw_if_index))
- return NULL;
-
- hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
-
- vec_foreach (lipn, lipn_names)
- {
- if (!vec_cmp (hw->name, lipn->lipn_phy_name))
- {
- lipn->lipn_phy_sw_if_index = sw_if_index;
-
- vlib_process_signal_event (vm, lcp_itf_pair_process_node.index, 0,
- lipn - lipn_names);
- break;
- }
- }
-
- return NULL;
-}
-
-VNET_SW_INTERFACE_ADD_DEL_FUNCTION (lcp_itf_phy_add);
-
static clib_error_t *
lcp_itf_pair_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
{
@@ -980,7 +1199,8 @@ lcp_itf_pair_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
tap_set_carrier (si->hw_if_index,
(flags & VNET_HW_INTERFACE_FLAG_LINK_UP));
- if (flags & VNET_HW_INTERFACE_FLAG_LINK_UP)
+ if (flags & VNET_HW_INTERFACE_FLAG_LINK_UP &&
+ hi->link_speed != UINT32_MAX)
{
tap_set_speed (si->hw_if_index, hi->link_speed / 1000);
}
@@ -992,13 +1212,15 @@ lcp_itf_pair_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (lcp_itf_pair_link_up_down);
static clib_error_t *
-lcp_itf_pair_init (vlib_main_t *vm)
+lcp_interface_init (vlib_main_t *vm)
{
vlib_punt_hdl_t punt_hdl = vlib_punt_client_register ("linux-cp");
/* punt IKE */
vlib_punt_register (punt_hdl, ipsec_punt_reason[IPSEC_PUNT_IP4_SPI_UDP_0],
"linux-cp-punt");
+ vlib_punt_register (punt_hdl, ipsec_punt_reason[IPSEC_PUNT_IP6_SPI_UDP_0],
+ "linux-cp-punt");
/* punt all unknown ports */
udp_punt_unknown (vm, 0, 1);
@@ -1011,7 +1233,7 @@ lcp_itf_pair_init (vlib_main_t *vm)
return NULL;
}
-VLIB_INIT_FUNCTION (lcp_itf_pair_init) = {
+VLIB_INIT_FUNCTION (lcp_interface_init) = {
.runs_after = VLIB_INITS ("vnet_interface_init", "tcp_init", "udp_init"),
};
diff --git a/src/plugins/linux-cp/lcp_interface.h b/src/plugins/linux-cp/lcp_interface.h
index bed30248845..cfcd3925a15 100644
--- a/src/plugins/linux-cp/lcp_interface.h
+++ b/src/plugins/linux-cp/lcp_interface.h
@@ -21,6 +21,22 @@
#include <plugins/linux-cp/lcp.h>
+extern vlib_log_class_t lcp_itf_pair_logger;
+
+#define LCP_ITF_PAIR_DBG(...) \
+ vlib_log_debug (lcp_itf_pair_logger, __VA_ARGS__);
+
+#define LCP_ITF_PAIR_INFO(...) \
+ vlib_log_info (lcp_itf_pair_logger, __VA_ARGS__);
+
+#define LCP_ITF_PAIR_NOTICE(...) \
+ vlib_log_notice (lcp_itf_pair_logger, __VA_ARGS__);
+
+#define LCP_ITF_PAIR_WARN(...) \
+ vlib_log_warn (lcp_itf_pair_logger, __VA_ARGS__);
+
+#define LCP_ITF_PAIR_ERR(...) vlib_log_err (lcp_itf_pair_logger, __VA_ARGS__);
+
#define foreach_lcp_itf_pair_flag _ (STALE, 0, "stale")
typedef enum lip_flag_t_
@@ -88,8 +104,6 @@ extern index_t lcp_itf_pair_find_by_vif (u32 vif_index);
extern int lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index,
u8 *host_name, u32 host_index,
lip_host_type_t host_type, u8 *ns);
-extern int lcp_itf_pair_add_sub (u32 vif, u8 *host_name, u32 sub_sw_if_index,
- u32 phy_sw_if_index, u8 *ns);
extern int lcp_itf_pair_del (u32 phy_sw_if_index);
/**
@@ -144,12 +158,6 @@ lcp_itf_pair_find_by_host (u32 host_sw_if_index)
return (lip_db_by_host[host_sw_if_index]);
}
-/**
- * manage interface auto creation
- */
-void lcp_set_auto_intf (u8 is_auto);
-int lcp_auto_intf (void);
-
typedef void (*lcp_itf_pair_add_cb_t) (lcp_itf_pair_t *);
typedef void (*lcp_itf_pair_del_cb_t) (lcp_itf_pair_t *);
@@ -160,6 +168,36 @@ typedef struct lcp_itf_pair_vft
} lcp_itf_pair_vft_t;
void lcp_itf_pair_register_vft (lcp_itf_pair_vft_t *lcp_itf_vft);
+
+/**
+ * sub-interface auto creation/deletion for LCP
+ */
+void lcp_set_auto_subint (u8 is_auto);
+int lcp_auto_subint (void);
+
+/**
+ * sync state changes from VPP into LCP
+ */
+void lcp_set_sync (u8 is_auto);
+int lcp_sync (void);
+
+/* Set TAP and Linux host link state */
+void lcp_itf_set_link_state (const lcp_itf_pair_t *lip, u8 state);
+
+/* Set any VPP L3 addresses on Linux host device */
+void lcp_itf_set_interface_addr (const lcp_itf_pair_t *lip);
+
+/* Sync all state from VPP to a specific Linux device, all sub-interfaces
+ * of a hardware interface, or all interfaces in the system.
+ *
+ * Note: in some circumstances, this syncer will (have to) make changes to
+ * the VPP interface, for example if its MTU is greater than its parent.
+ * See the function for rationale.
+ */
+void lcp_itf_pair_sync_state (lcp_itf_pair_t *lip);
+void lcp_itf_pair_sync_state_hw (vnet_hw_interface_t *hi);
+void lcp_itf_pair_sync_state_all ();
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/linux-cp/lcp_interface_sync.c b/src/plugins/linux-cp/lcp_interface_sync.c
new file mode 100644
index 00000000000..ca7638e1799
--- /dev/null
+++ b/src/plugins/linux-cp/lcp_interface_sync.c
@@ -0,0 +1,445 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright 2021 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <vnet/devices/netlink.h>
+#include <vnet/ip/ip.h>
+#include <vppinfra/linux/netns.h>
+#include <plugins/linux-cp/lcp_interface.h>
+
+/* helper function to copy forward all sw interface link state flags
+ * MTU, and IP addresses into their counterpart LIP interface.
+ *
+ * This is called upon MTU changes and state changes.
+ */
+void
+lcp_itf_pair_sync_state (lcp_itf_pair_t *lip)
+{
+ vnet_sw_interface_t *sw;
+ vnet_sw_interface_t *sup_sw;
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+ u32 mtu;
+ u32 netlink_mtu;
+
+ if (!lcp_sync () || lcp_get_netlink_processing_active ())
+ return;
+
+ sw =
+ vnet_get_sw_interface_or_null (vnet_get_main (), lip->lip_phy_sw_if_index);
+ if (!sw)
+ return;
+ sup_sw =
+ vnet_get_sw_interface_or_null (vnet_get_main (), sw->sup_sw_if_index);
+ if (!sup_sw)
+ return;
+
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
+ LCP_ITF_PAIR_INFO ("sync_state: %U flags %u sup-flags %u mtu %u sup-mtu %u",
+ format_lcp_itf_pair, lip, sw->flags, sup_sw->flags,
+ sw->mtu[VNET_MTU_L3], sup_sw->mtu[VNET_MTU_L3]);
+
+ /* Linux will not allow children to be admin-up if their parent is
+ * admin-down. If child is up but parent is not, force it down.
+ */
+ int state = sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP;
+
+ if (state && !(sup_sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ LCP_ITF_PAIR_WARN (
+ "sync_state: %U flags %u sup-flags %u mtu %u sup-mtu %u: "
+ "forcing state to sup-flags to satisfy netlink",
+ format_lcp_itf_pair, lip, sw->flags, sup_sw->flags,
+ sw->mtu[VNET_MTU_L3], sup_sw->mtu[VNET_MTU_L3]);
+ state = 0;
+ }
+ lcp_itf_set_link_state (lip, state);
+
+ /* Linux will clamp MTU of children when the parent is lower. VPP is fine
+ * with differing MTUs. VPP assumes that if a subint has MTU of 0, that it
+ * inherits from its parent. Linux likes to be more explicit, so we
+ * reconcile any differences.
+ */
+ mtu = sw->mtu[VNET_MTU_L3];
+ if (mtu == 0)
+ mtu = sup_sw->mtu[VNET_MTU_L3];
+
+ if (sup_sw->mtu[VNET_MTU_L3] < sw->mtu[VNET_MTU_L3])
+ {
+ LCP_ITF_PAIR_WARN ("sync_state: %U flags %u mtu %u sup-mtu %u: "
+ "clamping to sup-mtu to satisfy netlink",
+ format_lcp_itf_pair, lip, sw->flags,
+ sw->mtu[VNET_MTU_L3], sup_sw->mtu[VNET_MTU_L3]);
+ mtu = sup_sw->mtu[VNET_MTU_L3];
+ }
+
+ /* Set MTU on all of {sw, tap, netlink}. Only send a netlink message if we
+ * really do want to change the MTU.
+ */
+ vnet_sw_interface_set_mtu (vnet_get_main (), lip->lip_phy_sw_if_index, mtu);
+ vnet_sw_interface_set_mtu (vnet_get_main (), lip->lip_host_sw_if_index, mtu);
+ if (NULL == vnet_netlink_get_link_mtu (lip->lip_vif_index, &netlink_mtu))
+ {
+ if (netlink_mtu != mtu)
+ vnet_netlink_set_link_mtu (lip->lip_vif_index, mtu);
+ }
+
+ /* Linux will remove IPv6 addresses on children when the parent state
+ * goes down, so we ensure all IPv4/IPv6 addresses are synced.
+ */
+ lcp_itf_set_interface_addr (lip);
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+
+ return;
+}
+
+static walk_rc_t
+lcp_itf_pair_walk_sync_state_all_cb (index_t lipi, void *ctx)
+{
+ lcp_itf_pair_t *lip;
+ lip = lcp_itf_pair_get (lipi);
+ if (!lip)
+ return WALK_CONTINUE;
+
+ lcp_itf_pair_sync_state (lip);
+ return WALK_CONTINUE;
+}
+
+static walk_rc_t
+lcp_itf_pair_walk_sync_state_hw_cb (vnet_main_t *vnm, u32 sw_if_index,
+ void *arg)
+{
+ lcp_itf_pair_t *lip;
+
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (!lip)
+ {
+ return WALK_CONTINUE;
+ }
+
+ lcp_itf_pair_sync_state (lip);
+ return WALK_CONTINUE;
+}
+
+void
+lcp_itf_pair_sync_state_all ()
+{
+ lcp_itf_pair_walk (lcp_itf_pair_walk_sync_state_all_cb, 0);
+}
+
+void
+lcp_itf_pair_sync_state_hw (vnet_hw_interface_t *hi)
+{
+ if (!hi)
+ return;
+ LCP_ITF_PAIR_DBG ("sync_state_hw: hi %U", format_vnet_sw_if_index_name,
+ vnet_get_main (), hi->hw_if_index);
+
+ vnet_hw_interface_walk_sw (vnet_get_main (), hi->hw_if_index,
+ lcp_itf_pair_walk_sync_state_hw_cb, NULL);
+}
+
+static clib_error_t *
+lcp_itf_admin_state_change (vnet_main_t *vnm, u32 sw_if_index, u32 flags)
+{
+ lcp_itf_pair_t *lip;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+
+ if (!lcp_sync () || lcp_get_netlink_processing_active ())
+ return 0;
+
+ LCP_ITF_PAIR_DBG ("admin_state_change: sw %U %u",
+ format_vnet_sw_if_index_name, vnm, sw_if_index, flags);
+
+ // Sync interface state changes into host
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (!lip)
+ return NULL;
+ LCP_ITF_PAIR_INFO ("admin_state_change: %U flags %u", format_lcp_itf_pair,
+ lip, flags);
+
+ if (vnet_sw_interface_is_sub (vnm, sw_if_index))
+ {
+ lcp_itf_pair_sync_state (lip);
+ return NULL;
+ }
+
+ // When Linux changes link on a parent interface, all of its children also
+ // change. If a parent interface changes MTU, all of its children are clamped
+ // at that MTU by Linux. Neither holds true in VPP, so we are forced to undo
+ // change by walking the sub-interfaces of a phy and syncing their state back
+ // into Linux.
+ si = vnet_get_sw_interface_or_null (vnm, sw_if_index);
+ if (!si)
+ return NULL;
+
+ hi = vnet_get_hw_interface_or_null (vnm, si->hw_if_index);
+ if (!hi)
+ return NULL;
+ LCP_ITF_PAIR_DBG ("admin_state_change: si %U hi %U, syncing children",
+ format_vnet_sw_if_index_name, vnm, si->sw_if_index,
+ format_vnet_sw_if_index_name, vnm, hi->sw_if_index);
+
+ lcp_itf_pair_sync_state_hw (hi);
+
+ return NULL;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (lcp_itf_admin_state_change);
+
+static clib_error_t *
+lcp_itf_mtu_change (vnet_main_t *vnm, u32 sw_if_index, u32 flags)
+{
+ vnet_sw_interface_t *si;
+ vnet_hw_interface_t *hi;
+
+ if (!lcp_sync () || lcp_get_netlink_processing_active ())
+ return NULL;
+
+ LCP_ITF_PAIR_DBG ("mtu_change: sw %U %u", format_vnet_sw_if_index_name, vnm,
+ sw_if_index, flags);
+
+ if (vnet_sw_interface_is_sub (vnm, sw_if_index))
+ {
+ lcp_itf_pair_t *lip;
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (lip)
+ lcp_itf_pair_sync_state (lip);
+ return NULL;
+ }
+
+ // When Linux changes link on a parent interface, all of its children also
+ // change. If a parent interface changes MTU, all of its children are clamped
+ // at that MTU by Linux. Neither holds true in VPP, so we are forced to undo
+ // change by walking the sub-interfaces of a phy and syncing their state back
+ // into Linux.
+ si = vnet_get_sw_interface_or_null (vnm, sw_if_index);
+ if (!si)
+ return NULL;
+
+ hi = vnet_get_hw_interface_or_null (vnm, si->hw_if_index);
+ if (!hi)
+ return NULL;
+ LCP_ITF_PAIR_DBG ("mtu_change: si %U hi %U, syncing children",
+ format_vnet_sw_if_index_name, vnm, si->sw_if_index,
+ format_vnet_sw_if_index_name, vnm, hi->sw_if_index);
+
+ lcp_itf_pair_sync_state_hw (hi);
+
+ return NULL;
+}
+
+VNET_SW_INTERFACE_MTU_CHANGE_FUNCTION (lcp_itf_mtu_change);
+
+static void
+lcp_itf_ip4_add_del_interface_addr (ip4_main_t *im, uword opaque,
+ u32 sw_if_index, ip4_address_t *address,
+ u32 address_length, u32 if_address_index,
+ u32 is_del)
+{
+ const lcp_itf_pair_t *lip;
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+
+ if (!lcp_sync () || lcp_get_netlink_processing_active ())
+ return;
+
+ LCP_ITF_PAIR_DBG ("ip4_addr_%s: si:%U %U/%u", is_del ? "del" : "add",
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index, format_ip4_address, address, address_length);
+
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (!lip)
+ return;
+
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
+ LCP_ITF_PAIR_DBG ("ip4_addr_%s: %U ip4 %U/%u", is_del ? "del" : "add",
+ format_lcp_itf_pair, lip, format_ip4_address, address,
+ address_length);
+
+ if (is_del)
+ vnet_netlink_del_ip4_addr (lip->lip_vif_index, address, address_length);
+ else
+ vnet_netlink_add_ip4_addr (lip->lip_vif_index, address, address_length);
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+ return;
+}
+
+static void
+lcp_itf_ip6_add_del_interface_addr (ip6_main_t *im, uword opaque,
+ u32 sw_if_index, ip6_address_t *address,
+ u32 address_length, u32 if_address_index,
+ u32 is_del)
+{
+ const lcp_itf_pair_t *lip;
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+
+ if (!lcp_sync () || lcp_get_netlink_processing_active ())
+ return;
+
+ LCP_ITF_PAIR_DBG ("ip6_addr_%s: si:%U %U/%u", is_del ? "del" : "add",
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index, format_ip6_address, address, address_length);
+
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (!lip)
+ return;
+
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+ LCP_ITF_PAIR_DBG ("ip6_addr_%s: %U ip4 %U/%u", is_del ? "del" : "add",
+ format_lcp_itf_pair, lip, format_ip6_address, address,
+ address_length);
+ if (is_del)
+ vnet_netlink_del_ip6_addr (lip->lip_vif_index, address, address_length);
+ else
+ vnet_netlink_add_ip6_addr (lip->lip_vif_index, address, address_length);
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+}
+
+static clib_error_t *
+lcp_itf_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_create)
+{
+ const vnet_sw_interface_t *sw;
+ uword is_sub;
+
+ if (!lcp_auto_subint ())
+ return NULL;
+
+ sw = vnet_get_sw_interface_or_null (vnm, sw_if_index);
+ if (!sw)
+ return NULL;
+
+ is_sub = vnet_sw_interface_is_sub (vnm, sw_if_index);
+ if (!is_sub)
+ return NULL;
+
+ LCP_ITF_PAIR_DBG ("interface_%s: sw %U parent %U", is_create ? "add" : "del",
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw->sw_if_index, format_vnet_sw_if_index_name,
+ vnet_get_main (), sw->sup_sw_if_index);
+
+ if (is_create)
+ {
+ const lcp_itf_pair_t *sup_lip;
+ u8 *name = 0;
+
+ // If the parent has a LIP auto-create a LIP for this interface
+ sup_lip =
+ lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw->sup_sw_if_index));
+ if (!sup_lip)
+ return NULL;
+
+ name = format (name, "%s.%d%c", sup_lip->lip_host_name, sw->sub.id, 0);
+
+ LCP_ITF_PAIR_INFO (
+ "interface_%s: %U has parent %U, auto-creating LCP with host-if %s",
+ is_create ? "add" : "del", format_vnet_sw_if_index_name,
+ vnet_get_main (), sw->sw_if_index, format_lcp_itf_pair, sup_lip, name);
+
+ lcp_itf_pair_create (sw->sw_if_index, name, LCP_ITF_HOST_TAP,
+ sup_lip->lip_namespace, NULL);
+
+ vec_free (name);
+ }
+ else
+ {
+ lcp_itf_pair_delete (sw_if_index);
+ }
+
+ return NULL;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (lcp_itf_interface_add_del);
+
+static clib_error_t *
+lcp_itf_sync_init (vlib_main_t *vm)
+{
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+
+ ip4_add_del_interface_address_callback_t cb4;
+ ip6_add_del_interface_address_callback_t cb6;
+
+ cb4.function = lcp_itf_ip4_add_del_interface_addr;
+ cb4.function_opaque = 0;
+ vec_add1 (im4->add_del_interface_address_callbacks, cb4);
+
+ cb6.function = lcp_itf_ip6_add_del_interface_addr;
+ cb6.function_opaque = 0;
+ vec_add1 (im6->add_del_interface_address_callbacks, cb6);
+
+ return NULL;
+}
+
+VLIB_INIT_FUNCTION (lcp_itf_sync_init) = {
+ .runs_after = VLIB_INITS ("vnet_interface_init", "tcp_init", "udp_init"),
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/lcp_mpls_sync.c b/src/plugins/linux-cp/lcp_mpls_sync.c
new file mode 100644
index 00000000000..c08fcb4d1d9
--- /dev/null
+++ b/src/plugins/linux-cp/lcp_mpls_sync.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+
+#include <linux-cp/lcp_interface.h>
+
+#include <vnet/plugin/plugin.h>
+#include <vnet/mpls/mpls.h>
+#include <vppinfra/linux/netns.h>
+
+#include <fcntl.h>
+
+vlib_log_class_t lcp_mpls_sync_logger;
+
+#define LCP_MPLS_SYNC_DBG(...) \
+ vlib_log_debug (lcp_mpls_sync_logger, __VA_ARGS__);
+
+void
+lcp_mpls_sync_pair_add_cb (lcp_itf_pair_t *lip)
+{
+ u8 phy_is_enabled = mpls_sw_interface_is_enabled (lip->lip_phy_sw_if_index);
+ LCP_MPLS_SYNC_DBG ("pair_add_cb: mpls enabled %u, parent %U", phy_is_enabled,
+ format_lcp_itf_pair, lip);
+ if (phy_is_enabled)
+ mpls_sw_interface_enable_disable (&mpls_main, lip->lip_host_sw_if_index,
+ 1);
+}
+
+void
+lcp_mpls_sync_state_cb (struct mpls_main_t *mm, uword opaque, u32 sw_if_index,
+ u32 is_enable)
+{
+ lcp_itf_pair_t *lip;
+ index_t lipi;
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+ int ctl_fd = -1;
+ u8 *ctl_path = NULL;
+
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: called for sw_if_index %u", sw_if_index);
+
+ // If device is LCP PHY, sync state to host tap.
+ lipi = lcp_itf_pair_find_by_phy (sw_if_index);
+ if (INDEX_INVALID != lipi)
+ {
+ lip = lcp_itf_pair_get (lipi);
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: mpls enabled %u parent %U", is_enable,
+ format_lcp_itf_pair, lip);
+ mpls_sw_interface_enable_disable (&mpls_main, lip->lip_host_sw_if_index,
+ is_enable);
+ return;
+ }
+
+ // If device is LCP host, toggle MPLS XC feature.
+ lipi = lcp_itf_pair_find_by_host (sw_if_index);
+ if (INDEX_INVALID == lipi)
+ return;
+ lip = lcp_itf_pair_get (lipi);
+
+ vnet_feature_enable_disable ("mpls-input", "linux-cp-xc-mpls", sw_if_index,
+ is_enable, NULL, 0);
+
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: mpls xc state %u parent %U", is_enable,
+ format_lcp_itf_pair, lip);
+
+ // If syncing is enabled, sync Linux state as well.
+ // This can happen regardless of lcp_get_netlink_processing_active(),
+ // provided it does not generate Netlink messages.
+ if (!lcp_sync ())
+ return;
+
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
+ ctl_path = format (NULL, "/proc/sys/net/mpls/conf/%s/input%c",
+ lip->lip_host_name, NULL);
+ if (NULL == ctl_path)
+ {
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: failed to format sysctl");
+ goto SYNC_CLEANUP;
+ }
+
+ ctl_fd = open ((char *) ctl_path, O_WRONLY);
+ if (ctl_fd < 0)
+ {
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: failed to open %s for writing",
+ ctl_path);
+ goto SYNC_CLEANUP;
+ }
+
+ if (fdformat (ctl_fd, "%u", is_enable) < 1)
+ {
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: failed to write to %s", ctl_path);
+ goto SYNC_CLEANUP;
+ }
+
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: set mpls input for %s",
+ lip->lip_host_name);
+
+SYNC_CLEANUP:
+ if (ctl_fd > -1)
+ close (ctl_fd);
+
+ if (NULL != ctl_path)
+ vec_free (ctl_path);
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+}
+
+static clib_error_t *
+lcp_mpls_sync_init (vlib_main_t *vm)
+{
+ lcp_itf_pair_vft_t mpls_sync_itf_pair_vft = {
+ .pair_add_fn = lcp_mpls_sync_pair_add_cb,
+ };
+ lcp_itf_pair_register_vft (&mpls_sync_itf_pair_vft);
+
+ mpls_interface_state_change_add_callback (lcp_mpls_sync_state_cb, 0);
+
+ lcp_mpls_sync_logger = vlib_log_register_class ("linux-cp", "mpls-sync");
+
+ return NULL;
+}
+
+VLIB_INIT_FUNCTION (lcp_mpls_sync_init) = {
+ .runs_after = VLIB_INITS ("lcp_interface_init", "mpls_init"),
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/lcp_nl.c b/src/plugins/linux-cp/lcp_nl.c
new file mode 100644
index 00000000000..85b6447007a
--- /dev/null
+++ b/src/plugins/linux-cp/lcp_nl.c
@@ -0,0 +1,1043 @@
+/*
+ * Copyright (c) 2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <fcntl.h>
+
+#include <linux-cp/lcp_nl.h>
+
+#include <netlink/route/rule.h>
+#include <netlink/msg.h>
+#include <netlink/netlink.h>
+#include <netlink/socket.h>
+#include <netlink/route/link.h>
+#include <netlink/route/route.h>
+#include <netlink/route/neighbour.h>
+#include <netlink/route/addr.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vppinfra/error.h>
+#include <vppinfra/linux/netns.h>
+
+#include <vnet/fib/fib_table.h>
+
+#include <libmnl/libmnl.h>
+
+#include <plugins/linux-cp/lcp_interface.h>
+
+typedef enum nl_status_t_
+{
+ NL_STATUS_NOTIF_PROC,
+ NL_STATUS_SYNC,
+} nl_status_t;
+
+typedef enum nl_sock_type_t_
+{
+ NL_SOCK_TYPE_LINK,
+ NL_SOCK_TYPE_ADDR,
+ NL_SOCK_TYPE_NEIGH,
+ NL_SOCK_TYPE_ROUTE,
+} nl_sock_type_t;
+
+#define NL_SOCK_TYPES_N (NL_SOCK_TYPE_ROUTE + 1)
+
+/* Socket type, message type, type name, function subname */
+#define foreach_sock_type \
+ _ (NL_SOCK_TYPE_LINK, RTM_GETLINK, "link", link) \
+ _ (NL_SOCK_TYPE_ADDR, RTM_GETADDR, "address", link_addr) \
+ _ (NL_SOCK_TYPE_NEIGH, RTM_GETNEIGH, "neighbor", neigh) \
+ _ (NL_SOCK_TYPE_ROUTE, RTM_GETROUTE, "route", route)
+
+typedef enum nl_event_type_t_
+{
+ NL_EVENT_READ,
+ NL_EVENT_ERR,
+} nl_event_type_t;
+
+typedef struct nl_main
+{
+
+ nl_status_t nl_status;
+
+ struct nl_sock *sk_route;
+ struct nl_sock *sk_route_sync[NL_SOCK_TYPES_N];
+ vlib_log_class_t nl_logger;
+ nl_vft_t *nl_vfts;
+ struct nl_cache *nl_caches[LCP_NL_N_OBJS];
+ nl_msg_info_t *nl_msg_queue;
+ uword clib_file_index;
+
+ u32 rx_buf_size;
+ u32 tx_buf_size;
+ u32 batch_size;
+ u32 batch_delay_ms;
+
+ u32 sync_batch_limit;
+ u32 sync_batch_delay_ms;
+ u32 sync_attempt_delay_ms;
+
+} nl_main_t;
+
+#define NL_RX_BUF_SIZE_DEF (1 << 27) /* 128 MB */
+#define NL_TX_BUF_SIZE_DEF (1 << 18) /* 256 kB */
+#define NL_BATCH_SIZE_DEF (1 << 11) /* 2048 */
+#define NL_BATCH_DELAY_MS_DEF 50 /* 50 ms, max 20 batch/s */
+
+#define NL_SYNC_BATCH_LIMIT_DEF (1 << 10) /* 1024 */
+#define NL_SYNC_BATCH_DELAY_MS_DEF 20 /* 20ms, max 50 batch/s */
+#define NL_SYNC_ATTEMPT_DELAY_MS_DEF 2000 /* 2s */
+
+static nl_main_t nl_main = {
+ .rx_buf_size = NL_RX_BUF_SIZE_DEF,
+ .tx_buf_size = NL_TX_BUF_SIZE_DEF,
+ .batch_size = NL_BATCH_SIZE_DEF,
+ .batch_delay_ms = NL_BATCH_DELAY_MS_DEF,
+ .sync_batch_limit = NL_SYNC_BATCH_LIMIT_DEF,
+ .sync_batch_delay_ms = NL_SYNC_BATCH_DELAY_MS_DEF,
+ .sync_attempt_delay_ms = NL_SYNC_ATTEMPT_DELAY_MS_DEF,
+};
+
+/* #define foreach_nl_nft_proto \ */
+/* _(IP4, "ip", AF_INT) \ */
+/* _(IP6, "ip6", NFPROTO_IPV6) */
+
+/* typedef enum nl_nft_proto_t_ */
+/* { */
+/* #define _(a,b,c) NL_NFT_PROTO_##a = c, */
+/* foreach_nl_nft_proto */
+/* #undef _ */
+/* } nl_nft_proto_t; */
+
+#define FOREACH_VFT(__func, __arg) \
+ { \
+ nl_main_t *nm = &nl_main; \
+ nl_vft_t *__nv; \
+ vec_foreach (__nv, nm->nl_vfts) \
+ { \
+ if (!__nv->__func.cb) \
+ continue; \
+ \
+ if (!__nv->__func.is_mp_safe) \
+ vlib_worker_thread_barrier_sync (vlib_get_main ()); \
+ \
+ __nv->__func.cb (__arg); \
+ \
+ if (!__nv->__func.is_mp_safe) \
+ vlib_worker_thread_barrier_release (vlib_get_main ()); \
+ } \
+ }
+
+#define FOREACH_VFT_NO_ARG(__func) \
+ { \
+ nl_main_t *nm = &nl_main; \
+ nl_vft_t *__nv; \
+ vec_foreach (__nv, nm->nl_vfts) \
+ { \
+ if (!__nv->__func.cb) \
+ continue; \
+ \
+ if (!__nv->__func.is_mp_safe) \
+ vlib_worker_thread_barrier_sync (vlib_get_main ()); \
+ \
+ __nv->__func.cb (); \
+ \
+ if (!__nv->__func.is_mp_safe) \
+ vlib_worker_thread_barrier_release (vlib_get_main ()); \
+ } \
+ }
+
+#define FOREACH_VFT_CTX(__func, __arg, __ctx) \
+ { \
+ nl_main_t *nm = &nl_main; \
+ nl_vft_t *__nv; \
+ vec_foreach (__nv, nm->nl_vfts) \
+ { \
+ if (!__nv->__func.cb) \
+ continue; \
+ \
+ if (!__nv->__func.is_mp_safe) \
+ vlib_worker_thread_barrier_sync (vlib_get_main ()); \
+ \
+ __nv->__func.cb (__arg, __ctx); \
+ \
+ if (!__nv->__func.is_mp_safe) \
+ vlib_worker_thread_barrier_release (vlib_get_main ()); \
+ } \
+ }
+
+void
+nl_register_vft (const nl_vft_t *nv)
+{
+ nl_main_t *nm = &nl_main;
+
+ vec_add1 (nm->nl_vfts, *nv);
+}
+
+#define NL_DBG(...) vlib_log_debug (nl_main.nl_logger, __VA_ARGS__);
+#define NL_INFO(...) vlib_log_notice (nl_main.nl_logger, __VA_ARGS__);
+#define NL_ERROR(...) vlib_log_err (nl_main.nl_logger, __VA_ARGS__);
+
+static void lcp_nl_open_socket (void);
+static void lcp_nl_close_socket (void);
+static void lcp_nl_open_sync_socket (nl_sock_type_t sock_type);
+static void lcp_nl_close_sync_socket (nl_sock_type_t sock_type);
+
+static void
+nl_route_del (struct rtnl_route *rr, void *arg)
+{
+ FOREACH_VFT (nvl_rt_route_del, rr);
+}
+
+static void
+nl_route_add (struct rtnl_route *rr, void *arg)
+{
+ int is_replace = 0;
+
+ if (arg)
+ {
+ nl_msg_info_t *msg_info = (nl_msg_info_t *) arg;
+ struct nlmsghdr *nlh = nlmsg_hdr (msg_info->msg);
+
+ is_replace = (nlh->nlmsg_flags & NLM_F_REPLACE);
+ }
+
+ FOREACH_VFT_CTX (nvl_rt_route_add, rr, is_replace);
+}
+
+static void
+nl_route_sync_begin (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_route_sync_begin);
+}
+
+static void
+nl_route_sync_end (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_route_sync_end);
+}
+
+static void
+nl_neigh_del (struct rtnl_neigh *rn, void *arg)
+{
+ FOREACH_VFT (nvl_rt_neigh_del, rn);
+}
+
+static void
+nl_neigh_add (struct rtnl_neigh *rn, void *arg)
+{
+ FOREACH_VFT (nvl_rt_neigh_add, rn);
+}
+
+static void
+nl_neigh_sync_begin (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_neigh_sync_begin);
+}
+
+static void
+nl_neigh_sync_end (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_neigh_sync_end);
+}
+
+static void
+nl_link_addr_del (struct rtnl_addr *rla, void *arg)
+{
+ FOREACH_VFT (nvl_rt_addr_del, rla);
+}
+
+static void
+nl_link_addr_add (struct rtnl_addr *rla, void *arg)
+{
+ FOREACH_VFT (nvl_rt_addr_add, rla);
+}
+
+static void
+nl_link_addr_sync_begin (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_addr_sync_begin);
+}
+
+static void
+nl_link_addr_sync_end (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_addr_sync_end);
+}
+
+static void
+nl_link_del (struct rtnl_link *rl, void *arg)
+{
+ FOREACH_VFT_CTX (nvl_rt_link_del, rl, arg);
+}
+
+static void
+nl_link_add (struct rtnl_link *rl, void *arg)
+{
+ FOREACH_VFT_CTX (nvl_rt_link_add, rl, arg);
+}
+
+static void
+nl_link_sync_begin (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_link_sync_begin);
+}
+
+static void
+nl_link_sync_end (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_link_sync_end);
+}
+
+static void
+nl_route_dispatch (struct nl_object *obj, void *arg)
+{
+ /* nothing can be done without interface mappings */
+ if (!lcp_itf_num_pairs ())
+ return;
+
+ switch (nl_object_get_msgtype (obj))
+ {
+ case RTM_NEWROUTE:
+ nl_route_add ((struct rtnl_route *) obj, arg);
+ break;
+ case RTM_DELROUTE:
+ nl_route_del ((struct rtnl_route *) obj, arg);
+ break;
+ case RTM_NEWNEIGH:
+ nl_neigh_add ((struct rtnl_neigh *) obj, arg);
+ break;
+ case RTM_DELNEIGH:
+ nl_neigh_del ((struct rtnl_neigh *) obj, arg);
+ break;
+ case RTM_NEWADDR:
+ nl_link_addr_add ((struct rtnl_addr *) obj, arg);
+ break;
+ case RTM_DELADDR:
+ nl_link_addr_del ((struct rtnl_addr *) obj, arg);
+ break;
+ case RTM_NEWLINK:
+ nl_link_add ((struct rtnl_link *) obj, arg);
+ break;
+ case RTM_DELLINK:
+ nl_link_del ((struct rtnl_link *) obj, arg);
+ break;
+ default:
+ NL_INFO ("unhandled: %s", nl_object_get_type (obj));
+ break;
+ }
+}
+
+static int
+nl_route_process_msgs (void)
+{
+ nl_main_t *nm = &nl_main;
+ nl_msg_info_t *msg_info;
+ int err, n_msgs = 0;
+
+ lcp_set_netlink_processing_active (1);
+
+ /* process a batch of messages. break if we hit our limit */
+ vec_foreach (msg_info, nm->nl_msg_queue)
+ {
+ if ((err = nl_msg_parse (msg_info->msg, nl_route_dispatch, msg_info)) <
+ 0)
+ NL_ERROR ("Unable to parse object: %s", nl_geterror (err));
+ nlmsg_free (msg_info->msg);
+ if (++n_msgs >= nm->batch_size)
+ break;
+ }
+
+ /* remove the messages we processed from the head of the queue */
+ if (n_msgs)
+ vec_delete (nm->nl_msg_queue, n_msgs, 0);
+
+ NL_DBG ("Processed %u messages", n_msgs);
+
+ lcp_set_netlink_processing_active (0);
+
+ return n_msgs;
+}
+
+static int
+lcp_nl_route_discard_msgs (void)
+{
+ nl_main_t *nm = &nl_main;
+ nl_msg_info_t *msg_info;
+ int n_msgs;
+
+ n_msgs = vec_len (nm->nl_msg_queue);
+ if (n_msgs == 0)
+ return 0;
+
+ vec_foreach (msg_info, nm->nl_msg_queue)
+ {
+ nlmsg_free (msg_info->msg);
+ }
+
+ vec_reset_length (nm->nl_msg_queue);
+
+ NL_INFO ("Discarded %u messages", n_msgs);
+
+ return n_msgs;
+}
+
+static int
+lcp_nl_route_send_dump_req (nl_sock_type_t sock_type, int msg_type)
+{
+ nl_main_t *nm = &nl_main;
+ struct nl_sock *sk_route = nm->sk_route_sync[sock_type];
+ int err;
+ struct rtgenmsg rt_hdr = {
+ .rtgen_family = AF_UNSPEC,
+ };
+
+ err =
+ nl_send_simple (sk_route, msg_type, NLM_F_DUMP, &rt_hdr, sizeof (rt_hdr));
+
+ if (err < 0)
+ {
+ NL_ERROR ("Unable to send a dump request: %s", nl_geterror (err));
+ }
+ else
+ NL_INFO ("Dump request sent via socket %d of type %d",
+ nl_socket_get_fd (sk_route), sock_type);
+
+ return err;
+}
+
+static int
+lcp_nl_route_dump_cb (struct nl_msg *msg, void *arg)
+{
+ int err;
+
+ if ((err = nl_msg_parse (msg, nl_route_dispatch, NULL)) < 0)
+ NL_ERROR ("Unable to parse object: %s", nl_geterror (err));
+
+ return NL_OK;
+}
+
+static int
+lcp_nl_recv_dump_replies (nl_sock_type_t sock_type, int msg_limit,
+ int *is_done_rcvd)
+{
+ nl_main_t *nm = &nl_main;
+ struct nl_sock *sk_route = nm->sk_route_sync[sock_type];
+ struct sockaddr_nl nla;
+ uint8_t *buf = NULL;
+ int n_bytes;
+ struct nlmsghdr *hdr;
+ struct nl_msg *msg = NULL;
+ int err = 0;
+ int done = 0;
+ int n_msgs = 0;
+
+ lcp_set_netlink_processing_active (1);
+
+continue_reading:
+ n_bytes = nl_recv (sk_route, &nla, &buf, /* creds */ NULL);
+ if (n_bytes <= 0)
+ {
+ lcp_set_netlink_processing_active (0);
+ return n_bytes;
+ }
+
+ hdr = (struct nlmsghdr *) buf;
+ while (nlmsg_ok (hdr, n_bytes))
+ {
+ nlmsg_free (msg);
+ msg = nlmsg_convert (hdr);
+ if (!msg)
+ {
+ err = -NLE_NOMEM;
+ goto out;
+ }
+
+ n_msgs++;
+
+ nlmsg_set_proto (msg, NETLINK_ROUTE);
+ nlmsg_set_src (msg, &nla);
+
+ /* Message that terminates a multipart message. Finish parsing and signal
+ * the caller that all dump replies have been received
+ */
+ if (hdr->nlmsg_type == NLMSG_DONE)
+ {
+ done = 1;
+ goto out;
+ }
+ /* Message to be ignored. Continue parsing */
+ else if (hdr->nlmsg_type == NLMSG_NOOP)
+ ;
+ /* Message that indicates data was lost. Finish parsing and return an
+ * error
+ */
+ else if (hdr->nlmsg_type == NLMSG_OVERRUN)
+ {
+ err = -NLE_MSG_OVERFLOW;
+ goto out;
+ }
+ /* Message that indicates an error. Finish parsing, extract the error
+ * code, and return it */
+ else if (hdr->nlmsg_type == NLMSG_ERROR)
+ {
+ struct nlmsgerr *e = nlmsg_data (hdr);
+
+ if (hdr->nlmsg_len < nlmsg_size (sizeof (*e)))
+ {
+ err = -NLE_MSG_TRUNC;
+ goto out;
+ }
+ else if (e->error)
+ {
+ err = -nl_syserr2nlerr (e->error);
+ goto out;
+ }
+ /* Message is an acknowledgement (err_code = 0). Continue parsing */
+ else
+ ;
+ }
+ /* Message that contains the requested data. Pass it for processing and
+ * continue parsing
+ */
+ else
+ {
+ lcp_nl_route_dump_cb (msg, NULL);
+ }
+
+ hdr = nlmsg_next (hdr, &n_bytes);
+ }
+
+ nlmsg_free (msg);
+ free (buf);
+ msg = NULL;
+ buf = NULL;
+
+ if (!done && n_msgs < msg_limit)
+ goto continue_reading;
+
+out:
+ lcp_set_netlink_processing_active (0);
+
+ nlmsg_free (msg);
+ free (buf);
+
+ if (err)
+ return err;
+
+ *is_done_rcvd = done;
+
+ return n_msgs;
+}
+
+#define DAY_F64 (1.0 * (24 * 60 * 60))
+
+static uword
+nl_route_process (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ nl_main_t *nm = &nl_main;
+ uword event_type;
+ uword *event_data = 0;
+ f64 wait_time = DAY_F64;
+ int n_msgs;
+ int is_done;
+
+ while (1)
+ {
+ if (nm->nl_status == NL_STATUS_NOTIF_PROC)
+ {
+ /* If we process a batch of messages and stop because we reached the
+ * batch size limit, we want to wake up after the batch delay and
+ * process more. Otherwise we just want to wait for a read event.
+ */
+ vlib_process_wait_for_event_or_clock (vm, wait_time);
+ event_type = vlib_process_get_events (vm, &event_data);
+ vec_reset_length (event_data);
+
+ switch (event_type)
+ {
+ /* Process batch of queued messages on timeout or read event
+ * signal
+ */
+ case ~0:
+ case NL_EVENT_READ:
+ nl_route_process_msgs ();
+ wait_time = (vec_len (nm->nl_msg_queue) != 0) ?
+ nm->batch_delay_ms * 1e-3 :
+ DAY_F64;
+ break;
+
+ /* Initiate synchronization if there was an error polling or
+ * reading the notification socket
+ */
+ case NL_EVENT_ERR:
+ nm->nl_status = NL_STATUS_SYNC;
+ break;
+
+ default:
+ NL_ERROR ("Unknown event type: %u", (u32) event_type);
+ }
+ }
+ else if (nm->nl_status == NL_STATUS_SYNC)
+ {
+ /* Stop processing notifications - close the notification socket and
+ * discard all messages that are currently in the queue
+ */
+ lcp_nl_close_socket ();
+ lcp_nl_route_discard_msgs ();
+
+ /* Wait some time before next synchronization attempt. Allows to
+ * reduce the number of failed attempts that stall the main thread by
+ * waiting out the notification storm
+ */
+ NL_INFO ("Wait before next synchronization attempt for %ums",
+ nm->sync_attempt_delay_ms);
+ vlib_process_suspend (vm, nm->sync_attempt_delay_ms * 1e-3);
+
+ /* Open netlink synchronization socket, one for every data type of
+ * interest: link, address, neighbor, and route. That is needed to
+ * be able to send dump requests for every data type simultaneously.
+ * If send a dump request while the previous one is in progress,
+ * the request will fail and EBUSY returned
+ */
+#define _(stype, mtype, tname, fn) lcp_nl_open_sync_socket (stype);
+ foreach_sock_type
+#undef _
+
+ /* Start reading notifications and enqueueing them for further
+ * processing. The notifications will serve as a difference between
+ * the snapshot made after the dump request and the actual state at
+ * the moment. Once all the dump replies are processed, the
+ * notifications will be processed
+ */
+ lcp_nl_open_socket ();
+
+ /* Request the current entry set from the kernel for every data type
+ * of interest. Thus requesting a snapshot of the current routing
+ * state that the kernel will make and then reply with
+ */
+#define _(stype, mtype, tname, fn) lcp_nl_route_send_dump_req (stype, mtype);
+ foreach_sock_type
+#undef _
+
+ /* Process all the dump replies */
+#define _(stype, mtype, tname, fn) \
+ nl_##fn##_sync_begin (); \
+ is_done = 0; \
+ do \
+ { \
+ n_msgs = \
+ lcp_nl_recv_dump_replies (stype, nm->sync_batch_limit, &is_done); \
+ if (n_msgs < 0) \
+ { \
+ NL_ERROR ("Error receiving dump replies of type " tname \
+ ": %s (%d)", \
+ nl_geterror (n_msgs), n_msgs); \
+ break; \
+ } \
+ else if (n_msgs == 0) \
+ { \
+ NL_ERROR ("EOF while receiving dump replies of type " tname); \
+ break; \
+ } \
+ else \
+ NL_INFO ("Processed %u dump replies of type " tname, n_msgs); \
+ \
+ /* Suspend the processing loop and wait until event signal is \
+ * received or timeout expires. During synchronization, only \
+ * error event is expected because read event is suppressed. \
+ * Allows not to stall the main thread and detect errors on the \
+ * notification socket that will make synchronization \
+ * incomplete \
+ */ \
+ vlib_process_wait_for_event_or_clock (vm, \
+ nm->sync_batch_delay_ms * 1e-3); \
+ event_type = vlib_process_get_events (vm, &event_data); \
+ vec_reset_length (event_data); \
+ \
+ /* If error event received, stop synchronization and repeat an \
+ * attempt later \
+ */ \
+ if (event_type == NL_EVENT_ERR) \
+ goto sync_later; \
+ } \
+ while (!is_done); \
+ nl_##fn##_sync_end ();
+
+ foreach_sock_type
+#undef _
+
+ /* Start processing notifications */
+ nm->nl_status = NL_STATUS_NOTIF_PROC;
+
+ /* Trigger messages processing if there are notifications received
+ * during synchronization
+ */
+ wait_time = (vec_len (nm->nl_msg_queue) != 0) ? 1e-3 : DAY_F64;
+
+ sync_later:
+ /* Close netlink synchronization sockets */
+#define _(stype, mtype, tname, fn) lcp_nl_close_sync_socket (stype);
+ foreach_sock_type
+#undef _
+ }
+ else
+ NL_ERROR ("Unknown status: %d", nm->nl_status);
+ }
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (nl_route_process_node, static) = {
+ .function = nl_route_process,
+ .name = "linux-cp-netlink-process",
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .process_log2_n_stack_bytes = 17,
+};
+
+static int
+nl_route_cb (struct nl_msg *msg, void *arg)
+{
+ nl_main_t *nm = &nl_main;
+ nl_msg_info_t *msg_info = 0;
+
+ /* delay processing - increment ref count and queue for later */
+ vec_add2 (nm->nl_msg_queue, msg_info, 1);
+
+ /* store a timestamp for the message */
+ msg_info->ts = vlib_time_now (vlib_get_main ());
+ msg_info->msg = msg;
+ nlmsg_get (msg);
+
+ return 0;
+}
+
+int
+lcp_nl_drain_messages (void)
+{
+ int err;
+ nl_main_t *nm = &nl_main;
+
+ /* Read until there's an error */
+ while ((err = nl_recvmsgs_default (nm->sk_route)) > -1)
+ ;
+
+ /* If there was an error other then EAGAIN, signal process node */
+ if (err != -NLE_AGAIN)
+ vlib_process_signal_event (vlib_get_main (), nl_route_process_node.index,
+ NL_EVENT_ERR, 0);
+ else
+ {
+ /* If netlink notification processing is active, signal process node
+ * there were notifications read
+ */
+ if (nm->nl_status == NL_STATUS_NOTIF_PROC)
+ vlib_process_signal_event (
+ vlib_get_main (), nl_route_process_node.index, NL_EVENT_READ, 0);
+ }
+
+ return err;
+}
+
+void
+lcp_nl_pair_add_cb (lcp_itf_pair_t *pair)
+{
+ lcp_nl_drain_messages ();
+}
+
+static clib_error_t *
+nl_route_read_cb (clib_file_t *f)
+{
+ int err;
+ err = lcp_nl_drain_messages ();
+ if (err < 0 && err != -NLE_AGAIN)
+ NL_ERROR ("Error reading netlink socket (fd %d): %s (%d)",
+ f->file_descriptor, nl_geterror (err), err);
+
+ return 0;
+}
+
+static clib_error_t *
+nl_route_error_cb (clib_file_t *f)
+{
+ NL_ERROR ("Error polling netlink socket (fd %d)", f->file_descriptor);
+
+ /* notify process node */
+ vlib_process_signal_event (vlib_get_main (), nl_route_process_node.index,
+ NL_EVENT_ERR, 0);
+
+ return clib_error_return (0, "Error polling netlink socket %d",
+ f->file_descriptor);
+}
+
+struct nl_cache *
+lcp_nl_get_cache (lcp_nl_obj_t t)
+{
+ nl_main_t *nm = &nl_main;
+
+ return nm->nl_caches[t];
+}
+
+/* Set the RX buffer size to be used on the netlink socket */
+void
+lcp_nl_set_buffer_size (u32 buf_size)
+{
+ nl_main_t *nm = &nl_main;
+
+ nm->rx_buf_size = buf_size;
+
+ if (nm->sk_route)
+ nl_socket_set_buffer_size (nm->sk_route, nm->rx_buf_size, nm->tx_buf_size);
+}
+
+/* Set the batch size - maximum netlink messages to process at one time */
+void
+lcp_nl_set_batch_size (u32 batch_size)
+{
+ nl_main_t *nm = &nl_main;
+
+ nm->batch_size = batch_size;
+}
+
+/* Set the batch delay - how long to wait in ms between processing batches */
+void
+lcp_nl_set_batch_delay (u32 batch_delay_ms)
+{
+ nl_main_t *nm = &nl_main;
+
+ nm->batch_delay_ms = batch_delay_ms;
+}
+
+static clib_error_t *
+lcp_itf_pair_config (vlib_main_t *vm, unformat_input_t *input)
+{
+ u32 buf_size, batch_size, batch_delay_ms;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "nl-rx-buffer-size %u", &buf_size))
+ lcp_nl_set_buffer_size (buf_size);
+ else if (unformat (input, "nl-batch-size %u", &batch_size))
+ lcp_nl_set_batch_size (batch_size);
+ else if (unformat (input, "nl-batch-delay-ms %u", &batch_delay_ms))
+ lcp_nl_set_batch_delay (batch_delay_ms);
+ else
+ return clib_error_return (0, "invalid netlink option: %U",
+ format_unformat_error, input);
+ }
+
+ return NULL;
+}
+
+VLIB_CONFIG_FUNCTION (lcp_itf_pair_config, "linux-nl");
+
+static void
+lcp_nl_close_socket (void)
+{
+ nl_main_t *nm = &nl_main;
+
+ /* delete existing fd from epoll fd set */
+ if (nm->clib_file_index != ~0)
+ {
+ clib_file_main_t *fm = &file_main;
+ clib_file_t *f = clib_file_get (fm, nm->clib_file_index);
+
+ if (f)
+ {
+ NL_INFO ("Stopping poll of fd %u", f->file_descriptor);
+ fm->file_update (f, UNIX_FILE_UPDATE_DELETE);
+ }
+ else
+ /* stored index was not a valid file, reset stored index to ~0 */
+ nm->clib_file_index = ~0;
+ }
+
+ /* If we already created a socket, close/free it */
+ if (nm->sk_route)
+ {
+ NL_INFO ("Closing netlink socket %d", nl_socket_get_fd (nm->sk_route));
+ nl_socket_free (nm->sk_route);
+ nm->sk_route = NULL;
+ }
+}
+
+static void
+lcp_nl_open_socket (void)
+{
+ nl_main_t *nm = &nl_main;
+ int dest_ns_fd, curr_ns_fd;
+
+ /* Allocate a new socket for both routes and acls
+ * Notifications do not use sequence numbers, disable sequence number
+ * checking.
+ * Define a callback function, which will be called for each notification
+ * received
+ */
+ nm->sk_route = nl_socket_alloc ();
+ nl_socket_disable_seq_check (nm->sk_route);
+
+ dest_ns_fd = lcp_get_default_ns_fd ();
+ if (dest_ns_fd)
+ {
+ curr_ns_fd = open ("/proc/self/ns/net", O_RDONLY);
+ setns (dest_ns_fd, CLONE_NEWNET);
+ }
+
+ nl_connect (nm->sk_route, NETLINK_ROUTE);
+
+ if (dest_ns_fd && curr_ns_fd >= 0)
+ {
+ setns (curr_ns_fd, CLONE_NEWNET);
+ close (curr_ns_fd);
+ }
+
+ /* Subscribe to all the 'routing' notifications on the route socket */
+ nl_socket_add_memberships (nm->sk_route, RTNLGRP_LINK, RTNLGRP_IPV6_IFADDR,
+ RTNLGRP_IPV4_IFADDR, RTNLGRP_IPV4_ROUTE,
+ RTNLGRP_IPV6_ROUTE, RTNLGRP_NEIGH, RTNLGRP_NOTIFY,
+#ifdef RTNLGRP_MPLS_ROUTE /* not defined on CentOS/RHEL 7 */
+ RTNLGRP_MPLS_ROUTE,
+#endif
+ RTNLGRP_IPV4_RULE, RTNLGRP_IPV6_RULE, 0);
+
+ /* Set socket in nonblocking mode and increase buffer sizes */
+ nl_socket_set_nonblocking (nm->sk_route);
+ nl_socket_set_buffer_size (nm->sk_route, nm->rx_buf_size, nm->tx_buf_size);
+
+ if (nm->clib_file_index == ~0)
+ {
+ clib_file_t rt_file = {
+ .read_function = nl_route_read_cb,
+ .error_function = nl_route_error_cb,
+ .file_descriptor = nl_socket_get_fd (nm->sk_route),
+ .description = format (0, "linux-cp netlink route socket"),
+ };
+
+ nm->clib_file_index = clib_file_add (&file_main, &rt_file);
+ NL_INFO ("Added file %u", nm->clib_file_index);
+ }
+ else
+ /* clib file already created and socket was closed due to error */
+ {
+ clib_file_main_t *fm = &file_main;
+ clib_file_t *f = clib_file_get (fm, nm->clib_file_index);
+
+ f->file_descriptor = nl_socket_get_fd (nm->sk_route);
+ fm->file_update (f, UNIX_FILE_UPDATE_ADD);
+ NL_INFO ("Starting poll of %d", f->file_descriptor);
+ }
+
+ nl_socket_modify_cb (nm->sk_route, NL_CB_VALID, NL_CB_CUSTOM, nl_route_cb,
+ NULL);
+ NL_INFO ("Opened netlink socket %d", nl_socket_get_fd (nm->sk_route));
+}
+
+static void
+lcp_nl_open_sync_socket (nl_sock_type_t sock_type)
+{
+ nl_main_t *nm = &nl_main;
+ int dest_ns_fd, curr_ns_fd;
+ struct nl_sock *sk_route;
+
+ /* Allocate a new blocking socket for routes that will be used for dump
+ * requests. Buffer sizes are left default because replies to dump requests
+ * are flow-controlled and the kernel will not overflow the socket by sending
+ * these
+ */
+
+ nm->sk_route_sync[sock_type] = sk_route = nl_socket_alloc ();
+
+ dest_ns_fd = lcp_get_default_ns_fd ();
+ if (dest_ns_fd > 0)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ if (clib_setns (dest_ns_fd) == -1)
+ NL_ERROR ("Cannot set destination ns");
+ }
+
+ nl_connect (sk_route, NETLINK_ROUTE);
+
+ if (dest_ns_fd > 0)
+ {
+ if (curr_ns_fd == -1)
+ {
+ NL_ERROR ("No previous ns to set");
+ }
+ else
+ {
+ if (clib_setns (curr_ns_fd) == -1)
+ NL_ERROR ("Cannot set previous ns");
+ close (curr_ns_fd);
+ }
+ }
+
+ NL_INFO ("Opened netlink synchronization socket %d of type %d",
+ nl_socket_get_fd (sk_route), sock_type);
+}
+
+static void
+lcp_nl_close_sync_socket (nl_sock_type_t sock_type)
+{
+ nl_main_t *nm = &nl_main;
+ struct nl_sock *sk_route = nm->sk_route_sync[sock_type];
+
+ if (sk_route)
+ {
+ NL_INFO ("Closing netlink synchronization socket %d of type %d",
+ nl_socket_get_fd (sk_route), sock_type);
+ nl_socket_free (sk_route);
+ nm->sk_route_sync[sock_type] = NULL;
+ }
+}
+
+#include <vnet/plugin/plugin.h>
+clib_error_t *
+lcp_nl_init (vlib_main_t *vm)
+{
+ nl_main_t *nm = &nl_main;
+ lcp_itf_pair_vft_t nl_itf_pair_vft = {
+ .pair_add_fn = lcp_nl_pair_add_cb,
+ };
+
+ nm->nl_status = NL_STATUS_NOTIF_PROC;
+ nm->clib_file_index = ~0;
+ nm->nl_logger = vlib_log_register_class ("nl", "nl");
+
+ lcp_nl_open_socket ();
+ lcp_itf_pair_register_vft (&nl_itf_pair_vft);
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (lcp_nl_init) = {
+ .runs_after = VLIB_INITS ("lcp_interface_init", "tuntap_init",
+ "ip_neighbor_init"),
+};
+
+#include <vpp/app/version.h>
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "linux Control Plane - Netlink listener",
+ .default_disabled = 1,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/lcp_nl.h b/src/plugins/linux-cp/lcp_nl.h
new file mode 100644
index 00000000000..41757e9b983
--- /dev/null
+++ b/src/plugins/linux-cp/lcp_nl.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+
+#include <netlink/route/link.h>
+#include <netlink/route/route.h>
+#include <netlink/route/neighbour.h>
+#include <netlink/route/addr.h>
+
+typedef void (*nl_rt_link_cb_t) (struct rtnl_link *rl, void *ctx);
+typedef void (*nl_rt_link_sync_cb_t) (void);
+typedef void (*nl_rt_addr_cb_t) (struct rtnl_addr *ra);
+typedef void (*nl_rt_addr_sync_cb_t) (void);
+typedef void (*nl_rt_neigh_cb_t) (struct rtnl_neigh *rr);
+typedef void (*nl_rt_neigh_sync_cb_t) (void);
+typedef void (*nl_rt_route_add_cb_t) (struct rtnl_route *rn, int is_replace);
+typedef void (*nl_rt_route_del_cb_t) (struct rtnl_route *rn);
+typedef void (*nl_rt_route_sync_cb_t) (void);
+
+#define NL_RT_COMMON uword is_mp_safe
+
+typedef struct nl_rt_link_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_link_cb_t cb;
+} nl_rt_link_t;
+
+typedef struct nl_rt_link_sync_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_link_sync_cb_t cb;
+} nl_rt_link_sync_t;
+
+typedef struct nl_rt_addr_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_addr_cb_t cb;
+} nl_rt_addr_t;
+
+typedef struct nl_rt_addr_sync_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_addr_sync_cb_t cb;
+} nl_rt_addr_sync_t;
+
+typedef struct nl_rt_neigh_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_neigh_cb_t cb;
+} nl_rt_neigh_t;
+
+typedef struct nl_rt_neigh_sync_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_neigh_sync_cb_t cb;
+} nl_rt_neigh_sync_t;
+
+typedef struct nl_rt_route_add_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_route_add_cb_t cb;
+} nl_rt_route_add_t;
+
+typedef struct nl_rt_route_del_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_route_del_cb_t cb;
+} nl_rt_route_del_t;
+
+typedef struct nl_rt_route_sync_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_route_sync_cb_t cb;
+} nl_rt_route_sync_t;
+
+#undef NL_RT_COMMON
+
+typedef struct nl_vft_t_
+{
+ nl_rt_link_t nvl_rt_link_add;
+ nl_rt_link_t nvl_rt_link_del;
+ nl_rt_link_sync_t nvl_rt_link_sync_begin;
+ nl_rt_link_sync_t nvl_rt_link_sync_end;
+ nl_rt_addr_t nvl_rt_addr_add;
+ nl_rt_addr_t nvl_rt_addr_del;
+ nl_rt_addr_sync_t nvl_rt_addr_sync_begin;
+ nl_rt_addr_sync_t nvl_rt_addr_sync_end;
+ nl_rt_neigh_t nvl_rt_neigh_add;
+ nl_rt_neigh_t nvl_rt_neigh_del;
+ nl_rt_neigh_sync_t nvl_rt_neigh_sync_begin;
+ nl_rt_neigh_sync_t nvl_rt_neigh_sync_end;
+ nl_rt_route_add_t nvl_rt_route_add;
+ nl_rt_route_del_t nvl_rt_route_del;
+ nl_rt_route_sync_t nvl_rt_route_sync_begin;
+ nl_rt_route_sync_t nvl_rt_route_sync_end;
+} nl_vft_t;
+
+extern void nl_register_vft (const nl_vft_t *nv);
+
+typedef enum lcp_nl_obj_t_
+{
+ LCP_NL_LINK,
+ LCP_NL_ADDR,
+ LCP_NL_NEIGH,
+ LCP_NL_ROUTE,
+} lcp_nl_obj_t;
+
+/* struct type to hold context on the netlink message being processed.
+ *
+ * At creation of a pair, a tap/tun is created and configured to match its
+ * corresponding hardware interface (MAC address, link state, MTU). Netlink
+ * messages are sent announcing the creation and subsequent configuration.
+ * We do not need to (and should not) act on those messages since applying
+ * those same configurations again is unnecessary and can be disruptive. So
+ * a timestamp for a message is stored and can be compared against the time
+ * the interface came under linux-cp management in order to figure out
+ * whether we should apply any configuration.
+ */
+typedef struct nl_msg_info
+{
+ struct nl_msg *msg;
+ f64 ts;
+} nl_msg_info_t;
+
+#define LCP_NL_N_OBJS (LCP_NL_ROUTE + 1)
+
+extern struct nl_cache *lcp_nl_get_cache (lcp_nl_obj_t t);
+extern int lcp_nl_drain_messages (void);
+extern void lcp_nl_set_buffer_size (u32 buf_size);
+extern void lcp_nl_set_batch_size (u32 batch_size);
+extern void lcp_nl_set_batch_delay (u32 batch_delay_ms);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/lcp_node.c b/src/plugins/linux-cp/lcp_node.c
index b2ffc5fb63f..241cc5e4bff 100644
--- a/src/plugins/linux-cp/lcp_node.c
+++ b/src/plugins/linux-cp/lcp_node.c
@@ -31,6 +31,7 @@
#include <vnet/ip/ip4.h>
#include <vnet/ip/ip6.h>
#include <vnet/l2/l2_input.h>
+#include <vnet/mpls/mpls.h>
#define foreach_lip_punt \
_ (IO, "punt to host") \
@@ -438,14 +439,112 @@ VNET_FEATURE_INIT (lcp_xc_ip6_mcast_node, static) = {
typedef enum
{
+ LCP_XC_MPLS_NEXT_DROP,
+ LCP_XC_MPLS_NEXT_IO,
+ LCP_XC_MPLS_N_NEXT,
+} lcp_xc_mpls_next_t;
+
+static_always_inline uword
+lcp_xc_mpls_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 n_left_from, *from, *to_next, n_left_to_next;
+ lcp_xc_next_t next_index;
+
+ next_index = 0;
+ n_left_from = frame->n_vectors;
+ from = vlib_frame_vector_args (frame);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ const ethernet_header_t *eth;
+ const lcp_itf_pair_t *lip;
+ u32 next0, bi0, lipi, ai;
+ vlib_buffer_t *b0;
+ // const ip_adjacency_t *adj;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ lipi =
+ lcp_itf_pair_find_by_host (vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+ lip = lcp_itf_pair_get (lipi);
+
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = lip->lip_phy_sw_if_index;
+ vlib_buffer_advance (b0, -lip->lip_rewrite_len);
+ eth = vlib_buffer_get_current (b0);
+
+ ai = ADJ_INDEX_INVALID;
+ next0 = LCP_XC_MPLS_NEXT_DROP;
+ if (!ethernet_address_cast (eth->dst_address))
+ ai = lcp_adj_lkup ((u8 *) eth, lip->lip_rewrite_len,
+ vnet_buffer (b0)->sw_if_index[VLIB_TX]);
+ if (ai != ADJ_INDEX_INVALID)
+ {
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ai;
+ next0 = LCP_XC_MPLS_NEXT_IO;
+ }
+
+ if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ lcp_xc_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->phy_sw_if_index = lip->lip_phy_sw_if_index;
+ t->adj_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (lcp_xc_mpls)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return (lcp_xc_mpls_inline (vm, node, frame));
+}
+
+VLIB_REGISTER_NODE (
+ lcp_xc_mpls) = { .name = "linux-cp-xc-mpls",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lcp_xc_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_next_nodes = LCP_XC_MPLS_N_NEXT,
+ .next_nodes = {
+ [LCP_XC_MPLS_NEXT_DROP] = "error-drop",
+ [LCP_XC_MPLS_NEXT_IO] = "interface-output",
+ } };
+
+VNET_FEATURE_INIT (lcp_xc_mpls_node, static) = {
+ .arc_name = "mpls-input",
+ .node_name = "linux-cp-xc-mpls",
+};
+
+typedef enum
+{
LCP_XC_L3_NEXT_XC,
+ LCP_XC_L3_NEXT_LOOKUP,
LCP_XC_L3_N_NEXT,
} lcp_xc_l3_next_t;
/**
* X-connect all packets from the HOST to the PHY on L3 interfaces
*
- * There's only one adjacency that can be used on thises links.
+ * There's only one adjacency that can be used on these links.
*/
static_always_inline u32
lcp_xc_l3_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
@@ -453,6 +552,7 @@ lcp_xc_l3_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
u32 n_left_from, *from, *to_next, n_left_to_next;
lcp_xc_next_t next_index;
+ vnet_main_t *vnm = vnet_get_main ();
next_index = 0;
n_left_from = frame->n_vectors;
@@ -488,10 +588,24 @@ lcp_xc_l3_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
lcp_itf_pair_find_by_host (vnet_buffer (b0)->sw_if_index[VLIB_RX]);
lip = lcp_itf_pair_get (lipi);
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = lip->lip_phy_sw_if_index;
- next0 = LCP_XC_L3_NEXT_XC;
- vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
- lip->lip_phy_adjs.adj_index[af];
+ /* P2P tunnels can use generic adjacency */
+ if (PREDICT_TRUE (
+ vnet_sw_interface_is_p2p (vnm, lip->lip_phy_sw_if_index)))
+ {
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ lip->lip_phy_sw_if_index;
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
+ lip->lip_phy_adjs.adj_index[af];
+ next0 = LCP_XC_L3_NEXT_XC;
+ }
+ /* P2MP tunnels require a fib lookup to find the right adjacency */
+ else
+ {
+ /* lookup should use FIB table associated with phy interface */
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+ lip->lip_phy_sw_if_index;
+ next0 = LCP_XC_L3_NEXT_LOOKUP;
+ }
if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
{
@@ -534,6 +648,7 @@ VLIB_REGISTER_NODE (lcp_xc_l3_ip4_node) = {
.n_next_nodes = LCP_XC_L3_N_NEXT,
.next_nodes = {
[LCP_XC_L3_NEXT_XC] = "ip4-midchain",
+ [LCP_XC_L3_NEXT_LOOKUP] = "ip4-lookup",
},
};
@@ -556,6 +671,7 @@ VLIB_REGISTER_NODE (lcp_xc_l3_ip6_node) = {
.n_next_nodes = LCP_XC_L3_N_NEXT,
.next_nodes = {
[LCP_XC_L3_NEXT_XC] = "ip6-midchain",
+ [LCP_XC_L3_NEXT_LOOKUP] = "ip6-lookup",
},
};
@@ -671,10 +787,14 @@ VLIB_NODE_FN (lcp_arp_phy_node)
c0 = vlib_buffer_copy (vm, b0);
vlib_buffer_advance (b0, len0);
- /* Send to the host */
- vnet_buffer (c0)->sw_if_index[VLIB_TX] =
- lip0->lip_host_sw_if_index;
- reply_copies[n_copies++] = vlib_get_buffer_index (vm, c0);
+ if (c0)
+ {
+ /* Send to the host */
+ vnet_buffer (c0)->sw_if_index[VLIB_TX] =
+ lip0->lip_host_sw_if_index;
+ reply_copies[n_copies++] =
+ vlib_get_buffer_index (vm, c0);
+ }
}
}
if (arp1->opcode == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply))
@@ -699,10 +819,14 @@ VLIB_NODE_FN (lcp_arp_phy_node)
c1 = vlib_buffer_copy (vm, b1);
vlib_buffer_advance (b1, len1);
- /* Send to the host */
- vnet_buffer (c1)->sw_if_index[VLIB_TX] =
- lip1->lip_host_sw_if_index;
- reply_copies[n_copies++] = vlib_get_buffer_index (vm, c1);
+ if (c1)
+ {
+ /* Send to the host */
+ vnet_buffer (c1)->sw_if_index[VLIB_TX] =
+ lip1->lip_host_sw_if_index;
+ reply_copies[n_copies++] =
+ vlib_get_buffer_index (vm, c1);
+ }
}
}
@@ -771,10 +895,14 @@ VLIB_NODE_FN (lcp_arp_phy_node)
c0 = vlib_buffer_copy (vm, b0);
vlib_buffer_advance (b0, len0);
- /* Send to the host */
- vnet_buffer (c0)->sw_if_index[VLIB_TX] =
- lip0->lip_host_sw_if_index;
- reply_copies[n_copies++] = vlib_get_buffer_index (vm, c0);
+ if (c0)
+ {
+ /* Send to the host */
+ vnet_buffer (c0)->sw_if_index[VLIB_TX] =
+ lip0->lip_host_sw_if_index;
+ reply_copies[n_copies++] =
+ vlib_get_buffer_index (vm, c0);
+ }
}
}
diff --git a/src/plugins/linux-cp/lcp_router.c b/src/plugins/linux-cp/lcp_router.c
new file mode 100644
index 00000000000..0efd53e64ef
--- /dev/null
+++ b/src/plugins/linux-cp/lcp_router.c
@@ -0,0 +1,1578 @@
+/*
+ * Copyright (c) 2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sys/socket.h>
+#include <linux/if.h>
+#include <linux/mpls.h>
+
+//#include <vlib/vlib.h>
+#include <vlib/unix/plugin.h>
+#include <linux-cp/lcp_nl.h>
+#include <linux-cp/lcp_interface.h>
+
+#include <netlink/msg.h>
+#include <netlink/netlink.h>
+#include <netlink/socket.h>
+#include <netlink/route/link.h>
+#include <netlink/route/route.h>
+#include <netlink/route/neighbour.h>
+#include <netlink/route/nexthop.h>
+#include <netlink/route/addr.h>
+#include <netlink/route/link/vlan.h>
+
+#include <vnet/fib/fib_table.h>
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/ip/ip6_ll_table.h>
+#include <vnet/ip-neighbor/ip_neighbor.h>
+#include <vnet/ip/ip6_link.h>
+
+typedef struct lcp_router_table_t_
+{
+ uint32_t nlt_id;
+ fib_protocol_t nlt_proto;
+ u32 nlt_fib_index;
+ u32 nlt_mfib_index;
+ u32 nlt_refs;
+} lcp_router_table_t;
+
+static uword *lcp_router_table_db[FIB_PROTOCOL_MAX];
+static lcp_router_table_t *lcp_router_table_pool;
+static vlib_log_class_t lcp_router_logger;
+
+const static fib_prefix_t pfx_all1s = {
+ .fp_addr = {
+ .ip4 = {
+ .as_u32 = 0xffffffff,
+ }
+ },
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = 32,
+};
+
+static fib_source_t lcp_rt_fib_src;
+static fib_source_t lcp_rt_fib_src_dynamic;
+
+#define LCP_ROUTER_DBG(...) vlib_log_debug (lcp_router_logger, __VA_ARGS__);
+
+#define LCP_ROUTER_INFO(...) vlib_log_notice (lcp_router_logger, __VA_ARGS__);
+
+#define LCP_ROUTER_ERROR(...) vlib_log_err (lcp_router_logger, __VA_ARGS__);
+
+static const mfib_prefix_t ip4_specials[] = {
+ /* ALL prefixes are in network order */
+ {
+ /* (*,224.0.0.0)/24 - all local subnet */
+ .fp_grp_addr = {
+ .ip4.data_u32 = 0x000000e0,
+ },
+ .fp_len = 24,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ },
+};
+
+static const mfib_prefix_t ip6_specials[] = {
+ /* ALL prefixes are in network order */
+ {
+ /* (*,ff00::)/8 - all local subnet */
+ .fp_grp_addr = {
+ .ip6.as_u64[0] = 0x00000000000000ff,
+ },
+ .fp_len = 8,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ },
+};
+
+/* VIF to PHY DB of managed interfaces */
+static uword *lcp_routing_itf_db;
+
+static u32
+lcp_router_intf_h2p (u32 host)
+{
+ lcp_itf_pair_t *lip;
+ index_t lipi;
+ uword *p;
+
+ /*
+ * first check the linux side created interface (i.e. vlans, tunnels etc)
+ */
+ p = hash_get (lcp_routing_itf_db, host);
+
+ if (p)
+ return p[0];
+
+ /*
+ * then check the paired phys
+ */
+ lipi = lcp_itf_pair_find_by_vif (host);
+
+ if (INDEX_INVALID == lipi)
+ return (~0);
+
+ lip = lcp_itf_pair_get (lipi);
+
+ return lip->lip_phy_sw_if_index;
+}
+
+/*
+ * Check timestamps on netlink message and interface pair to decide whether
+ * the message should be applied. See the declaration of nl_msg_info_t for
+ * an explanation on why this is necessary.
+ * If timestamps are good (message ts is newer than intf pair ts), return 0.
+ * Else, return -1.
+ */
+static int
+lcp_router_lip_ts_check (nl_msg_info_t *msg_info, lcp_itf_pair_t *lip)
+{
+ if (!msg_info)
+ return 0;
+
+ if (msg_info->ts > lip->lip_create_ts)
+ return 0;
+
+ LCP_ROUTER_INFO ("Early message received for %U",
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_phy_sw_if_index);
+ return -1;
+}
+
+static void
+lcp_router_link_del (struct rtnl_link *rl, void *ctx)
+{
+ index_t lipi;
+
+ if (!lcp_auto_subint ())
+ return;
+
+ lipi = lcp_itf_pair_find_by_vif (rtnl_link_get_ifindex (rl));
+
+ if (INDEX_INVALID != lipi)
+ {
+ lcp_itf_pair_t *lip;
+
+ lip = lcp_itf_pair_get (lipi);
+
+ if (lcp_router_lip_ts_check ((nl_msg_info_t *) ctx, lip))
+ return;
+
+ LCP_ROUTER_INFO ("delete link: %s - %U", rtnl_link_get_type (rl),
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_phy_sw_if_index);
+ lcp_itf_pair_delete (lip->lip_phy_sw_if_index);
+
+ if (rtnl_link_is_vlan (rl))
+ {
+ LCP_ROUTER_INFO ("delete vlan: %s -> %U", rtnl_link_get_name (rl),
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_phy_sw_if_index);
+ vnet_delete_sub_interface (lip->lip_phy_sw_if_index);
+ vnet_delete_sub_interface (lip->lip_host_sw_if_index);
+ }
+ }
+ else
+ LCP_ROUTER_INFO ("ignore link del: %s - %s", rtnl_link_get_type (rl),
+ rtnl_link_get_name (rl));
+}
+
+static void
+lcp_router_ip4_mroutes_add_del (u32 sw_if_index, u8 is_add)
+{
+ const fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_IP4,
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = sw_if_index,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT,
+ };
+ u32 mfib_index;
+ int ii;
+
+ mfib_index =
+ mfib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
+
+ for (ii = 0; ii < ARRAY_LEN (ip4_specials); ii++)
+ {
+ if (is_add)
+ {
+ mfib_table_entry_path_update (mfib_index, &ip4_specials[ii],
+ MFIB_SOURCE_PLUGIN_LOW,
+ MFIB_ENTRY_FLAG_NONE, &path);
+ }
+ else
+ {
+ mfib_table_entry_path_remove (mfib_index, &ip4_specials[ii],
+ MFIB_SOURCE_PLUGIN_LOW, &path);
+ }
+ }
+}
+
+static void
+lcp_router_ip6_mroutes_add_del (u32 sw_if_index, u8 is_add)
+{
+ const fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_IP6,
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = sw_if_index,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT,
+ };
+ u32 mfib_index;
+ int ii;
+
+ mfib_index =
+ mfib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
+
+ for (ii = 0; ii < ARRAY_LEN (ip6_specials); ii++)
+ {
+ if (is_add)
+ {
+ mfib_table_entry_path_update (mfib_index, &ip6_specials[ii],
+ MFIB_SOURCE_PLUGIN_LOW,
+ MFIB_ENTRY_FLAG_NONE, &path);
+ }
+ else
+ {
+ mfib_table_entry_path_remove (mfib_index, &ip6_specials[ii],
+ MFIB_SOURCE_PLUGIN_LOW, &path);
+ }
+ }
+}
+
+static void
+lcp_router_link_mtu (struct rtnl_link *rl, u32 sw_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 mtu;
+ vnet_sw_interface_t *sw;
+ vnet_hw_interface_t *hw;
+
+ mtu = rtnl_link_get_mtu (rl);
+ if (!mtu)
+ return;
+
+ sw = vnet_get_sw_interface (vnm, sw_if_index);
+ hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+
+ /* If HW interface, try to change hw link */
+ if ((sw->sw_if_index == sw->sup_sw_if_index) &&
+ (hw->hw_class_index == ethernet_hw_interface_class.index))
+ vnet_hw_interface_set_mtu (vnm, hw->hw_if_index, mtu);
+ else
+ vnet_sw_interface_set_mtu (vnm, sw->sw_if_index, mtu);
+}
+
+static walk_rc_t
+lcp_router_link_addr_adj_upd_cb (vnet_main_t *vnm, u32 sw_if_index, void *arg)
+{
+ lcp_itf_pair_t *lip;
+
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (!lip)
+ {
+ return WALK_CONTINUE;
+ }
+
+ vnet_update_adjacency_for_sw_interface (vnm, lip->lip_phy_sw_if_index,
+ lip->lip_phy_adjs.adj_index[AF_IP4]);
+ vnet_update_adjacency_for_sw_interface (vnm, lip->lip_phy_sw_if_index,
+ lip->lip_phy_adjs.adj_index[AF_IP6]);
+
+ return WALK_CONTINUE;
+}
+
+static void
+lcp_router_link_addr (struct rtnl_link *rl, lcp_itf_pair_t *lip)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ struct nl_addr *mac_addr;
+ vnet_sw_interface_t *sw;
+ vnet_hw_interface_t *hw;
+ void *mac_addr_bytes;
+
+ mac_addr = rtnl_link_get_addr (rl);
+ if (!mac_addr || (nl_addr_get_family (mac_addr) != AF_LLC))
+ return;
+
+ sw = vnet_get_sw_interface (vnm, lip->lip_phy_sw_if_index);
+
+ /* can only change address on hw interface */
+ if (sw->sw_if_index != sw->sup_sw_if_index)
+ return;
+
+ hw = vnet_get_sup_hw_interface (vnm, lip->lip_phy_sw_if_index);
+ if (!vec_len (hw->hw_address))
+ return;
+
+ mac_addr_bytes = nl_addr_get_binary_addr (mac_addr);
+ if (clib_memcmp (mac_addr_bytes, hw->hw_address, nl_addr_get_len (mac_addr)))
+ vnet_hw_interface_change_mac_address (vnm, hw->hw_if_index,
+ mac_addr_bytes);
+
+ /* mcast adjacencies need to be updated */
+ vnet_hw_interface_walk_sw (vnm, hw->hw_if_index,
+ lcp_router_link_addr_adj_upd_cb, NULL);
+}
+
+static void lcp_router_table_flush (lcp_router_table_t *nlt,
+ u32 *sw_if_index_to_bool,
+ fib_source_t source);
+
+static void
+lcp_router_link_add (struct rtnl_link *rl, void *ctx)
+{
+ index_t lipi;
+ int up;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ lipi = lcp_itf_pair_find_by_vif (rtnl_link_get_ifindex (rl));
+ up = IFF_UP & rtnl_link_get_flags (rl);
+
+ if (INDEX_INVALID != lipi)
+ {
+ lcp_itf_pair_t *lip;
+ u32 sw_if_flags;
+ u32 sw_if_up;
+
+ lip = lcp_itf_pair_get (lipi);
+ if (!vnet_get_sw_interface (vnm, lip->lip_phy_sw_if_index))
+ return;
+
+ if (lcp_router_lip_ts_check ((nl_msg_info_t *) ctx, lip))
+ return;
+
+ sw_if_flags =
+ vnet_sw_interface_get_flags (vnm, lip->lip_phy_sw_if_index);
+ sw_if_up = (sw_if_flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ if (!sw_if_up && up)
+ {
+ vnet_sw_interface_admin_up (vnet_get_main (),
+ lip->lip_phy_sw_if_index);
+ }
+ else if (sw_if_up && !up)
+ {
+ vnet_sw_interface_admin_down (vnet_get_main (),
+ lip->lip_phy_sw_if_index);
+
+ /* When an interface is brought down administratively, the kernel
+ * removes routes which resolve through that interface. For IPv4
+ * routes, the kernel will not send any explicit RTM_DELROUTE
+ * messages about removing them. In order to synchronize with the
+ * kernel, affected IPv4 routes need to be manually removed from the
+ * FIB. The behavior is different for IPv6 routes. Explicit
+ * RTM_DELROUTE messages are sent about IPv6 routes being removed.
+ */
+ u32 fib_index;
+ lcp_router_table_t *nlt;
+
+ fib_index = fib_table_get_index_for_sw_if_index (
+ FIB_PROTOCOL_IP4, lip->lip_phy_sw_if_index);
+
+ pool_foreach (nlt, lcp_router_table_pool)
+ {
+ if (fib_index == nlt->nlt_fib_index &&
+ FIB_PROTOCOL_IP4 == nlt->nlt_proto)
+ {
+ u32 *sw_if_index_to_bool = NULL;
+
+ vec_validate_init_empty (sw_if_index_to_bool,
+ lip->lip_phy_sw_if_index, false);
+ sw_if_index_to_bool[lip->lip_phy_sw_if_index] = true;
+
+ lcp_router_table_flush (nlt, sw_if_index_to_bool,
+ lcp_rt_fib_src);
+ lcp_router_table_flush (nlt, sw_if_index_to_bool,
+ lcp_rt_fib_src_dynamic);
+
+ vec_free (sw_if_index_to_bool);
+ break;
+ }
+ }
+ }
+
+ LCP_ROUTER_DBG ("link: %s (%d) -> %U/%U %s", rtnl_link_get_name (rl),
+ rtnl_link_get_ifindex (rl), format_vnet_sw_if_index_name,
+ vnm, lip->lip_phy_sw_if_index,
+ format_vnet_sw_if_index_name, vnm,
+ lip->lip_host_sw_if_index, (up ? "up" : "down"));
+
+ lcp_router_link_mtu (rl, lip->lip_phy_sw_if_index);
+ lcp_router_link_addr (rl, lip);
+ }
+ else if (lcp_auto_subint () && rtnl_link_is_vlan (rl))
+ {
+ /* Find the pair based on the parent VIF */
+ lipi = lcp_itf_pair_find_by_vif (rtnl_link_get_link (rl));
+
+ if (INDEX_INVALID != lipi)
+ {
+ u32 sub_phy_sw_if_index, sub_host_sw_if_index;
+ const lcp_itf_pair_t *lip;
+ int vlan;
+ u8 *ns = 0; /* FIXME */
+
+ lip = lcp_itf_pair_get (lipi);
+
+ vlan = rtnl_link_vlan_get_id (rl);
+
+ /* create the vlan interface on the parent phy */
+ if (vnet_create_sub_interface (lip->lip_phy_sw_if_index, vlan, 18, 0,
+ vlan, &sub_phy_sw_if_index))
+ {
+ LCP_ROUTER_INFO ("failed create phy vlan: %s on %U",
+ rtnl_link_get_name (rl),
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_phy_sw_if_index);
+ return;
+ }
+
+ /* pool could grow during the previous operation */
+ lip = lcp_itf_pair_get (lipi);
+
+ /* create the vlan interface on the parent host */
+ if (vnet_create_sub_interface (lip->lip_host_sw_if_index, vlan, 18,
+ 0, vlan, &sub_host_sw_if_index))
+ {
+ LCP_ROUTER_INFO ("failed create vlan: %s on %U",
+ rtnl_link_get_name (rl),
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_host_sw_if_index);
+ return;
+ }
+
+ char *if_name;
+ u8 *if_namev = 0;
+
+ LCP_ROUTER_INFO (
+ "create vlan: %s -> (%U, %U) : (%U, %U)", rtnl_link_get_name (rl),
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_phy_sw_if_index, format_vnet_sw_if_index_name,
+ vnet_get_main (), sub_phy_sw_if_index,
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_host_sw_if_index, format_vnet_sw_if_index_name,
+ vnet_get_main (), sub_host_sw_if_index);
+
+ if ((if_name = rtnl_link_get_name (rl)) != NULL)
+ vec_validate_init_c_string (if_namev, if_name,
+ strnlen (if_name, IFNAMSIZ));
+ lcp_itf_pair_add (sub_host_sw_if_index, sub_phy_sw_if_index,
+ if_namev, rtnl_link_get_ifindex (rl),
+ lip->lip_host_type, ns);
+ if (up)
+ vnet_sw_interface_admin_up (vnet_get_main (), sub_phy_sw_if_index);
+ vnet_sw_interface_admin_up (vnet_get_main (), sub_host_sw_if_index);
+
+ vec_free (if_namev);
+ }
+ else
+ {
+ LCP_ROUTER_INFO ("ignore parent-link add: %s - %s",
+ rtnl_link_get_type (rl), rtnl_link_get_name (rl));
+ }
+ }
+ else
+ LCP_ROUTER_INFO ("ignore link add: %s - %s", rtnl_link_get_type (rl),
+ rtnl_link_get_name (rl));
+}
+
+static void
+lcp_router_link_sync_begin (void)
+{
+ LCP_ROUTER_INFO ("Begin synchronization of interface configurations");
+}
+
+static void
+lcp_router_link_sync_end (void)
+{
+ LCP_ROUTER_INFO ("End synchronization of interface configurations");
+}
+
+static clib_error_t *
+lcp_router_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
+{
+ vnet_hw_interface_t *hi;
+ index_t lipi;
+
+ hi = vnet_get_hw_interface_or_null (vnm, hw_if_index);
+ if (!hi)
+ return 0;
+
+ lipi = lcp_itf_pair_find_by_phy (hi->sw_if_index);
+ if (lipi == INDEX_INVALID)
+ return 0;
+
+ /* When the link goes down on an interface, the kernel processes routes which
+ * resolve through that interface depending on how they were created:
+ * - Legacy Route API: the kernel retains the routes and marks them as
+ * "linkdown";
+ * - Nexthop API: the kernel removes the next-hop objects and the routes
+ * which reference them.
+ *
+ * For IPv4 routes created with Nexthop API, the kernel will not send any
+ * explicit RTM_DELROUTE messages about removing them. In order to
+ * synchronize with the kernel, affected routes need to be manually removed
+ * from the FIB.
+ *
+ * The behavior is different for IPv6 routes created with Nexthop API. The
+ * kernel will send explicit RTM_DELROUTE messages about IPv6 routes being
+ * removed.
+ */
+ if (!(flags & VNET_HW_INTERFACE_FLAG_LINK_UP) &&
+ (lcp_get_del_static_on_link_down () ||
+ lcp_get_del_dynamic_on_link_down ()))
+ {
+ u32 fib_index;
+ u32 **fib_index_to_sw_if_index_to_bool = NULL;
+ u32 id, sw_if_index;
+ lcp_router_table_t *nlt;
+
+ fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+ hi->sw_if_index);
+
+ vec_validate_init_empty (fib_index_to_sw_if_index_to_bool, fib_index,
+ NULL);
+ vec_validate_init_empty (fib_index_to_sw_if_index_to_bool[fib_index],
+ hi->sw_if_index, false);
+ fib_index_to_sw_if_index_to_bool[fib_index][hi->sw_if_index] = true;
+
+ /* clang-format off */
+ hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id,
+ ({
+ fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+ sw_if_index);
+ vec_validate_init_empty (fib_index_to_sw_if_index_to_bool, fib_index,
+ NULL);
+ vec_validate_init_empty (fib_index_to_sw_if_index_to_bool[fib_index],
+ sw_if_index, false);
+ fib_index_to_sw_if_index_to_bool[fib_index][sw_if_index] = true;
+ }));
+ /* clang-format on */
+
+ vec_foreach_index (fib_index, fib_index_to_sw_if_index_to_bool)
+ {
+ u32 *sw_if_index_to_bool;
+
+ sw_if_index_to_bool = fib_index_to_sw_if_index_to_bool[fib_index];
+ if (NULL == sw_if_index_to_bool)
+ continue;
+
+ pool_foreach (nlt, lcp_router_table_pool)
+ {
+ if (fib_index == nlt->nlt_fib_index &&
+ FIB_PROTOCOL_IP4 == nlt->nlt_proto)
+ {
+ if (lcp_get_del_static_on_link_down ())
+ lcp_router_table_flush (nlt, sw_if_index_to_bool,
+ lcp_rt_fib_src);
+ if (lcp_get_del_dynamic_on_link_down ())
+ lcp_router_table_flush (nlt, sw_if_index_to_bool,
+ lcp_rt_fib_src_dynamic);
+ break;
+ }
+ }
+
+ vec_free (sw_if_index_to_bool);
+ }
+
+ vec_free (fib_index_to_sw_if_index_to_bool);
+ }
+
+ return 0;
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (lcp_router_link_up_down);
+
+static fib_protocol_t
+lcp_router_proto_k2f (uint32_t k)
+{
+ switch (k)
+ {
+ case AF_INET6:
+ return FIB_PROTOCOL_IP6;
+ case AF_INET:
+ return FIB_PROTOCOL_IP4;
+ case AF_MPLS:
+ return FIB_PROTOCOL_MPLS;
+ default:
+ ASSERT (0);
+ return FIB_PROTOCOL_NONE;
+ }
+}
+
+static void
+lcp_router_mk_addr (const struct nl_addr *rna, ip_address_t *ia)
+{
+ fib_protocol_t fproto;
+
+ ip_address_reset (ia);
+ fproto = lcp_router_proto_k2f (nl_addr_get_family (rna));
+ ASSERT (FIB_PROTOCOL_MPLS != fproto);
+
+ ip_address_set (ia, nl_addr_get_binary_addr (rna),
+ FIB_PROTOCOL_IP4 == fproto ? AF_IP4 : AF_IP6);
+}
+
+static fib_protocol_t
+lcp_router_mk_addr46 (const struct nl_addr *rna, ip46_address_t *ia)
+{
+ fib_protocol_t fproto;
+
+ fproto = lcp_router_proto_k2f (nl_addr_get_family (rna));
+ ASSERT (FIB_PROTOCOL_MPLS != fproto);
+
+ ip46_address_reset (ia);
+ if (FIB_PROTOCOL_IP4 == fproto)
+ memcpy (&ia->ip4, nl_addr_get_binary_addr (rna), nl_addr_get_len (rna));
+ else
+ memcpy (&ia->ip6, nl_addr_get_binary_addr (rna), nl_addr_get_len (rna));
+
+ return (fproto);
+}
+
+static void
+lcp_router_link_addr_add_del (struct rtnl_addr *rla, int is_del)
+{
+ u32 sw_if_index;
+
+ sw_if_index = lcp_router_intf_h2p (rtnl_addr_get_ifindex (rla));
+
+ if (~0 != sw_if_index)
+ {
+ ip_address_t nh;
+
+ lcp_router_mk_addr (rtnl_addr_get_local (rla), &nh);
+
+ if (AF_IP4 == ip_addr_version (&nh))
+ {
+ ip4_add_del_interface_address (
+ vlib_get_main (), sw_if_index, &ip_addr_v4 (&nh),
+ rtnl_addr_get_prefixlen (rla), is_del);
+ lcp_router_ip4_mroutes_add_del (sw_if_index, !is_del);
+ }
+ else if (AF_IP6 == ip_addr_version (&nh))
+ {
+ if (ip6_address_is_link_local_unicast (&ip_addr_v6 (&nh)))
+ if (is_del)
+ ip6_link_disable (sw_if_index);
+ else
+ {
+ ip6_link_enable (sw_if_index, NULL);
+ ip6_link_set_local_address (sw_if_index, &ip_addr_v6 (&nh));
+ }
+ else
+ ip6_add_del_interface_address (
+ vlib_get_main (), sw_if_index, &ip_addr_v6 (&nh),
+ rtnl_addr_get_prefixlen (rla), is_del);
+ lcp_router_ip6_mroutes_add_del (sw_if_index, !is_del);
+ }
+
+ LCP_ROUTER_DBG ("link-addr: %U %U/%d", format_vnet_sw_if_index_name,
+ vnet_get_main (), sw_if_index, format_ip_address, &nh,
+ rtnl_addr_get_prefixlen (rla));
+ }
+}
+
+static void
+lcp_router_link_addr_del (struct rtnl_addr *la)
+{
+ lcp_router_link_addr_add_del (la, 1);
+}
+
+static void
+lcp_router_link_addr_add (struct rtnl_addr *la)
+{
+ lcp_router_link_addr_add_del (la, 0);
+}
+
+static walk_rc_t
+lcp_router_address_mark (index_t index, void *ctx)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+
+ lcp_itf_pair_t *lip = lcp_itf_pair_get (index);
+ if (!lip)
+ return WALK_CONTINUE;
+
+ ip_interface_address_mark_one_interface (
+ vnm, vnet_get_sw_interface (vnm, lip->lip_phy_sw_if_index), 0);
+
+ return WALK_CONTINUE;
+}
+
+static void
+lcp_router_link_addr_sync_begin (void)
+{
+ lcp_itf_pair_walk (lcp_router_address_mark, 0);
+
+ LCP_ROUTER_INFO ("Begin synchronization of interface addresses");
+}
+
+static void
+lcp_router_link_addr_sync_end (void)
+{
+ ip_interface_address_sweep ();
+
+ LCP_ROUTER_INFO ("End synchronization of interface addresses");
+}
+
+static void
+lcp_router_mk_mac_addr (const struct nl_addr *rna, mac_address_t *mac)
+{
+ mac_address_from_bytes (mac, nl_addr_get_binary_addr (rna));
+}
+
+static void
+lcp_router_neigh_del (struct rtnl_neigh *rn)
+{
+ u32 sw_if_index;
+
+ sw_if_index = lcp_router_intf_h2p (rtnl_neigh_get_ifindex (rn));
+
+ if (~0 != sw_if_index)
+ {
+ ip_address_t nh;
+ int rv;
+ struct nl_addr *rna;
+
+ if ((rna = rtnl_neigh_get_dst (rn)) == NULL)
+ return;
+ lcp_router_mk_addr (rna, &nh);
+
+ if (ip46_address_is_multicast (&ip_addr_46 (&nh)))
+ {
+ LCP_ROUTER_DBG ("ignore neighbor del: %U %U", format_ip_address, &nh,
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+ return;
+ }
+
+ rv = ip_neighbor_del (&nh, sw_if_index);
+
+ if (rv)
+ {
+ LCP_ROUTER_ERROR (
+ "Failed to delete neighbor: %U %U", format_ip_address, &nh,
+ format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index);
+ }
+ else
+ {
+ LCP_ROUTER_DBG ("neighbor del: %U %U", format_ip_address, &nh,
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+ }
+ }
+ else
+ LCP_ROUTER_INFO ("ignore neighbour del on: %d",
+ rtnl_neigh_get_ifindex (rn));
+}
+
+#ifndef NUD_VALID
+#define NUD_VALID \
+ (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE | \
+ NUD_DELAY)
+#endif
+
+static void
+lcp_router_neigh_add (struct rtnl_neigh *rn)
+{
+ u32 sw_if_index;
+
+ sw_if_index = lcp_router_intf_h2p (rtnl_neigh_get_ifindex (rn));
+
+ if (~0 != sw_if_index)
+ {
+ struct nl_addr *ll;
+ ip_address_t nh;
+ int state;
+ struct nl_addr *rna;
+
+ if ((rna = rtnl_neigh_get_dst (rn)) == NULL)
+ return;
+ lcp_router_mk_addr (rna, &nh);
+
+ if (ip46_address_is_multicast (&ip_addr_46 (&nh)))
+ {
+ LCP_ROUTER_DBG ("ignore neighbor add: %U %U", format_ip_address, &nh,
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+ return;
+ }
+
+ ll = rtnl_neigh_get_lladdr (rn);
+ state = rtnl_neigh_get_state (rn);
+
+ if (ll && (state & NUD_VALID))
+ {
+ mac_address_t mac;
+ ip_neighbor_flags_t flags;
+ int rv;
+
+ lcp_router_mk_mac_addr (ll, &mac);
+
+ if (state & (NUD_NOARP | NUD_PERMANENT))
+ flags = IP_NEIGHBOR_FLAG_STATIC;
+ else
+ flags = IP_NEIGHBOR_FLAG_DYNAMIC;
+
+ rv = ip_neighbor_add (&nh, &mac, sw_if_index, flags, NULL);
+
+ if (rv)
+ {
+ LCP_ROUTER_ERROR (
+ "Failed to create neighbor: %U %U", format_ip_address, &nh,
+ format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index);
+ }
+ else
+ {
+ LCP_ROUTER_DBG ("neighbor add: %U %U", format_ip_address, &nh,
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+ }
+ }
+ else
+ /* It's a delete */
+ lcp_router_neigh_del (rn);
+ }
+ else
+ LCP_ROUTER_INFO ("ignore neighbour add on: %d",
+ rtnl_neigh_get_ifindex (rn));
+}
+
+static walk_rc_t
+lcp_router_neighbor_mark (index_t index, void *ctx)
+{
+ lcp_itf_pair_t *lip = lcp_itf_pair_get (index);
+ if (!lip)
+ return WALK_CONTINUE;
+
+ ip_neighbor_walk (AF_IP4, lip->lip_phy_sw_if_index, ip_neighbor_mark_one, 0);
+ ip_neighbor_walk (AF_IP6, lip->lip_phy_sw_if_index, ip_neighbor_mark_one, 0);
+
+ return WALK_CONTINUE;
+}
+
+static void
+lcp_router_neigh_sync_begin (void)
+{
+ lcp_itf_pair_walk (lcp_router_neighbor_mark, 0);
+
+ LCP_ROUTER_INFO ("Begin synchronization of neighbors");
+}
+
+static void
+lcp_router_neigh_sync_end (void)
+{
+ ip_neighbor_sweep (AF_IP4);
+ ip_neighbor_sweep (AF_IP6);
+
+ LCP_ROUTER_INFO ("End synchronization of neighbors");
+}
+
+static lcp_router_table_t *
+lcp_router_table_find (uint32_t id, fib_protocol_t fproto)
+{
+ uword *p;
+
+ p = hash_get (lcp_router_table_db[fproto], id);
+
+ if (p)
+ return pool_elt_at_index (lcp_router_table_pool, p[0]);
+
+ return (NULL);
+}
+
+static uint32_t
+lcp_router_table_k2f (uint32_t k)
+{
+ // the kernel's table ID 255 is the default table
+ if (k == 255 || k == 254)
+ return 0;
+ return k;
+}
+
+static lcp_router_table_t *
+lcp_router_table_add_or_lock (uint32_t id, fib_protocol_t fproto)
+{
+ lcp_router_table_t *nlt;
+
+ id = lcp_router_table_k2f (id);
+ nlt = lcp_router_table_find (id, fproto);
+
+ if (NULL == nlt)
+ {
+ pool_get_zero (lcp_router_table_pool, nlt);
+
+ nlt->nlt_id = id;
+ nlt->nlt_proto = fproto;
+
+ nlt->nlt_fib_index = fib_table_find_or_create_and_lock (
+ nlt->nlt_proto, nlt->nlt_id, lcp_rt_fib_src);
+ nlt->nlt_mfib_index = mfib_table_find_or_create_and_lock (
+ nlt->nlt_proto, nlt->nlt_id, MFIB_SOURCE_PLUGIN_LOW);
+
+ hash_set (lcp_router_table_db[fproto], nlt->nlt_id,
+ nlt - lcp_router_table_pool);
+
+ if (FIB_PROTOCOL_IP4 == fproto)
+ {
+ /* Set the all 1s address in this table to punt */
+ fib_table_entry_special_add (nlt->nlt_fib_index, &pfx_all1s,
+ lcp_rt_fib_src, FIB_ENTRY_FLAG_LOCAL);
+
+ const fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_IP4,
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
+ .frp_flags = FIB_ROUTE_PATH_LOCAL,
+ };
+ int ii;
+
+ for (ii = 0; ii < ARRAY_LEN (ip4_specials); ii++)
+ {
+ mfib_table_entry_path_update (
+ nlt->nlt_mfib_index, &ip4_specials[ii], MFIB_SOURCE_PLUGIN_LOW,
+ MFIB_ENTRY_FLAG_NONE, &path);
+ }
+ }
+ else if (FIB_PROTOCOL_IP6 == fproto)
+ {
+ const fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_IP6,
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
+ .frp_flags = FIB_ROUTE_PATH_LOCAL,
+ };
+ int ii;
+
+ for (ii = 0; ii < ARRAY_LEN (ip6_specials); ii++)
+ {
+ mfib_table_entry_path_update (
+ nlt->nlt_mfib_index, &ip6_specials[ii], MFIB_SOURCE_PLUGIN_LOW,
+ MFIB_ENTRY_FLAG_NONE, &path);
+ }
+ }
+ }
+
+ nlt->nlt_refs++;
+
+ return (nlt);
+}
+
+static void
+lcp_router_table_unlock (lcp_router_table_t *nlt)
+{
+ nlt->nlt_refs--;
+
+ if (0 == nlt->nlt_refs)
+ {
+ if (FIB_PROTOCOL_IP4 == nlt->nlt_proto)
+ {
+ /* Set the all 1s address in this table to punt */
+ fib_table_entry_special_remove (nlt->nlt_fib_index, &pfx_all1s,
+ lcp_rt_fib_src);
+ }
+
+ fib_table_unlock (nlt->nlt_fib_index, nlt->nlt_proto, lcp_rt_fib_src);
+
+ hash_unset (lcp_router_table_db[nlt->nlt_proto], nlt->nlt_id);
+ pool_put (lcp_router_table_pool, nlt);
+ }
+}
+
+static void
+lcp_router_route_mk_prefix (struct rtnl_route *r, fib_prefix_t *p)
+{
+ const struct nl_addr *addr = rtnl_route_get_dst (r);
+ u32 *baddr = nl_addr_get_binary_addr (addr);
+ u32 blen = nl_addr_get_len (addr);
+ ip46_address_t *paddr = &p->fp_addr;
+ u32 entry;
+
+ ip46_address_reset (paddr);
+ p->fp_proto = lcp_router_proto_k2f (nl_addr_get_family (addr));
+
+ switch (p->fp_proto)
+ {
+ case FIB_PROTOCOL_MPLS:
+ entry = ntohl (*baddr);
+ p->fp_label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+ p->fp_len = 21;
+ p->fp_eos = MPLS_NON_EOS;
+ return;
+ case FIB_PROTOCOL_IP4:
+ memcpy (&paddr->ip4, baddr, blen);
+ break;
+ case FIB_PROTOCOL_IP6:
+ memcpy (&paddr->ip6, baddr, blen);
+ break;
+ }
+
+ p->fp_len = nl_addr_get_prefixlen (addr);
+}
+
+static void
+lcp_router_route_mk_mprefix (struct rtnl_route *r, mfib_prefix_t *p)
+{
+ const struct nl_addr *addr;
+
+ addr = rtnl_route_get_dst (r);
+
+ p->fp_len = nl_addr_get_prefixlen (addr);
+ p->fp_proto = lcp_router_mk_addr46 (addr, &p->fp_grp_addr);
+
+ addr = rtnl_route_get_src (r);
+ if (addr)
+ p->fp_proto = lcp_router_mk_addr46 (addr, &p->fp_src_addr);
+}
+
+static int
+lcp_router_mpls_nladdr_to_path (fib_route_path_t *path, struct nl_addr *addr)
+{
+ if (!addr)
+ return 0;
+
+ struct mpls_label *stack = nl_addr_get_binary_addr (addr);
+ u32 entry, label;
+ u8 exp, ttl;
+ int label_count = 0;
+
+ while (1)
+ {
+ entry = ntohl (stack[label_count++].entry);
+ label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+ exp = (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT;
+ ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
+
+ fib_mpls_label_t fml = {
+ .fml_value = label,
+ .fml_exp = exp,
+ .fml_ttl = ttl,
+ };
+ vec_add1 (path->frp_label_stack, fml);
+
+ if (entry & MPLS_LS_S_MASK)
+ break;
+ }
+ return label_count;
+}
+
+typedef struct lcp_router_route_path_parse_t_
+{
+ fib_route_path_t *paths;
+ fib_protocol_t route_proto;
+ bool is_mcast;
+ fib_route_path_flags_t type_flags;
+ u8 preference;
+} lcp_router_route_path_parse_t;
+
+static void
+lcp_router_route_path_parse (struct rtnl_nexthop *rnh, void *arg)
+{
+ lcp_router_route_path_parse_t *ctx = arg;
+ fib_route_path_t *path;
+ u32 sw_if_index;
+ int label_count = 0;
+
+ sw_if_index = lcp_router_intf_h2p (rtnl_route_nh_get_ifindex (rnh));
+
+ if (~0 != sw_if_index)
+ {
+ fib_protocol_t fproto;
+ struct nl_addr *addr;
+
+ vec_add2 (ctx->paths, path, 1);
+
+ path->frp_flags = FIB_ROUTE_PATH_FLAG_NONE | ctx->type_flags;
+ path->frp_sw_if_index = sw_if_index;
+ path->frp_preference = ctx->preference;
+
+ /*
+ * FIB Path Weight of 0 is meaningless and replaced with 1 further along.
+ * See fib_path_create. fib_path_cmp_w_route_path would fail to match
+ * such a fib_route_path_t with any fib_path_t, because a fib_path_t's
+ * fp_weight can never be 0.
+ */
+ path->frp_weight = clib_max (1, rtnl_route_nh_get_weight (rnh));
+
+ addr = rtnl_route_nh_get_gateway (rnh);
+ if (!addr)
+ addr = rtnl_route_nh_get_via (rnh);
+
+ if (addr)
+ fproto = lcp_router_mk_addr46 (addr, &path->frp_addr);
+ else
+ fproto = ctx->route_proto;
+
+ path->frp_proto = fib_proto_to_dpo (fproto);
+
+ if (ctx->route_proto == FIB_PROTOCOL_MPLS)
+ {
+ addr = rtnl_route_nh_get_newdst (rnh);
+ label_count = lcp_router_mpls_nladdr_to_path (path, addr);
+ if (label_count)
+ {
+ LCP_ROUTER_DBG (" is label swap to %u",
+ path->frp_label_stack[0].fml_value);
+ }
+ else
+ {
+ fib_mpls_label_t fml = {
+ .fml_value = MPLS_LABEL_POP,
+ };
+ vec_add1 (path->frp_label_stack, fml);
+ LCP_ROUTER_DBG (" is label pop");
+ }
+ }
+
+#ifdef NL_CAPABILITY_VERSION_3_6_0
+ addr = rtnl_route_nh_get_encap_mpls_dst (rnh);
+ label_count = lcp_router_mpls_nladdr_to_path (path, addr);
+ if (label_count)
+ LCP_ROUTER_DBG (" has encap mpls, %d labels", label_count);
+#endif
+
+ if (ctx->is_mcast)
+ path->frp_mitf_flags = MFIB_ITF_FLAG_FORWARD;
+
+ LCP_ROUTER_DBG (" path:[%U]", format_fib_route_path, path);
+ }
+}
+
+/*
+ * blackhole, unreachable, prohibit will not have a next hop in an
+ * RTM_NEWROUTE. Add a path for them.
+ */
+static void
+lcp_router_route_path_add_special (struct rtnl_route *rr,
+ lcp_router_route_path_parse_t *ctx)
+{
+ fib_route_path_t *path;
+
+ if (rtnl_route_get_type (rr) < RTN_BLACKHOLE)
+ return;
+
+ /* if it already has a path, it does not need us to add one */
+ if (vec_len (ctx->paths) > 0)
+ return;
+
+ vec_add2 (ctx->paths, path, 1);
+
+ path->frp_flags = FIB_ROUTE_PATH_FLAG_NONE | ctx->type_flags;
+ path->frp_sw_if_index = ~0;
+ path->frp_proto = fib_proto_to_dpo (ctx->route_proto);
+ path->frp_preference = ctx->preference;
+
+ LCP_ROUTER_DBG (" path:[%U]", format_fib_route_path, path);
+}
+
+/*
+ * Map of supported route types. Some types are omitted:
+ * RTN_LOCAL - interface address addition creates these automatically
+ * RTN_BROADCAST - same as RTN_LOCAL
+ * RTN_UNSPEC, RTN_ANYCAST, RTN_THROW, RTN_NAT, RTN_XRESOLVE -
+ * There's not a VPP equivalent for these currently.
+ */
+static const u8 lcp_router_route_type_valid[__RTN_MAX] = {
+ [RTN_UNICAST] = 1, [RTN_MULTICAST] = 1, [RTN_BLACKHOLE] = 1,
+ [RTN_UNREACHABLE] = 1, [RTN_PROHIBIT] = 1,
+};
+
+/* Map of fib entry flags by route type */
+static const fib_entry_flag_t lcp_router_route_type_feflags[__RTN_MAX] = {
+ [RTN_LOCAL] = FIB_ENTRY_FLAG_LOCAL | FIB_ENTRY_FLAG_CONNECTED,
+ [RTN_BROADCAST] = FIB_ENTRY_FLAG_DROP | FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT,
+ [RTN_BLACKHOLE] = FIB_ENTRY_FLAG_DROP,
+};
+
+/* Map of fib route path flags by route type */
+static const fib_route_path_flags_t
+ lcp_router_route_type_frpflags[__RTN_MAX] = {
+ [RTN_UNREACHABLE] = FIB_ROUTE_PATH_ICMP_UNREACH,
+ [RTN_PROHIBIT] = FIB_ROUTE_PATH_ICMP_PROHIBIT,
+ [RTN_BLACKHOLE] = FIB_ROUTE_PATH_DROP,
+ };
+
+static inline fib_source_t
+lcp_router_proto_fib_source (u8 rt_proto)
+{
+ return (rt_proto <= RTPROT_STATIC) ? lcp_rt_fib_src : lcp_rt_fib_src_dynamic;
+}
+
+static fib_entry_flag_t
+lcp_router_route_mk_entry_flags (uint8_t rtype, int table_id, uint8_t rproto)
+{
+ fib_entry_flag_t fef = FIB_ENTRY_FLAG_NONE;
+
+ fef |= lcp_router_route_type_feflags[rtype];
+ if ((rproto == RTPROT_KERNEL) || PREDICT_FALSE (255 == table_id))
+ /* kernel proto is interface prefixes, 255 is linux's 'local' table */
+ fef |= FIB_ENTRY_FLAG_ATTACHED | FIB_ENTRY_FLAG_CONNECTED;
+
+ return (fef);
+}
+
+static void
+lcp_router_route_del (struct rtnl_route *rr)
+{
+ fib_entry_flag_t entry_flags;
+ uint32_t table_id;
+ fib_prefix_t pfx;
+ lcp_router_table_t *nlt;
+ uint8_t rtype, rproto;
+
+ rtype = rtnl_route_get_type (rr);
+ table_id = rtnl_route_get_table (rr);
+ rproto = rtnl_route_get_protocol (rr);
+
+ /* skip unsupported route types and local table */
+ if (!lcp_router_route_type_valid[rtype] || (table_id == 255))
+ return;
+
+ lcp_router_route_mk_prefix (rr, &pfx);
+ entry_flags = lcp_router_route_mk_entry_flags (rtype, table_id, rproto);
+ nlt = lcp_router_table_find (lcp_router_table_k2f (table_id), pfx.fp_proto);
+
+ LCP_ROUTER_DBG ("route del: %d:%U %U", rtnl_route_get_table (rr),
+ format_fib_prefix, &pfx, format_fib_entry_flags,
+ entry_flags);
+
+ if (NULL == nlt)
+ return;
+
+ lcp_router_route_path_parse_t np = {
+ .route_proto = pfx.fp_proto,
+ .type_flags = lcp_router_route_type_frpflags[rtype],
+ };
+
+ rtnl_route_foreach_nexthop (rr, lcp_router_route_path_parse, &np);
+ lcp_router_route_path_add_special (rr, &np);
+
+ if (0 != vec_len (np.paths))
+ {
+ fib_source_t fib_src;
+
+ fib_src = lcp_router_proto_fib_source (rproto);
+
+ switch (pfx.fp_proto)
+ {
+ case FIB_PROTOCOL_IP6:
+ fib_table_entry_delete (nlt->nlt_fib_index, &pfx, fib_src);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ fib_table_entry_path_remove2 (nlt->nlt_fib_index, &pfx, fib_src,
+ np.paths);
+ /* delete the EOS route in addition to NEOS - fallthrough */
+ pfx.fp_eos = MPLS_EOS;
+ default:
+ fib_table_entry_path_remove2 (nlt->nlt_fib_index, &pfx, fib_src,
+ np.paths);
+ }
+ }
+
+ vec_free (np.paths);
+
+ lcp_router_table_unlock (nlt);
+}
+
+static fib_route_path_t *
+lcp_router_fib_route_path_dup (fib_route_path_t *old)
+{
+ int idx;
+ fib_route_path_t *p;
+
+ fib_route_path_t *new = vec_dup (old);
+ if (!new)
+ return NULL;
+
+ for (idx = 0; idx < vec_len (new); idx++)
+ {
+ p = &new[idx];
+ if (p->frp_label_stack)
+ p->frp_label_stack = vec_dup (p->frp_label_stack);
+ }
+
+ return new;
+}
+
+static void
+lcp_router_route_add (struct rtnl_route *rr, int is_replace)
+{
+ fib_entry_flag_t entry_flags;
+ uint32_t table_id;
+ fib_prefix_t pfx;
+ lcp_router_table_t *nlt;
+ uint8_t rtype, rproto;
+
+ rtype = rtnl_route_get_type (rr);
+ table_id = rtnl_route_get_table (rr);
+ rproto = rtnl_route_get_protocol (rr);
+
+ /* skip unsupported route types and local table */
+ if (!lcp_router_route_type_valid[rtype] || (table_id == 255))
+ return;
+
+ lcp_router_route_mk_prefix (rr, &pfx);
+ entry_flags = lcp_router_route_mk_entry_flags (rtype, table_id, rproto);
+
+ nlt = lcp_router_table_add_or_lock (table_id, pfx.fp_proto);
+ /* Skip any kernel routes and IPv6 LL or multicast routes */
+ if (rproto == RTPROT_KERNEL ||
+ (FIB_PROTOCOL_IP6 == pfx.fp_proto &&
+ (ip6_address_is_multicast (&pfx.fp_addr.ip6) ||
+ ip6_address_is_link_local_unicast (&pfx.fp_addr.ip6))))
+ {
+ LCP_ROUTER_DBG ("route skip: %d:%U %U", rtnl_route_get_table (rr),
+ format_fib_prefix, &pfx, format_fib_entry_flags,
+ entry_flags);
+ return;
+ }
+ LCP_ROUTER_DBG ("route %s: %d:%U %U", is_replace ? "replace" : "add",
+ rtnl_route_get_table (rr), format_fib_prefix, &pfx,
+ format_fib_entry_flags, entry_flags);
+
+ lcp_router_route_path_parse_t np = {
+ .route_proto = pfx.fp_proto,
+ .is_mcast = (rtype == RTN_MULTICAST),
+ .type_flags = lcp_router_route_type_frpflags[rtype],
+ .preference = (u8) rtnl_route_get_priority (rr),
+ };
+
+ rtnl_route_foreach_nexthop (rr, lcp_router_route_path_parse, &np);
+ lcp_router_route_path_add_special (rr, &np);
+
+ if (0 != vec_len (np.paths))
+ {
+ if (rtype == RTN_MULTICAST)
+ {
+ /* it's not clear to me how linux expresses the RPF paramters
+ * so we'll allow from all interfaces and hope for the best */
+ mfib_prefix_t mpfx = {};
+
+ lcp_router_route_mk_mprefix (rr, &mpfx);
+
+ mfib_table_entry_update (nlt->nlt_mfib_index, &mpfx,
+ MFIB_SOURCE_PLUGIN_LOW, MFIB_RPF_ID_NONE,
+ MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF);
+
+ mfib_table_entry_paths_update (nlt->nlt_mfib_index, &mpfx,
+ MFIB_SOURCE_PLUGIN_LOW,
+ MFIB_ENTRY_FLAG_NONE, np.paths);
+ }
+ else
+ {
+ fib_source_t fib_src;
+ const fib_route_path_t *rpath;
+
+ vec_foreach (rpath, np.paths)
+ {
+ if (fib_route_path_is_attached (rpath))
+ {
+ entry_flags |= FIB_ENTRY_FLAG_ATTACHED;
+ break;
+ }
+ }
+
+ fib_src = lcp_router_proto_fib_source (rproto);
+
+ if (pfx.fp_proto == FIB_PROTOCOL_MPLS)
+ {
+ /* in order to avoid double-frees, we duplicate the paths. */
+ fib_route_path_t *pathdup =
+ lcp_router_fib_route_path_dup (np.paths);
+ if (is_replace)
+ fib_table_entry_update (nlt->nlt_fib_index, &pfx, fib_src,
+ entry_flags, pathdup);
+ else
+ fib_table_entry_path_add2 (nlt->nlt_fib_index, &pfx, fib_src,
+ entry_flags, pathdup);
+ vec_free (pathdup);
+
+ /* install EOS route in addition to NEOS */
+ pfx.fp_eos = MPLS_EOS;
+ pfx.fp_payload_proto = np.paths[0].frp_proto;
+ }
+
+ if (is_replace)
+ fib_table_entry_update (nlt->nlt_fib_index, &pfx, fib_src,
+ entry_flags, np.paths);
+ else
+ fib_table_entry_path_add2 (nlt->nlt_fib_index, &pfx, fib_src,
+ entry_flags, np.paths);
+ }
+ }
+ else
+ {
+ LCP_ROUTER_DBG ("no paths for route: %d:%U %U",
+ rtnl_route_get_table (rr), format_fib_prefix, &pfx,
+ format_fib_entry_flags, entry_flags);
+ }
+ vec_free (np.paths);
+}
+
+static void
+lcp_router_route_sync_begin (void)
+{
+ lcp_router_table_t *nlt;
+
+ pool_foreach (nlt, lcp_router_table_pool)
+ {
+ fib_table_mark (nlt->nlt_fib_index, nlt->nlt_proto, lcp_rt_fib_src);
+ fib_table_mark (nlt->nlt_fib_index, nlt->nlt_proto,
+ lcp_rt_fib_src_dynamic);
+
+ LCP_ROUTER_INFO ("Begin synchronization of %U routes in table %u",
+ format_fib_protocol, nlt->nlt_proto,
+ nlt->nlt_fib_index);
+ }
+}
+
+static void
+lcp_router_route_sync_end (void)
+{
+ lcp_router_table_t *nlt;
+
+ pool_foreach (nlt, lcp_router_table_pool)
+ {
+ fib_table_sweep (nlt->nlt_fib_index, nlt->nlt_proto, lcp_rt_fib_src);
+ fib_table_sweep (nlt->nlt_fib_index, nlt->nlt_proto,
+ lcp_rt_fib_src_dynamic);
+
+ LCP_ROUTER_INFO ("End synchronization of %U routes in table %u",
+ format_fib_protocol, nlt->nlt_proto,
+ nlt->nlt_fib_index);
+ }
+}
+
+typedef struct lcp_router_table_flush_ctx_t_
+{
+ fib_node_index_t *lrtf_entries;
+ u32 *lrtf_sw_if_index_to_bool;
+ fib_source_t lrtf_source;
+} lcp_router_table_flush_ctx_t;
+
+static fib_table_walk_rc_t
+lcp_router_table_flush_cb (fib_node_index_t fib_entry_index, void *arg)
+{
+ lcp_router_table_flush_ctx_t *ctx = arg;
+ u32 sw_if_index;
+
+ sw_if_index = fib_entry_get_resolving_interface_for_source (
+ fib_entry_index, ctx->lrtf_source);
+
+ if (sw_if_index < vec_len (ctx->lrtf_sw_if_index_to_bool) &&
+ ctx->lrtf_sw_if_index_to_bool[sw_if_index])
+ {
+ vec_add1 (ctx->lrtf_entries, fib_entry_index);
+ }
+ return (FIB_TABLE_WALK_CONTINUE);
+}
+
+static void
+lcp_router_table_flush (lcp_router_table_t *nlt, u32 *sw_if_index_to_bool,
+ fib_source_t source)
+{
+ fib_node_index_t *fib_entry_index;
+ lcp_router_table_flush_ctx_t ctx = {
+ .lrtf_entries = NULL,
+ .lrtf_sw_if_index_to_bool = sw_if_index_to_bool,
+ .lrtf_source = source,
+ };
+
+ LCP_ROUTER_DBG (
+ "Flush table: proto %U, fib-index %u, max sw_if_index %u, source %U",
+ format_fib_protocol, nlt->nlt_proto, nlt->nlt_fib_index,
+ vec_len (sw_if_index_to_bool) - 1, format_fib_source, source);
+
+ fib_table_walk (nlt->nlt_fib_index, nlt->nlt_proto,
+ lcp_router_table_flush_cb, &ctx);
+
+ LCP_ROUTER_DBG ("Flush table: entries number to delete %u",
+ vec_len (ctx.lrtf_entries));
+
+ vec_foreach (fib_entry_index, ctx.lrtf_entries)
+ {
+ fib_table_entry_delete_index (*fib_entry_index, source);
+ lcp_router_table_unlock (nlt);
+ }
+
+ vec_free (ctx.lrtf_entries);
+}
+
+const nl_vft_t lcp_router_vft = {
+ .nvl_rt_link_add = { .is_mp_safe = 0, .cb = lcp_router_link_add },
+ .nvl_rt_link_del = { .is_mp_safe = 0, .cb = lcp_router_link_del },
+ .nvl_rt_link_sync_begin = { .is_mp_safe = 0,
+ .cb = lcp_router_link_sync_begin },
+ .nvl_rt_link_sync_end = { .is_mp_safe = 0, .cb = lcp_router_link_sync_end },
+ .nvl_rt_addr_add = { .is_mp_safe = 0, .cb = lcp_router_link_addr_add },
+ .nvl_rt_addr_del = { .is_mp_safe = 0, .cb = lcp_router_link_addr_del },
+ .nvl_rt_addr_sync_begin = { .is_mp_safe = 0,
+ .cb = lcp_router_link_addr_sync_begin },
+ .nvl_rt_addr_sync_end = { .is_mp_safe = 0,
+ .cb = lcp_router_link_addr_sync_end },
+ .nvl_rt_neigh_add = { .is_mp_safe = 0, .cb = lcp_router_neigh_add },
+ .nvl_rt_neigh_del = { .is_mp_safe = 0, .cb = lcp_router_neigh_del },
+ .nvl_rt_neigh_sync_begin = { .is_mp_safe = 0,
+ .cb = lcp_router_neigh_sync_begin },
+ .nvl_rt_neigh_sync_end = { .is_mp_safe = 0,
+ .cb = lcp_router_neigh_sync_end },
+ .nvl_rt_route_add = { .is_mp_safe = 1, .cb = lcp_router_route_add },
+ .nvl_rt_route_del = { .is_mp_safe = 1, .cb = lcp_router_route_del },
+ .nvl_rt_route_sync_begin = { .is_mp_safe = 0,
+ .cb = lcp_router_route_sync_begin },
+ .nvl_rt_route_sync_end = { .is_mp_safe = 0,
+ .cb = lcp_router_route_sync_end },
+};
+
+static clib_error_t *
+lcp_router_init (vlib_main_t *vm)
+{
+ lcp_router_logger = vlib_log_register_class ("linux-cp", "router");
+
+ nl_register_vft (&lcp_router_vft);
+
+ /*
+ * allocate 2 route sources. The low priority source will be for
+ * dynamic routes. If a dynamic route daemon (FRR) tries to remove its
+ * route, it will use the low priority source to ensure it will not
+ * remove static routes which were added with the higher priority source.
+ */
+ lcp_rt_fib_src =
+ fib_source_allocate ("lcp-rt", FIB_SOURCE_PRIORITY_HI, FIB_SOURCE_BH_API);
+
+ lcp_rt_fib_src_dynamic = fib_source_allocate (
+ "lcp-rt-dynamic", FIB_SOURCE_PRIORITY_HI + 1, FIB_SOURCE_BH_API);
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (lcp_router_init) = {
+ .runs_before = VLIB_INITS ("lcp_nl_init"),
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */