aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet
diff options
context:
space:
mode:
authorOle Troan <ot@cisco.com>2018-06-07 10:17:57 +0200
committerDamjan Marion <dmarion@me.com>2018-06-11 10:25:59 +0000
commitd723161e038d00e59766aa67a6a0dcc350227e4b (patch)
tree24f5a1d87ebdc0012f28d42aa80a9f5d760073bf /src/vnet
parentf4fd0d4217ab6c41fe6b093871bd40ac130e6486 (diff)
MTU: Software interface / Per-protocol MTU support
This patch separates setting of hardware interfaec and software interface MTU. Software MTU is L2 payload MTU (i.e. not including L2 header). Per-protocol MTU for IPv4, IPv6 and MPLS can also be set. Currently only IP4, IP6 are enabled in adjacency / rewrite code. Documentation in src/vnet/MTU.md Change-Id: Iee2fd6f0bbc8210748dd8e073ab9fab87d323690 Signed-off-by: Ole Troan <ot@cisco.com>
Diffstat (limited to 'src/vnet')
-rw-r--r--src/vnet/MTU.md72
-rw-r--r--src/vnet/adj/adj.c27
-rw-r--r--src/vnet/adj/adj.h6
-rw-r--r--src/vnet/adj/adj_glean.c4
-rw-r--r--src/vnet/adj/adj_mcast.c2
-rw-r--r--src/vnet/adj/adj_nbr.c2
-rw-r--r--src/vnet/adj/rewrite.c11
-rw-r--r--src/vnet/adj/rewrite.h3
-rw-r--r--src/vnet/devices/virtio/vhost-user.c3
-rw-r--r--src/vnet/ethernet/interface.c4
-rw-r--r--src/vnet/gre/interface.c5
-rw-r--r--src/vnet/interface.api20
-rw-r--r--src/vnet/interface.c101
-rw-r--r--src/vnet/interface.h27
-rw-r--r--src/vnet/interface_api.c31
-rw-r--r--src/vnet/interface_cli.c50
-rw-r--r--src/vnet/interface_format.c20
-rw-r--r--src/vnet/interface_funcs.h25
-rw-r--r--src/vnet/ip/ip6_neighbor.c7
-rw-r--r--src/vnet/ipip/ipip.c4
-rw-r--r--src/vnet/ipip/sixrd.c2
-rw-r--r--src/vnet/ipsec-gre/interface.c6
-rw-r--r--src/vnet/mpls/mpls_tunnel.c3
-rw-r--r--src/vnet/sctp/sctp.c16
-rw-r--r--src/vnet/srp/interface.c2
-rw-r--r--src/vnet/unix/tapcli.c3
-rw-r--r--src/vnet/vnet.h2
27 files changed, 332 insertions, 126 deletions
diff --git a/src/vnet/MTU.md b/src/vnet/MTU.md
new file mode 100644
index 00000000000..32b8c39edea
--- /dev/null
+++ b/src/vnet/MTU.md
@@ -0,0 +1,72 @@
+# Introduction
+Maximum Transmission Unit is a term used to describe the maximum sized "thingy" that can be sent out an interface. It can refer to the maximum frame size that a NIC can send. On Ethernet that would include the Ethernet header but typically not the IGF. It can refer to the maximum packet size, that is, on Ethernet an MTU of 1500, would allow an IPv4 packet of 1500 bytes, that would result in an Ethernet frame of 1518 bytes.
+
+# MTU in VPP
+VPP allows setting of the physical payload MTU. I.e. not including L2 overhead. Setting the hardware MTU will program the NIC.
+This MTU will be inherited by all software interfaces.
+
+VPP also allows setting of the payload MTU for software interfaces. Independently of the MTU set on the hardware. If the software payload MTU is set higher than the capability of the NIC, the packet will be dropped.
+
+In addition VPP supports setting the MTU of individual network layer protocols. IPv4, IPv6 or MPLS. For example an IPv4 MTU of 1500 (includes the IPv4 header) will fit in a hardware payload MTU of 1500.
+
+_Note we might consider changing the hardware payload MTU to hardware MTU_. That is, the MTU includes all L2 framing. Then the payload MTU can be calculated based on the interface's configuration. E.g. 802.1q tags etc.
+
+There are currently no checks or warnings if e.g. the user configures a per-protocol MTU larger than the underlying payload MTU. If that happens packets will be fragmented or dropped.
+
+## Data structures
+The hardware payload MTU is stored in the max_packet_bytes variable in the vnet_hw_interface_t structure.
+
+The software MTU (previously max_l3_packet_bytes) is in vnet_sw_interface_t->in mtu[VNET_N_MTU].
+
+# API
+
+## Set physical MTU
+
+This API message is used to set the physical MTU. It is currently limited to Ethernet interfaces. Note, this programs the NIC.
+
+```
+autoreply define hw_interface_set_mtu
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u16 mtu;
+};
+```
+
+## Set the L2 payload MTU (not including the L2 header) and per-protocol MTUs
+
+This API message sets the L3 payload MTU. E.g. on Ethernet it is the maximum size of the Ethernet payload. If a value is left as 0, then the default is picked from VNET_MTU_L3.
+
+```
+autoreply define sw_interface_set_mtu
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ /* $$$$ Replace with enum */
+ u32 mtu[4]; /* 0 - L3, 1 - IP4, 2 - IP6, 3 - MPLS */
+};
+
+```
+
+## Get interface MTU
+
+The various MTUs on an interface can be queried with the sw_interface_dump/sw_interface_details calls.
+
+```
+define sw_interface_details
+{
+ /* MTU */
+ u16 link_mtu;
+
+ /* Per protocol MTUs */
+ u32 mtu[4]; /* 0 - L3, 1 - IP4, 2 - IP6, 3 - MPLS */
+};
+```
+
+# CLI
+
+```
+set interface mtu [packet|ip4|ip6|mpls] <value> <interface>
+```
diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c
index abfe8729b39..0de3fc8b775 100644
--- a/src/vnet/adj/adj.c
+++ b/src/vnet/adj/adj.c
@@ -347,36 +347,19 @@ adj_mtu_update_walk_cb (adj_index_t ai,
adj = adj_get(ai);
- vnet_rewrite_update_mtu (vnet_get_main(),
+ vnet_rewrite_update_mtu (vnet_get_main(), adj->ia_link,
&adj->rewrite_header);
return (ADJ_WALK_RC_CONTINUE);
}
-static walk_rc_t
-adj_sw_mtu_update (vnet_main_t * vnm,
- u32 sw_if_index,
- void *ctx)
+static void
+adj_mtu_update (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
{
- /*
- * Walk all the adjacencies on the interface to update the cached MTU
- */
- adj_walk (sw_if_index, adj_mtu_update_walk_cb, NULL);
-
- return (WALK_CONTINUE);
+ adj_walk (sw_if_index, adj_mtu_update_walk_cb, NULL);
}
-void
-adj_mtu_update (u32 hw_if_index)
-{
- /*
- * Walk all the SW interfaces on the HW interface to update the cached MTU
- */
- vnet_hw_interface_walk_sw(vnet_get_main(),
- hw_if_index,
- adj_sw_mtu_update,
- NULL);
-}
+VNET_SW_INTERFACE_MTU_CHANGE_FUNCTION(adj_mtu_update);
/**
* @brief Walk the Adjacencies on a given interface
diff --git a/src/vnet/adj/adj.h b/src/vnet/adj/adj.h
index bcf6c041209..fe77d1634e0 100644
--- a/src/vnet/adj/adj.h
+++ b/src/vnet/adj/adj.h
@@ -345,12 +345,6 @@ extern const u8* adj_get_rewrite (adj_index_t ai);
extern void adj_feature_update (u32 sw_if_index, u8 arc_index, u8 is_enable);
/**
- * @brief Notify the adjacency subsystem that the MTU settings for
- * an HW interface have changed
- */
-extern void adj_mtu_update (u32 hw_if_index);
-
-/**
* @brief
* The global adjacnecy pool. Exposed for fast/inline data-plane access
*/
diff --git a/src/vnet/adj/adj_glean.c b/src/vnet/adj/adj_glean.c
index 74881d7f67c..92369116532 100644
--- a/src/vnet/adj/adj_glean.c
+++ b/src/vnet/adj/adj_glean.c
@@ -77,8 +77,8 @@ adj_glean_add_or_lock (fib_protocol_t proto,
adj->rewrite_header.sw_if_index = sw_if_index;
adj->rewrite_header.data_bytes = 0;
adj->rewrite_header.max_l3_packet_bytes =
- vnet_sw_interface_get_mtu(vnet_get_main(), sw_if_index, VLIB_TX);
-
+ vnet_sw_interface_get_mtu(vnet_get_main(), sw_if_index,
+ vnet_link_to_mtu(linkt));
adj_lock(adj_get_index(adj));
vnet_update_adjacency_for_sw_interface(vnet_get_main(),
diff --git a/src/vnet/adj/adj_mcast.c b/src/vnet/adj/adj_mcast.c
index 593c1b66a8a..9fbad482600 100644
--- a/src/vnet/adj/adj_mcast.c
+++ b/src/vnet/adj/adj_mcast.c
@@ -69,7 +69,7 @@ adj_mcast_add_or_lock (fib_protocol_t proto,
adj_mcasts[proto][sw_if_index] = adj_get_index(adj);
adj_lock(adj_get_index(adj));
- vnet_rewrite_init(vnm, sw_if_index,
+ vnet_rewrite_init(vnm, sw_if_index, link_type,
adj_get_mcast_node(proto),
vnet_tx_node_index_for_sw_interface(vnm, sw_if_index),
&adj->rewrite_header);
diff --git a/src/vnet/adj/adj_nbr.c b/src/vnet/adj/adj_nbr.c
index 6fd9b40efc3..3f66acbb5c5 100644
--- a/src/vnet/adj/adj_nbr.c
+++ b/src/vnet/adj/adj_nbr.c
@@ -230,7 +230,7 @@ adj_nbr_add_or_lock (fib_protocol_t nh_proto,
adj_index = adj_get_index(adj);
adj_lock(adj_index);
- vnet_rewrite_init(vnm, sw_if_index,
+ vnet_rewrite_init(vnm, sw_if_index, link_type,
adj_get_nd_node(nh_proto),
vnet_tx_node_index_for_sw_interface(vnm, sw_if_index),
&adj->rewrite_header);
diff --git a/src/vnet/adj/rewrite.c b/src/vnet/adj/rewrite.c
index f4b26a9d0dd..1a87793eb1c 100644
--- a/src/vnet/adj/rewrite.c
+++ b/src/vnet/adj/rewrite.c
@@ -103,19 +103,22 @@ vnet_tx_node_index_for_sw_interface (vnet_main_t * vnm, u32 sw_if_index)
void
vnet_rewrite_init (vnet_main_t * vnm,
u32 sw_if_index,
+ vnet_link_t linkt,
u32 this_node, u32 next_node, vnet_rewrite_header_t * rw)
{
rw->sw_if_index = sw_if_index;
rw->next_index = vlib_node_add_next (vnm->vlib_main, this_node, next_node);
rw->max_l3_packet_bytes =
- vnet_sw_interface_get_mtu (vnm, sw_if_index, VLIB_TX);
+ vnet_sw_interface_get_mtu (vnm, sw_if_index, vnet_link_to_mtu (linkt));
}
void
-vnet_rewrite_update_mtu (vnet_main_t * vnm, vnet_rewrite_header_t * rw)
+vnet_rewrite_update_mtu (vnet_main_t * vnm, vnet_link_t linkt,
+ vnet_rewrite_header_t * rw)
{
rw->max_l3_packet_bytes =
- vnet_sw_interface_get_mtu (vnm, rw->sw_if_index, VLIB_TX);
+ vnet_sw_interface_get_mtu (vnm, rw->sw_if_index,
+ vnet_link_to_mtu (linkt));
}
void
@@ -133,7 +136,7 @@ vnet_rewrite_for_sw_interface (vnet_main_t * vnm,
vnet_get_hw_interface_class (vnm, hw->hw_class_index);
u8 *rewrite = NULL;
- vnet_rewrite_init (vnm, sw_if_index, node_index,
+ vnet_rewrite_init (vnm, sw_if_index, link_type, node_index,
vnet_tx_node_index_for_sw_interface (vnm, sw_if_index),
rw);
diff --git a/src/vnet/adj/rewrite.h b/src/vnet/adj/rewrite.h
index 712f686f4ae..0d4b0b9fd28 100644
--- a/src/vnet/adj/rewrite.h
+++ b/src/vnet/adj/rewrite.h
@@ -314,11 +314,12 @@ u32 vnet_tx_node_index_for_sw_interface (struct vnet_main_t *vnm,
void vnet_rewrite_init (struct vnet_main_t *vnm,
u32 sw_if_index,
+ vnet_link_t linkt,
u32 this_node,
u32 next_node, vnet_rewrite_header_t * rw);
void vnet_rewrite_update_mtu (struct vnet_main_t *vnm,
- vnet_rewrite_header_t * rw);
+ vnet_link_t linkt, vnet_rewrite_header_t * rw);
u8 *vnet_build_rewrite_for_sw_interface (struct vnet_main_t *vnm,
u32 sw_if_index,
diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c
index 4e745d662fb..92447765726 100644
--- a/src/vnet/devices/virtio/vhost-user.c
+++ b/src/vnet/devices/virtio/vhost-user.c
@@ -2867,8 +2867,7 @@ vhost_user_create_ethernet (vnet_main_t * vnm, vlib_main_t * vm,
if (error)
clib_error_report (error);
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, vui->hw_if_index);
- hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
+ vnet_sw_interface_set_mtu (vnm, vui->sw_if_index, 9000);
}
/*
diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c
index 174b3639f41..b3ea983e50d 100644
--- a/src/vnet/ethernet/interface.c
+++ b/src/vnet/ethernet/interface.c
@@ -300,11 +300,9 @@ ethernet_register_interface (vnet_main_t * vnm,
ETHERNET_MIN_PACKET_BYTES;
hi->max_packet_bytes = hi->max_supported_packet_bytes =
ETHERNET_MAX_PACKET_BYTES;
- hi->per_packet_overhead_bytes =
- /* preamble */ 8 + /* inter frame gap */ 12;
/* Standard default ethernet MTU. */
- hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
+ vnet_sw_interface_set_mtu (vnm, hi->sw_if_index, 9000);
clib_memcpy (ei->address, address, sizeof (ei->address));
vec_add (hi->hw_address, address, sizeof (ei->address));
diff --git a/src/vnet/gre/interface.c b/src/vnet/gre/interface.c
index 5b165c858d3..0822cd74b52 100644
--- a/src/vnet/gre/interface.c
+++ b/src/vnet/gre/interface.c
@@ -348,11 +348,8 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t * a,
64 + sizeof (gre_header_t) + sizeof (ip6_header_t);
}
- hi->per_packet_overhead_bytes =
- /* preamble */ 8 + /* inter frame gap */ 12;
-
/* Standard default gre MTU. */
- hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
+ vnet_sw_interface_set_mtu (vnm, sw_if_index, 9000);
/*
* source the FIB entry for the tunnel's destination
diff --git a/src/vnet/interface.api b/src/vnet/interface.api
index 25ba70342ee..2cbf4f68dbc 100644
--- a/src/vnet/interface.api
+++ b/src/vnet/interface.api
@@ -1,4 +1,4 @@
-option version = "1.1.0";
+option version = "2.0.0";
service {
rpc want_interface_events returns want_interface_events_reply
@@ -21,13 +21,13 @@ autoreply define sw_interface_set_flags
u8 admin_up_down;
};
-/** \brief Set interface MTU
+/** \brief Set interface physical MTU
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@param sw_if_index - index of the interface to set MTU on
@param mtu - MTU
*/
-autoreply define sw_interface_set_mtu
+autoreply define hw_interface_set_mtu
{
u32 client_index;
u32 context;
@@ -35,6 +35,17 @@ autoreply define sw_interface_set_mtu
u16 mtu;
};
+/** \brief Set interface L3 MTU */
+autoreply define sw_interface_set_mtu
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ /* $$$$ Replace with enum */
+ u32 mtu[4]; /* 0 - L3, 1 - IP4, 2 - IP6, 3 - MPLS */
+};
+
+
/** \brief Interface Event generated by want_interface_events
@param client_index - opaque cookie to identify the sender
@param pid - client pid registered to receive notification
@@ -125,6 +136,9 @@ define sw_interface_details
/* MTU */
u16 link_mtu;
+ /* Per protocol MTUs */
+ u32 mtu[4]; /* 0 - L3, 1 - IP4, 2 - IP6, 3 - MPLS */
+
/* Subinterface ID. A number 0-N to uniquely identify this subinterface under the super interface */
u32 sub_id;
diff --git a/src/vnet/interface.c b/src/vnet/interface.c
index 797fe44419a..7a2c3abd2d4 100644
--- a/src/vnet/interface.c
+++ b/src/vnet/interface.c
@@ -122,22 +122,6 @@ unserialize_vnet_sw_interface_set_flags (serialize_main_t * m, va_list * va)
/* helper_flags no redistribution */ 0);
}
-void
-vnet_hw_interface_set_mtu (vnet_main_t * vnm, u32 hw_if_index, u32 mtu)
-{
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
-
- if (hi->max_packet_bytes != mtu)
- {
- u16 l3_pad = hi->max_packet_bytes - hi->max_l3_packet_bytes[VLIB_TX];
- hi->max_packet_bytes = mtu;
- hi->max_l3_packet_bytes[VLIB_TX] =
- hi->max_l3_packet_bytes[VLIB_RX] = mtu - l3_pad;
- ethernet_set_flags (vnm, hw_if_index, ETHERNET_INTERFACE_FLAG_MTU);
- adj_mtu_update (hw_if_index);
- }
-}
-
static void
unserialize_vnet_hw_interface_set_flags (serialize_main_t * m, va_list * va)
{
@@ -681,6 +665,71 @@ vnet_delete_sw_interface (vnet_main_t * vnm, u32 sw_if_index)
pool_put (im->sw_interfaces, sw);
}
+static clib_error_t *
+call_sw_interface_mtu_change_callbacks (vnet_main_t * vnm, u32 sw_if_index)
+{
+ return call_elf_section_interface_callbacks
+ (vnm, sw_if_index, 0, vnm->sw_interface_mtu_change_functions);
+}
+
+void
+vnet_sw_interface_set_mtu (vnet_main_t * vnm, u32 sw_if_index, u32 mtu)
+{
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+
+ if (si->mtu[VNET_MTU_L3] != mtu)
+ {
+ si->mtu[VNET_MTU_L3] = mtu;
+ call_sw_interface_mtu_change_callbacks (vnm, sw_if_index);
+ }
+}
+
+void
+vnet_sw_interface_set_protocol_mtu (vnet_main_t * vnm, u32 sw_if_index,
+ u32 mtu[])
+{
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+ bool changed = false;
+ int i;
+
+ for (i = 0; i < VNET_N_MTU; i++)
+ {
+ if (si->mtu[i] != mtu[i])
+ {
+ si->mtu[i] = mtu[i];
+ changed = true;
+ }
+ }
+ /* Notify interested parties */
+ if (changed)
+ call_sw_interface_mtu_change_callbacks (vnm, sw_if_index);
+}
+
+/*
+ * Reflect a change in hardware MTU on protocol MTUs
+ */
+static walk_rc_t
+sw_interface_walk_callback (vnet_main_t * vnm, u32 sw_if_index, void *ctx)
+{
+ u32 *link_mtu = ctx;
+ vnet_sw_interface_set_mtu (vnm, sw_if_index, *link_mtu);
+ return WALK_CONTINUE;
+}
+
+void
+vnet_hw_interface_set_mtu (vnet_main_t * vnm, u32 hw_if_index, u32 mtu)
+{
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+
+ if (hi->max_packet_bytes != mtu)
+ {
+ hi->max_packet_bytes = mtu;
+ ethernet_set_flags (vnm, hw_if_index, ETHERNET_INTERFACE_FLAG_MTU);
+ vnet_hw_interface_walk_sw (vnm, hw_if_index, sw_interface_walk_callback,
+ &mtu);
+ }
+}
+
static void
setup_tx_node (vlib_main_t * vm,
u32 node_index, vnet_device_class_t * dev_class)
@@ -762,9 +811,7 @@ vnet_register_interface (vnet_main_t * vnm,
hw->max_rate_bits_per_sec = 0;
hw->min_packet_bytes = 0;
- hw->per_packet_overhead_bytes = 0;
- hw->max_l3_packet_bytes[VLIB_RX] = ~0;
- hw->max_l3_packet_bytes[VLIB_TX] = ~0;
+ vnet_sw_interface_set_mtu (vnm, hw->sw_if_index, 0);
if (dev_class->tx_function == 0)
goto no_output_nodes; /* No output/tx nodes to create */
@@ -1523,6 +1570,22 @@ vnet_link_to_l3_proto (vnet_link_t link)
return (0);
}
+vnet_mtu_t
+vnet_link_to_mtu (vnet_link_t link)
+{
+ switch (link)
+ {
+ case VNET_LINK_IP4:
+ return (VNET_MTU_IP4);
+ case VNET_LINK_IP6:
+ return (VNET_MTU_IP6);
+ case VNET_LINK_MPLS:
+ return (VNET_MTU_MPLS);
+ default:
+ return (VNET_MTU_L3);
+ }
+}
+
u8 *
default_build_rewrite (vnet_main_t * vnm,
u32 sw_if_index,
diff --git a/src/vnet/interface.h b/src/vnet/interface.h
index 87addbe67be..b582dba7c8d 100644
--- a/src/vnet/interface.h
+++ b/src/vnet/interface.h
@@ -154,8 +154,10 @@ static void __vnet_interface_function_deinit_##tag##_##f (void) \
_VNET_INTERFACE_FUNCTION_DECL(f,hw_interface_add_del)
#define VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION(f) \
_VNET_INTERFACE_FUNCTION_DECL(f,hw_interface_link_up_down)
-#define VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION_PRIO(f,p) \
+#define VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION_PRIO(f,p) \
_VNET_INTERFACE_FUNCTION_DECL_PRIO(f,hw_interface_link_up_down,p)
+#define VNET_SW_INTERFACE_MTU_CHANGE_FUNCTION(f) \
+ _VNET_INTERFACE_FUNCTION_DECL(f,sw_interface_mtu_change)
#define VNET_SW_INTERFACE_ADD_DEL_FUNCTION(f) \
_VNET_INTERFACE_FUNCTION_DECL(f,sw_interface_add_del)
#define VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(f) \
@@ -537,14 +539,6 @@ typedef struct vnet_hw_interface_t
/* Largest packet size for this interface. */
u32 max_packet_bytes;
- /* Number of extra bytes that go on the wire.
- Packet length on wire
- = max (length + per_packet_overhead_bytes, min_packet_bytes). */
- u32 per_packet_overhead_bytes;
-
- /* Receive and transmit layer 3 packet size limits (MRU/MTU). */
- u32 max_l3_packet_bytes[VLIB_N_RX_TX];
-
/* Hash table mapping sub interface id to sw_if_index. */
uword *sub_interface_sw_if_index_by_id;
@@ -641,6 +635,18 @@ typedef enum
VNET_FLOOD_CLASS_NO_FLOOD,
} vnet_flood_class_t;
+/* Per protocol MTU */
+typedef enum
+{
+ VNET_MTU_L3, /* Default payload MTU (without L2 headers) */
+ VNET_MTU_IP4, /* Per-protocol MTUs overriding default */
+ VNET_MTU_IP6,
+ VNET_MTU_MPLS,
+ VNET_N_MTU
+} vnet_mtu_t;
+
+extern vnet_mtu_t vnet_link_to_mtu (vnet_link_t link);
+
/* Software-interface. This corresponds to a Ethernet VLAN, ATM vc, a
tunnel, etc. Configuration (e.g. IP address) gets attached to
software interface. */
@@ -684,6 +690,9 @@ typedef struct
/* VNET_SW_INTERFACE_TYPE_HARDWARE. */
u32 hw_if_index;
+ /* MTU for network layer (not including L2 headers) */
+ u32 mtu[VNET_N_MTU];
+
/* VNET_SW_INTERFACE_TYPE_SUB. */
vnet_sub_interface_t sub;
diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c
index 9a1838fa321..e2f4d8fd130 100644
--- a/src/vnet/interface_api.c
+++ b/src/vnet/interface_api.c
@@ -50,6 +50,7 @@ vpe_api_main_t vpe_api_main;
#define foreach_vpe_api_msg \
_(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \
+_(HW_INTERFACE_SET_MTU, hw_interface_set_mtu) \
_(SW_INTERFACE_SET_MTU, sw_interface_set_mtu) \
_(WANT_INTERFACE_EVENTS, want_interface_events) \
_(SW_INTERFACE_DUMP, sw_interface_dump) \
@@ -96,9 +97,9 @@ vl_api_sw_interface_set_flags_t_handler (vl_api_sw_interface_set_flags_t * mp)
}
static void
-vl_api_sw_interface_set_mtu_t_handler (vl_api_sw_interface_set_mtu_t * mp)
+vl_api_hw_interface_set_mtu_t_handler (vl_api_hw_interface_set_mtu_t * mp)
{
- vl_api_sw_interface_set_mtu_reply_t *rmp;
+ vl_api_hw_interface_set_mtu_reply_t *rmp;
vnet_main_t *vnm = vnet_get_main ();
u32 sw_if_index = ntohl (mp->sw_if_index);
u16 mtu = ntohs (mp->mtu);
@@ -138,6 +139,27 @@ vl_api_sw_interface_set_mtu_t_handler (vl_api_sw_interface_set_mtu_t * mp)
vnet_hw_interface_set_mtu (vnm, si->hw_if_index, mtu);
BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_HW_INTERFACE_SET_MTU_REPLY);
+}
+
+static void
+vl_api_sw_interface_set_mtu_t_handler (vl_api_sw_interface_set_mtu_t * mp)
+{
+ vl_api_sw_interface_set_mtu_reply_t *rmp;
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ int rv = 0;
+ int i;
+ u32 per_protocol_mtu[VNET_N_MTU];
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ for (i = 0; i < VNET_N_MTU; i++)
+ per_protocol_mtu[i] = ntohl (mp->mtu[i]);
+
+ vnet_sw_interface_set_protocol_mtu (vnm, sw_if_index, per_protocol_mtu);
+
+ BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_SW_INTERFACE_SET_MTU_REPLY);
}
@@ -162,6 +184,11 @@ send_sw_interface_details (vpe_api_main_t * am,
mp->link_speed = ((hi->flags & VNET_HW_INTERFACE_FLAG_SPEED_MASK) >>
VNET_HW_INTERFACE_FLAG_SPEED_SHIFT);
mp->link_mtu = ntohs (hi->max_packet_bytes);
+ mp->mtu[VNET_MTU_L3] = ntohl (swif->mtu[VNET_MTU_L3]);
+ mp->mtu[VNET_MTU_IP4] = ntohl (swif->mtu[VNET_MTU_IP4]);
+ mp->mtu[VNET_MTU_IP6] = ntohl (swif->mtu[VNET_MTU_IP6]);
+ mp->mtu[VNET_MTU_MPLS] = ntohl (swif->mtu[VNET_MTU_MPLS]);
+
mp->context = context;
strncpy ((char *) mp->interface_name,
diff --git a/src/vnet/interface_cli.c b/src/vnet/interface_cli.c
index b803a31c05f..264c1f34e18 100644
--- a/src/vnet/interface_cli.c
+++ b/src/vnet/interface_cli.c
@@ -362,7 +362,7 @@ show_sw_interfaces (vlib_main_t * vm,
if (visible)
vec_add1 (sorted_sis, si[0]);}
));
- /* *INDENT-OFF* */
+ /* *INDENT-ON* */
/* Sort by name. */
vec_sort_with_function (sorted_sis, sw_interface_name_compare);
}
@@ -449,20 +449,19 @@ show_sw_interfaces (vlib_main_t * vm,
format_ip6_address, r6, ia->address_length);
}));
/* *INDENT-ON* */
+ }
+ }
+ else
+ {
+ vec_foreach (si, sorted_sis)
+ {
+ vlib_cli_output (vm, "%U\n", format_vnet_sw_interface, vnm, si);
+ }
}
-}
-
-else
-{
- vec_foreach (si, sorted_sis)
- {
- vlib_cli_output (vm, "%U\n", format_vnet_sw_interface, vnm, si);
- }
-}
done:
-vec_free (sorted_sis);
-return error;
+ vec_free (sorted_sis);
+ return error;
}
/* *INDENT-OFF* */
@@ -1115,12 +1114,17 @@ static clib_error_t *
mtu_cmd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
{
vnet_main_t *vnm = vnet_get_main ();
- u32 hw_if_index, mtu;
+ u32 hw_if_index, sw_if_index, mtu;
ethernet_main_t *em = &ethernet_main;
+ u32 mtus[VNET_N_MTU] = { 0, 0, 0, 0 };
if (unformat (input, "%d %U", &mtu,
unformat_vnet_hw_interface, vnm, &hw_if_index))
{
+ /*
+ * Change physical MTU on interface. Only supported for Ethernet
+ * interfaces
+ */
vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
ethernet_interface_t *eif = ethernet_get_interface (em, hw_if_index);
@@ -1137,17 +1141,35 @@ mtu_cmd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
hi->max_supported_packet_bytes);
vnet_hw_interface_set_mtu (vnm, hw_if_index, mtu);
+ goto done;
}
+ else if (unformat (input, "packet %d %U", &mtu,
+ unformat_vnet_sw_interface, vnm, &sw_if_index))
+ /* Set default packet MTU (including L3 header */
+ mtus[VNET_MTU_L3] = mtu;
+ else if (unformat (input, "ip4 %d %U", &mtu,
+ unformat_vnet_sw_interface, vnm, &sw_if_index))
+ mtus[VNET_MTU_IP4] = mtu;
+ else if (unformat (input, "ip6 %d %U", &mtu,
+ unformat_vnet_sw_interface, vnm, &sw_if_index))
+ mtus[VNET_MTU_IP6] = mtu;
+ else if (unformat (input, "mpls %d %U", &mtu,
+ unformat_vnet_sw_interface, vnm, &sw_if_index))
+ mtus[VNET_MTU_MPLS] = mtu;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
+
+ vnet_sw_interface_set_protocol_mtu (vnm, sw_if_index, mtus);
+
+done:
return 0;
}
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_mtu_cmd, static) = {
.path = "set interface mtu",
- .short_help = "set interface mtu <value> <interface>",
+ .short_help = "set interface mtu [packet|ip4|ip6|mpls] <value> <interface>",
.function = mtu_cmd,
};
/* *INDENT-ON* */
diff --git a/src/vnet/interface_format.c b/src/vnet/interface_format.c
index 7e94092b52e..631120b96ac 100644
--- a/src/vnet/interface_format.c
+++ b/src/vnet/interface_format.c
@@ -291,6 +291,16 @@ format_vnet_sw_interface_cntrs (u8 * s, vnet_interface_main_t * im,
return s;
}
+static u8 *
+format_vnet_sw_interface_mtu (u8 * s, va_list * args)
+{
+ vnet_sw_interface_t *si = va_arg (*args, vnet_sw_interface_t *);
+
+ return format (s, "%d/%d/%d/%d", si->mtu[VNET_MTU_L3],
+ si->mtu[VNET_MTU_IP4],
+ si->mtu[VNET_MTU_IP6], si->mtu[VNET_MTU_MPLS]);
+}
+
u8 *
format_vnet_sw_interface (u8 * s, va_list * args)
{
@@ -299,12 +309,14 @@ format_vnet_sw_interface (u8 * s, va_list * args)
vnet_interface_main_t *im = &vnm->interface_main;
if (!si)
- return format (s, "%=32s%=5s%=16s%=16s%=16s",
- "Name", "Idx", "State", "Counter", "Count");
+ return format (s, "%=32s%=5s%=10s%=21s%=16s%=16s",
+ "Name", "Idx", "State", "MTU (L3/IP4/IP6/MPLS)", "Counter",
+ "Count");
- s = format (s, "%-32U%=5d%=16U",
+ s = format (s, "%-32U%=5d%=10U%=21U",
format_vnet_sw_interface_name, vnm, si, si->sw_if_index,
- format_vnet_sw_interface_flags, si->flags);
+ format_vnet_sw_interface_flags, si->flags,
+ format_vnet_sw_interface_mtu, si);
s = format_vnet_sw_interface_cntrs (s, im, si);
diff --git a/src/vnet/interface_funcs.h b/src/vnet/interface_funcs.h
index e1568e33b5d..c4dd3b72e51 100644
--- a/src/vnet/interface_funcs.h
+++ b/src/vnet/interface_funcs.h
@@ -262,20 +262,22 @@ vnet_hw_interface_get_flags (vnet_main_t * vnm, u32 hw_if_index)
return hw->flags;
}
-always_inline uword
-vnet_hw_interface_get_mtu (vnet_main_t * vnm, u32 hw_if_index,
- vlib_rx_or_tx_t dir)
+always_inline u32
+vnet_hw_interface_get_mtu (vnet_main_t * vnm, u32 hw_if_index)
{
vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
- return hw->max_l3_packet_bytes[dir];
+ return hw->max_packet_bytes;
}
-always_inline uword
-vnet_sw_interface_get_mtu (vnet_main_t * vnm, u32 sw_if_index,
- vlib_rx_or_tx_t dir)
+always_inline u32
+vnet_sw_interface_get_mtu (vnet_main_t * vnm, u32 sw_if_index, vnet_mtu_t af)
{
- vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
- return (hw->max_l3_packet_bytes[dir]);
+ vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index);
+ u32 mtu;
+ mtu = sw->mtu[af] > 0 ? sw->mtu[af] : sw->mtu[VNET_MTU_L3];
+ if (mtu == 0)
+ return 9000; /* $$$ Deal with interface-types not setting MTU */
+ return mtu;
}
always_inline uword
@@ -339,6 +341,11 @@ clib_error_t *set_hw_interface_change_rx_mode (vnet_main_t * vnm,
/* Set the MTU on the HW interface */
void vnet_hw_interface_set_mtu (vnet_main_t * vnm, u32 hw_if_index, u32 mtu);
+/* Set the MTU on the SW interface */
+void vnet_sw_interface_set_mtu (vnet_main_t * vnm, u32 sw_if_index, u32 mtu);
+void vnet_sw_interface_set_protocol_mtu (vnet_main_t * vnm, u32 sw_if_index,
+ u32 mtu[]);
+
/* update the unnumbered state of an interface */
void vnet_sw_interface_update_unnumbered (u32 sw_if_index,
u32 ip_sw_if_index, u8 enable);
diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c
index fe78eae8e5b..e281d7e1d2f 100644
--- a/src/vnet/ip/ip6_neighbor.c
+++ b/src/vnet/ip/ip6_neighbor.c
@@ -2633,10 +2633,6 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm,
{
if (is_add)
{
- vnet_hw_interface_t *hw_if0;
-
- hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index);
-
pool_get (nm->if_radv_pool, a);
ri = a - nm->if_radv_pool;
@@ -2670,7 +2666,8 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm,
a->send_radv = 1;
/* fill in radv_info for this interface that will be needed later */
- a->adv_link_mtu = hw_if0->max_l3_packet_bytes[VLIB_RX];
+ a->adv_link_mtu =
+ vnet_sw_interface_get_mtu (vnm, sw_if_index, VNET_MTU_IP6);
clib_memcpy (a->link_layer_address, eth_if0->address, 6);
diff --git a/src/vnet/ipip/ipip.c b/src/vnet/ipip/ipip.c
index c39d27d5e4f..a47704a62e6 100644
--- a/src/vnet/ipip/ipip.c
+++ b/src/vnet/ipip/ipip.c
@@ -526,10 +526,8 @@ ipip_add_tunnel (ipip_transport_t transport,
hi->min_packet_bytes = 64 + sizeof (ip6_header_t);
}
- hi->per_packet_overhead_bytes = /* preamble */ 8 + /* inter frame gap */ 12;
-
/* Standard default ipip MTU. */
- hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
+ vnet_sw_interface_set_mtu (vnm, sw_if_index, 9000);
t->tunnel_src = *src;
t->tunnel_dst = *dst;
diff --git a/src/vnet/ipip/sixrd.c b/src/vnet/ipip/sixrd.c
index cfdd0f87e3a..38ca899f274 100644
--- a/src/vnet/ipip/sixrd.c
+++ b/src/vnet/ipip/sixrd.c
@@ -337,7 +337,7 @@ sixrd_add_tunnel (ip6_address_t * ip6_prefix, u8 ip6_prefix_len,
t->dev_instance = t_idx;
t->user_instance = t_idx;
- hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 1480;
+ vnet_sw_interface_set_mtu (vnet_get_main (), t->sw_if_index, 1480);
ipip_tunnel_db_add (t, &key);
diff --git a/src/vnet/ipsec-gre/interface.c b/src/vnet/ipsec-gre/interface.c
index 0772ce73df2..8903df01c04 100644
--- a/src/vnet/ipsec-gre/interface.c
+++ b/src/vnet/ipsec-gre/interface.c
@@ -174,12 +174,10 @@ vnet_ipsec_gre_add_del_tunnel (vnet_ipsec_gre_add_del_tunnel_args_t * a,
hi->min_packet_bytes = 64 + sizeof (gre_header_t) +
sizeof (ip4_header_t) + sizeof (esp_header_t) + sizeof (esp_footer_t);
- hi->per_packet_overhead_bytes =
- /* preamble */ 8 + /* inter frame gap */ 12;
/* Standard default gre MTU. */
- hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] =
- 9000;
+ /* TODO: Should take tunnel overhead into consideration */
+ vnet_sw_interface_set_mtu (vnm, sw_if_index, 9000);
clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src));
clib_memcpy (&t->tunnel_dst, &a->dst, sizeof (t->tunnel_dst));
diff --git a/src/vnet/mpls/mpls_tunnel.c b/src/vnet/mpls/mpls_tunnel.c
index 7ca2c12cbcf..a142edf3f0a 100644
--- a/src/vnet/mpls/mpls_tunnel.c
+++ b/src/vnet/mpls/mpls_tunnel.c
@@ -643,6 +643,9 @@ vnet_mpls_tunnel_create (u8 l2_only,
hi = vnet_get_hw_interface (vnm, mt->mt_hw_if_index);
}
+ /* Standard default MPLS tunnel MTU. */
+ vnet_sw_interface_set_mtu (vnm, hi->sw_if_index, 9000);
+
/*
* Add the new tunnel to the tunnel DB - key:SW if index
*/
diff --git a/src/vnet/sctp/sctp.c b/src/vnet/sctp/sctp.c
index 6e2dccc552e..ffa86343b47 100644
--- a/src/vnet/sctp/sctp.c
+++ b/src/vnet/sctp/sctp.c
@@ -43,8 +43,12 @@ sctp_connection_bind (u32 session_index, transport_endpoint_t * tep)
ip_copy (&listener->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.lcl_ip,
&tep->ip, tep->is_ip4);
- listener->sub_conn[SCTP_PRIMARY_PATH_IDX].PMTU =
- vnet_sw_interface_get_mtu (vnet_get_main (), tep->sw_if_index, VLIB_TX);
+ u32 mtu = tep->is_ip4 ? vnet_sw_interface_get_mtu (vnet_get_main (),
+ tep->sw_if_index,
+ VNET_MTU_IP4) :
+ vnet_sw_interface_get_mtu (vnet_get_main (), tep->sw_if_index,
+ VNET_MTU_IP6);
+ listener->sub_conn[SCTP_PRIMARY_PATH_IDX].PMTU = mtu;
listener->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.is_ip4 = tep->is_ip4;
listener->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.proto =
TRANSPORT_PROTO_SCTP;
@@ -480,8 +484,12 @@ sctp_connection_open (transport_endpoint_t * rmt)
clib_spinlock_lock_if_init (&tm->half_open_lock);
sctp_conn = sctp_half_open_connection_new (thread_id);
- sctp_conn->sub_conn[idx].PMTU =
- vnet_sw_interface_get_mtu (vnet_get_main (), rmt->sw_if_index, VLIB_TX);
+ u32 mtu = rmt->is_ip4 ? vnet_sw_interface_get_mtu (vnet_get_main (),
+ rmt->sw_if_index,
+ VNET_MTU_IP4) :
+ vnet_sw_interface_get_mtu (vnet_get_main (), rmt->sw_if_index,
+ VNET_MTU_IP6);
+ sctp_conn->sub_conn[idx].PMTU = mtu;
transport_connection_t *trans_conn = &sctp_conn->sub_conn[idx].connection;
ip_copy (&trans_conn->rmt_ip, &rmt->ip, rmt->is_ip4);
diff --git a/src/vnet/srp/interface.c b/src/vnet/srp/interface.c
index 44e2b0d6460..735f960cbd3 100644
--- a/src/vnet/srp/interface.c
+++ b/src/vnet/srp/interface.c
@@ -432,7 +432,7 @@ create_simulated_srp_interfaces (vlib_main_t * vm,
hi->min_packet_bytes = 40 + 16;
/* Standard default ethernet MTU. */
- hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 1500;
+ vnet_sw_interface_set_mtu (vnm, sw_if_index, 1500);
vec_free (hi->hw_address);
vec_add (hi->hw_address, address, sizeof (address));
diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c
index 10a86a457e8..e61c91a45a2 100644
--- a/src/vnet/unix/tapcli.c
+++ b/src/vnet/unix/tapcli.c
@@ -1068,8 +1068,7 @@ vnet_tap_connect (vlib_main_t * vm, vnet_tap_connect_args_t * ap)
hw = vnet_get_hw_interface (tm->vnet_main, ti->hw_if_index);
hw->min_supported_packet_bytes = TAP_MTU_MIN;
hw->max_supported_packet_bytes = TAP_MTU_MAX;
- hw->max_l3_packet_bytes[VLIB_RX] = hw->max_l3_packet_bytes[VLIB_TX] =
- hw->max_supported_packet_bytes - sizeof (ethernet_header_t);
+ vnet_sw_interface_set_mtu (tm->vnet_main, hw->sw_if_index, 9000);
ti->sw_if_index = hw->sw_if_index;
if (ap->sw_if_indexp)
*(ap->sw_if_indexp) = hw->sw_if_index;
diff --git a/src/vnet/vnet.h b/src/vnet/vnet.h
index 153fdcca785..5098aa6ecec 100644
--- a/src/vnet/vnet.h
+++ b/src/vnet/vnet.h
@@ -66,6 +66,8 @@ typedef struct vnet_main_t
* sw_interface_add_del_functions[VNET_ITF_FUNC_N_PRIO];
_vnet_interface_function_list_elt_t
* sw_interface_admin_up_down_functions[VNET_ITF_FUNC_N_PRIO];
+ _vnet_interface_function_list_elt_t
+ * sw_interface_mtu_change_functions[VNET_ITF_FUNC_N_PRIO];
uword *interface_tag_by_sw_if_index;