summaryrefslogtreecommitdiffstats
path: root/src/vnet
diff options
context:
space:
mode:
authorOle Troan <ot@cisco.com>2018-03-27 14:48:39 +0200
committerNeale Ranns <nranns@cisco.com>2018-04-13 13:27:44 +0000
commit70083ee74c3141bbefb185525315f1b34497dcaa (patch)
tree2b93f23b853239152864ad5ee0db073d37c341ea /src/vnet
parent25b049484fcf9161edb2c19250066b893c38c264 (diff)
MTU: Setting of MTU on software interface (instead of hardware interface)
Change-Id: I98bd454a761a1032738a21edeb0fe847e801f901 Signed-off-by: Ole Troan <ot@cisco.com>
Diffstat (limited to 'src/vnet')
-rw-r--r--src/vnet/adj/adj.c18
-rw-r--r--src/vnet/devices/virtio/vhost-user.c4
-rw-r--r--src/vnet/ethernet/interface.c5
-rw-r--r--src/vnet/gre/interface.c6
-rw-r--r--src/vnet/interface.api4
-rw-r--r--src/vnet/interface.c62
-rw-r--r--src/vnet/interface.h11
-rw-r--r--src/vnet/interface_api.c33
-rw-r--r--src/vnet/interface_cli.c43
-rw-r--r--src/vnet/interface_funcs.h25
-rw-r--r--src/vnet/ip/icmp4.c6
-rw-r--r--src/vnet/ip/icmp6.c7
-rw-r--r--src/vnet/ip/ip4_forward.c98
-rw-r--r--src/vnet/ip/ip6_forward.c104
-rw-r--r--src/vnet/ip/ip6_neighbor.c7
-rw-r--r--src/vnet/ipip/ipip.c7
-rw-r--r--src/vnet/ipip/sixrd.c3
-rw-r--r--src/vnet/ipsec-gre/interface.c5
-rw-r--r--src/vnet/unix/tapcli.c5
19 files changed, 232 insertions, 221 deletions
diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c
index ed4bada6f24..0c9f7468b11 100644
--- a/src/vnet/adj/adj.c
+++ b/src/vnet/adj/adj.c
@@ -353,10 +353,8 @@ adj_mtu_update_walk_cb (adj_index_t ai,
return (ADJ_WALK_RC_CONTINUE);
}
-static void
-adj_sw_mtu_update (vnet_main_t * vnm,
- u32 sw_if_index,
- void *ctx)
+void
+adj_mtu_update (u32 sw_if_index)
{
/*
* Walk all the adjacencies on the interface to update the cached MTU
@@ -364,18 +362,6 @@ adj_sw_mtu_update (vnet_main_t * vnm,
adj_walk (sw_if_index, adj_mtu_update_walk_cb, NULL);
}
-void
-adj_mtu_update (u32 hw_if_index)
-{
- /*
- * Walk all the SW interfaces on the HW interface to update the cached MTU
- */
- vnet_hw_interface_walk_sw(vnet_get_main(),
- hw_if_index,
- adj_sw_mtu_update,
- NULL);
-}
-
/**
* @brief Walk the Adjacencies on a given interface
*/
diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c
index 5460f10b74e..34b131c556f 100644
--- a/src/vnet/devices/virtio/vhost-user.c
+++ b/src/vnet/devices/virtio/vhost-user.c
@@ -2869,8 +2869,8 @@ vhost_user_create_ethernet (vnet_main_t * vnm, vlib_main_t * vm,
if (error)
clib_error_report (error);
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, vui->hw_if_index);
- hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
+ vnet_sw_interface_t *si = vnet_get_hw_sw_interface (vnm, vui->hw_if_index);
+ vnet_sw_interface_set_mtu (vnm, si->sw_if_index, 9000);
}
/*
diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c
index 2ed20e15c24..4e1d081635b 100644
--- a/src/vnet/ethernet/interface.c
+++ b/src/vnet/ethernet/interface.c
@@ -300,12 +300,9 @@ ethernet_register_interface (vnet_main_t * vnm,
ETHERNET_MIN_PACKET_BYTES;
hi->max_packet_bytes = hi->max_supported_packet_bytes =
ETHERNET_MAX_PACKET_BYTES;
- hi->per_packet_overhead_bytes =
- /* preamble */ 8 + /* inter frame gap */ 12;
/* Standard default ethernet MTU. */
- hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
-
+ vnet_sw_interface_set_mtu (vnm, hi->sw_if_index, 9000);
clib_memcpy (ei->address, address, sizeof (ei->address));
vec_free (hi->hw_address);
vec_add (hi->hw_address, address, sizeof (ei->address));
diff --git a/src/vnet/gre/interface.c b/src/vnet/gre/interface.c
index 5b165c858d3..013dde60af9 100644
--- a/src/vnet/gre/interface.c
+++ b/src/vnet/gre/interface.c
@@ -348,12 +348,8 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t * a,
64 + sizeof (gre_header_t) + sizeof (ip6_header_t);
}
- hi->per_packet_overhead_bytes =
- /* preamble */ 8 + /* inter frame gap */ 12;
-
/* Standard default gre MTU. */
- hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
-
+ vnet_sw_interface_set_mtu (vnm, sw_if_index, 9000);
/*
* source the FIB entry for the tunnel's destination
* and become a child thereof. The tunnel will then get poked
diff --git a/src/vnet/interface.api b/src/vnet/interface.api
index 25ba70342ee..0f88863dff3 100644
--- a/src/vnet/interface.api
+++ b/src/vnet/interface.api
@@ -75,7 +75,7 @@ autoreply define want_interface_events
@param interface_name - name of the interface
@param link_duplex - 1 if half duplex, 2 if full duplex
@param link_speed - 1 = 10M, 2 = 100M, 4 = 1G, 8 = 10G, 16 = 40G, 32 = 100G
- @param link_MTU - max. transmittion unit
+ @param MTU - max. transmittion unit
@param sub_if_id - A number 0-N to uniquely identify this subif on super if
@param sub_dot1ad - 0 = dot1q, 1 = dot1ad
@param sub_dot1ah - 1 = dot1ah, 0 = otherwise
@@ -123,7 +123,7 @@ define sw_interface_details
u8 link_speed;
/* MTU */
- u16 link_mtu;
+ u16 mtu;
/* Subinterface ID. A number 0-N to uniquely identify this subinterface under the super interface */
u32 sub_id;
diff --git a/src/vnet/interface.c b/src/vnet/interface.c
index b07a9ba7553..28c46110851 100644
--- a/src/vnet/interface.c
+++ b/src/vnet/interface.c
@@ -122,20 +122,55 @@ unserialize_vnet_sw_interface_set_flags (serialize_main_t * m, va_list * va)
/* helper_flags no redistribution */ 0);
}
-void
-vnet_hw_interface_set_mtu (vnet_main_t * vnm, u32 hw_if_index, u32 mtu)
+static void
+vnet_sw_interface_set_mtu_cb (vnet_main_t * vnm, u32 sw_if_index, void *ctx)
{
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+ u32 *mtu = ctx;
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+ ASSERT (si);
+
+ si->max_l3_packet_bytes[VLIB_TX] = si->max_l3_packet_bytes[VLIB_RX] = *mtu;
+ adj_mtu_update (sw_if_index);
+}
+
+/*
+ * MTU is set per software interface. Setting MTU on a parent
+ * interface will override the MTU setting on sub-interfaces.
+ * TODO: If sub-interface MTU is ~0 inherit from parent?
+ */
+int
+vnet_sw_interface_set_mtu (vnet_main_t * vnm, u32 sw_if_index, u32 mtu)
+{
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+ vnet_hw_interface_t *hi = vnet_get_sw_hw_interface (vnm, sw_if_index);
+
+ if (mtu < hi->min_packet_bytes)
+ return VNET_API_ERROR_INVALID_VALUE;
+ if (mtu > hi->max_packet_bytes)
+ return VNET_API_ERROR_INVALID_VALUE;
- if (hi->max_packet_bytes != mtu)
+ /* If done on a parent interface */
+ if (si->sw_if_index == si->sup_sw_if_index)
{
- u16 l3_pad = hi->max_packet_bytes - hi->max_l3_packet_bytes[VLIB_TX];
- hi->max_packet_bytes = mtu;
- hi->max_l3_packet_bytes[VLIB_TX] =
- hi->max_l3_packet_bytes[VLIB_RX] = mtu - l3_pad;
- ethernet_set_flags (vnm, hw_if_index, ETHERNET_INTERFACE_FLAG_MTU);
- adj_mtu_update (hw_if_index);
+ if (hi->hw_class_index == ethernet_hw_interface_class.index)
+ {
+ ethernet_set_flags (vnm, hi->hw_if_index,
+ ETHERNET_INTERFACE_FLAG_MTU);
+ }
+
+ /* Override MTU on any sub-interface */
+ vnet_hw_interface_walk_sw (vnm,
+ hi->hw_if_index,
+ vnet_sw_interface_set_mtu_cb, &mtu);
}
+ else
+ {
+ si->max_l3_packet_bytes[VLIB_TX] = si->max_l3_packet_bytes[VLIB_RX] =
+ mtu;
+ adj_mtu_update (sw_if_index);
+ }
+
+ return 0;
}
static void
@@ -584,6 +619,9 @@ vnet_create_sw_interface_no_callbacks (vnet_main_t * vnm,
if (sw->type == VNET_SW_INTERFACE_TYPE_HARDWARE)
sw->sup_sw_if_index = sw->sw_if_index;
+ sw->max_l3_packet_bytes[VLIB_RX] = ~0;
+ sw->max_l3_packet_bytes[VLIB_TX] = ~0;
+
/* Allocate counters for this interface. */
{
u32 i;
@@ -758,9 +796,7 @@ vnet_register_interface (vnet_main_t * vnm,
hw->max_rate_bits_per_sec = 0;
hw->min_packet_bytes = 0;
- hw->per_packet_overhead_bytes = 0;
- hw->max_l3_packet_bytes[VLIB_RX] = ~0;
- hw->max_l3_packet_bytes[VLIB_TX] = ~0;
+ hw->max_packet_bytes = 9000; /* default */
if (dev_class->tx_function == 0)
goto no_output_nodes; /* No output/tx nodes to create */
diff --git a/src/vnet/interface.h b/src/vnet/interface.h
index 7556bc5544e..d462e1e8448 100644
--- a/src/vnet/interface.h
+++ b/src/vnet/interface.h
@@ -512,14 +512,6 @@ typedef struct vnet_hw_interface_t
/* Largest packet size for this interface. */
u32 max_packet_bytes;
- /* Number of extra bytes that go on the wire.
- Packet length on wire
- = max (length + per_packet_overhead_bytes, min_packet_bytes). */
- u32 per_packet_overhead_bytes;
-
- /* Receive and transmit layer 3 packet size limits (MRU/MTU). */
- u32 max_l3_packet_bytes[VLIB_N_RX_TX];
-
/* Hash table mapping sub interface id to sw_if_index. */
uword *sub_interface_sw_if_index_by_id;
@@ -656,6 +648,9 @@ typedef struct
u32 link_speed;
+ /* Receive and transmit layer 3 packet size limits (MRU/MTU). */
+ u32 max_l3_packet_bytes[VLIB_N_RX_TX];
+
union
{
/* VNET_SW_INTERFACE_TYPE_HARDWARE. */
diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c
index 8d982e36b73..116ee63bdba 100644
--- a/src/vnet/interface_api.c
+++ b/src/vnet/interface_api.c
@@ -102,40 +102,11 @@ vl_api_sw_interface_set_mtu_t_handler (vl_api_sw_interface_set_mtu_t * mp)
vnet_main_t *vnm = vnet_get_main ();
u32 sw_if_index = ntohl (mp->sw_if_index);
u16 mtu = ntohs (mp->mtu);
- ethernet_main_t *em = &ethernet_main;
int rv = 0;
VALIDATE_SW_IF_INDEX (mp);
- vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
- if (si->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
- {
- rv = VNET_API_ERROR_INVALID_VALUE;
- goto bad_sw_if_index;
- }
-
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, si->hw_if_index);
- ethernet_interface_t *eif = ethernet_get_interface (em, si->hw_if_index);
-
- if (!eif)
- {
- rv = VNET_API_ERROR_FEATURE_DISABLED;
- goto bad_sw_if_index;
- }
-
- if (mtu < hi->min_supported_packet_bytes)
- {
- rv = VNET_API_ERROR_INVALID_VALUE;
- goto bad_sw_if_index;
- }
-
- if (mtu > hi->max_supported_packet_bytes)
- {
- rv = VNET_API_ERROR_INVALID_VALUE;
- goto bad_sw_if_index;
- }
-
- vnet_hw_interface_set_mtu (vnm, si->hw_if_index, mtu);
+ rv = vnet_sw_interface_set_mtu (vnm, sw_if_index, mtu);
BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_SW_INTERFACE_SET_MTU_REPLY);
@@ -161,7 +132,7 @@ send_sw_interface_details (vpe_api_main_t * am,
VNET_HW_INTERFACE_FLAG_DUPLEX_SHIFT);
mp->link_speed = ((hi->flags & VNET_HW_INTERFACE_FLAG_SPEED_MASK) >>
VNET_HW_INTERFACE_FLAG_SPEED_SHIFT);
- mp->link_mtu = ntohs (hi->max_packet_bytes);
+ mp->mtu = ntohs (swif->max_l3_packet_bytes[VLIB_TX]);
mp->context = context;
strncpy ((char *) mp->interface_name,
diff --git a/src/vnet/interface_cli.c b/src/vnet/interface_cli.c
index d151335aa1f..fe17c823b36 100644
--- a/src/vnet/interface_cli.c
+++ b/src/vnet/interface_cli.c
@@ -1103,32 +1103,29 @@ static clib_error_t *
mtu_cmd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
{
vnet_main_t *vnm = vnet_get_main ();
- u32 hw_if_index, mtu;
- ethernet_main_t *em = &ethernet_main;
+ u32 sw_if_index, mtu;
- if (unformat (input, "%d %U", &mtu,
- unformat_vnet_hw_interface, vnm, &hw_if_index))
+ if (unformat (input, "%d %U", &mtu, unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
{
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
- ethernet_interface_t *eif = ethernet_get_interface (em, hw_if_index);
-
- if (!eif)
- return clib_error_return (0, "not supported");
-
- if (mtu < hi->min_supported_packet_bytes)
- return clib_error_return (0, "Invalid mtu (%d): "
- "must be >= min pkt bytes (%d)", mtu,
- hi->min_supported_packet_bytes);
-
- if (mtu > hi->max_supported_packet_bytes)
- return clib_error_return (0, "Invalid mtu (%d): must be <= (%d)", mtu,
- hi->max_supported_packet_bytes);
-
- vnet_hw_interface_set_mtu (vnm, hw_if_index, mtu);
+ ;
}
else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
+ {
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ int rv = vnet_sw_interface_set_mtu (vnm, sw_if_index, mtu);
+ if (rv < 0)
+ {
+ vnet_hw_interface_t *hi = vnet_get_sw_hw_interface (vnm, sw_if_index);
+ ASSERT (hi);
+ return clib_error_return (0, "Invalid mtu (%d): "
+ "must be between min pkt bytes (%d) and max pkt bytes (%d)",
+ mtu, hi->min_packet_bytes,
+ hi->max_packet_bytes);
+ }
return 0;
}
@@ -1406,7 +1403,7 @@ set_interface_rx_mode (vlib_main_t * vm, unformat_input_t * input,
* @cliexend
?*/
/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (cmd_set_if_rx_mode,static) = {
+VLIB_CLI_COMMAND (cmd_set_if_rx_mode, static) = {
.path = "set interface rx-mode",
.short_help = "set interface rx-mode <interface> [queue <n>] [polling | interrupt | adaptive]",
.function = set_interface_rx_mode,
diff --git a/src/vnet/interface_funcs.h b/src/vnet/interface_funcs.h
index 6e188f7f543..42b69bdf789 100644
--- a/src/vnet/interface_funcs.h
+++ b/src/vnet/interface_funcs.h
@@ -87,6 +87,15 @@ vnet_get_sup_hw_interface (vnet_main_t * vnm, u32 sw_if_index)
return vnet_get_hw_interface (vnm, sw->hw_if_index);
}
+always_inline vnet_hw_interface_t *
+vnet_get_sw_hw_interface (vnet_main_t * vnm, u32 sw_if_index)
+{
+ vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index);
+ if (sw->type == VNET_SW_INTERFACE_TYPE_HARDWARE)
+ return vnet_get_hw_interface (vnm, sw->hw_if_index);
+ return vnet_get_sup_hw_interface (vnm, sw_if_index);
+}
+
always_inline vnet_hw_interface_class_t *
vnet_get_hw_interface_class (vnet_main_t * vnm, u32 hw_class_index)
{
@@ -219,19 +228,11 @@ vnet_hw_interface_get_flags (vnet_main_t * vnm, u32 hw_if_index)
}
always_inline uword
-vnet_hw_interface_get_mtu (vnet_main_t * vnm, u32 hw_if_index,
- vlib_rx_or_tx_t dir)
-{
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
- return hw->max_l3_packet_bytes[dir];
-}
-
-always_inline uword
vnet_sw_interface_get_mtu (vnet_main_t * vnm, u32 sw_if_index,
vlib_rx_or_tx_t dir)
{
- vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
- return (hw->max_l3_packet_bytes[dir]);
+ vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index);
+ return (sw->max_l3_packet_bytes[dir]);
}
always_inline uword
@@ -292,8 +293,8 @@ clib_error_t *set_hw_interface_change_rx_mode (vnet_main_t * vnm,
vnet_hw_interface_rx_mode
mode);
-/* Set the MTU on the HW interface */
-void vnet_hw_interface_set_mtu (vnet_main_t * vnm, u32 hw_if_index, u32 mtu);
+/* Set the MTU on the SW interface */
+int vnet_sw_interface_set_mtu (vnet_main_t * vnm, u32 sw_if_index, u32 mtu);
/* update the unnumbered state of an interface */
void vnet_sw_interface_update_unnumbered (u32 sw_if_index,
diff --git a/src/vnet/ip/icmp4.c b/src/vnet/ip/icmp4.c
index 1fe01e48500..a4808f23271 100644
--- a/src/vnet/ip/icmp4.c
+++ b/src/vnet/ip/icmp4.c
@@ -513,13 +513,15 @@ ip4_icmp_error (vlib_main_t * vm,
b->current_length = 0;
}
}
- p0->current_length =
- p0->current_length > 576 ? 576 : p0->current_length;
/* Add IP header and ICMPv4 header including a 4 byte data field */
vlib_buffer_advance (p0,
-sizeof (ip4_header_t) -
sizeof (icmp46_header_t) - 4);
+
+ p0->current_length =
+ p0->current_length > 576 ? 576 : p0->current_length;
+
out_ip0 = vlib_buffer_get_current (p0);
icmp0 = (icmp46_header_t *) & out_ip0[1];
diff --git a/src/vnet/ip/icmp6.c b/src/vnet/ip/icmp6.c
index fd5d0ecba38..6beec28c2cf 100644
--- a/src/vnet/ip/icmp6.c
+++ b/src/vnet/ip/icmp6.c
@@ -526,13 +526,14 @@ ip6_icmp_error (vlib_main_t * vm,
b->current_length = 0;
}
}
- p0->current_length =
- p0->current_length > 1280 ? 1280 : p0->current_length;
-
/* Add IP header and ICMPv6 header including a 4 byte data field */
vlib_buffer_advance (p0,
-sizeof (ip6_header_t) -
sizeof (icmp46_header_t) - 4);
+
+ p0->current_length =
+ p0->current_length > 1280 ? 1280 : p0->current_length;
+
out_ip0 = vlib_buffer_get_current (p0);
icmp0 = (icmp46_header_t *) & out_ip0[1];
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index 7c56a294436..3dce590c807 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -1940,6 +1940,29 @@ typedef enum
IP4_REWRITE_NEXT_ICMP_ERROR,
} ip4_rewrite_next_t;
+always_inline void
+ip4_mtu_check (vlib_buffer_t * b, u16 buffer_packet_bytes,
+ u16 adj_packet_bytes, bool df, u32 * next, u32 * error)
+{
+ if (buffer_packet_bytes > adj_packet_bytes)
+ {
+ *error = IP4_ERROR_MTU_EXCEEDED;
+ if (df)
+ {
+ icmp4_error_set_vnet_buffer
+ (b, ICMP4_destination_unreachable,
+ ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
+ adj_packet_bytes);
+ *next = IP4_REWRITE_NEXT_ICMP_ERROR;
+ }
+ else
+ {
+ /* Add support for fragmentation here */
+ *next = IP4_REWRITE_NEXT_DROP;
+ }
+ }
+}
+
always_inline uword
ip4_rewrite_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -2100,26 +2123,20 @@ ip4_rewrite_inline (vlib_main_t * vm,
vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
/* Check MTU of outgoing interface. */
- if (vlib_buffer_length_in_chain (vm, p0) >
- adj0[0].rewrite_header.max_l3_packet_bytes)
- {
- error0 = IP4_ERROR_MTU_EXCEEDED;
- next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
- icmp4_error_set_vnet_buffer
- (p0, ICMP4_destination_unreachable,
- ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
- 0);
- }
- if (vlib_buffer_length_in_chain (vm, p1) >
- adj1[0].rewrite_header.max_l3_packet_bytes)
- {
- error1 = IP4_ERROR_MTU_EXCEEDED;
- next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
- icmp4_error_set_vnet_buffer
- (p1, ICMP4_destination_unreachable,
- ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
- 0);
- }
+ ip4_mtu_check (p0, vlib_buffer_length_in_chain (vm, p0),
+ adj0[0].rewrite_header.max_l3_packet_bytes,
+ ip0->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
+ &next0, &error0);
+ ip4_mtu_check (p1, vlib_buffer_length_in_chain (vm, p1),
+ adj1[0].rewrite_header.max_l3_packet_bytes,
+ ip1->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
+ &next1, &error1);
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_two_headers (adj0[0], adj1[0],
+ ip0, ip1, sizeof (ethernet_header_t));
if (is_mcast)
{
@@ -2143,10 +2160,17 @@ ip4_rewrite_inline (vlib_main_t * vm,
tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+ if (is_midchain)
+ {
+ adj0->sub_type.midchain.fixup_func
+ (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
+ }
+
if (PREDICT_FALSE
(adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
vnet_feature_arc_start (lm->output_feature_arc_index,
tx_sw_if_index0, &next0, p0);
+
}
if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
{
@@ -2157,16 +2181,18 @@ ip4_rewrite_inline (vlib_main_t * vm,
tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
+ if (is_midchain)
+ {
+ adj1->sub_type.midchain.fixup_func
+ (vm, adj1, p1, adj0->sub_type.midchain.fixup_data);
+ }
+
if (PREDICT_FALSE
(adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
vnet_feature_arc_start (lm->output_feature_arc_index,
tx_sw_if_index1, &next1, p1);
}
- /* Guess we are only writing on simple Ethernet header. */
- vnet_rewrite_two_headers (adj0[0], adj1[0],
- ip0, ip1, sizeof (ethernet_header_t));
-
/*
* Bump the per-adjacency counters
*/
@@ -2185,13 +2211,6 @@ ip4_rewrite_inline (vlib_main_t * vm,
vlib_buffer_length_in_chain (vm, p1) + rw_len1);
}
- if (is_midchain)
- {
- adj0->sub_type.midchain.fixup_func
- (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
- adj1->sub_type.midchain.fixup_func
- (vm, adj1, p1, adj0->sub_type.midchain.fixup_data);
- }
if (is_mcast)
{
/*
@@ -2272,6 +2291,7 @@ ip4_rewrite_inline (vlib_main_t * vm,
/* Guess we are only writing on simple Ethernet header. */
vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
+
if (is_mcast)
{
/*
@@ -2291,16 +2311,12 @@ ip4_rewrite_inline (vlib_main_t * vm,
vlib_buffer_length_in_chain (vm, p0) + rw_len0);
/* Check MTU of outgoing interface. */
- if (vlib_buffer_length_in_chain (vm, p0) >
- adj0[0].rewrite_header.max_l3_packet_bytes)
- {
- error0 = IP4_ERROR_MTU_EXCEEDED;
- next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
- icmp4_error_set_vnet_buffer
- (p0, ICMP4_destination_unreachable,
- ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
- 0);
- }
+ ip4_mtu_check (p0, vlib_buffer_length_in_chain (vm, p0),
+ adj0[0].rewrite_header.max_l3_packet_bytes,
+ ip0->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
+ &next0, &error0);
+
if (is_mcast)
{
error0 = ((adj0[0].rewrite_header.sw_if_index ==
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index 588cd0675a4..7599733fcb5 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -1774,6 +1774,19 @@ typedef enum
IP6_REWRITE_NEXT_ICMP_ERROR,
} ip6_rewrite_next_t;
+always_inline void
+ip6_mtu_check (vlib_buffer_t * b, u16 buffer_packet_bytes,
+ u16 adj_packet_bytes, u32 * next, u32 * error)
+{
+ if (adj_packet_bytes >= 1280 && buffer_packet_bytes > adj_packet_bytes)
+ {
+ *error = IP6_ERROR_MTU_EXCEEDED;
+ icmp6_error_set_vnet_buffer (b, ICMP6_packet_too_big, 0,
+ adj_packet_bytes);
+ *next = IP6_REWRITE_NEXT_ICMP_ERROR;
+ }
+}
+
always_inline uword
ip6_rewrite_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1898,9 +1911,14 @@ ip6_rewrite_inline (vlib_main_t * vm,
{
p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
}
+
adj0 = adj_get (adj_index0);
adj1 = adj_get (adj_index1);
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_two_headers (adj0[0], adj1[0],
+ ip0, ip1, sizeof (ethernet_header_t));
+
rw_len0 = adj0[0].rewrite_header.data_bytes;
rw_len1 = adj1[0].rewrite_header.data_bytes;
vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
@@ -1919,16 +1937,12 @@ ip6_rewrite_inline (vlib_main_t * vm,
}
/* Check MTU of outgoing interface. */
- error0 =
- (vlib_buffer_length_in_chain (vm, p0) >
- adj0[0].
- rewrite_header.max_l3_packet_bytes ? IP6_ERROR_MTU_EXCEEDED :
- error0);
- error1 =
- (vlib_buffer_length_in_chain (vm, p1) >
- adj1[0].
- rewrite_header.max_l3_packet_bytes ? IP6_ERROR_MTU_EXCEEDED :
- error1);
+ ip6_mtu_check (p0, vlib_buffer_length_in_chain (vm, p0),
+ adj0[0].rewrite_header.max_l3_packet_bytes, &next0,
+ &error0);
+ ip6_mtu_check (p1, vlib_buffer_length_in_chain (vm, p1),
+ adj1[0].rewrite_header.max_l3_packet_bytes, &next1,
+ &error1);
/* Don't adjust the buffer for hop count issue; icmp-error node
* wants to see the IP headerr */
@@ -1945,6 +1959,19 @@ ip6_rewrite_inline (vlib_main_t * vm,
(adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
vnet_feature_arc_start (lm->output_feature_arc_index,
tx_sw_if_index0, &next0, p0);
+
+ if (is_midchain)
+ {
+ adj0->sub_type.midchain.fixup_func
+ (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
+ }
+ if (is_mcast)
+ {
+ /*
+ * copy bytes from the IP address into the MAC rewrite
+ */
+ vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
+ }
}
if (PREDICT_TRUE (error1 == IP6_ERROR_NONE))
{
@@ -1959,26 +1986,19 @@ ip6_rewrite_inline (vlib_main_t * vm,
(adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
vnet_feature_arc_start (lm->output_feature_arc_index,
tx_sw_if_index1, &next1, p1);
- }
- /* Guess we are only writing on simple Ethernet header. */
- vnet_rewrite_two_headers (adj0[0], adj1[0],
- ip0, ip1, sizeof (ethernet_header_t));
-
- if (is_midchain)
- {
- adj0->sub_type.midchain.fixup_func
- (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
- adj1->sub_type.midchain.fixup_func
- (vm, adj1, p1, adj1->sub_type.midchain.fixup_data);
- }
- if (is_mcast)
- {
- /*
- * copy bytes from the IP address into the MAC rewrite
- */
- vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
- vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
+ if (is_midchain)
+ {
+ adj1->sub_type.midchain.fixup_func
+ (vm, adj1, p1, adj1->sub_type.midchain.fixup_data);
+ }
+ if (is_mcast)
+ {
+ /*
+ * copy bytes from the IP address into the MAC rewrite
+ */
+ vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
+ }
}
vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
@@ -2054,11 +2074,9 @@ ip6_rewrite_inline (vlib_main_t * vm,
}
/* Check MTU of outgoing interface. */
- error0 =
- (vlib_buffer_length_in_chain (vm, p0) >
- adj0[0].
- rewrite_header.max_l3_packet_bytes ? IP6_ERROR_MTU_EXCEEDED :
- error0);
+ ip6_mtu_check (p0, vlib_buffer_length_in_chain (vm, p0),
+ adj0[0].rewrite_header.max_l3_packet_bytes, &next0,
+ &error0);
/* Don't adjust the buffer for hop count issue; icmp-error node
* wants to see the IP headerr */
@@ -2076,16 +2094,16 @@ ip6_rewrite_inline (vlib_main_t * vm,
(adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
vnet_feature_arc_start (lm->output_feature_arc_index,
tx_sw_if_index0, &next0, p0);
- }
- if (is_midchain)
- {
- adj0->sub_type.midchain.fixup_func
- (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
- }
- if (is_mcast)
- {
- vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
+ if (is_midchain)
+ {
+ adj0->sub_type.midchain.fixup_func
+ (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
+ }
+ if (is_mcast)
+ {
+ vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
+ }
}
p0->error = error_node->errors[error0];
diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c
index fee4356f5e0..a1439faa154 100644
--- a/src/vnet/ip/ip6_neighbor.c
+++ b/src/vnet/ip/ip6_neighbor.c
@@ -2575,10 +2575,6 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm,
{
if (is_add)
{
- vnet_hw_interface_t *hw_if0;
-
- hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index);
-
pool_get (nm->if_radv_pool, a);
ri = a - nm->if_radv_pool;
@@ -2612,7 +2608,8 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm,
a->send_radv = 1;
/* fill in radv_info for this interface that will be needed later */
- a->adv_link_mtu = hw_if0->max_l3_packet_bytes[VLIB_RX];
+ a->adv_link_mtu =
+ vnet_sw_interface_get_mtu (vnm, sw_if_index, VLIB_TX);
clib_memcpy (a->link_layer_address, eth_if0->address, 6);
diff --git a/src/vnet/ipip/ipip.c b/src/vnet/ipip/ipip.c
index 82c961cdddd..9b808d4ac0f 100644
--- a/src/vnet/ipip/ipip.c
+++ b/src/vnet/ipip/ipip.c
@@ -476,17 +476,16 @@ ipip_add_tunnel (ipip_transport_t transport,
{
vec_validate (im4->fib_index_by_sw_if_index, sw_if_index);
hi->min_packet_bytes = 64 + sizeof (ip4_header_t);
+ hi->max_packet_bytes = 65536 - sizeof (ip4_header_t);
}
else
{
vec_validate (im6->fib_index_by_sw_if_index, sw_if_index);
hi->min_packet_bytes = 64 + sizeof (ip6_header_t);
+ hi->max_packet_bytes = 65536 - sizeof (ip6_header_t);
}
- hi->per_packet_overhead_bytes = /* preamble */ 8 + /* inter frame gap */ 12;
-
- /* Standard default ipip MTU. */
- hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
+ vnet_sw_interface_set_mtu (vnm, sw_if_index, hi->max_packet_bytes);
t->tunnel_src = *src;
t->tunnel_dst = *dst;
diff --git a/src/vnet/ipip/sixrd.c b/src/vnet/ipip/sixrd.c
index cfdd0f87e3a..998025782fb 100644
--- a/src/vnet/ipip/sixrd.c
+++ b/src/vnet/ipip/sixrd.c
@@ -337,7 +337,8 @@ sixrd_add_tunnel (ip6_address_t * ip6_prefix, u8 ip6_prefix_len,
t->dev_instance = t_idx;
t->user_instance = t_idx;
- hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 1480;
+ /* Set default MTU to 1480 */
+ vnet_sw_interface_set_mtu (vnet_get_main (), t->sw_if_index, 1480);
ipip_tunnel_db_add (t, &key);
diff --git a/src/vnet/ipsec-gre/interface.c b/src/vnet/ipsec-gre/interface.c
index 0772ce73df2..fa33684c50d 100644
--- a/src/vnet/ipsec-gre/interface.c
+++ b/src/vnet/ipsec-gre/interface.c
@@ -174,12 +174,9 @@ vnet_ipsec_gre_add_del_tunnel (vnet_ipsec_gre_add_del_tunnel_args_t * a,
hi->min_packet_bytes = 64 + sizeof (gre_header_t) +
sizeof (ip4_header_t) + sizeof (esp_header_t) + sizeof (esp_footer_t);
- hi->per_packet_overhead_bytes =
- /* preamble */ 8 + /* inter frame gap */ 12;
/* Standard default gre MTU. */
- hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] =
- 9000;
+ vnet_sw_interface_set_mtu (vnm, hi->sw_if_index, 9000);
clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src));
clib_memcpy (&t->tunnel_dst, &a->dst, sizeof (t->tunnel_dst));
diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c
index 10a86a457e8..5c550bc3320 100644
--- a/src/vnet/unix/tapcli.c
+++ b/src/vnet/unix/tapcli.c
@@ -1068,8 +1068,9 @@ vnet_tap_connect (vlib_main_t * vm, vnet_tap_connect_args_t * ap)
hw = vnet_get_hw_interface (tm->vnet_main, ti->hw_if_index);
hw->min_supported_packet_bytes = TAP_MTU_MIN;
hw->max_supported_packet_bytes = TAP_MTU_MAX;
- hw->max_l3_packet_bytes[VLIB_RX] = hw->max_l3_packet_bytes[VLIB_TX] =
- hw->max_supported_packet_bytes - sizeof (ethernet_header_t);
+ vnet_sw_interface_set_mtu (tm->vnet_main, hw->sw_if_index,
+ hw->max_supported_packet_bytes -
+ sizeof (ethernet_header_t));
ti->sw_if_index = hw->sw_if_index;
if (ap->sw_if_indexp)
*(ap->sw_if_indexp) = hw->sw_if_index;