From 70083ee74c3141bbefb185525315f1b34497dcaa Mon Sep 17 00:00:00 2001 From: Ole Troan Date: Tue, 27 Mar 2018 14:48:39 +0200 Subject: MTU: Setting of MTU on software interface (instead of hardware interface) Change-Id: I98bd454a761a1032738a21edeb0fe847e801f901 Signed-off-by: Ole Troan --- src/plugins/dpdk/device/init.c | 28 +++--- src/vat/api_format.c | 2 +- src/vnet/adj/adj.c | 18 +--- src/vnet/devices/virtio/vhost-user.c | 4 +- src/vnet/ethernet/interface.c | 5 +- src/vnet/gre/interface.c | 6 +- src/vnet/interface.api | 4 +- src/vnet/interface.c | 62 +++++++++--- src/vnet/interface.h | 11 +-- src/vnet/interface_api.c | 33 +------ src/vnet/interface_cli.c | 43 ++++----- src/vnet/interface_funcs.h | 25 ++--- src/vnet/ip/icmp4.c | 6 +- src/vnet/ip/icmp6.c | 7 +- src/vnet/ip/ip4_forward.c | 98 +++++++++++-------- src/vnet/ip/ip6_forward.c | 104 ++++++++++++--------- src/vnet/ip/ip6_neighbor.c | 7 +- src/vnet/ipip/ipip.c | 7 +- src/vnet/ipip/sixrd.c | 3 +- src/vnet/ipsec-gre/interface.c | 5 +- src/vnet/unix/tapcli.c | 5 +- .../vpp/jvpp/core/examples/CallbackApiExample.java | 4 +- .../vpp/jvpp/core/examples/FutureApiExample.java | 2 +- 23 files changed, 250 insertions(+), 239 deletions(-) (limited to 'src') diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index 76d122d37d5..ac20edca303 100755 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -696,12 +696,9 @@ dpdk_lib_init (dpdk_main_t * dm) clib_warning ("VLAN strip cannot be supported by interface\n"); } - if (hi) - hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = - xd->port_conf.rxmode.max_rx_pkt_len - sizeof (ethernet_header_t); - else - clib_warning ("hi NULL"); - + vnet_sw_interface_set_mtu (dm->vnet_main, sw->sw_if_index, + xd->port_conf.rxmode.max_rx_pkt_len - + sizeof (ethernet_header_t)); rte_eth_dev_set_mtu (xd->device_index, mtu); } @@ -1530,9 +1527,10 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) /* Init l3 packet size allowed on bonded interface */ bhi->max_packet_bytes = ETHERNET_MAX_PACKET_BYTES; - bhi->max_l3_packet_bytes[VLIB_RX] = - bhi->max_l3_packet_bytes[VLIB_TX] = - ETHERNET_MAX_PACKET_BYTES - sizeof (ethernet_header_t); + vnet_sw_interface_set_mtu (vnm, bhi->sw_if_index, + ETHERNET_MAX_PACKET_BYTES - + sizeof (ethernet_header_t)); + while (nlink >= 1) { /* for all slave links */ int slave = slink[--nlink]; @@ -1570,11 +1568,13 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) clib_memcpy (shi->hw_address, addr, 6); clib_memcpy (sei->address, addr, 6); /* Set l3 packet size allowed as the lowest of slave */ - if (bhi->max_l3_packet_bytes[VLIB_RX] > - shi->max_l3_packet_bytes[VLIB_RX]) - bhi->max_l3_packet_bytes[VLIB_RX] = - bhi->max_l3_packet_bytes[VLIB_TX] = - shi->max_l3_packet_bytes[VLIB_RX]; + vnet_sw_interface_t *bsi = + vnet_get_sw_interface (vnm, bhi->sw_if_index); + if (bsi->max_l3_packet_bytes[VLIB_RX] > + ssi->max_l3_packet_bytes[VLIB_RX]) + bsi->max_l3_packet_bytes[VLIB_RX] = + bsi->max_l3_packet_bytes[VLIB_TX] = + ssi->max_l3_packet_bytes[VLIB_RX]; /* Set max packet size allowed as the lowest of slave */ if (bhi->max_packet_bytes > shi->max_packet_bytes) bhi->max_packet_bytes = shi->max_packet_bytes; diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 019d095c40a..db964feeefc 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -981,7 +981,7 @@ static void vl_api_sw_interface_details_t_handler_json vat_json_object_add_uint (node, "link_up_down", mp->link_up_down); vat_json_object_add_uint (node, "link_duplex", mp->link_duplex); vat_json_object_add_uint (node, "link_speed", mp->link_speed); - vat_json_object_add_uint (node, "mtu", ntohs (mp->link_mtu)); + vat_json_object_add_uint (node, "mtu", ntohs (mp->mtu)); vat_json_object_add_uint (node, "sub_id", ntohl (mp->sub_id)); vat_json_object_add_uint (node, "sub_dot1ad", mp->sub_dot1ad); vat_json_object_add_uint (node, "sub_number_of_tags", diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c index ed4bada6f24..0c9f7468b11 100644 --- a/src/vnet/adj/adj.c +++ b/src/vnet/adj/adj.c @@ -353,10 +353,8 @@ adj_mtu_update_walk_cb (adj_index_t ai, return (ADJ_WALK_RC_CONTINUE); } -static void -adj_sw_mtu_update (vnet_main_t * vnm, - u32 sw_if_index, - void *ctx) +void +adj_mtu_update (u32 sw_if_index) { /* * Walk all the adjacencies on the interface to update the cached MTU @@ -364,18 +362,6 @@ adj_sw_mtu_update (vnet_main_t * vnm, adj_walk (sw_if_index, adj_mtu_update_walk_cb, NULL); } -void -adj_mtu_update (u32 hw_if_index) -{ - /* - * Walk all the SW interfaces on the HW interface to update the cached MTU - */ - vnet_hw_interface_walk_sw(vnet_get_main(), - hw_if_index, - adj_sw_mtu_update, - NULL); -} - /** * @brief Walk the Adjacencies on a given interface */ diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 5460f10b74e..34b131c556f 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -2869,8 +2869,8 @@ vhost_user_create_ethernet (vnet_main_t * vnm, vlib_main_t * vm, if (error) clib_error_report (error); - vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, vui->hw_if_index); - hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000; + vnet_sw_interface_t *si = vnet_get_hw_sw_interface (vnm, vui->hw_if_index); + vnet_sw_interface_set_mtu (vnm, si->sw_if_index, 9000); } /* diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c index 2ed20e15c24..4e1d081635b 100644 --- a/src/vnet/ethernet/interface.c +++ b/src/vnet/ethernet/interface.c @@ -300,12 +300,9 @@ ethernet_register_interface (vnet_main_t * vnm, ETHERNET_MIN_PACKET_BYTES; hi->max_packet_bytes = hi->max_supported_packet_bytes = ETHERNET_MAX_PACKET_BYTES; - hi->per_packet_overhead_bytes = - /* preamble */ 8 + /* inter frame gap */ 12; /* Standard default ethernet MTU. */ - hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000; - + vnet_sw_interface_set_mtu (vnm, hi->sw_if_index, 9000); clib_memcpy (ei->address, address, sizeof (ei->address)); vec_free (hi->hw_address); vec_add (hi->hw_address, address, sizeof (ei->address)); diff --git a/src/vnet/gre/interface.c b/src/vnet/gre/interface.c index 5b165c858d3..013dde60af9 100644 --- a/src/vnet/gre/interface.c +++ b/src/vnet/gre/interface.c @@ -348,12 +348,8 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t * a, 64 + sizeof (gre_header_t) + sizeof (ip6_header_t); } - hi->per_packet_overhead_bytes = - /* preamble */ 8 + /* inter frame gap */ 12; - /* Standard default gre MTU. */ - hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000; - + vnet_sw_interface_set_mtu (vnm, sw_if_index, 9000); /* * source the FIB entry for the tunnel's destination * and become a child thereof. The tunnel will then get poked diff --git a/src/vnet/interface.api b/src/vnet/interface.api index 25ba70342ee..0f88863dff3 100644 --- a/src/vnet/interface.api +++ b/src/vnet/interface.api @@ -75,7 +75,7 @@ autoreply define want_interface_events @param interface_name - name of the interface @param link_duplex - 1 if half duplex, 2 if full duplex @param link_speed - 1 = 10M, 2 = 100M, 4 = 1G, 8 = 10G, 16 = 40G, 32 = 100G - @param link_MTU - max. transmittion unit + @param MTU - max. transmittion unit @param sub_if_id - A number 0-N to uniquely identify this subif on super if @param sub_dot1ad - 0 = dot1q, 1 = dot1ad @param sub_dot1ah - 1 = dot1ah, 0 = otherwise @@ -123,7 +123,7 @@ define sw_interface_details u8 link_speed; /* MTU */ - u16 link_mtu; + u16 mtu; /* Subinterface ID. A number 0-N to uniquely identify this subinterface under the super interface */ u32 sub_id; diff --git a/src/vnet/interface.c b/src/vnet/interface.c index b07a9ba7553..28c46110851 100644 --- a/src/vnet/interface.c +++ b/src/vnet/interface.c @@ -122,20 +122,55 @@ unserialize_vnet_sw_interface_set_flags (serialize_main_t * m, va_list * va) /* helper_flags no redistribution */ 0); } -void -vnet_hw_interface_set_mtu (vnet_main_t * vnm, u32 hw_if_index, u32 mtu) +static void +vnet_sw_interface_set_mtu_cb (vnet_main_t * vnm, u32 sw_if_index, void *ctx) { - vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index); + u32 *mtu = ctx; + vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index); + ASSERT (si); + + si->max_l3_packet_bytes[VLIB_TX] = si->max_l3_packet_bytes[VLIB_RX] = *mtu; + adj_mtu_update (sw_if_index); +} + +/* + * MTU is set per software interface. Setting MTU on a parent + * interface will override the MTU setting on sub-interfaces. + * TODO: If sub-interface MTU is ~0 inherit from parent? + */ +int +vnet_sw_interface_set_mtu (vnet_main_t * vnm, u32 sw_if_index, u32 mtu) +{ + vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index); + vnet_hw_interface_t *hi = vnet_get_sw_hw_interface (vnm, sw_if_index); + + if (mtu < hi->min_packet_bytes) + return VNET_API_ERROR_INVALID_VALUE; + if (mtu > hi->max_packet_bytes) + return VNET_API_ERROR_INVALID_VALUE; - if (hi->max_packet_bytes != mtu) + /* If done on a parent interface */ + if (si->sw_if_index == si->sup_sw_if_index) { - u16 l3_pad = hi->max_packet_bytes - hi->max_l3_packet_bytes[VLIB_TX]; - hi->max_packet_bytes = mtu; - hi->max_l3_packet_bytes[VLIB_TX] = - hi->max_l3_packet_bytes[VLIB_RX] = mtu - l3_pad; - ethernet_set_flags (vnm, hw_if_index, ETHERNET_INTERFACE_FLAG_MTU); - adj_mtu_update (hw_if_index); + if (hi->hw_class_index == ethernet_hw_interface_class.index) + { + ethernet_set_flags (vnm, hi->hw_if_index, + ETHERNET_INTERFACE_FLAG_MTU); + } + + /* Override MTU on any sub-interface */ + vnet_hw_interface_walk_sw (vnm, + hi->hw_if_index, + vnet_sw_interface_set_mtu_cb, &mtu); } + else + { + si->max_l3_packet_bytes[VLIB_TX] = si->max_l3_packet_bytes[VLIB_RX] = + mtu; + adj_mtu_update (sw_if_index); + } + + return 0; } static void @@ -584,6 +619,9 @@ vnet_create_sw_interface_no_callbacks (vnet_main_t * vnm, if (sw->type == VNET_SW_INTERFACE_TYPE_HARDWARE) sw->sup_sw_if_index = sw->sw_if_index; + sw->max_l3_packet_bytes[VLIB_RX] = ~0; + sw->max_l3_packet_bytes[VLIB_TX] = ~0; + /* Allocate counters for this interface. */ { u32 i; @@ -758,9 +796,7 @@ vnet_register_interface (vnet_main_t * vnm, hw->max_rate_bits_per_sec = 0; hw->min_packet_bytes = 0; - hw->per_packet_overhead_bytes = 0; - hw->max_l3_packet_bytes[VLIB_RX] = ~0; - hw->max_l3_packet_bytes[VLIB_TX] = ~0; + hw->max_packet_bytes = 9000; /* default */ if (dev_class->tx_function == 0) goto no_output_nodes; /* No output/tx nodes to create */ diff --git a/src/vnet/interface.h b/src/vnet/interface.h index 7556bc5544e..d462e1e8448 100644 --- a/src/vnet/interface.h +++ b/src/vnet/interface.h @@ -512,14 +512,6 @@ typedef struct vnet_hw_interface_t /* Largest packet size for this interface. */ u32 max_packet_bytes; - /* Number of extra bytes that go on the wire. - Packet length on wire - = max (length + per_packet_overhead_bytes, min_packet_bytes). */ - u32 per_packet_overhead_bytes; - - /* Receive and transmit layer 3 packet size limits (MRU/MTU). */ - u32 max_l3_packet_bytes[VLIB_N_RX_TX]; - /* Hash table mapping sub interface id to sw_if_index. */ uword *sub_interface_sw_if_index_by_id; @@ -656,6 +648,9 @@ typedef struct u32 link_speed; + /* Receive and transmit layer 3 packet size limits (MRU/MTU). */ + u32 max_l3_packet_bytes[VLIB_N_RX_TX]; + union { /* VNET_SW_INTERFACE_TYPE_HARDWARE. */ diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c index 8d982e36b73..116ee63bdba 100644 --- a/src/vnet/interface_api.c +++ b/src/vnet/interface_api.c @@ -102,40 +102,11 @@ vl_api_sw_interface_set_mtu_t_handler (vl_api_sw_interface_set_mtu_t * mp) vnet_main_t *vnm = vnet_get_main (); u32 sw_if_index = ntohl (mp->sw_if_index); u16 mtu = ntohs (mp->mtu); - ethernet_main_t *em = ðernet_main; int rv = 0; VALIDATE_SW_IF_INDEX (mp); - vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index); - if (si->type != VNET_SW_INTERFACE_TYPE_HARDWARE) - { - rv = VNET_API_ERROR_INVALID_VALUE; - goto bad_sw_if_index; - } - - vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, si->hw_if_index); - ethernet_interface_t *eif = ethernet_get_interface (em, si->hw_if_index); - - if (!eif) - { - rv = VNET_API_ERROR_FEATURE_DISABLED; - goto bad_sw_if_index; - } - - if (mtu < hi->min_supported_packet_bytes) - { - rv = VNET_API_ERROR_INVALID_VALUE; - goto bad_sw_if_index; - } - - if (mtu > hi->max_supported_packet_bytes) - { - rv = VNET_API_ERROR_INVALID_VALUE; - goto bad_sw_if_index; - } - - vnet_hw_interface_set_mtu (vnm, si->hw_if_index, mtu); + rv = vnet_sw_interface_set_mtu (vnm, sw_if_index, mtu); BAD_SW_IF_INDEX_LABEL; REPLY_MACRO (VL_API_SW_INTERFACE_SET_MTU_REPLY); @@ -161,7 +132,7 @@ send_sw_interface_details (vpe_api_main_t * am, VNET_HW_INTERFACE_FLAG_DUPLEX_SHIFT); mp->link_speed = ((hi->flags & VNET_HW_INTERFACE_FLAG_SPEED_MASK) >> VNET_HW_INTERFACE_FLAG_SPEED_SHIFT); - mp->link_mtu = ntohs (hi->max_packet_bytes); + mp->mtu = ntohs (swif->max_l3_packet_bytes[VLIB_TX]); mp->context = context; strncpy ((char *) mp->interface_name, diff --git a/src/vnet/interface_cli.c b/src/vnet/interface_cli.c index d151335aa1f..fe17c823b36 100644 --- a/src/vnet/interface_cli.c +++ b/src/vnet/interface_cli.c @@ -1103,32 +1103,29 @@ static clib_error_t * mtu_cmd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { vnet_main_t *vnm = vnet_get_main (); - u32 hw_if_index, mtu; - ethernet_main_t *em = ðernet_main; + u32 sw_if_index, mtu; - if (unformat (input, "%d %U", &mtu, - unformat_vnet_hw_interface, vnm, &hw_if_index)) + if (unformat (input, "%d %U", &mtu, unformat_vnet_sw_interface, vnm, + &sw_if_index)) { - vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index); - ethernet_interface_t *eif = ethernet_get_interface (em, hw_if_index); - - if (!eif) - return clib_error_return (0, "not supported"); - - if (mtu < hi->min_supported_packet_bytes) - return clib_error_return (0, "Invalid mtu (%d): " - "must be >= min pkt bytes (%d)", mtu, - hi->min_supported_packet_bytes); - - if (mtu > hi->max_supported_packet_bytes) - return clib_error_return (0, "Invalid mtu (%d): must be <= (%d)", mtu, - hi->max_supported_packet_bytes); - - vnet_hw_interface_set_mtu (vnm, hw_if_index, mtu); + ; } else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + int rv = vnet_sw_interface_set_mtu (vnm, sw_if_index, mtu); + if (rv < 0) + { + vnet_hw_interface_t *hi = vnet_get_sw_hw_interface (vnm, sw_if_index); + ASSERT (hi); + return clib_error_return (0, "Invalid mtu (%d): " + "must be between min pkt bytes (%d) and max pkt bytes (%d)", + mtu, hi->min_packet_bytes, + hi->max_packet_bytes); + } return 0; } @@ -1406,7 +1403,7 @@ set_interface_rx_mode (vlib_main_t * vm, unformat_input_t * input, * @cliexend ?*/ /* *INDENT-OFF* */ -VLIB_CLI_COMMAND (cmd_set_if_rx_mode,static) = { +VLIB_CLI_COMMAND (cmd_set_if_rx_mode, static) = { .path = "set interface rx-mode", .short_help = "set interface rx-mode [queue ] [polling | interrupt | adaptive]", .function = set_interface_rx_mode, diff --git a/src/vnet/interface_funcs.h b/src/vnet/interface_funcs.h index 6e188f7f543..42b69bdf789 100644 --- a/src/vnet/interface_funcs.h +++ b/src/vnet/interface_funcs.h @@ -87,6 +87,15 @@ vnet_get_sup_hw_interface (vnet_main_t * vnm, u32 sw_if_index) return vnet_get_hw_interface (vnm, sw->hw_if_index); } +always_inline vnet_hw_interface_t * +vnet_get_sw_hw_interface (vnet_main_t * vnm, u32 sw_if_index) +{ + vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index); + if (sw->type == VNET_SW_INTERFACE_TYPE_HARDWARE) + return vnet_get_hw_interface (vnm, sw->hw_if_index); + return vnet_get_sup_hw_interface (vnm, sw_if_index); +} + always_inline vnet_hw_interface_class_t * vnet_get_hw_interface_class (vnet_main_t * vnm, u32 hw_class_index) { @@ -218,20 +227,12 @@ vnet_hw_interface_get_flags (vnet_main_t * vnm, u32 hw_if_index) return hw->flags; } -always_inline uword -vnet_hw_interface_get_mtu (vnet_main_t * vnm, u32 hw_if_index, - vlib_rx_or_tx_t dir) -{ - vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); - return hw->max_l3_packet_bytes[dir]; -} - always_inline uword vnet_sw_interface_get_mtu (vnet_main_t * vnm, u32 sw_if_index, vlib_rx_or_tx_t dir) { - vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index); - return (hw->max_l3_packet_bytes[dir]); + vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index); + return (sw->max_l3_packet_bytes[dir]); } always_inline uword @@ -292,8 +293,8 @@ clib_error_t *set_hw_interface_change_rx_mode (vnet_main_t * vnm, vnet_hw_interface_rx_mode mode); -/* Set the MTU on the HW interface */ -void vnet_hw_interface_set_mtu (vnet_main_t * vnm, u32 hw_if_index, u32 mtu); +/* Set the MTU on the SW interface */ +int vnet_sw_interface_set_mtu (vnet_main_t * vnm, u32 sw_if_index, u32 mtu); /* update the unnumbered state of an interface */ void vnet_sw_interface_update_unnumbered (u32 sw_if_index, diff --git a/src/vnet/ip/icmp4.c b/src/vnet/ip/icmp4.c index 1fe01e48500..a4808f23271 100644 --- a/src/vnet/ip/icmp4.c +++ b/src/vnet/ip/icmp4.c @@ -513,13 +513,15 @@ ip4_icmp_error (vlib_main_t * vm, b->current_length = 0; } } - p0->current_length = - p0->current_length > 576 ? 576 : p0->current_length; /* Add IP header and ICMPv4 header including a 4 byte data field */ vlib_buffer_advance (p0, -sizeof (ip4_header_t) - sizeof (icmp46_header_t) - 4); + + p0->current_length = + p0->current_length > 576 ? 576 : p0->current_length; + out_ip0 = vlib_buffer_get_current (p0); icmp0 = (icmp46_header_t *) & out_ip0[1]; diff --git a/src/vnet/ip/icmp6.c b/src/vnet/ip/icmp6.c index fd5d0ecba38..6beec28c2cf 100644 --- a/src/vnet/ip/icmp6.c +++ b/src/vnet/ip/icmp6.c @@ -526,13 +526,14 @@ ip6_icmp_error (vlib_main_t * vm, b->current_length = 0; } } - p0->current_length = - p0->current_length > 1280 ? 1280 : p0->current_length; - /* Add IP header and ICMPv6 header including a 4 byte data field */ vlib_buffer_advance (p0, -sizeof (ip6_header_t) - sizeof (icmp46_header_t) - 4); + + p0->current_length = + p0->current_length > 1280 ? 1280 : p0->current_length; + out_ip0 = vlib_buffer_get_current (p0); icmp0 = (icmp46_header_t *) & out_ip0[1]; diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 7c56a294436..3dce590c807 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -1940,6 +1940,29 @@ typedef enum IP4_REWRITE_NEXT_ICMP_ERROR, } ip4_rewrite_next_t; +always_inline void +ip4_mtu_check (vlib_buffer_t * b, u16 buffer_packet_bytes, + u16 adj_packet_bytes, bool df, u32 * next, u32 * error) +{ + if (buffer_packet_bytes > adj_packet_bytes) + { + *error = IP4_ERROR_MTU_EXCEEDED; + if (df) + { + icmp4_error_set_vnet_buffer + (b, ICMP4_destination_unreachable, + ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set, + adj_packet_bytes); + *next = IP4_REWRITE_NEXT_ICMP_ERROR; + } + else + { + /* Add support for fragmentation here */ + *next = IP4_REWRITE_NEXT_DROP; + } + } +} + always_inline uword ip4_rewrite_inline (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -2100,26 +2123,20 @@ ip4_rewrite_inline (vlib_main_t * vm, vnet_buffer (p1)->ip.save_rewrite_length = rw_len1; /* Check MTU of outgoing interface. */ - if (vlib_buffer_length_in_chain (vm, p0) > - adj0[0].rewrite_header.max_l3_packet_bytes) - { - error0 = IP4_ERROR_MTU_EXCEEDED; - next0 = IP4_REWRITE_NEXT_ICMP_ERROR; - icmp4_error_set_vnet_buffer - (p0, ICMP4_destination_unreachable, - ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set, - 0); - } - if (vlib_buffer_length_in_chain (vm, p1) > - adj1[0].rewrite_header.max_l3_packet_bytes) - { - error1 = IP4_ERROR_MTU_EXCEEDED; - next1 = IP4_REWRITE_NEXT_ICMP_ERROR; - icmp4_error_set_vnet_buffer - (p1, ICMP4_destination_unreachable, - ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set, - 0); - } + ip4_mtu_check (p0, vlib_buffer_length_in_chain (vm, p0), + adj0[0].rewrite_header.max_l3_packet_bytes, + ip0->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), + &next0, &error0); + ip4_mtu_check (p1, vlib_buffer_length_in_chain (vm, p1), + adj1[0].rewrite_header.max_l3_packet_bytes, + ip1->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), + &next1, &error1); + + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_two_headers (adj0[0], adj1[0], + ip0, ip1, sizeof (ethernet_header_t)); if (is_mcast) { @@ -2143,10 +2160,17 @@ ip4_rewrite_inline (vlib_main_t * vm, tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index; vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0; + if (is_midchain) + { + adj0->sub_type.midchain.fixup_func + (vm, adj0, p0, adj0->sub_type.midchain.fixup_data); + } + if (PREDICT_FALSE (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) vnet_feature_arc_start (lm->output_feature_arc_index, tx_sw_if_index0, &next0, p0); + } if (PREDICT_TRUE (error1 == IP4_ERROR_NONE)) { @@ -2157,16 +2181,18 @@ ip4_rewrite_inline (vlib_main_t * vm, tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index; vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1; + if (is_midchain) + { + adj1->sub_type.midchain.fixup_func + (vm, adj1, p1, adj0->sub_type.midchain.fixup_data); + } + if (PREDICT_FALSE (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) vnet_feature_arc_start (lm->output_feature_arc_index, tx_sw_if_index1, &next1, p1); } - /* Guess we are only writing on simple Ethernet header. */ - vnet_rewrite_two_headers (adj0[0], adj1[0], - ip0, ip1, sizeof (ethernet_header_t)); - /* * Bump the per-adjacency counters */ @@ -2185,13 +2211,6 @@ ip4_rewrite_inline (vlib_main_t * vm, vlib_buffer_length_in_chain (vm, p1) + rw_len1); } - if (is_midchain) - { - adj0->sub_type.midchain.fixup_func - (vm, adj0, p0, adj0->sub_type.midchain.fixup_data); - adj1->sub_type.midchain.fixup_func - (vm, adj1, p1, adj0->sub_type.midchain.fixup_data); - } if (is_mcast) { /* @@ -2272,6 +2291,7 @@ ip4_rewrite_inline (vlib_main_t * vm, /* Guess we are only writing on simple Ethernet header. */ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t)); + if (is_mcast) { /* @@ -2291,16 +2311,12 @@ ip4_rewrite_inline (vlib_main_t * vm, vlib_buffer_length_in_chain (vm, p0) + rw_len0); /* Check MTU of outgoing interface. */ - if (vlib_buffer_length_in_chain (vm, p0) > - adj0[0].rewrite_header.max_l3_packet_bytes) - { - error0 = IP4_ERROR_MTU_EXCEEDED; - next0 = IP4_REWRITE_NEXT_ICMP_ERROR; - icmp4_error_set_vnet_buffer - (p0, ICMP4_destination_unreachable, - ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set, - 0); - } + ip4_mtu_check (p0, vlib_buffer_length_in_chain (vm, p0), + adj0[0].rewrite_header.max_l3_packet_bytes, + ip0->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), + &next0, &error0); + if (is_mcast) { error0 = ((adj0[0].rewrite_header.sw_if_index == diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index 588cd0675a4..7599733fcb5 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -1774,6 +1774,19 @@ typedef enum IP6_REWRITE_NEXT_ICMP_ERROR, } ip6_rewrite_next_t; +always_inline void +ip6_mtu_check (vlib_buffer_t * b, u16 buffer_packet_bytes, + u16 adj_packet_bytes, u32 * next, u32 * error) +{ + if (adj_packet_bytes >= 1280 && buffer_packet_bytes > adj_packet_bytes) + { + *error = IP6_ERROR_MTU_EXCEEDED; + icmp6_error_set_vnet_buffer (b, ICMP6_packet_too_big, 0, + adj_packet_bytes); + *next = IP6_REWRITE_NEXT_ICMP_ERROR; + } +} + always_inline uword ip6_rewrite_inline (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1898,9 +1911,14 @@ ip6_rewrite_inline (vlib_main_t * vm, { p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED; } + adj0 = adj_get (adj_index0); adj1 = adj_get (adj_index1); + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_two_headers (adj0[0], adj1[0], + ip0, ip1, sizeof (ethernet_header_t)); + rw_len0 = adj0[0].rewrite_header.data_bytes; rw_len1 = adj1[0].rewrite_header.data_bytes; vnet_buffer (p0)->ip.save_rewrite_length = rw_len0; @@ -1919,16 +1937,12 @@ ip6_rewrite_inline (vlib_main_t * vm, } /* Check MTU of outgoing interface. */ - error0 = - (vlib_buffer_length_in_chain (vm, p0) > - adj0[0]. - rewrite_header.max_l3_packet_bytes ? IP6_ERROR_MTU_EXCEEDED : - error0); - error1 = - (vlib_buffer_length_in_chain (vm, p1) > - adj1[0]. - rewrite_header.max_l3_packet_bytes ? IP6_ERROR_MTU_EXCEEDED : - error1); + ip6_mtu_check (p0, vlib_buffer_length_in_chain (vm, p0), + adj0[0].rewrite_header.max_l3_packet_bytes, &next0, + &error0); + ip6_mtu_check (p1, vlib_buffer_length_in_chain (vm, p1), + adj1[0].rewrite_header.max_l3_packet_bytes, &next1, + &error1); /* Don't adjust the buffer for hop count issue; icmp-error node * wants to see the IP headerr */ @@ -1945,6 +1959,19 @@ ip6_rewrite_inline (vlib_main_t * vm, (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) vnet_feature_arc_start (lm->output_feature_arc_index, tx_sw_if_index0, &next0, p0); + + if (is_midchain) + { + adj0->sub_type.midchain.fixup_func + (vm, adj0, p0, adj0->sub_type.midchain.fixup_data); + } + if (is_mcast) + { + /* + * copy bytes from the IP address into the MAC rewrite + */ + vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0); + } } if (PREDICT_TRUE (error1 == IP6_ERROR_NONE)) { @@ -1959,26 +1986,19 @@ ip6_rewrite_inline (vlib_main_t * vm, (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) vnet_feature_arc_start (lm->output_feature_arc_index, tx_sw_if_index1, &next1, p1); - } - /* Guess we are only writing on simple Ethernet header. */ - vnet_rewrite_two_headers (adj0[0], adj1[0], - ip0, ip1, sizeof (ethernet_header_t)); - - if (is_midchain) - { - adj0->sub_type.midchain.fixup_func - (vm, adj0, p0, adj0->sub_type.midchain.fixup_data); - adj1->sub_type.midchain.fixup_func - (vm, adj1, p1, adj1->sub_type.midchain.fixup_data); - } - if (is_mcast) - { - /* - * copy bytes from the IP address into the MAC rewrite - */ - vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0); - vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1); + if (is_midchain) + { + adj1->sub_type.midchain.fixup_func + (vm, adj1, p1, adj1->sub_type.midchain.fixup_data); + } + if (is_mcast) + { + /* + * copy bytes from the IP address into the MAC rewrite + */ + vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1); + } } vlib_validate_buffer_enqueue_x2 (vm, node, next_index, @@ -2054,11 +2074,9 @@ ip6_rewrite_inline (vlib_main_t * vm, } /* Check MTU of outgoing interface. */ - error0 = - (vlib_buffer_length_in_chain (vm, p0) > - adj0[0]. - rewrite_header.max_l3_packet_bytes ? IP6_ERROR_MTU_EXCEEDED : - error0); + ip6_mtu_check (p0, vlib_buffer_length_in_chain (vm, p0), + adj0[0].rewrite_header.max_l3_packet_bytes, &next0, + &error0); /* Don't adjust the buffer for hop count issue; icmp-error node * wants to see the IP headerr */ @@ -2076,16 +2094,16 @@ ip6_rewrite_inline (vlib_main_t * vm, (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) vnet_feature_arc_start (lm->output_feature_arc_index, tx_sw_if_index0, &next0, p0); - } - if (is_midchain) - { - adj0->sub_type.midchain.fixup_func - (vm, adj0, p0, adj0->sub_type.midchain.fixup_data); - } - if (is_mcast) - { - vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0); + if (is_midchain) + { + adj0->sub_type.midchain.fixup_func + (vm, adj0, p0, adj0->sub_type.midchain.fixup_data); + } + if (is_mcast) + { + vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0); + } } p0->error = error_node->errors[error0]; diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index fee4356f5e0..a1439faa154 100644 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -2575,10 +2575,6 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm, { if (is_add) { - vnet_hw_interface_t *hw_if0; - - hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index); - pool_get (nm->if_radv_pool, a); ri = a - nm->if_radv_pool; @@ -2612,7 +2608,8 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm, a->send_radv = 1; /* fill in radv_info for this interface that will be needed later */ - a->adv_link_mtu = hw_if0->max_l3_packet_bytes[VLIB_RX]; + a->adv_link_mtu = + vnet_sw_interface_get_mtu (vnm, sw_if_index, VLIB_TX); clib_memcpy (a->link_layer_address, eth_if0->address, 6); diff --git a/src/vnet/ipip/ipip.c b/src/vnet/ipip/ipip.c index 82c961cdddd..9b808d4ac0f 100644 --- a/src/vnet/ipip/ipip.c +++ b/src/vnet/ipip/ipip.c @@ -476,17 +476,16 @@ ipip_add_tunnel (ipip_transport_t transport, { vec_validate (im4->fib_index_by_sw_if_index, sw_if_index); hi->min_packet_bytes = 64 + sizeof (ip4_header_t); + hi->max_packet_bytes = 65536 - sizeof (ip4_header_t); } else { vec_validate (im6->fib_index_by_sw_if_index, sw_if_index); hi->min_packet_bytes = 64 + sizeof (ip6_header_t); + hi->max_packet_bytes = 65536 - sizeof (ip6_header_t); } - hi->per_packet_overhead_bytes = /* preamble */ 8 + /* inter frame gap */ 12; - - /* Standard default ipip MTU. */ - hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000; + vnet_sw_interface_set_mtu (vnm, sw_if_index, hi->max_packet_bytes); t->tunnel_src = *src; t->tunnel_dst = *dst; diff --git a/src/vnet/ipip/sixrd.c b/src/vnet/ipip/sixrd.c index cfdd0f87e3a..998025782fb 100644 --- a/src/vnet/ipip/sixrd.c +++ b/src/vnet/ipip/sixrd.c @@ -337,7 +337,8 @@ sixrd_add_tunnel (ip6_address_t * ip6_prefix, u8 ip6_prefix_len, t->dev_instance = t_idx; t->user_instance = t_idx; - hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 1480; + /* Set default MTU to 1480 */ + vnet_sw_interface_set_mtu (vnet_get_main (), t->sw_if_index, 1480); ipip_tunnel_db_add (t, &key); diff --git a/src/vnet/ipsec-gre/interface.c b/src/vnet/ipsec-gre/interface.c index 0772ce73df2..fa33684c50d 100644 --- a/src/vnet/ipsec-gre/interface.c +++ b/src/vnet/ipsec-gre/interface.c @@ -174,12 +174,9 @@ vnet_ipsec_gre_add_del_tunnel (vnet_ipsec_gre_add_del_tunnel_args_t * a, hi->min_packet_bytes = 64 + sizeof (gre_header_t) + sizeof (ip4_header_t) + sizeof (esp_header_t) + sizeof (esp_footer_t); - hi->per_packet_overhead_bytes = - /* preamble */ 8 + /* inter frame gap */ 12; /* Standard default gre MTU. */ - hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = - 9000; + vnet_sw_interface_set_mtu (vnm, hi->sw_if_index, 9000); clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src)); clib_memcpy (&t->tunnel_dst, &a->dst, sizeof (t->tunnel_dst)); diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c index 10a86a457e8..5c550bc3320 100644 --- a/src/vnet/unix/tapcli.c +++ b/src/vnet/unix/tapcli.c @@ -1068,8 +1068,9 @@ vnet_tap_connect (vlib_main_t * vm, vnet_tap_connect_args_t * ap) hw = vnet_get_hw_interface (tm->vnet_main, ti->hw_if_index); hw->min_supported_packet_bytes = TAP_MTU_MIN; hw->max_supported_packet_bytes = TAP_MTU_MAX; - hw->max_l3_packet_bytes[VLIB_RX] = hw->max_l3_packet_bytes[VLIB_TX] = - hw->max_supported_packet_bytes - sizeof (ethernet_header_t); + vnet_sw_interface_set_mtu (tm->vnet_main, hw->sw_if_index, + hw->max_supported_packet_bytes - + sizeof (ethernet_header_t)); ti->sw_if_index = hw->sw_if_index; if (ap->sw_if_indexp) *(ap->sw_if_indexp) = hw->sw_if_index; diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackApiExample.java b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackApiExample.java index b99979cf301..e15f70ad8f8 100644 --- a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackApiExample.java +++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/CallbackApiExample.java @@ -86,9 +86,9 @@ public class CallbackApiExample { @Override public void onSwInterfaceDetails(final SwInterfaceDetails msg) { System.out.printf("Received SwInterfaceDetails: interfaceName=%s, l2AddressLength=%d, adminUpDown=%d, " - + "linkUpDown=%d, linkSpeed=%d, linkMtu=%d%n", + + "linkUpDown=%d, linkSpeed=%d, mtu=%d%n", new String(msg.interfaceName, StandardCharsets.UTF_8), msg.l2AddressLength, msg.adminUpDown, - msg.linkUpDown, msg.linkSpeed, (int) msg.linkMtu); + msg.linkUpDown, msg.linkSpeed, (int) msg.mtu); } @Override diff --git a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/FutureApiExample.java b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/FutureApiExample.java index 931c9b337aa..9546611aee1 100644 --- a/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/FutureApiExample.java +++ b/src/vpp-api/java/jvpp-core/io/fd/vpp/jvpp/core/examples/FutureApiExample.java @@ -102,7 +102,7 @@ public class FutureApiExample { + "linkUpDown=%d, linkSpeed=%d, linkMtu=%d%n", new String(details.interfaceName, StandardCharsets.UTF_8), details.l2AddressLength, details.adminUpDown, - details.linkUpDown, details.linkSpeed, (int) details.linkMtu)); + details.linkUpDown, details.linkSpeed, (int) details.mtu)); } } -- cgit 1.2.3-korg