diff options
-rw-r--r-- | MAINTAINERS | 1 | ||||
-rw-r--r-- | src/vat/api_format.c | 43 | ||||
-rw-r--r-- | src/vnet/CMakeLists.txt | 18 | ||||
-rw-r--r-- | src/vnet/gso/cli.c | 93 | ||||
-rw-r--r-- | src/vnet/gso/gso.api | 39 | ||||
-rw-r--r-- | src/vnet/gso/gso.c | 88 | ||||
-rw-r--r-- | src/vnet/gso/gso.h | 40 | ||||
-rw-r--r-- | src/vnet/gso/gso_api.c | 106 | ||||
-rw-r--r-- | src/vnet/gso/node.c | 652 | ||||
-rw-r--r-- | src/vnet/interface_output.c | 391 | ||||
-rw-r--r-- | src/vnet/vnet_all_api_h.h | 1 | ||||
-rw-r--r-- | src/vpp/api/custom_dump.c | 16 | ||||
-rw-r--r-- | test/test_gso.py | 2 | ||||
-rw-r--r-- | test/vpp_papi_provider.py | 7 |
14 files changed, 1120 insertions, 377 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 99e792d7e36..652d76a2e1f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -194,6 +194,7 @@ F: src/vnet/lisp-gpe/ VNET GSO I: gso M: Andrew Yourtchenko <ayourtch@gmail.com> +M: Mohsin Kazmi <sykazmi@cisco.com> F: src/vnet/interface_output.c Plugin - MAP diff --git a/src/vat/api_format.c b/src/vat/api_format.c index 369dcc44641..cbd7f901f98 100644 --- a/src/vat/api_format.c +++ b/src/vat/api_format.c @@ -5174,6 +5174,7 @@ _(delete_subif_reply) \ _(l2_interface_pbb_tag_rewrite_reply) \ _(set_punt_reply) \ _(feature_enable_disable_reply) \ +_(feature_gso_enable_disable_reply) \ _(sw_interface_tag_add_del_reply) \ _(sw_interface_add_del_mac_address_reply) \ _(hw_interface_set_mtu_reply) \ @@ -5483,6 +5484,7 @@ _(SET_PUNT_REPLY, set_punt_reply) \ _(IP_TABLE_DETAILS, ip_table_details) \ _(IP_ROUTE_DETAILS, ip_route_details) \ _(FEATURE_ENABLE_DISABLE_REPLY, feature_enable_disable_reply) \ +_(FEATURE_GSO_ENABLE_DISABLE_REPLY, feature_gso_enable_disable_reply) \ _(SW_INTERFACE_TAG_ADD_DEL_REPLY, sw_interface_tag_add_del_reply) \ _(SW_INTERFACE_ADD_DEL_MAC_ADDRESS_REPLY, sw_interface_add_del_mac_address_reply) \ _(L2_XCONNECT_DETAILS, l2_xconnect_details) \ @@ -19966,6 +19968,45 @@ api_feature_enable_disable (vat_main_t * vam) } static int +api_feature_gso_enable_disable (vat_main_t * vam) +{ + unformat_input_t *i = vam->input; + vl_api_feature_gso_enable_disable_t *mp; + u32 sw_if_index = ~0; + u8 enable = 1; + int ret; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index)) + ; + else if (unformat (i, "sw_if_index %d", &sw_if_index)) + ; + else if (unformat (i, "enable")) + enable = 1; + else if (unformat (i, "disable")) + enable = 0; + else + break; + } + + if (sw_if_index == ~0) + { + errmsg ("missing interface name or sw_if_index"); + return -99; + } + + /* Construct the API message */ + M (FEATURE_GSO_ENABLE_DISABLE, mp); + mp->sw_if_index = ntohl (sw_if_index); + mp->enable_disable = enable; + + S (mp); + W (ret); + return ret; +} + +static int api_sw_interface_tag_add_del (vat_main_t * vam) { unformat_input_t *i = vam->input; @@ -21814,6 +21855,8 @@ _(ip_mtable_dump, "") \ _(ip_mroute_dump, "table-id [ip4|ip6]") \ _(feature_enable_disable, "arc_name <arc_name> " \ "feature_name <feature_name> <intfc> | sw_if_index <nn> [disable]") \ +_(feature_gso_enable_disable, "<intfc> | sw_if_index <nn> " \ + "[enable | disable] ") \ _(sw_interface_tag_add_del, "<intfc> | sw_if_index <nn> tag <text>" \ "[disable]") \ _(sw_interface_add_del_mac_address, "<intfc> | sw_if_index <nn> " \ diff --git a/src/vnet/CMakeLists.txt b/src/vnet/CMakeLists.txt index 9c1bceb5753..7120809636e 100644 --- a/src/vnet/CMakeLists.txt +++ b/src/vnet/CMakeLists.txt @@ -941,6 +941,24 @@ list(APPEND VNET_HEADERS list(APPEND VNET_API_FILES ipfix-export/ipfix_export.api) ############################################################################## +# GSO +############################################################################## +list(APPEND VNET_SOURCES + gso/cli.c + gso/gso.c + gso/gso_api.c + gso/node.c +) + +list(APPEND VNET_HEADERS + gso/gso.h +) + +list(APPEND VNET_API_FILES + gso/gso.api +) + +############################################################################## # IPFIX classify code ############################################################################## diff --git a/src/vnet/gso/cli.c b/src/vnet/gso/cli.c new file mode 100644 index 00000000000..060ce812fad --- /dev/null +++ b/src/vnet/gso/cli.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/error.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/feature/feature.h> +#include <vnet/gso/gso.h> + +static clib_error_t * +set_interface_feature_gso_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = 0; + + u32 sw_if_index = ~0; + u8 enable = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (line_input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index)) + ; + else if (unformat (line_input, "enable")) + enable = 1; + else if (unformat (line_input, "disable")) + enable = 0; + else + { + error = unformat_parse_error (line_input); + goto done; + } + } + + if (sw_if_index == ~0) + { + error = clib_error_return (0, "Interface not specified..."); + goto done; + } + int rv = vnet_sw_interface_gso_enable_disable (sw_if_index, enable); + + switch (rv) + { + case VNET_API_ERROR_INVALID_VALUE: + error = clib_error_return (0, "interface type is not hardware"); + break; + case VNET_API_ERROR_FEATURE_DISABLED: + error = clib_error_return (0, "interface should be ethernet interface"); + break; + default: + ; + } + +done: + unformat_free (line_input); + return error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_interface_feature_gso_command, static) = { + .path = "set interface feature gso", + .short_help = "set interface feature gso <intfc> [enable | disable]", + .function = set_interface_feature_gso_command_fn, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/gso/gso.api b/src/vnet/gso/gso.api new file mode 100644 index 00000000000..8b1da88f040 --- /dev/null +++ b/src/vnet/gso/gso.api @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +option version = "1.0.0"; + +import "vnet/interface_types.api"; + +/** \brief Enable or disable interface feature gso arc + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - The interface to enable/disable gso feature arc. + @param enable_disable - set to 1 to enable, 0 to disable gso feature arc +*/ +autoreply define feature_gso_enable_disable +{ + u32 client_index; + u32 context; + vl_api_interface_index_t sw_if_index; + bool enable_disable; + option vat_help = "<intfc> | sw_if_index <nn> [enable | disable]"; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/gso/gso.c b/src/vnet/gso/gso.c new file mode 100644 index 00000000000..c741b17bdd4 --- /dev/null +++ b/src/vnet/gso/gso.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/error.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/feature/feature.h> +#include <vnet/l2/l2_in_out_feat_arc.h> +#include <vnet/gso/gso.h> + +gso_main_t gso_main; + +int +vnet_sw_interface_gso_enable_disable (u32 sw_if_index, u8 enable) +{ + ethernet_interface_t *eif; + vnet_sw_interface_t *si; + ethernet_main_t *em; + vnet_main_t *vnm; + + vnm = vnet_get_main (); + em = ðernet_main; + si = vnet_get_sw_interface (vnm, sw_if_index); + + /* + * only ethernet HW interfaces are supported at this time + */ + if (si->type != VNET_SW_INTERFACE_TYPE_HARDWARE) + { + return (VNET_API_ERROR_INVALID_VALUE); + } + + eif = ethernet_get_interface (em, si->hw_if_index); + + if (!eif) + { + return (VNET_API_ERROR_FEATURE_DISABLED); + } + + vnet_feature_enable_disable ("ip4-output", "gso-ip4", sw_if_index, enable, + 0, 0); + vnet_feature_enable_disable ("ip6-output", "gso-ip6", sw_if_index, enable, + 0, 0); + + vnet_l2_feature_enable_disable ("l2-output-nonip", "gso-l2-nonip", + sw_if_index, enable, 0, 0); + vnet_l2_feature_enable_disable ("l2-output-ip4", "gso-l2-ip4", + sw_if_index, enable, 0, 0); + vnet_l2_feature_enable_disable ("l2-output-ip6", "gso-l2-ip6", + sw_if_index, enable, 0, 0); + + return (0); +} + +static clib_error_t * +gso_init (vlib_main_t * vm) +{ + gso_main_t *gm = &gso_main; + + clib_memset (gm, 0, sizeof (gm[0])); + gm->vlib_main = vm; + gm->vnet_main = vnet_get_main (); + + return 0; +} + +VLIB_INIT_FUNCTION (gso_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/gso/gso.h b/src/vnet/gso/gso.h new file mode 100644 index 00000000000..8e174dfd1f6 --- /dev/null +++ b/src/vnet/gso/gso.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_gso_h +#define included_gso_h + +#include <vnet/vnet.h> + +typedef struct +{ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + u16 msg_id_base; +} gso_main_t; + +extern gso_main_t gso_main; + +int vnet_sw_interface_gso_enable_disable (u32 sw_if_index, u8 enable); + +#endif /* included_gso_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/gso/gso_api.c b/src/vnet/gso/gso_api.c new file mode 100644 index 00000000000..9991c5fd289 --- /dev/null +++ b/src/vnet/gso/gso_api.c @@ -0,0 +1,106 @@ +/* + *------------------------------------------------------------------ + * gso_api.c - Generic Segmentation Offload api + * + * Copyright (c) 2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <vnet/vnet.h> +#include <vlibmemory/api.h> +#include <vnet/gso/gso.h> + +#include <vnet/vnet_msg_enum.h> + +#define vl_typedefs /* define message structures */ +#include <vnet/vnet_all_api_h.h> +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include <vnet/vnet_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include <vnet/vnet_all_api_h.h> +#undef vl_printfun + +#include <vlibapi/api_helper_macros.h> + +#define foreach_feature_gso_api_msg \ +_(FEATURE_GSO_ENABLE_DISABLE, feature_gso_enable_disable) + +static void + vl_api_feature_gso_enable_disable_t_handler + (vl_api_feature_gso_enable_disable_t * mp) +{ + vl_api_feature_gso_enable_disable_reply_t *rmp; + int rv = 0; + + VALIDATE_SW_IF_INDEX (mp); + + rv = + vnet_sw_interface_gso_enable_disable (ntohl (mp->sw_if_index), + mp->enable_disable); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_FEATURE_GSO_ENABLE_DISABLE_REPLY); +} + +#define vl_msg_name_crc_list +#include <vnet/gso/gso.api.h> +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_gso; +#undef _ +} + +static clib_error_t * +feature_gso_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_feature_gso_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (feature_gso_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/gso/node.c b/src/vnet/gso/node.c new file mode 100644 index 00000000000..5998a4f8387 --- /dev/null +++ b/src/vnet/gso/node.c @@ -0,0 +1,652 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/error.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/feature/feature.h> +#include <vnet/gso/gso.h> +#include <vnet/ip/icmp46_packet.h> +#include <vnet/ip/ip4.h> +#include <vnet/ip/ip6.h> +#include <vnet/udp/udp_packet.h> + +typedef struct +{ + u32 flags; + u16 gso_size; + u8 gso_l4_hdr_sz; +} gso_trace_t; + +static u8 * +format_gso_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + gso_trace_t *t = va_arg (*args, gso_trace_t *); + + if (t->flags & VNET_BUFFER_F_GSO) + { + s = format (s, "gso_sz %d gso_l4_hdr_sz %d", + t->gso_size, t->gso_l4_hdr_sz); + } + + return s; +} + +static_always_inline u16 +tso_alloc_tx_bufs (vlib_main_t * vm, + vnet_interface_per_thread_data_t * ptd, + vlib_buffer_t * b0, u32 n_bytes_b0, u16 l234_sz, + u16 gso_size) +{ + u16 size = + clib_min (gso_size, vlib_buffer_get_default_data_size (vm) - l234_sz); + + /* rounded-up division */ + u16 n_bufs = (n_bytes_b0 - l234_sz + (size - 1)) / size; + u16 n_alloc; + + ASSERT (n_bufs > 0); + vec_validate (ptd->split_buffers, n_bufs - 1); + + n_alloc = vlib_buffer_alloc (vm, ptd->split_buffers, n_bufs); + if (n_alloc < n_bufs) + { + vlib_buffer_free (vm, ptd->split_buffers, n_alloc); + return 0; + } + return n_alloc; +} + +static_always_inline void +tso_init_buf_from_template_base (vlib_buffer_t * nb0, vlib_buffer_t * b0, + u32 flags, u16 length) +{ + nb0->current_data = b0->current_data; + nb0->total_length_not_including_first_buffer = 0; + nb0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID | flags; + clib_memcpy_fast (&nb0->opaque, &b0->opaque, sizeof (nb0->opaque)); + clib_memcpy_fast (vlib_buffer_get_current (nb0), + vlib_buffer_get_current (b0), length); + nb0->current_length = length; +} + +static_always_inline void +tso_init_buf_from_template (vlib_main_t * vm, vlib_buffer_t * nb0, + vlib_buffer_t * b0, u16 template_data_sz, + u16 gso_size, u8 ** p_dst_ptr, u16 * p_dst_left, + u32 next_tcp_seq, u32 flags) +{ + tso_init_buf_from_template_base (nb0, b0, flags, template_data_sz); + + *p_dst_left = + clib_min (gso_size, + vlib_buffer_get_default_data_size (vm) - (template_data_sz + + nb0->current_data)); + *p_dst_ptr = vlib_buffer_get_current (nb0) + template_data_sz; + + tcp_header_t *tcp = + (tcp_header_t *) (nb0->data + vnet_buffer (nb0)->l4_hdr_offset); + tcp->seq_number = clib_host_to_net_u32 (next_tcp_seq); +} + +static_always_inline void +tso_fixup_segmented_buf (vlib_buffer_t * b0, u8 tcp_flags, int is_ip6) +{ + u16 l3_hdr_offset = vnet_buffer (b0)->l3_hdr_offset; + u16 l4_hdr_offset = vnet_buffer (b0)->l4_hdr_offset; + ip4_header_t *ip4 = (ip4_header_t *) (b0->data + l3_hdr_offset); + ip6_header_t *ip6 = (ip6_header_t *) (b0->data + l3_hdr_offset); + tcp_header_t *tcp = (tcp_header_t *) (b0->data + l4_hdr_offset); + + tcp->flags = tcp_flags; + + if (is_ip6) + ip6->payload_length = + clib_host_to_net_u16 (b0->current_length - + (l4_hdr_offset - b0->current_data)); + else + ip4->length = + clib_host_to_net_u16 (b0->current_length - + (l3_hdr_offset - b0->current_data)); +} + +/** + * Allocate the necessary number of ptd->split_buffers, + * and segment the possibly chained buffer(s) from b0 into + * there. + * + * Return the cumulative number of bytes sent or zero + * if allocation failed. + */ + +static_always_inline u32 +tso_segment_buffer (vlib_main_t * vm, vnet_interface_per_thread_data_t * ptd, + u32 sbi0, vlib_buffer_t * sb0, u32 n_bytes_b0, int is_ip6) +{ + u32 n_tx_bytes = 0; + ASSERT (sb0->flags & VNET_BUFFER_F_L2_HDR_OFFSET_VALID); + ASSERT (sb0->flags & VNET_BUFFER_F_L3_HDR_OFFSET_VALID); + ASSERT (sb0->flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID); + u16 gso_size = vnet_buffer2 (sb0)->gso_size; + + int l4_hdr_sz = vnet_buffer2 (sb0)->gso_l4_hdr_sz; + u8 save_tcp_flags = 0; + u8 tcp_flags_no_fin_psh = 0; + u32 next_tcp_seq = 0; + + tcp_header_t *tcp = + (tcp_header_t *) (sb0->data + vnet_buffer (sb0)->l4_hdr_offset); + next_tcp_seq = clib_net_to_host_u32 (tcp->seq_number); + /* store original flags for last packet and reset FIN and PSH */ + save_tcp_flags = tcp->flags; + tcp_flags_no_fin_psh = tcp->flags & ~(TCP_FLAG_FIN | TCP_FLAG_PSH); + tcp->checksum = 0; + + u32 default_bflags = + sb0->flags & ~(VNET_BUFFER_F_GSO | VLIB_BUFFER_NEXT_PRESENT); + u16 l234_sz = vnet_buffer (sb0)->l4_hdr_offset + l4_hdr_sz + - sb0->current_data; + int first_data_size = clib_min (gso_size, sb0->current_length - l234_sz); + next_tcp_seq += first_data_size; + + if (PREDICT_FALSE + (!tso_alloc_tx_bufs (vm, ptd, sb0, n_bytes_b0, l234_sz, gso_size))) + return 0; + + vlib_buffer_t *b0 = vlib_get_buffer (vm, ptd->split_buffers[0]); + tso_init_buf_from_template_base (b0, sb0, default_bflags, + l234_sz + first_data_size); + + u32 total_src_left = n_bytes_b0 - l234_sz - first_data_size; + if (total_src_left) + { + /* Need to copy more segments */ + u8 *src_ptr, *dst_ptr; + u16 src_left, dst_left; + /* current source buffer */ + vlib_buffer_t *csb0 = sb0; + u32 csbi0 = sbi0; + /* current dest buffer */ + vlib_buffer_t *cdb0; + u16 dbi = 1; /* the buffer [0] is b0 */ + + src_ptr = vlib_buffer_get_current (sb0) + l234_sz + first_data_size; + src_left = sb0->current_length - l234_sz - first_data_size; + + tso_fixup_segmented_buf (b0, tcp_flags_no_fin_psh, is_ip6); + + /* grab a second buffer and prepare the loop */ + ASSERT (dbi < vec_len (ptd->split_buffers)); + cdb0 = vlib_get_buffer (vm, ptd->split_buffers[dbi++]); + tso_init_buf_from_template (vm, cdb0, b0, l234_sz, gso_size, &dst_ptr, + &dst_left, next_tcp_seq, default_bflags); + + /* an arbitrary large number to catch the runaway loops */ + int nloops = 2000; + while (total_src_left) + { + if (nloops-- <= 0) + clib_panic ("infinite loop detected"); + u16 bytes_to_copy = clib_min (src_left, dst_left); + + clib_memcpy_fast (dst_ptr, src_ptr, bytes_to_copy); + + src_left -= bytes_to_copy; + src_ptr += bytes_to_copy; + total_src_left -= bytes_to_copy; + dst_left -= bytes_to_copy; + dst_ptr += bytes_to_copy; + next_tcp_seq += bytes_to_copy; + cdb0->current_length += bytes_to_copy; + + if (0 == src_left) + { + int has_next = (csb0->flags & VLIB_BUFFER_NEXT_PRESENT); + u32 next_bi = csb0->next_buffer; + + /* init src to the next buffer in chain */ + if (has_next) + { + csbi0 = next_bi; + csb0 = vlib_get_buffer (vm, csbi0); + src_left = csb0->current_length; + src_ptr = vlib_buffer_get_current (csb0); + } + else + { + ASSERT (total_src_left == 0); + break; + } + } + if (0 == dst_left && total_src_left) + { + n_tx_bytes += cdb0->current_length; + ASSERT (dbi < vec_len (ptd->split_buffers)); + cdb0 = vlib_get_buffer (vm, ptd->split_buffers[dbi++]); + tso_init_buf_from_template (vm, cdb0, b0, l234_sz, + gso_size, &dst_ptr, &dst_left, + next_tcp_seq, default_bflags); + } + } + + tso_fixup_segmented_buf (cdb0, save_tcp_flags, is_ip6); + + n_tx_bytes += cdb0->current_length; + } + n_tx_bytes += b0->current_length; + return n_tx_bytes; +} + +static_always_inline void +drop_one_buffer_and_count (vlib_main_t * vm, vnet_main_t * vnm, + vlib_node_runtime_t * node, u32 * pbi0, + u32 sw_if_index, u32 drop_error_code) +{ + u32 thread_index = vm->thread_index; + + vlib_simple_counter_main_t *cm; + cm = + vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_TX_ERROR); + vlib_increment_simple_counter (cm, thread_index, sw_if_index, 1); + + vlib_error_drop_buffers (vm, node, pbi0, + /* buffer stride */ 1, + /* n_buffers */ 1, + VNET_INTERFACE_OUTPUT_NEXT_DROP, + node->node_index, drop_error_code); +} + +static_always_inline uword +vnet_gso_node_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + vnet_main_t * vnm, + vnet_hw_interface_t * hi, + int is_ip6, int do_segmentation) +{ + u32 *to_next; + u32 next_index = node->cached_next_index; + u32 *from = vlib_frame_vector_args (frame); + u32 n_left_from = frame->n_vectors; + u32 *from_end = from + n_left_from; + u32 thread_index = vm->thread_index; + vnet_interface_main_t *im = &vnm->interface_main; + vnet_interface_per_thread_data_t *ptd = + vec_elt_at_index (im->per_thread_data, thread_index); + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; + + vlib_get_buffers (vm, from, b, n_left_from); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (from + 8 <= from_end && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + u32 next0, next1, next2, next3; + u32 swif0, swif1, swif2, swif3; + gso_trace_t *t0, *t1, *t2, *t3; + vnet_hw_interface_t *hi0, *hi1, *hi2, *hi3; + + /* Prefetch next iteration. */ + vlib_prefetch_buffer_header (b[4], LOAD); + vlib_prefetch_buffer_header (b[5], LOAD); + vlib_prefetch_buffer_header (b[6], LOAD); + vlib_prefetch_buffer_header (b[7], LOAD); + + bi0 = from[0]; + bi1 = from[1]; + bi2 = from[2]; + bi3 = from[3]; + to_next[0] = bi0; + to_next[1] = bi1; + to_next[2] = bi2; + to_next[3] = bi3; + + swif0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX]; + swif1 = vnet_buffer (b[1])->sw_if_index[VLIB_TX]; + swif2 = vnet_buffer (b[2])->sw_if_index[VLIB_TX]; + swif3 = vnet_buffer (b[3])->sw_if_index[VLIB_TX]; + + if (PREDICT_FALSE (hi->sw_if_index != swif0)) + { + hi0 = vnet_get_sup_hw_interface (vnm, swif0); + if ((hi0->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) == 0 && + (b[0]->flags & VNET_BUFFER_F_GSO)) + break; + } + if (PREDICT_FALSE (hi->sw_if_index != swif1)) + { + hi1 = vnet_get_sup_hw_interface (vnm, swif0); + if (!(hi1->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) && + (b[1]->flags & VNET_BUFFER_F_GSO)) + break; + } + if (PREDICT_FALSE (hi->sw_if_index != swif2)) + { + hi2 = vnet_get_sup_hw_interface (vnm, swif0); + if ((hi2->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) == 0 && + (b[2]->flags & VNET_BUFFER_F_GSO)) + break; + } + if (PREDICT_FALSE (hi->sw_if_index != swif3)) + { + hi3 = vnet_get_sup_hw_interface (vnm, swif0); + if (!(hi3->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) && + (b[3]->flags & VNET_BUFFER_F_GSO)) + break; + } + + if (b[0]->flags & VLIB_BUFFER_IS_TRACED) + { + t0 = vlib_add_trace (vm, node, b[0], sizeof (t0[0])); + t0->flags = b[0]->flags & VNET_BUFFER_F_GSO; + t0->gso_size = vnet_buffer2 (b[0])->gso_size; + t0->gso_l4_hdr_sz = vnet_buffer2 (b[0])->gso_l4_hdr_sz; + } + if (b[1]->flags & VLIB_BUFFER_IS_TRACED) + { + t1 = vlib_add_trace (vm, node, b[1], sizeof (t1[0])); + t1->flags = b[1]->flags & VNET_BUFFER_F_GSO; + t1->gso_size = vnet_buffer2 (b[1])->gso_size; + t1->gso_l4_hdr_sz = vnet_buffer2 (b[1])->gso_l4_hdr_sz; + } + if (b[2]->flags & VLIB_BUFFER_IS_TRACED) + { + t2 = vlib_add_trace (vm, node, b[2], sizeof (t2[0])); + t2->flags = b[2]->flags & VNET_BUFFER_F_GSO; + t2->gso_size = vnet_buffer2 (b[2])->gso_size; + t2->gso_l4_hdr_sz = vnet_buffer2 (b[2])->gso_l4_hdr_sz; + } + if (b[3]->flags & VLIB_BUFFER_IS_TRACED) + { + t3 = vlib_add_trace (vm, node, b[3], sizeof (t3[0])); + t3->flags = b[3]->flags & VNET_BUFFER_F_GSO; + t3->gso_size = vnet_buffer2 (b[3])->gso_size; + t3->gso_l4_hdr_sz = vnet_buffer2 (b[3])->gso_l4_hdr_sz; + } + + from += 4; + to_next += 4; + n_left_to_next -= 4; + n_left_from -= 4; + + next0 = next1 = 0; + next2 = next3 = 0; + vnet_feature_next (&next0, b[0]); + vnet_feature_next (&next1, b[1]); + vnet_feature_next (&next2, b[2]); + vnet_feature_next (&next3, b[3]); + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + b += 4; + } + + while (from + 1 <= from_end && n_left_to_next > 0) + { + u32 bi0, swif0; + gso_trace_t *t0; + vnet_hw_interface_t *hi0; + u32 next0 = 0; + + swif0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX]; + if (PREDICT_FALSE (hi->sw_if_index != swif0)) + { + hi0 = vnet_get_sup_hw_interface (vnm, swif0); + if ((hi0->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) == 0 && + (b[0]->flags & VNET_BUFFER_F_GSO)) + do_segmentation = 1; + } + + /* speculatively enqueue b0 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next += 1; + n_left_to_next -= 1; + from += 1; + n_left_from -= 1; + + if (b[0]->flags & VLIB_BUFFER_IS_TRACED) + { + t0 = vlib_add_trace (vm, node, b[0], sizeof (t0[0])); + t0->flags = b[0]->flags & VNET_BUFFER_F_GSO; + t0->gso_size = vnet_buffer2 (b[0])->gso_size; + t0->gso_l4_hdr_sz = vnet_buffer2 (b[0])->gso_l4_hdr_sz; + } + + if (do_segmentation) + { + if (PREDICT_FALSE (b[0]->flags & VNET_BUFFER_F_GSO)) + { + /* + * Undo the enqueue of the b0 - it is not going anywhere, + * and will be freed either after it's segmented or + * when dropped, if there is no buffers to segment into. + */ + to_next -= 1; + n_left_to_next += 1; + /* undo the counting. */ + u32 n_bytes_b0 = vlib_buffer_length_in_chain (vm, b[0]); + u32 n_tx_bytes = 0; + + n_tx_bytes = + tso_segment_buffer (vm, ptd, bi0, b[0], n_bytes_b0, + is_ip6); + + if (PREDICT_FALSE (n_tx_bytes == 0)) + { + drop_one_buffer_and_count (vm, vnm, node, from - 1, + hi->sw_if_index, + VNET_INTERFACE_OUTPUT_ERROR_NO_BUFFERS_FOR_GSO); + b += 1; + continue; + } + + u16 n_tx_bufs = vec_len (ptd->split_buffers); + u32 *from_seg = ptd->split_buffers; + + while (n_tx_bufs > 0) + { + u32 sbi0; + vlib_buffer_t *sb0; + if (n_tx_bufs >= n_left_to_next) + { + while (n_left_to_next > 0) + { + sbi0 = to_next[0] = from_seg[0]; + sb0 = vlib_get_buffer (vm, sbi0); + to_next += 1; + from_seg += 1; + n_left_to_next -= 1; + n_tx_bufs -= 1; + vnet_feature_next (&next0, sb0); + vlib_validate_buffer_enqueue_x1 (vm, node, + next_index, + to_next, + n_left_to_next, + sbi0, next0); + } + vlib_put_next_frame (vm, node, next_index, + n_left_to_next); + vlib_get_new_next_frame (vm, node, next_index, + to_next, n_left_to_next); + } + while (n_tx_bufs > 0) + { + sbi0 = to_next[0] = from_seg[0]; + sb0 = vlib_get_buffer (vm, sbi0); + to_next += 1; + from_seg += 1; + n_left_to_next -= 1; + n_tx_bufs -= 1; + vnet_feature_next (&next0, sb0); + vlib_validate_buffer_enqueue_x1 (vm, node, + next_index, + to_next, + n_left_to_next, + sbi0, next0); + } + } + /* The buffers were enqueued. Reset the length */ + _vec_len (ptd->split_buffers) = 0; + /* Free the now segmented buffer */ + vlib_buffer_free_one (vm, bi0); + b += 1; + continue; + } + } + + vnet_feature_next (&next0, b[0]); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + b += 1; + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +static_always_inline uword +vnet_gso_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame, int is_ip6) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_hw_interface_t *hi; + + if (frame->n_vectors > 0) + { + u32 *from = vlib_frame_vector_args (frame); + vlib_buffer_t *b = vlib_get_buffer (vm, from[0]); + hi = vnet_get_sup_hw_interface (vnm, + vnet_buffer (b)->sw_if_index[VLIB_TX]); + /* + * The 3-headed "if" is here because we want to err on the side + * of not impacting the non-GSO performance - so for the more + * common case of no GSO interfaces we want to prevent the + * segmentation codepath from being there altogether. + */ + if (hi->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) + return vnet_gso_node_inline (vm, node, frame, vnm, hi, + is_ip6, /* do_segmentation */ 0); + else + return vnet_gso_node_inline (vm, node, frame, vnm, hi, + is_ip6, /* do_segmentation */ 1); + } + return 0; +} + +VLIB_NODE_FN (gso_l2_ip4_node) (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return vnet_gso_inline (vm, node, frame, 0 /* ip6 */ ); +} + +VLIB_NODE_FN (gso_l2_ip6_node) (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return vnet_gso_inline (vm, node, frame, 1 /* ip6 */ ); +} + +VLIB_NODE_FN (gso_ip4_node) (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return vnet_gso_inline (vm, node, frame, 0 /* ip6 */ ); +} + +VLIB_NODE_FN (gso_ip6_node) (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return vnet_gso_inline (vm, node, frame, 1 /* ip6 */ ); +} + +/* *INDENT-OFF* */ + +VLIB_REGISTER_NODE (gso_l2_ip4_node) = { + .vector_size = sizeof (u32), + .format_trace = format_gso_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = 0, + .n_next_nodes = 0, + .name = "gso-l2-ip4", +}; + +VLIB_REGISTER_NODE (gso_l2_ip6_node) = { + .vector_size = sizeof (u32), + .format_trace = format_gso_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = 0, + .n_next_nodes = 0, + .name = "gso-l2-ip6", +}; + +VLIB_REGISTER_NODE (gso_ip4_node) = { + .vector_size = sizeof (u32), + .format_trace = format_gso_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = 0, + .n_next_nodes = 0, + .name = "gso-ip4", +}; + +VLIB_REGISTER_NODE (gso_ip6_node) = { + .vector_size = sizeof (u32), + .format_trace = format_gso_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = 0, + .n_next_nodes = 0, + .name = "gso-ip6", +}; + +VNET_FEATURE_INIT (gso_l2_ip4_node, static) = { + .arc_name = "l2-output-ip4", + .node_name = "gso-l2-ip4", + .runs_before = VNET_FEATURES ("l2-output-feat-arc-end"), +}; + +VNET_FEATURE_INIT (gso_l2_ip6_node, static) = { + .arc_name = "l2-output-ip6", + .node_name = "gso-l2-ip6", + .runs_before = VNET_FEATURES ("l2-output-feat-arc-end"), +}; + +VNET_FEATURE_INIT (gso_ip4_node, static) = { + .arc_name = "ip4-output", + .node_name = "gso-ip4", + .runs_after = VNET_FEATURES ("ipsec4-output-feature"), + .runs_before = VNET_FEATURES ("interface-output"), +}; + +VNET_FEATURE_INIT (gso_ip6_node, static) = { + .arc_name = "ip6-output", + .node_name = "gso-ip6", + .runs_after = VNET_FEATURES ("ipsec6-output-feature"), + .runs_before = VNET_FEATURES ("interface-output"), +}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/interface_output.c b/src/vnet/interface_output.c index 707b1167a48..5e702e3b96c 100644 --- a/src/vnet/interface_output.c +++ b/src/vnet/interface_output.c @@ -49,9 +49,7 @@ typedef struct { u32 sw_if_index; u32 flags; - u16 gso_size; - u8 gso_l4_hdr_sz; - u8 data[128 - 3 * sizeof (u32)]; + u8 data[128 - 2 * sizeof (u32)]; } interface_output_trace_t; @@ -83,17 +81,6 @@ format_vnet_interface_output_trace (u8 * s, va_list * va) format (s, "%U ", format_vnet_sw_interface_name, vnm, si, t->flags); } -#define _(bit, name, v, x) \ - if (v && (t->flags & VNET_BUFFER_F_##name)) \ - s = format (s, "%s ", v); - foreach_vnet_buffer_flag -#undef _ - if (t->flags & VNET_BUFFER_F_GSO) - { - s = format (s, "\n%Ugso_sz %d gso_l4_hdr_sz %d", - format_white_space, indent + 2, t->gso_size, - t->gso_l4_hdr_sz); - } s = format (s, "\n%U%U", format_white_space, indent, node->format_buffer ? node->format_buffer : format_hex_bytes, @@ -133,8 +120,6 @@ vnet_interface_output_trace (vlib_main_t * vm, t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); t0->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX]; t0->flags = b0->flags; - t0->gso_size = vnet_buffer2 (b0)->gso_size; - t0->gso_l4_hdr_sz = vnet_buffer2 (b0)->gso_l4_hdr_sz; clib_memcpy_fast (t0->data, vlib_buffer_get_current (b0), sizeof (t0->data)); } @@ -143,8 +128,6 @@ vnet_interface_output_trace (vlib_main_t * vm, t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0])); t1->sw_if_index = vnet_buffer (b1)->sw_if_index[VLIB_TX]; t1->flags = b1->flags; - t1->gso_size = vnet_buffer2 (b1)->gso_size; - t1->gso_l4_hdr_sz = vnet_buffer2 (b1)->gso_l4_hdr_sz; clib_memcpy_fast (t1->data, vlib_buffer_get_current (b1), sizeof (t1->data)); } @@ -167,8 +150,6 @@ vnet_interface_output_trace (vlib_main_t * vm, t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); t0->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX]; t0->flags = b0->flags; - t0->gso_size = vnet_buffer2 (b0)->gso_size; - t0->gso_l4_hdr_sz = vnet_buffer2 (b0)->gso_l4_hdr_sz; clib_memcpy_fast (t0->data, vlib_buffer_get_current (b0), sizeof (t0->data)); } @@ -230,250 +211,13 @@ calc_checksums (vlib_main_t * vm, vlib_buffer_t * b) b->flags &= ~VNET_BUFFER_F_OFFLOAD_IP_CKSUM; } -static_always_inline u16 -tso_alloc_tx_bufs (vlib_main_t * vm, - vnet_interface_per_thread_data_t * ptd, - vlib_buffer_t * b0, u32 n_bytes_b0, u16 l234_sz, - u16 gso_size) -{ - u16 size = - clib_min (gso_size, vlib_buffer_get_default_data_size (vm) - l234_sz); - - /* rounded-up division */ - u16 n_bufs = (n_bytes_b0 - l234_sz + (size - 1)) / size; - u16 n_alloc; - - ASSERT (n_bufs > 0); - vec_validate (ptd->split_buffers, n_bufs - 1); - - n_alloc = vlib_buffer_alloc (vm, ptd->split_buffers, n_bufs); - if (n_alloc < n_bufs) - { - vlib_buffer_free (vm, ptd->split_buffers, n_alloc); - return 0; - } - return n_alloc; -} - -static_always_inline void -tso_init_buf_from_template_base (vlib_buffer_t * nb0, vlib_buffer_t * b0, - u32 flags, u16 length) -{ - nb0->current_data = b0->current_data; - nb0->total_length_not_including_first_buffer = 0; - nb0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID | flags; - clib_memcpy_fast (&nb0->opaque, &b0->opaque, sizeof (nb0->opaque)); - clib_memcpy_fast (vlib_buffer_get_current (nb0), - vlib_buffer_get_current (b0), length); - nb0->current_length = length; -} - -static_always_inline void -tso_init_buf_from_template (vlib_main_t * vm, vlib_buffer_t * nb0, - vlib_buffer_t * b0, u16 template_data_sz, - u16 gso_size, u8 ** p_dst_ptr, u16 * p_dst_left, - u32 next_tcp_seq, u32 flags) -{ - tso_init_buf_from_template_base (nb0, b0, flags, template_data_sz); - - *p_dst_left = - clib_min (gso_size, - vlib_buffer_get_default_data_size (vm) - (template_data_sz + - nb0->current_data)); - *p_dst_ptr = vlib_buffer_get_current (nb0) + template_data_sz; - - tcp_header_t *tcp = - (tcp_header_t *) (nb0->data + vnet_buffer (nb0)->l4_hdr_offset); - tcp->seq_number = clib_host_to_net_u32 (next_tcp_seq); -} - -static_always_inline void -tso_fixup_segmented_buf (vlib_buffer_t * b0, u8 tcp_flags, int is_ip6) -{ - u16 l3_hdr_offset = vnet_buffer (b0)->l3_hdr_offset; - u16 l4_hdr_offset = vnet_buffer (b0)->l4_hdr_offset; - ip4_header_t *ip4 = (ip4_header_t *) (b0->data + l3_hdr_offset); - ip6_header_t *ip6 = (ip6_header_t *) (b0->data + l3_hdr_offset); - tcp_header_t *tcp = (tcp_header_t *) (b0->data + l4_hdr_offset); - - tcp->flags = tcp_flags; - - if (is_ip6) - ip6->payload_length = - clib_host_to_net_u16 (b0->current_length - - (l4_hdr_offset - b0->current_data)); - else - ip4->length = - clib_host_to_net_u16 (b0->current_length - - (l3_hdr_offset - b0->current_data)); -} - -/** - * Allocate the necessary number of ptd->split_buffers, - * and segment the possibly chained buffer(s) from b0 into - * there. - * - * Return the cumulative number of bytes sent or zero - * if allocation failed. - */ - -static_always_inline u32 -tso_segment_buffer (vlib_main_t * vm, vnet_interface_per_thread_data_t * ptd, - int do_tx_offloads, u32 sbi0, vlib_buffer_t * sb0, - u32 n_bytes_b0) -{ - u32 n_tx_bytes = 0; - int is_ip4 = sb0->flags & VNET_BUFFER_F_IS_IP4; - int is_ip6 = sb0->flags & VNET_BUFFER_F_IS_IP6; - ASSERT (is_ip4 || is_ip6); - ASSERT (sb0->flags & VNET_BUFFER_F_L2_HDR_OFFSET_VALID); - ASSERT (sb0->flags & VNET_BUFFER_F_L3_HDR_OFFSET_VALID); - ASSERT (sb0->flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID); - u16 gso_size = vnet_buffer2 (sb0)->gso_size; - - int l4_hdr_sz = vnet_buffer2 (sb0)->gso_l4_hdr_sz; - u8 save_tcp_flags = 0; - u8 tcp_flags_no_fin_psh = 0; - u32 next_tcp_seq = 0; - - tcp_header_t *tcp = - (tcp_header_t *) (sb0->data + vnet_buffer (sb0)->l4_hdr_offset); - next_tcp_seq = clib_net_to_host_u32 (tcp->seq_number); - /* store original flags for last packet and reset FIN and PSH */ - save_tcp_flags = tcp->flags; - tcp_flags_no_fin_psh = tcp->flags & ~(TCP_FLAG_FIN | TCP_FLAG_PSH); - tcp->checksum = 0; - - u32 default_bflags = - sb0->flags & ~(VNET_BUFFER_F_GSO | VLIB_BUFFER_NEXT_PRESENT); - u16 l234_sz = vnet_buffer (sb0)->l4_hdr_offset + l4_hdr_sz - - sb0->current_data; - int first_data_size = clib_min (gso_size, sb0->current_length - l234_sz); - next_tcp_seq += first_data_size; - - if (PREDICT_FALSE - (!tso_alloc_tx_bufs (vm, ptd, sb0, n_bytes_b0, l234_sz, gso_size))) - return 0; - - vlib_buffer_t *b0 = vlib_get_buffer (vm, ptd->split_buffers[0]); - tso_init_buf_from_template_base (b0, sb0, default_bflags, - l234_sz + first_data_size); - - u32 total_src_left = n_bytes_b0 - l234_sz - first_data_size; - if (total_src_left) - { - /* Need to copy more segments */ - u8 *src_ptr, *dst_ptr; - u16 src_left, dst_left; - /* current source buffer */ - vlib_buffer_t *csb0 = sb0; - u32 csbi0 = sbi0; - /* current dest buffer */ - vlib_buffer_t *cdb0; - u16 dbi = 1; /* the buffer [0] is b0 */ - - src_ptr = vlib_buffer_get_current (sb0) + l234_sz + first_data_size; - src_left = sb0->current_length - l234_sz - first_data_size; - - tso_fixup_segmented_buf (b0, tcp_flags_no_fin_psh, is_ip6); - if (do_tx_offloads) - calc_checksums (vm, b0); - - /* grab a second buffer and prepare the loop */ - ASSERT (dbi < vec_len (ptd->split_buffers)); - cdb0 = vlib_get_buffer (vm, ptd->split_buffers[dbi++]); - tso_init_buf_from_template (vm, cdb0, b0, l234_sz, gso_size, &dst_ptr, - &dst_left, next_tcp_seq, default_bflags); - - /* an arbitrary large number to catch the runaway loops */ - int nloops = 2000; - while (total_src_left) - { - if (nloops-- <= 0) - clib_panic ("infinite loop detected"); - u16 bytes_to_copy = clib_min (src_left, dst_left); - - clib_memcpy_fast (dst_ptr, src_ptr, bytes_to_copy); - - src_left -= bytes_to_copy; - src_ptr += bytes_to_copy; - total_src_left -= bytes_to_copy; - dst_left -= bytes_to_copy; - dst_ptr += bytes_to_copy; - next_tcp_seq += bytes_to_copy; - cdb0->current_length += bytes_to_copy; - - if (0 == src_left) - { - int has_next = (csb0->flags & VLIB_BUFFER_NEXT_PRESENT); - u32 next_bi = csb0->next_buffer; - - /* init src to the next buffer in chain */ - if (has_next) - { - csbi0 = next_bi; - csb0 = vlib_get_buffer (vm, csbi0); - src_left = csb0->current_length; - src_ptr = vlib_buffer_get_current (csb0); - } - else - { - ASSERT (total_src_left == 0); - break; - } - } - if (0 == dst_left && total_src_left) - { - if (do_tx_offloads) - calc_checksums (vm, cdb0); - n_tx_bytes += cdb0->current_length; - ASSERT (dbi < vec_len (ptd->split_buffers)); - cdb0 = vlib_get_buffer (vm, ptd->split_buffers[dbi++]); - tso_init_buf_from_template (vm, cdb0, b0, l234_sz, - gso_size, &dst_ptr, &dst_left, - next_tcp_seq, default_bflags); - } - } - - tso_fixup_segmented_buf (cdb0, save_tcp_flags, is_ip6); - if (do_tx_offloads) - calc_checksums (vm, cdb0); - - n_tx_bytes += cdb0->current_length; - } - n_tx_bytes += b0->current_length; - return n_tx_bytes; -} - -static_always_inline void -drop_one_buffer_and_count (vlib_main_t * vm, vnet_main_t * vnm, - vlib_node_runtime_t * node, u32 * pbi0, - u32 drop_error_code) -{ - u32 thread_index = vm->thread_index; - vnet_interface_output_runtime_t *rt = (void *) node->runtime_data; - - vlib_simple_counter_main_t *cm; - cm = - vec_elt_at_index (vnm->interface_main.sw_if_counters, - VNET_INTERFACE_COUNTER_TX_ERROR); - vlib_increment_simple_counter (cm, thread_index, rt->sw_if_index, 1); - - vlib_error_drop_buffers (vm, node, pbi0, - /* buffer stride */ 1, - /* n_buffers */ 1, - VNET_INTERFACE_OUTPUT_NEXT_DROP, - node->node_index, drop_error_code); -} - static_always_inline uword -vnet_interface_output_node_inline_gso (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, - vnet_main_t * vnm, - vnet_hw_interface_t * hi, - int do_tx_offloads, - int do_segmentation) +vnet_interface_output_node_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + vnet_main_t * vnm, + vnet_hw_interface_t * hi, + int do_tx_offloads) { vnet_interface_output_runtime_t *rt = (void *) node->runtime_data; vnet_sw_interface_t *si; @@ -485,8 +229,6 @@ vnet_interface_output_node_inline_gso (vlib_main_t * vm, u32 next_index = VNET_INTERFACE_OUTPUT_NEXT_TX; u32 current_config_index = ~0; u8 arc = im->output_feature_arc_index; - vnet_interface_per_thread_data_t *ptd = - vec_elt_at_index (im->per_thread_data, thread_index); vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; n_buffers = frame->n_vectors; @@ -571,12 +313,6 @@ vnet_interface_output_node_inline_gso (vlib_main_t * vm, or_flags = b[0]->flags | b[1]->flags | b[2]->flags | b[3]->flags; - if (do_segmentation) - { - /* go to single loop if we need TSO segmentation */ - if (PREDICT_FALSE (or_flags & VNET_BUFFER_F_GSO)) - break; - } from += 4; to_tx += 4; n_left_to_tx -= 4; @@ -691,84 +427,6 @@ vnet_interface_output_node_inline_gso (vlib_main_t * vm, b[0]->current_config_index = current_config_index; } - if (do_segmentation) - { - if (PREDICT_FALSE (b[0]->flags & VNET_BUFFER_F_GSO)) - { - /* - * Undo the enqueue of the b0 - it is not going anywhere, - * and will be freed either after it's segmented or - * when dropped, if there is no buffers to segment into. - */ - to_tx -= 1; - n_left_to_tx += 1; - /* undo the counting. */ - n_bytes -= n_bytes_b0; - n_packets -= 1; - - u32 n_tx_bytes = 0; - - n_tx_bytes = - tso_segment_buffer (vm, ptd, do_tx_offloads, bi0, b[0], - n_bytes_b0); - - if (PREDICT_FALSE (n_tx_bytes == 0)) - { - drop_one_buffer_and_count (vm, vnm, node, from - 1, - VNET_INTERFACE_OUTPUT_ERROR_NO_BUFFERS_FOR_GSO); - b += 1; - continue; - } - - u16 n_tx_bufs = vec_len (ptd->split_buffers); - u32 *from_tx_seg = ptd->split_buffers; - - while (n_tx_bufs > 0) - { - if (n_tx_bufs >= n_left_to_tx) - { - while (n_left_to_tx > 0) - { - to_tx[0] = from_tx_seg[0]; - to_tx += 1; - from_tx_seg += 1; - n_left_to_tx -= 1; - n_tx_bufs -= 1; - n_packets += 1; - } - vlib_put_next_frame (vm, node, next_index, - n_left_to_tx); - vlib_get_new_next_frame (vm, node, next_index, - to_tx, n_left_to_tx); - } - while (n_tx_bufs > 0) - { - to_tx[0] = from_tx_seg[0]; - to_tx += 1; - from_tx_seg += 1; - n_left_to_tx -= 1; - n_tx_bufs -= 1; - n_packets += 1; - } - } - n_bytes += n_tx_bytes; - if (PREDICT_FALSE (tx_swif0 != rt->sw_if_index)) - { - - vlib_increment_combined_counter - (im->combined_sw_if_counters + - VNET_INTERFACE_COUNTER_TX, thread_index, tx_swif0, - _vec_len (ptd->split_buffers), n_tx_bytes); - } - /* The buffers were enqueued. Reset the length */ - _vec_len (ptd->split_buffers) = 0; - /* Free the now segmented buffer */ - vlib_buffer_free_one (vm, bi0); - b += 1; - continue; - } - } - if (PREDICT_FALSE (tx_swif0 != rt->sw_if_index)) { @@ -779,8 +437,13 @@ vnet_interface_output_node_inline_gso (vlib_main_t * vm, } if (do_tx_offloads) - calc_checksums (vm, b[0]); - + { + if (b[0]->flags & + (VNET_BUFFER_F_OFFLOAD_TCP_CKSUM | + VNET_BUFFER_F_OFFLOAD_UDP_CKSUM | + VNET_BUFFER_F_OFFLOAD_IP_CKSUM)) + calc_checksums (vm, b[0]); + } b += 1; } @@ -845,32 +508,6 @@ static_always_inline void vnet_interface_pcap_tx_trace } #ifndef CLIB_MARCH_VARIANT -static_always_inline uword -vnet_interface_output_node_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, vnet_main_t * vnm, - vnet_hw_interface_t * hi, - int do_tx_offloads) -{ - /* - * The 3-headed "if" is here because we want to err on the side - * of not impacting the non-GSO performance - so for the more - * common case of no GSO interfaces we want to prevent the - * segmentation codepath from being there altogether. - */ - if (PREDICT_TRUE (vnm->interface_main.gso_interface_count == 0)) - return vnet_interface_output_node_inline_gso (vm, node, frame, vnm, hi, - do_tx_offloads, - /* do_segmentation */ 0); - else if (hi->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) - return vnet_interface_output_node_inline_gso (vm, node, frame, vnm, hi, - do_tx_offloads, - /* do_segmentation */ 0); - else - return vnet_interface_output_node_inline_gso (vm, node, frame, vnm, hi, - do_tx_offloads, - /* do_segmentation */ 1); -} uword vnet_interface_output_node (vlib_main_t * vm, vlib_node_runtime_t * node, diff --git a/src/vnet/vnet_all_api_h.h b/src/vnet/vnet_all_api_h.h index 519f5219042..7395d1c4756 100644 --- a/src/vnet/vnet_all_api_h.h +++ b/src/vnet/vnet_all_api_h.h @@ -74,6 +74,7 @@ #include <vnet/vxlan-gbp/vxlan_gbp.api.h> #include <vnet/syslog/syslog.api.h> #include <vnet/devices/virtio/virtio.api.h> +#include <vnet/gso/gso.api.h> /* * fd.io coding-style-patch-verification: ON diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 14da1242335..66a17b27cb1 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -3475,6 +3475,21 @@ static void *vl_api_feature_enable_disable_t_print FINISH; } +static void *vl_api_feature_gso_enable_disable_t_print + (vl_api_feature_gso_enable_disable_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: feature_gso_enable_disable "); + s = format (s, "sw_if_index %d ", ntohl (mp->sw_if_index)); + if (mp->enable_disable) + s = format (s, "enable"); + if (!mp->enable_disable) + s = format (s, "disable"); + + FINISH; +} + static void *vl_api_sw_interface_tag_add_del_t_print (vl_api_sw_interface_tag_add_del_t * mp, void *handle) { @@ -3866,6 +3881,7 @@ _(IOAM_DISABLE, ioam_disable) \ _(IP_TABLE_DUMP, ip_table_dump) \ _(IP_ROUTE_DUMP, ip_route_dump) \ _(FEATURE_ENABLE_DISABLE, feature_enable_disable) \ +_(FEATURE_GSO_ENABLE_DISABLE, feature_gso_enable_disable) \ _(SW_INTERFACE_TAG_ADD_DEL, sw_interface_tag_add_del) \ _(HW_INTERFACE_SET_MTU, hw_interface_set_mtu) \ _(P2P_ETHERNET_ADD, p2p_ethernet_add) \ diff --git a/test/test_gso.py b/test/test_gso.py index 87626ccf01f..017bb496a0f 100644 --- a/test/test_gso.py +++ b/test/test_gso.py @@ -129,6 +129,7 @@ class TestGSO(VppTestCase): i.resolve_arp() i.resolve_ndp() + self.vapi.feature_gso_enable_disable(self.pg4.sw_if_index) p42 = (Ether(src=self.pg2.remote_mac, dst=self.pg2.local_mac) / IP(src=self.pg2.remote_ip4, dst=self.pg4.remote_ip4, flags='DF') / @@ -186,6 +187,7 @@ class TestGSO(VppTestCase): i.resolve_ndp() self.vapi.sw_interface_set_mtu(self.pg5.sw_if_index, [9000, 0, 0, 0]) + self.vapi.feature_gso_enable_disable(self.pg1.sw_if_index) p44 = (Ether(src=self.pg5.remote_mac, dst=self.pg5.local_mac) / IP(src=self.pg5.remote_ip4, dst=self.pg1.remote_ip4) / TCP(sport=1234, dport=1234) / diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index 12dcf1081ab..d80f2ed4400 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -2050,3 +2050,10 @@ class VppPapiProvider(object): 'sw_if_index': sw_if_index, 'is_enable': is_enable, }) + + def feature_gso_enable_disable(self, sw_if_index, enable_disable=1): + return self.api(self.papi.feature_gso_enable_disable, + { + 'sw_if_index': sw_if_index, + 'enable_disable': enable_disable, + }) |