From 68b0fb0c620c7451ef1a6380c43c39de6614db51 Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Tue, 28 Feb 2017 15:15:56 -0500 Subject: VPP-598: tcp stack initial commit Change-Id: I49e5ce0aae6e4ff634024387ceaf7dbc432a0351 Signed-off-by: Dave Barach Signed-off-by: Florin Coras --- src/vnet/tcp/tcp_format.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 src/vnet/tcp/tcp_format.c (limited to 'src/vnet/tcp/tcp_format.c') diff --git a/src/vnet/tcp/tcp_format.c b/src/vnet/tcp/tcp_format.c new file mode 100644 index 00000000..7136741d --- /dev/null +++ b/src/vnet/tcp/tcp_format.c @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * tcp/tcp_format.c: tcp formatting + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +static u8 * +format_tcp_flags (u8 * s, va_list * args) +{ + int flags = va_arg (*args, int); + +#define _(f) if (flags & TCP_FLAG_##f) s = format (s, "%s, ", #f); + foreach_tcp_flag +#undef _ + return s; +} + +/* Format TCP header. */ +u8 * +format_tcp_header (u8 * s, va_list * args) +{ + tcp_header_t *tcp = va_arg (*args, tcp_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + u32 header_bytes; + uword indent; + + /* Nothing to do. */ + if (max_header_bytes < sizeof (tcp[0])) + return format (s, "TCP header truncated"); + + indent = format_get_indent (s); + indent += 2; + header_bytes = tcp_header_bytes (tcp); + + s = format (s, "TCP: %d -> %d", clib_net_to_host_u16 (tcp->src), + clib_net_to_host_u16 (tcp->dst)); + + s = format (s, "\n%Useq. 0x%08x ack 0x%08x", format_white_space, indent, + clib_net_to_host_u32 (tcp->seq_number), + clib_net_to_host_u32 (tcp->ack_number)); + + s = format (s, "\n%Uflags %U, tcp header: %d bytes", format_white_space, + indent, format_tcp_flags, tcp->flags, header_bytes); + + s = format (s, "\n%Uwindow %d, checksum 0x%04x", format_white_space, indent, + clib_net_to_host_u16 (tcp->window), + clib_net_to_host_u16 (tcp->checksum)); + + +#if 0 + /* Format TCP options. */ + { + u8 *o; + u8 *option_start = (void *) (tcp + 1); + u8 *option_end = (void *) tcp + header_bytes; + + for (o = option_start; o < option_end;) + { + u32 length = o[1]; + switch (o[0]) + { + case TCP_OPTION_END: + length = 1; + o = option_end; + break; + + case TCP_OPTION_NOOP: + length = 1; + break; + + } + } + } +#endif + + /* Recurse into next protocol layer. */ + if (max_header_bytes != 0 && header_bytes < max_header_bytes) + { + ip_main_t *im = &ip_main; + tcp_udp_port_info_t *pi; + + pi = ip_get_tcp_udp_port_info (im, tcp->dst); + + if (pi && pi->format_header) + s = format (s, "\n%U%U", format_white_space, indent - 2, + pi->format_header, + /* next protocol header */ (void *) tcp + header_bytes, + max_header_bytes - header_bytes); + } + + return s; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg From 1f75cfd73320476a8f821064391fe368dd4bf75b Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Fri, 14 Apr 2017 16:46:44 -0400 Subject: Fix fifo ooo bugs and improve testing Change-Id: If3c01e318bcb740ca5b240c63f712e2167082a80 Signed-off-by: Dave Barach Signed-off-by: Florin Coras --- src/svm/svm_fifo.c | 126 +++++++---- src/svm/svm_fifo.h | 21 +- src/vnet/tcp/tcp.c | 2 +- src/vnet/tcp/tcp.h | 1 + src/vnet/tcp/tcp_format.c | 2 +- src/vnet/tcp/tcp_input.c | 28 ++- src/vnet/tcp/tcp_test.c | 516 ++++++++++++++++++++++++++++++++++++++++++---- 7 files changed, 602 insertions(+), 94 deletions(-) (limited to 'src/vnet/tcp/tcp_format.c') diff --git a/src/svm/svm_fifo.c b/src/svm/svm_fifo.c index 097bab77..bd968aea 100644 --- a/src/svm/svm_fifo.c +++ b/src/svm/svm_fifo.c @@ -15,6 +15,36 @@ #include +#define offset_lt(_a, _b) ((i32)((_a)-(_b)) < 0) +#define offset_leq(_a, _b) ((i32)((_a)-(_b)) <= 0) + +u8 * +format_ooo_segment (u8 * s, va_list * args) +{ + ooo_segment_t *seg = va_arg (*args, ooo_segment_t *); + + s = format (s, "pos %u, len %u, next %d, prev %d", + seg->start, seg->length, seg->next, seg->prev); + return s; +} + +u8 * +format_ooo_list (u8 * s, va_list * args) +{ + svm_fifo_t *f = va_arg (*args, svm_fifo_t *); + u32 ooo_segment_index = f->ooos_list_head; + ooo_segment_t *seg; + + while (ooo_segment_index != OOO_SEGMENT_INVALID_INDEX) + { + seg = pool_elt_at_index (f->ooo_segments, ooo_segment_index); + s = format (s, "\n %U", format_ooo_segment, seg); + + ooo_segment_index = seg->next; + } + return s; +} + /** create an svm fifo, in the current heap. Fails vs blow up the process */ svm_fifo_t * svm_fifo_create (u32 data_size_in_bytes) @@ -47,7 +77,7 @@ ooo_segment_new (svm_fifo_t * f, u32 start, u32 length) pool_get (f->ooo_segments, s); - s->fifo_position = start; + s->start = start; s->length = length; s->prev = s->next = OOO_SEGMENT_INVALID_INDEX; @@ -88,14 +118,13 @@ static void ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length) { ooo_segment_t *s, *new_s, *prev, *next, *it; - u32 new_index, position, end_offset, s_sof, s_eof, s_index; + u32 new_index, end_offset, s_sof, s_eof, s_index; - position = (f->tail + offset) % f->nitems; end_offset = offset + length; if (f->ooos_list_head == OOO_SEGMENT_INVALID_INDEX) { - s = ooo_segment_new (f, position, length); + s = ooo_segment_new (f, offset, length); f->ooos_list_head = s - f->ooo_segments; f->ooos_newest = f->ooos_list_head; return; @@ -104,26 +133,26 @@ ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length) /* Find first segment that starts after new segment */ s = pool_elt_at_index (f->ooo_segments, f->ooos_list_head); while (s->next != OOO_SEGMENT_INVALID_INDEX - && ooo_segment_offset (f, s) <= offset) + && offset_leq (ooo_segment_offset (f, s), offset)) s = pool_elt_at_index (f->ooo_segments, s->next); s_index = s - f->ooo_segments; s_sof = ooo_segment_offset (f, s); s_eof = ooo_segment_end_offset (f, s); + prev = ooo_segment_get_prev (f, s); /* No overlap, add before current segment */ - if (end_offset < s_sof) + if (offset_lt (end_offset, s_sof) + && (!prev || offset_lt (prev->start + prev->length, offset))) { - new_s = ooo_segment_new (f, position, length); + new_s = ooo_segment_new (f, offset, length); new_index = new_s - f->ooo_segments; /* Pool might've moved, get segment again */ s = pool_elt_at_index (f->ooo_segments, s_index); - if (s->prev != OOO_SEGMENT_INVALID_INDEX) { new_s->prev = s->prev; - prev = pool_elt_at_index (f->ooo_segments, new_s->prev); prev->next = new_index; } @@ -139,9 +168,9 @@ ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length) return; } /* No overlap, add after current segment */ - else if (s_eof < offset) + else if (offset_lt (s_eof, offset)) { - new_s = ooo_segment_new (f, position, length); + new_s = ooo_segment_new (f, offset, length); new_index = new_s - f->ooo_segments; /* Pool might've moved, get segment again */ @@ -150,7 +179,6 @@ ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length) if (s->next != OOO_SEGMENT_INVALID_INDEX) { new_s->next = s->next; - next = pool_elt_at_index (f->ooo_segments, new_s->next); next->prev = new_index; } @@ -167,7 +195,7 @@ ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length) */ /* Merge at head */ - if (offset <= s_sof) + if (offset_leq (offset, s_sof)) { /* If we have a previous, check if we overlap */ if (s->prev != OOO_SEGMENT_INVALID_INDEX) @@ -176,26 +204,31 @@ ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length) /* New segment merges prev and current. Remove previous and * update position of current. */ - if (ooo_segment_end_offset (f, prev) >= offset) + if (offset_leq (offset, ooo_segment_end_offset (f, prev))) { - s->fifo_position = prev->fifo_position; + s->start = prev->start; s->length = s_eof - ooo_segment_offset (f, prev); ooo_segment_del (f, s->prev); } + else + { + s->start = offset; + s->length = s_eof - ooo_segment_offset (f, s); + } } else { - s->fifo_position = position; + s->start = offset; s->length = s_eof - ooo_segment_offset (f, s); } /* The new segment's tail may cover multiple smaller ones */ - if (s_eof < end_offset) + if (offset_lt (s_eof, end_offset)) { /* Remove segments completely covered */ it = (s->next != OOO_SEGMENT_INVALID_INDEX) ? pool_elt_at_index (f->ooo_segments, s->next) : 0; - while (it && ooo_segment_end_offset (f, it) < end_offset) + while (it && offset_lt (ooo_segment_end_offset (f, it), end_offset)) { next = (it->next != OOO_SEGMENT_INVALID_INDEX) ? pool_elt_at_index (f->ooo_segments, it->next) : 0; @@ -207,7 +240,7 @@ ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length) s->length = end_offset - ooo_segment_offset (f, s); /* If partial overlap with last, merge */ - if (it && ooo_segment_offset (f, it) < end_offset) + if (it && offset_lt (ooo_segment_offset (f, it), end_offset)) { s->length += it->length - (ooo_segment_offset (f, it) - end_offset); @@ -216,7 +249,7 @@ ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length) } } /* Last but overlapping previous */ - else if (s_eof <= end_offset) + else if (offset_leq (s_eof, end_offset)) { s->length = end_offset - ooo_segment_offset (f, s); } @@ -247,7 +280,7 @@ ooo_segment_try_collect (svm_fifo_t * f, u32 n_bytes_enqueued) s = pool_elt_at_index (f->ooo_segments, f->ooos_list_head); /* If last tail update overlaps one/multiple ooo segments, remove them */ - diff = (f->nitems + f->tail - s->fifo_position) % f->nitems; + diff = (f->nitems + ((int) s->start - f->tail)) % f->nitems; while (0 < diff && diff < n_bytes_enqueued) { /* Segment end is beyond the tail. Advance tail and be done */ @@ -262,7 +295,7 @@ ooo_segment_try_collect (svm_fifo_t * f, u32 n_bytes_enqueued) { index = s - f->ooo_segments; s = pool_elt_at_index (f->ooo_segments, s->next); - diff = (f->nitems + f->tail - s->fifo_position) % f->nitems; + diff = (f->nitems + ((int) s->start - f->tail)) % f->nitems; ooo_segment_del (f, index); } /* End of search */ @@ -368,9 +401,20 @@ svm_fifo_enqueue_with_offset_internal (svm_fifo_t * f, { u32 total_copy_bytes, first_copy_bytes, second_copy_bytes; u32 cursize, nitems; - u32 tail_plus_offset; + u32 normalized_offset; + int rv; - ASSERT (offset > 0); + /* Safety: don't wrap more than nitems/2 */ + ASSERT ((f->nitems + offset - f->tail) % f->nitems < f->nitems / 2); + + /* Users would do do well to avoid this */ + if (PREDICT_FALSE (f->tail == (offset % f->nitems))) + { + rv = svm_fifo_enqueue_internal (f, pid, required_bytes, copy_from_here); + if (rv > 0) + return 0; + return -1; + } /* read cursize, which can only increase while we're working */ cursize = svm_fifo_max_dequeue (f); @@ -384,24 +428,24 @@ svm_fifo_enqueue_with_offset_internal (svm_fifo_t * f, /* Number of bytes we're going to copy */ total_copy_bytes = required_bytes; - tail_plus_offset = (f->tail + offset) % nitems; + normalized_offset = offset % nitems; /* Number of bytes in first copy segment */ - first_copy_bytes = ((nitems - tail_plus_offset) < total_copy_bytes) - ? (nitems - tail_plus_offset) : total_copy_bytes; + first_copy_bytes = ((nitems - normalized_offset) < total_copy_bytes) + ? (nitems - normalized_offset) : total_copy_bytes; - clib_memcpy (&f->data[tail_plus_offset], copy_from_here, first_copy_bytes); + clib_memcpy (&f->data[normalized_offset], copy_from_here, first_copy_bytes); /* Number of bytes in second copy segment, if any */ second_copy_bytes = total_copy_bytes - first_copy_bytes; if (second_copy_bytes) { - tail_plus_offset += first_copy_bytes; - tail_plus_offset %= nitems; + normalized_offset += first_copy_bytes; + normalized_offset %= nitems; - ASSERT (tail_plus_offset == 0); + ASSERT (normalized_offset == 0); - clib_memcpy (&f->data[tail_plus_offset], + clib_memcpy (&f->data[normalized_offset], copy_from_here + first_copy_bytes, second_copy_bytes); } @@ -573,8 +617,8 @@ format_svm_fifo (u8 * s, va_list * args) ooo_segment_t *seg; u32 seg_index; - s = - format (s, "ooo pool %d active elts\n", pool_elts (f->ooo_segments)); + s = format (s, "ooo pool %d active elts\n", + pool_elts (f->ooo_segments)); seg_index = f->ooos_list_head; @@ -582,13 +626,25 @@ format_svm_fifo (u8 * s, va_list * args) { seg = pool_elt_at_index (f->ooo_segments, seg_index); s = format (s, " pos %u, len %u next %d\n", - seg->fifo_position, seg->length, seg->next); + seg->start, seg->length, seg->next); seg_index = seg->next; } } return s; } +u32 +svm_fifo_number_ooo_segments (svm_fifo_t * f) +{ + return pool_elts (f->ooo_segments); +} + +ooo_segment_t * +svm_fifo_first_ooo_segment (svm_fifo_t * f) +{ + return pool_elt_at_index (f->ooo_segments, f->ooos_list_head); +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/svm/svm_fifo.h b/src/svm/svm_fifo.h index 9beb63f5..0fff2577 100644 --- a/src/svm/svm_fifo.h +++ b/src/svm/svm_fifo.h @@ -36,10 +36,13 @@ typedef struct u32 next; /**< Next linked-list element pool index */ u32 prev; /**< Previous linked-list element pool index */ - u32 fifo_position; /**< Start of segment, normalized*/ + u32 start; /**< Start of segment, normalized*/ u32 length; /**< Length of segment */ } ooo_segment_t; +format_function_t format_ooo_segment; +format_function_t format_ooo_list; + #define OOO_SEGMENT_INVALID_INDEX ((u32)~0) typedef struct @@ -127,6 +130,8 @@ int svm_fifo_dequeue_nowait (svm_fifo_t * f, int pid, u32 max_bytes, int svm_fifo_peek (svm_fifo_t * f, int pid, u32 offset, u32 max_bytes, u8 * copy_here); int svm_fifo_dequeue_drop (svm_fifo_t * f, int pid, u32 max_bytes); +u32 svm_fifo_number_ooo_segments (svm_fifo_t * f); +ooo_segment_t *svm_fifo_first_ooo_segment (svm_fifo_t * f); format_function_t format_svm_fifo; @@ -139,13 +144,23 @@ svm_fifo_newest_ooo_segment (svm_fifo_t * f) always_inline u32 ooo_segment_offset (svm_fifo_t * f, ooo_segment_t * s) { - return ((f->nitems + s->fifo_position - f->tail) % f->nitems); +// return ((f->nitems + s->fifo_position - f->tail) % f->nitems); + return s->start; } always_inline u32 ooo_segment_end_offset (svm_fifo_t * f, ooo_segment_t * s) { - return ((f->nitems + s->fifo_position + s->length - f->tail) % f->nitems); +// return ((f->nitems + s->fifo_position + s->length - f->tail) % f->nitems); + return s->start + s->length; +} + +always_inline ooo_segment_t * +ooo_segment_get_prev (svm_fifo_t * f, ooo_segment_t * s) +{ + if (s->prev == OOO_SEGMENT_INVALID_INDEX) + return 0; + return pool_elt_at_index (f->ooo_segments, s->prev); } #endif /* __included_ssvm_fifo_h__ */ diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index a0c66b9f..12982589 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -447,7 +447,7 @@ format_tcp_state (u8 * s, va_list * args) if (*state < TCP_N_STATES) s = format (s, "%s", tcp_fsm_states[*state]); else - s = format (s, "UNKNOWN"); + s = format (s, "UNKNOWN (%d (0x%x))", *state, *state); return s; } diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index 225b26da..2ac6a9b8 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -58,6 +58,7 @@ typedef enum _tcp_state } tcp_state_t; format_function_t format_tcp_state; +format_function_t format_tcp_flags; /** TCP timers */ #define foreach_tcp_timer \ diff --git a/src/vnet/tcp/tcp_format.c b/src/vnet/tcp/tcp_format.c index 7136741d..994ccfd6 100644 --- a/src/vnet/tcp/tcp_format.c +++ b/src/vnet/tcp/tcp_format.c @@ -40,7 +40,7 @@ #include #include -static u8 * +u8 * format_tcp_flags (u8 * s, va_list * args) { int flags = va_arg (*args, int); diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index a12ad8c0..97679aaf 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -211,8 +211,6 @@ tcp_options_parse (tcp_header_t * th, tcp_options_t * to) always_inline int tcp_segment_check_paws (tcp_connection_t * tc) { - /* XXX normally test for timestamp should be lt instead of leq, but for - * local testing this is not enough */ return tcp_opts_tstamp (&tc->opt) && tc->tsval_recent && timestamp_lt (tc->opt.tsval, tc->tsval_recent); } @@ -999,7 +997,7 @@ tcp_session_enqueue_ooo (tcp_connection_t * tc, vlib_buffer_t * b, u16 data_len) { stream_session_t *s0; - u32 offset, seq; + u32 offset; int rv; /* Pure ACK. Do nothing */ @@ -1009,8 +1007,9 @@ tcp_session_enqueue_ooo (tcp_connection_t * tc, vlib_buffer_t * b, } s0 = stream_session_get (tc->c_s_index, tc->c_thread_index); - seq = vnet_buffer (b)->tcp.seq_number; - offset = seq - tc->rcv_nxt; + offset = vnet_buffer (b)->tcp.seq_number - tc->irs; + + clib_warning ("ooo: offset %d len %d", offset, data_len); rv = svm_fifo_enqueue_with_offset (s0->server_rx_fifo, s0->pid, offset, data_len, vlib_buffer_get_current (b)); @@ -1032,8 +1031,8 @@ tcp_session_enqueue_ooo (tcp_connection_t * tc, vlib_buffer_t * b, /* Get the newest segment from the fifo */ newest = svm_fifo_newest_ooo_segment (s0->server_rx_fifo); - start = tc->rcv_nxt + ooo_segment_offset (s0->server_rx_fifo, newest); - end = tc->rcv_nxt + ooo_segment_end_offset (s0->server_rx_fifo, newest); + start = ooo_segment_offset (s0->server_rx_fifo, newest); + end = ooo_segment_end_offset (s0->server_rx_fifo, newest); tcp_update_sack_list (tc, start, end); } @@ -1072,6 +1071,7 @@ tcp_segment_rcv (tcp_main_t * tm, tcp_connection_t * tc, vlib_buffer_t * b, { /* Old sequence numbers allowed through because they overlapped * the rx window */ + if (seq_lt (vnet_buffer (b)->tcp.seq_number, tc->rcv_nxt)) { error = TCP_ERROR_SEGMENT_OLD; @@ -1181,6 +1181,7 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node, u32 n_left_from, next_index, *from, *to_next; u32 my_thread_index = vm->thread_index, errors = 0; tcp_main_t *tm = vnet_get_tcp_main (); + u8 is_fin = 0; from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; @@ -1243,9 +1244,11 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node, n_advance_bytes0 += sizeof (ip60[0]); } + is_fin = (th0->flags & TCP_FLAG_FIN) != 0; + /* SYNs, FINs and data consume sequence numbers */ vnet_buffer (b0)->tcp.seq_end = vnet_buffer (b0)->tcp.seq_number - + tcp_is_syn (th0) + tcp_is_fin (th0) + n_data_bytes0; + + tcp_is_syn (th0) + is_fin + n_data_bytes0; /* TODO header prediction fast path */ @@ -1272,8 +1275,11 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_advance (b0, n_advance_bytes0); error0 = tcp_segment_rcv (tm, tc0, b0, n_data_bytes0, &next0); + /* N.B. buffer is rewritten if segment is ooo. Thus, th0 becomes a + * dangling reference. */ + /* 8: check the FIN bit */ - if (tcp_fin (th0)) + if (is_fin) { /* Enter CLOSE-WAIT and notify session. Don't send ACK, instead * wait for session to call close. To avoid lingering @@ -2365,8 +2371,12 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (PREDICT_FALSE (error0 == TCP_ERROR_DISPATCH)) { + tcp_state_t state0 = tc0->state; /* Overload tcp flags to store state */ vnet_buffer (b0)->tcp.flags = tc0->state; + clib_warning ("disp error state %U flags %U", + format_tcp_state, &state0, + format_tcp_flags, flags0); } } else diff --git a/src/vnet/tcp/tcp_test.c b/src/vnet/tcp/tcp_test.c index 3dbbdf6f..12579632 100644 --- a/src/vnet/tcp/tcp_test.c +++ b/src/vnet/tcp/tcp_test.c @@ -173,17 +173,145 @@ tcp_test_sack () return 0; } -static int -tcp_test_fifo (vlib_main_t * vm, unformat_input_t * input) +typedef struct +{ + u32 offset; + u32 len; +} test_pattern_t; + +/* *INDENT-OFF* */ +test_pattern_t test_pattern[] = { + {380, 8}, {768, 8}, {1156, 8}, {1544, 8}, {1932, 8}, {2320, 8}, {2708, 8}, + {2992, 8}, {372, 8}, {760, 8}, {1148, 8}, {1536, 8}, {1924, 8}, {2312, 8}, + {2700, 8}, {2984, 8}, {364, 8}, {752, 8}, {1140, 8}, {1528, 8}, {1916, 8}, + {2304, 8}, {2692, 8}, {2976, 8}, {356, 8}, {744, 8}, {1132, 8}, {1520, 8}, + {1908, 8}, {2296, 8}, {2684, 8}, {2968, 8}, {348, 8}, {736, 8}, {1124, 8}, + {1512, 8}, {1900, 8}, {2288, 8}, {2676, 8}, {2960, 8}, {340, 8}, {728, 8}, + {1116, 8}, {1504, 8}, {1892, 8}, {2280, 8}, {2668, 8}, {2952, 8}, {332, 8}, + {720, 8}, {1108, 8}, {1496, 8}, {1884, 8}, {2272, 8}, {2660, 8}, {2944, 8}, + {324, 8}, {712, 8}, {1100, 8}, {1488, 8}, {1876, 8}, {2264, 8}, {2652, 8}, + {2936, 8}, {316, 8}, {704, 8}, {1092, 8}, {1480, 8}, {1868, 8}, {2256, 8}, + {2644, 8}, {2928, 8}, {308, 8}, {696, 8}, {1084, 8}, {1472, 8}, {1860, 8}, + {2248, 8}, {2636, 8}, {2920, 8}, {300, 8}, {688, 8}, {1076, 8}, {1464, 8}, + {1852, 8}, {2240, 8}, {2628, 8}, {2912, 8}, {292, 8}, {680, 8}, {1068, 8}, + {1456, 8}, {1844, 8}, {2232, 8}, {2620, 8}, {2904, 8}, {284, 8}, {672, 8}, + {1060, 8}, {1448, 8}, {1836, 8}, {2224, 8}, {2612, 8}, {2896, 8}, {276, 8}, + {664, 8}, {1052, 8}, {1440, 8}, {1828, 8}, {2216, 8}, {2604, 8}, {2888, 8}, + {268, 8}, {656, 8}, {1044, 8}, {1432, 8}, {1820, 8}, {2208, 8}, {2596, 8}, + {2880, 8}, {260, 8}, {648, 8}, {1036, 8}, {1424, 8}, {1812, 8}, {2200, 8}, + {2588, 8}, {2872, 8}, {252, 8}, {640, 8}, {1028, 8}, {1416, 8}, {1804, 8}, + {2192, 8}, {2580, 8}, {2864, 8}, {244, 8}, {632, 8}, {1020, 8}, {1408, 8}, + {1796, 8}, {2184, 8}, {2572, 8}, {2856, 8}, {236, 8}, {624, 8}, {1012, 8}, + {1400, 8}, {1788, 8}, {2176, 8}, {2564, 8}, {2848, 8}, {228, 8}, {616, 8}, + {1004, 8}, {1392, 8}, {1780, 8}, {2168, 8}, {2556, 8}, {2840, 8}, {220, 8}, + {608, 8}, {996, 8}, {1384, 8}, {1772, 8}, {2160, 8}, {2548, 8}, {2832, 8}, + {212, 8}, {600, 8}, {988, 8}, {1376, 8}, {1764, 8}, {2152, 8}, {2540, 8}, + {2824, 8}, {204, 8}, {592, 8}, {980, 8}, {1368, 8}, {1756, 8}, {2144, 8}, + {2532, 8}, {2816, 8}, {196, 8}, {584, 8}, {972, 8}, {1360, 8}, {1748, 8}, + {2136, 8}, {2524, 8}, {2808, 8}, {188, 8}, {576, 8}, {964, 8}, {1352, 8}, + {1740, 8}, {2128, 8}, {2516, 8}, {2800, 8}, {180, 8}, {568, 8}, {956, 8}, + {1344, 8}, {1732, 8}, {2120, 8}, {2508, 8}, {2792, 8}, {172, 8}, {560, 8}, + {948, 8}, {1336, 8}, {1724, 8}, {2112, 8}, {2500, 8}, {2784, 8}, {164, 8}, + {552, 8}, {940, 8}, {1328, 8}, {1716, 8}, {2104, 8}, {2492, 8}, {2776, 8}, + {156, 8}, {544, 8}, {932, 8}, {1320, 8}, {1708, 8}, {2096, 8}, {2484, 8}, + {2768, 8}, {148, 8}, {536, 8}, {924, 8}, {1312, 8}, {1700, 8}, {2088, 8}, + {2476, 8}, {2760, 8}, {140, 8}, {528, 8}, {916, 8}, {1304, 8}, {1692, 8}, + {2080, 8}, {2468, 8}, {2752, 8}, {132, 8}, {520, 8}, {908, 8}, {1296, 8}, + {1684, 8}, {2072, 8}, {2460, 8}, {2744, 8}, {124, 8}, {512, 8}, {900, 8}, + {1288, 8}, {1676, 8}, {2064, 8}, {2452, 8}, {2736, 8}, {116, 8}, {504, 8}, + {892, 8}, {1280, 8}, {1668, 8}, {2056, 8}, {2444, 8}, {2728, 8}, {108, 8}, + {496, 8}, {884, 8}, {1272, 8}, {1660, 8}, {2048, 8}, {2436, 8}, {2720, 8}, + {100, 8}, {488, 8}, {876, 8}, {1264, 8}, {1652, 8}, {2040, 8}, {2428, 8}, + {2716, 4}, {92, 8}, {480, 8}, {868, 8}, {1256, 8}, {1644, 8}, {2032, 8}, + {2420, 8}, {84, 8}, {472, 8}, {860, 8}, {1248, 8}, {1636, 8}, {2024, 8}, + {2412, 8}, {76, 8}, {464, 8}, {852, 8}, {1240, 8}, {1628, 8}, {2016, 8}, + {2404, 8}, {68, 8}, {456, 8}, {844, 8}, {1232, 8}, {1620, 8}, {2008, 8}, + {2396, 8}, {60, 8}, {448, 8}, {836, 8}, {1224, 8}, {1612, 8}, {2000, 8}, + {2388, 8}, {52, 8}, {440, 8}, {828, 8}, {1216, 8}, {1604, 8}, {1992, 8}, + {2380, 8}, {44, 8}, {432, 8}, {820, 8}, {1208, 8}, {1596, 8}, {1984, 8}, + {2372, 8}, {36, 8}, {424, 8}, {812, 8}, {1200, 8}, {1588, 8}, {1976, 8}, + {2364, 8}, {28, 8}, {416, 8}, {804, 8}, {1192, 8}, {1580, 8}, {1968, 8}, + {2356, 8}, {20, 8}, {408, 8}, {796, 8}, {1184, 8}, {1572, 8}, {1960, 8}, + {2348, 8}, {12, 8}, {400, 8}, {788, 8}, {1176, 8}, {1564, 8}, {1952, 8}, + {2340, 8}, {4, 8}, {392, 8}, {780, 8}, {1168, 8}, {1556, 8}, {1944, 8}, + {2332, 8}, + /* missing from original data set */ + {388, 4}, {776, 4}, {1164, 4}, {1552, 4}, {1940, 4}, {2328, 4}, +}; +/* *INDENT-ON* */ + +int +pattern_cmp (const void *arg1, const void *arg2) +{ + test_pattern_t *a1 = (test_pattern_t *) arg1; + test_pattern_t *a2 = (test_pattern_t *) arg2; + + if (a1->offset < a2->offset) + return -1; + else if (a1->offset > a2->offset) + return 1; + return 0; +} + +static u8 +fifo_validate_pattern (vlib_main_t * vm, test_pattern_t * pattern, + u32 pattern_length) +{ + test_pattern_t *tp = pattern; + int i; + + /* Go through the pattern and make 100% sure it's sane */ + for (i = 0; i < pattern_length - 1; i++) + { + if (tp->offset + tp->len != (tp + 1)->offset) + { + vlib_cli_output (vm, "[%d] missing {%d, %d}", i, + (tp->offset + tp->len), + (tp + 1)->offset - (tp->offset + tp->len)); + return 0; + } + tp++; + } + return 1; +} + +static test_pattern_t * +fifo_get_validate_pattern (vlib_main_t * vm, test_pattern_t * test_data, + u32 test_data_len) +{ + test_pattern_t *validate_pattern = 0; + + /* Validate, and try segments in order... */ + vec_validate (validate_pattern, test_data_len - 1); + memcpy (validate_pattern, test_data, + test_data_len * sizeof (test_pattern_t)); + qsort ((u8 *) validate_pattern, test_data_len, sizeof (test_pattern_t), + pattern_cmp); + + if (fifo_validate_pattern (vm, validate_pattern, test_data_len) == 0) + return 0; + + return validate_pattern; +} + +int +tcp_test_fifo1 (vlib_main_t * vm, unformat_input_t * input) { svm_fifo_t *f; u32 fifo_size = 1 << 20; u32 *test_data = 0; u32 offset; - int i, rv; + int i, rv, verbose = 0; u32 data_word, test_data_len; + ooo_segment_t *ooo_seg; + u8 *data; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "verbose")) + verbose = 1; + } - /* $$$ parse args */ test_data_len = fifo_size / sizeof (u32); vec_validate (test_data, test_data_len - 1); @@ -198,12 +326,8 @@ tcp_test_fifo (vlib_main_t * vm, unformat_input_t * input) /* Enqueue an initial (un-dequeued) chunk */ rv = svm_fifo_enqueue_nowait (f, 0 /* pid */ , sizeof (u32), (u8 *) test_data); - - if (rv != sizeof (u32)) - { - clib_warning ("enqueue returned %d", rv); - goto out; - } + TCP_TEST ((rv == sizeof (u32)), "enqueued %d", rv); + TCP_TEST ((f->tail == 4), "fifo tail %u", f->tail); /* * Create 3 chunks in the future. The offsets are relative @@ -212,51 +336,62 @@ tcp_test_fifo (vlib_main_t * vm, unformat_input_t * input) for (i = 0; i < 3; i++) { offset = (2 * i + 1) * sizeof (u32); - vlib_cli_output (vm, "add offset %d", offset); - - rv = svm_fifo_enqueue_with_offset - (f, 0 /* pid */ , offset, sizeof (u32), - (u8 *) (test_data + ((offset + sizeof (u32)) / sizeof (u32)))); - + data = (u8 *) (test_data + (2 * i + 1)); + rv = + svm_fifo_enqueue_with_offset (f, 0 /* pid */ , offset, sizeof (u32), + data); + if (verbose) + vlib_cli_output (vm, "add [%d] [%d, %d]", 2 * i + 1, offset, + offset + sizeof (u32)); if (rv) { clib_warning ("enqueue returned %d", rv); - goto out; + goto err; } } - /* Paint missing data backwards */ - for (i = 3; i > 0; i--) + if (verbose) + vlib_cli_output (vm, "fifo after odd segs: %U", format_svm_fifo, f, 1); + TCP_TEST ((f->tail == 8), "fifo tail %u", f->tail); + + /* Paint some of missing data backwards */ + for (i = 3; i > 1; i--) { offset = (2 * i + 0) * sizeof (u32); - - vlib_cli_output (vm, "add offset %d", offset); - - rv = svm_fifo_enqueue_with_offset - (f, 0 /* pid */ , offset, sizeof (u32), - (u8 *) (test_data + ((offset + sizeof (u32)) / sizeof (u32)))); - + data = (u8 *) (test_data + (2 * i + 0)); + rv = + svm_fifo_enqueue_with_offset (f, 0 /* pid */ , offset, sizeof (u32), + data); + if (verbose) + vlib_cli_output (vm, "add [%d] [%d, %d]", 2 * i, offset, + offset + sizeof (u32)); if (rv) { clib_warning ("enqueue returned %d", rv); - goto out; + goto err; } } - vlib_cli_output (vm, "fifo before missing link: %U", - format_svm_fifo, f, 1 /* verbose */ ); + if (verbose) + vlib_cli_output (vm, "fifo before missing link: %U", format_svm_fifo, f, + 1); + TCP_TEST ((svm_fifo_number_ooo_segments (f) == 1), + "number of ooo segments %u", svm_fifo_number_ooo_segments (f)); + ooo_seg = svm_fifo_first_ooo_segment (f); + TCP_TEST ((ooo_seg->start == 12), + "first ooo seg position %u", ooo_seg->start); + TCP_TEST ((ooo_seg->length == 16), + "first ooo seg length %u", ooo_seg->length); /* Enqueue the missing u32 */ - rv = svm_fifo_enqueue_nowait (f, 0 /* pid */ , - sizeof (u32), (u8 *) (test_data + 1)); - if (rv != 7 * sizeof (u32)) - { - clib_warning ("enqueue returned %d", rv); - goto out; - } - - vlib_cli_output (vm, "fifo after missing link: %U", - format_svm_fifo, f, 1 /* verbose */ ); + rv = svm_fifo_enqueue_nowait (f, 0 /* pid */ , sizeof (u32), + (u8 *) (test_data + 2)); + if (verbose) + vlib_cli_output (vm, "fifo after missing link: %U", format_svm_fifo, f, + 1); + TCP_TEST ((rv == 20), "bytes to be enqueued %u", rv); + TCP_TEST ((svm_fifo_number_ooo_segments (f) == 0), + "number of ooo segments %u", svm_fifo_number_ooo_segments (f)); /* Collect results */ for (i = 0; i < 7; i++) @@ -265,25 +400,316 @@ tcp_test_fifo (vlib_main_t * vm, unformat_input_t * input) (u8 *) & data_word); if (rv != sizeof (u32)) { - clib_warning ("dequeue returned %d", rv); - goto out; + clib_warning ("bytes dequeues %u", rv); + goto err; } if (data_word != test_data[i]) { - clib_warning ("recovered data %d not %d", data_word, test_data[i]); - goto out; + clib_warning ("recovered [%d] %d not %d", i, data_word, + test_data[i]); + goto err; } } - clib_warning ("test complete..."); + svm_fifo_free (f); + vec_free (test_data); + return 0; -out: +err: svm_fifo_free (f); vec_free (test_data); + return -1; +} + +static int +tcp_test_fifo2 (vlib_main_t * vm) +{ + svm_fifo_t *f; + u32 fifo_size = 1 << 20; + int i, rv, test_data_len; + u64 data64; + test_pattern_t *tp, *vp, *test_data; + ooo_segment_t *ooo_seg; + + test_data = test_pattern; + test_data_len = ARRAY_LEN (test_pattern); + + vp = fifo_get_validate_pattern (vm, test_data, test_data_len); + + /* Create a fifo */ + f = svm_fifo_create (fifo_size); + + /* Paint the fifo data vector with -1's */ + memset (f->data, 0xFF, 1 << 20); + + /* + * Try with sorted data + */ + for (i = 0; i < test_data_len; i++) + { + tp = vp + i; + data64 = tp->offset; + rv = svm_fifo_enqueue_with_offset (f, 0, tp->offset, tp->len, + (u8 *) & data64); + } + + /* Expected result: one big fat chunk at offset 4 */ + TCP_TEST ((svm_fifo_number_ooo_segments (f) == 1), + "number of ooo segments %u", svm_fifo_number_ooo_segments (f)); + ooo_seg = svm_fifo_first_ooo_segment (f); + TCP_TEST ((ooo_seg->start == 4), + "first ooo seg position %u", ooo_seg->start); + TCP_TEST ((ooo_seg->length == 2996), + "first ooo seg length %u", ooo_seg->length); + + data64 = 0; + rv = svm_fifo_enqueue_nowait (f, 0, sizeof (u32), (u8 *) & data64); + TCP_TEST ((rv == 3000), "bytes to be enqueued %u", rv); + + svm_fifo_free (f); + vec_free (vp); + + /* + * Now try it again w/ unsorted data... + */ + + f = svm_fifo_create (fifo_size); + + /* Paint fifo data vector with -1's */ + memset (f->data, 0xFF, 1 << 20); + + for (i = 0; i < test_data_len; i++) + { + tp = &test_data[i]; + data64 = tp->offset; + rv = svm_fifo_enqueue_with_offset (f, 0, tp->offset, tp->len, + (u8 *) & data64); + if (rv) + { + clib_warning ("enqueue returned %d", rv); + } + } + + /* Expecting the same result: one big fat chunk at offset 4 */ + TCP_TEST ((svm_fifo_number_ooo_segments (f) == 1), + "number of ooo segments %u", svm_fifo_number_ooo_segments (f)); + ooo_seg = svm_fifo_first_ooo_segment (f); + TCP_TEST ((ooo_seg->start == 4), + "first ooo seg position %u", ooo_seg->start); + TCP_TEST ((ooo_seg->length == 2996), + "first ooo seg length %u", ooo_seg->length); + + data64 = 0; + rv = svm_fifo_enqueue_nowait (f, 0, sizeof (u32), (u8 *) & data64); + + TCP_TEST ((rv == 3000), "bytes to be enqueued %u", rv); + + svm_fifo_free (f); + return 0; } +static int +tcp_test_fifo3 (vlib_main_t * vm, unformat_input_t * input) +{ + svm_fifo_t *f; + u32 fifo_size = 4 << 10; + u32 fifo_initial_offset = 0; + u32 total_size = 2 << 10; + int overlap = 0; + int i, rv; + u8 *data_pattern = 0; + test_pattern_t *tp, *generate = 0; + u32 nsegs = 2; + u32 seg_size, length_so_far; + u32 current_offset, offset_increment, len_this_chunk; + u32 seed = 0xdeaddabe; + int verbose = 0; + int randomize = 1; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "fifo-size %d", &fifo_size)) + ; + else if (unformat (input, "total-size %d", &total_size)) + ; + else if (unformat (input, "verbose")) + verbose = 1; + else if (unformat (input, "overlap")) + overlap = 1; + else if (unformat (input, "initial-offset %d", &fifo_initial_offset)) + ; + else if (unformat (input, "seed %d", &seed)) + ; + else if (unformat (input, "nsegs %d", &nsegs)) + ; + else if (unformat (input, "no-randomize")) + randomize = 0; + else + { + clib_error_t *e = clib_error_return + (0, "unknown input `%U'", format_unformat_error, input); + clib_error_report (e); + return -1; + } + } + /* + * Generate data + */ + vec_validate (data_pattern, total_size - 1); + for (i = 0; i < vec_len (data_pattern); i++) + data_pattern[i] = i & 0xff; + + seg_size = total_size / nsegs; + length_so_far = 0; + current_offset = 1; + while (length_so_far < total_size) + { + vec_add2 (generate, tp, 1); + len_this_chunk = clib_min (seg_size, total_size - length_so_far); + tp->offset = current_offset; + tp->len = len_this_chunk; + + if (overlap && (len_this_chunk == seg_size)) + do + { + offset_increment = len_this_chunk + % (1 + (random_u32 (&seed) % len_this_chunk)); + } + while (offset_increment == 0); + else + offset_increment = len_this_chunk; + + current_offset += offset_increment; + length_so_far = tp->offset + tp->len; + } + + /* + * Validate segment list. Only valid for non-overlap cases. + */ + if (overlap == 0) + fifo_validate_pattern (vm, generate, vec_len (generate)); + + if (verbose) + { + vlib_cli_output (vm, "raw data pattern:"); + for (i = 0; i < vec_len (generate); i++) + { + vlib_cli_output (vm, "[%d] offset %u len %u", i, + generate[i].offset, generate[i].len); + } + } + + /* Randomize data pattern */ + if (randomize) + { + for (i = 0; i < vec_len (generate) / 2; i++) + { + u32 src_index, dst_index; + test_pattern_t _tmp, *tmp = &_tmp; + + src_index = random_u32 (&seed) % vec_len (generate); + dst_index = random_u32 (&seed) % vec_len (generate); + + tmp[0] = generate[dst_index]; + generate[dst_index] = generate[src_index]; + generate[src_index] = tmp[0]; + } + } + + if (verbose) + { + vlib_cli_output (vm, "randomized data pattern:"); + for (i = 0; i < vec_len (generate); i++) + { + vlib_cli_output (vm, "[%d] offset %u len %u", i, + generate[i].offset, generate[i].len); + } + } + + /* Create a fifo */ + f = svm_fifo_create (fifo_size); + + /* Paint the fifo data vector with -1's */ + memset (f->data, 0xFF, fifo_size); + + /* manually set head and tail pointers to validate modular arithmetic */ + f->head = fifo_initial_offset % fifo_size; + f->tail = fifo_initial_offset % fifo_size; + + for (i = 0; i < vec_len (generate); i++) + { + tp = generate + i; + rv = svm_fifo_enqueue_with_offset (f, 0, tp->offset, tp->len, + (u8 *) data_pattern + tp->offset); + } + + /* Expected result: one big fat chunk at offset 1 */ + + if (verbose) + vlib_cli_output (vm, "fifo before missing link: %U", + format_svm_fifo, f, 1 /* verbose */ ); + + rv = svm_fifo_enqueue_nowait (f, 0, 1 /* count */ , data_pattern + 0); + + if (verbose) + vlib_cli_output (vm, "in-order enqueue returned %d", rv); + + TCP_TEST ((rv == total_size), "retrieved %u expected %u", rv, total_size); + TCP_TEST ((svm_fifo_number_ooo_segments (f) == 0), + "number of ooo segments %u", svm_fifo_number_ooo_segments (f)); + svm_fifo_free (f); + vec_free (data_pattern); + + return 0; +} + +static int +tcp_test_fifo (vlib_main_t * vm, unformat_input_t * input) +{ + int res = 0; + + /* Run all tests */ + if (unformat_check_input (input) == UNFORMAT_END_OF_INPUT) + { + res = tcp_test_fifo1 (vm, input); + if (res) + return res; + + res = tcp_test_fifo2 (vm); + if (res) + return res; + + /* Run a number of fifo3 configs */ + unformat_init_cstring (input, "nsegs 3 overlap seed 123"); + if (tcp_test_fifo3 (vm, input)) + return -1; + unformat_free (input); + + unformat_init_cstring (input, "nsegs 10"); + if (tcp_test_fifo3 (vm, input)) + return -1; + unformat_free (input); + } + else + { + if (unformat (input, "fifo3")) + { + res = tcp_test_fifo3 (vm, input); + } + else if (unformat (input, "fifo2")) + { + res = tcp_test_fifo2 (vm); + } + else if (unformat (input, "fifo1")) + { + res = tcp_test_fifo1 (vm, input); + } + } + + return res; +} static clib_error_t * tcp_test (vlib_main_t * vm, -- cgit 1.2.3-korg From 636815199a1f359fdd0da706985a74eca95254da Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Thu, 20 Apr 2017 17:50:39 -0400 Subject: Debug CLI to magically create / delete a TCP session The session ends up in established state, and is hand-crafted to look like it was created by the builtin_server. This will come in handy for injecting packets into tcp46-established, along with ancillary debug CLI to adjust connection parameters. Immediate applications include screwball window cases, out of order segments, paws checking, and so on and so forth. Debug CLI script: loop create set int ip address loop0 6.0.1.1/8 set int state loop0 up set ip arp loop0 6.0.1.2 feed.face.babe test tcp server test tcp session packet-generator new { name tcp limit 1 node ip4-input size 100-100 interface loop0 no-recycle data { TCP: 6.0.1.2 -> 6.0.1.1 TCP: 11234 -> 1234 ACK window 2000 seqnum 0 acknum 0 incrementing 100 } } Change-Id: I866c2159376064b7d14f70531022c1fe949258c2 Signed-off-by: Dave Barach --- src/vnet/tcp/tcp_format.c | 3 +- src/vnet/tcp/tcp_input.c | 2 +- src/vnet/tcp/tcp_pg.c | 108 +++++++++++++++++++++++++--------------------- src/vnet/tcp/tcp_test.c | 69 +++++++++++++++++++++++++++-- 4 files changed, 127 insertions(+), 55 deletions(-) (limited to 'src/vnet/tcp/tcp_format.c') diff --git a/src/vnet/tcp/tcp_format.c b/src/vnet/tcp/tcp_format.c index 994ccfd6..1ca2f58e 100644 --- a/src/vnet/tcp/tcp_format.c +++ b/src/vnet/tcp/tcp_format.c @@ -45,7 +45,8 @@ format_tcp_flags (u8 * s, va_list * args) { int flags = va_arg (*args, int); -#define _(f) if (flags & TCP_FLAG_##f) s = format (s, "%s, ", #f); + s = format (s, "0x%02x", flags); +#define _(f) if (flags & TCP_FLAG_##f) s = format (s, " %s", #f); foreach_tcp_flag #undef _ return s; diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 3bd53878..bfe3665a 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -2376,7 +2376,7 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vnet_buffer (b0)->tcp.flags = tc0->state; clib_warning ("disp error state %U flags %U", format_tcp_state, &state0, - format_tcp_flags, flags0); + format_tcp_flags, (int) flags0); } } else diff --git a/src/vnet/tcp/tcp_pg.c b/src/vnet/tcp/tcp_pg.c index dc324049..3be4592c 100644 --- a/src/vnet/tcp/tcp_pg.c +++ b/src/vnet/tcp/tcp_pg.c @@ -54,21 +54,19 @@ static void tcp_pg_edit_function (pg_main_t * pg, pg_stream_t * s, - pg_edit_group_t * g, - u32 * packets, - u32 n_packets) + pg_edit_group_t * g, u32 * packets, u32 n_packets) { - vlib_main_t * vm = vlib_get_main(); + vlib_main_t *vm = vlib_get_main (); u32 ip_offset, tcp_offset; tcp_offset = g->start_byte_offset; - ip_offset = (g-1)->start_byte_offset; + ip_offset = (g - 1)->start_byte_offset; while (n_packets >= 1) { - vlib_buffer_t * p0; - ip4_header_t * ip0; - tcp_header_t * tcp0; + vlib_buffer_t *p0; + ip4_header_t *ip0; + tcp_header_t *tcp0; ip_csum_t sum0; u32 tcp_len0; @@ -85,7 +83,9 @@ tcp_pg_edit_function (pg_main_t * pg, if (BITS (sum0) == 32) { sum0 = clib_mem_unaligned (&ip0->src_address, u32); - sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32)); + sum0 = + ip_csum_with_carry (sum0, + clib_mem_unaligned (&ip0->dst_address, u32)); } else sum0 = clib_mem_unaligned (&ip0->src_address, u64); @@ -96,20 +96,22 @@ tcp_pg_edit_function (pg_main_t * pg, /* Invalidate possibly old checksum. */ tcp0->checksum = 0; - sum0 = ip_incremental_checksum_buffer (vm, p0, tcp_offset, tcp_len0, sum0); + sum0 = + ip_incremental_checksum_buffer (vm, p0, tcp_offset, tcp_len0, sum0); - tcp0->checksum = ~ ip_csum_fold (sum0); + tcp0->checksum = ~ip_csum_fold (sum0); } } -typedef struct { +typedef struct +{ pg_edit_t src, dst; pg_edit_t seq_number, ack_number; pg_edit_t data_offset_and_reserved; #define _(f) pg_edit_t f##_flag; - foreach_tcp_flag + foreach_tcp_flag #undef _ - pg_edit_t window; + pg_edit_t window; pg_edit_t checksum; pg_edit_t urgent_pointer; } pg_tcp_header_t; @@ -119,13 +121,13 @@ pg_tcp_header_init (pg_tcp_header_t * p) { /* Initialize fields that are not bit fields in the IP header. */ #define _(f) pg_edit_init (&p->f, tcp_header_t, f); - _ (src); - _ (dst); - _ (seq_number); - _ (ack_number); - _ (window); - _ (checksum); - _ (urgent_pointer); + _(src); + _(dst); + _(seq_number); + _(ack_number); + _(window); + _(checksum); + _(urgent_pointer); #undef _ /* Initialize bit fields. */ @@ -136,19 +138,17 @@ pg_tcp_header_init (pg_tcp_header_t * p) foreach_tcp_flag #undef _ - - pg_edit_init_bitfield (&p->data_offset_and_reserved, tcp_header_t, - data_offset_and_reserved, - 4, 4); + pg_edit_init_bitfield (&p->data_offset_and_reserved, tcp_header_t, + data_offset_and_reserved, 4, 4); } uword unformat_pg_tcp_header (unformat_input_t * input, va_list * args) { - pg_stream_t * s = va_arg (*args, pg_stream_t *); - pg_tcp_header_t * p; + pg_stream_t *s = va_arg (*args, pg_stream_t *); + pg_tcp_header_t *p; u32 group_index; - + p = pg_create_edit_group (s, sizeof (p[0]), sizeof (tcp_header_t), &group_index); pg_tcp_header_init (p); @@ -157,8 +157,8 @@ unformat_pg_tcp_header (unformat_input_t * input, va_list * args) pg_edit_set_fixed (&p->seq_number, 0); pg_edit_set_fixed (&p->ack_number, 0); - pg_edit_set_fixed (&p->data_offset_and_reserved, - sizeof (tcp_header_t) / sizeof (u32)); + pg_edit_set_fixed (&p->data_offset_and_reserved, + sizeof (tcp_header_t) / sizeof (u32)); pg_edit_set_fixed (&p->window, 4096); pg_edit_set_fixed (&p->urgent_pointer, 0); @@ -166,43 +166,44 @@ unformat_pg_tcp_header (unformat_input_t * input, va_list * args) #define _(f) pg_edit_set_fixed (&p->f##_flag, 0); foreach_tcp_flag #undef _ + p->checksum.type = PG_EDIT_UNSPECIFIED; - p->checksum.type = PG_EDIT_UNSPECIFIED; - - if (! unformat (input, "TCP: %U -> %U", - unformat_pg_edit, - unformat_tcp_udp_port, &p->src, - unformat_pg_edit, - unformat_tcp_udp_port, &p->dst)) + if (!unformat (input, "TCP: %U -> %U", + unformat_pg_edit, + unformat_tcp_udp_port, &p->src, + unformat_pg_edit, unformat_tcp_udp_port, &p->dst)) goto error; /* Parse options. */ while (1) { if (unformat (input, "window %U", - unformat_pg_edit, - unformat_pg_number, &p->window)) + unformat_pg_edit, unformat_pg_number, &p->window)) ; else if (unformat (input, "checksum %U", - unformat_pg_edit, - unformat_pg_number, &p->checksum)) + unformat_pg_edit, unformat_pg_number, &p->checksum)) ; + else if (unformat (input, "seqnum %U", unformat_pg_edit, + unformat_pg_number, &p->seq_number)) + ; + else if (unformat (input, "acknum %U", unformat_pg_edit, + unformat_pg_number, &p->ack_number)) + ; /* Flags. */ #define _(f) else if (unformat (input, #f)) pg_edit_set_fixed (&p->f##_flag, 1); - foreach_tcp_flag + foreach_tcp_flag #undef _ - - /* Can't parse input: try next protocol level. */ - else + /* Can't parse input: try next protocol level. */ + else break; } { - ip_main_t * im = &ip_main; + ip_main_t *im = &ip_main; u16 dst_port; - tcp_udp_port_info_t * pi; + tcp_udp_port_info_t *pi; pi = 0; if (p->dst.type == PG_EDIT_FIXED) @@ -215,12 +216,12 @@ unformat_pg_tcp_header (unformat_input_t * input, va_list * args) && unformat_user (input, pi->unformat_pg_edit, s)) ; - else if (! unformat_user (input, unformat_pg_payload, s)) + else if (!unformat_user (input, unformat_pg_payload, s)) goto error; if (p->checksum.type == PG_EDIT_UNSPECIFIED) { - pg_edit_group_t * g = pg_stream_get_group (s, group_index); + pg_edit_group_t *g = pg_stream_get_group (s, group_index); g->edit_function = tcp_pg_edit_function; g->edit_function_opaque = 0; } @@ -228,9 +229,16 @@ unformat_pg_tcp_header (unformat_input_t * input, va_list * args) return 1; } - error: +error: /* Free up any edits we may have added. */ pg_free_edit_group (s); return 0; } +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/tcp/tcp_test.c b/src/vnet/tcp/tcp_test.c index 0146154b..d65ce1be 100644 --- a/src/vnet/tcp/tcp_test.c +++ b/src/vnet/tcp/tcp_test.c @@ -895,6 +895,68 @@ tcp_test_fifo (vlib_main_t * vm, unformat_input_t * input) return res; } +static int +tcp_test_session (vlib_main_t * vm, unformat_input_t * input) +{ + int rv = 0; + tcp_connection_t *tc0; + u8 sst = SESSION_TYPE_IP4_TCP; + ip4_address_t local, remote; + u16 local_port, remote_port; + tcp_main_t *tm = vnet_get_tcp_main (); + int is_add = 1; + + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "del")) + is_add = 0; + else if (unformat (input, "add")) + is_add = 1; + else + break; + } + + if (is_add) + { + local.as_u32 = clib_host_to_net_u32 (0x06000101); + remote.as_u32 = clib_host_to_net_u32 (0x06000102); + local_port = clib_host_to_net_u16 (1234); + remote_port = clib_host_to_net_u16 (11234); + + pool_get (tm->connections[0], tc0); + memset (tc0, 0, sizeof (*tc0)); + + tc0->state = TCP_STATE_ESTABLISHED; + tc0->rcv_las = 1; + tc0->c_c_index = tc0 - tm->connections[0]; + tc0->c_lcl_port = local_port; + tc0->c_rmt_port = remote_port; + tc0->c_is_ip4 = 1; + tc0->c_thread_index = 0; + tc0->c_lcl_ip4.as_u32 = local.as_u32; + tc0->c_rmt_ip4.as_u32 = remote.as_u32; + tc0->opt.mss = 1450; + tcp_connection_init_vars (tc0); + + TCP_EVT_DBG (TCP_EVT_OPEN, tc0); + + if (stream_session_accept (&tc0->connection, 0 /* listener index */ , + sst, 0 /* notify */ )) + clib_warning ("stream_session_accept failed"); + + stream_session_accept_notify (&tc0->connection); + } + else + { + tc0 = tcp_connection_get (0 /* connection index */ , 0 /* thread */ ); + tc0->state = TCP_STATE_CLOSED; + stream_session_disconnect_notify (&tc0->connection); + } + + return rv; +} + static clib_error_t * tcp_test (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd_arg) @@ -911,11 +973,12 @@ tcp_test (vlib_main_t * vm, { res = tcp_test_fifo (vm, input); } - else + else if (unformat (input, "session")) { - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + res = tcp_test_session (vm, input); } + else + break; } if (res) -- cgit 1.2.3-korg From 45d3496f3d86ee1a930ce0ffd6ca3d1730355eb8 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Tue, 25 Apr 2017 00:05:27 -0700 Subject: Add sack tx unit test Change-Id: Ib91db6e531231bdc52b0104673a912bee024872f Signed-off-by: Florin Coras --- src/vnet/tcp/tcp.h | 6 +- src/vnet/tcp/tcp_format.c | 12 ++++ src/vnet/tcp/tcp_input.c | 42 +++++++++----- src/vnet/tcp/tcp_test.c | 143 +++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 185 insertions(+), 18 deletions(-) (limited to 'src/vnet/tcp/tcp_format.c') diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index 2ac6a9b8..40fb3515 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -59,6 +59,7 @@ typedef enum _tcp_state format_function_t format_tcp_state; format_function_t format_tcp_flags; +format_function_t format_tcp_sacks; /** TCP timers */ #define foreach_tcp_timer \ @@ -470,11 +471,13 @@ tcp_available_snd_space (const tcp_connection_t * tc) void tcp_update_rcv_wnd (tcp_connection_t * tc); void tcp_retransmit_first_unacked (tcp_connection_t * tc); - void tcp_fast_retransmit (tcp_connection_t * tc); void tcp_cc_congestion (tcp_connection_t * tc); void tcp_cc_recover (tcp_connection_t * tc); +/* Made public for unit testing only */ +void tcp_update_sack_list (tcp_connection_t * tc, u32 start, u32 end); + always_inline u32 tcp_time_now (void) { @@ -496,7 +499,6 @@ tcp_prepare_retransmit_segment (tcp_connection_t * tc, vlib_buffer_t * b, void tcp_connection_timers_init (tcp_connection_t * tc); void tcp_connection_timers_reset (tcp_connection_t * tc); - void tcp_connection_init_vars (tcp_connection_t * tc); always_inline void diff --git a/src/vnet/tcp/tcp_format.c b/src/vnet/tcp/tcp_format.c index 1ca2f58e..3148fd40 100644 --- a/src/vnet/tcp/tcp_format.c +++ b/src/vnet/tcp/tcp_format.c @@ -128,6 +128,18 @@ format_tcp_header (u8 * s, va_list * args) return s; } +u8 * +format_tcp_sacks (u8 * s, va_list * args) +{ + sack_block_t *sacks = va_arg (*args, sack_block_t *); + sack_block_t *block; + vec_foreach (block, sacks) + { + s = format (s, " start %u end %u\n", block->start, block->end); + } + return s; +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index bfe3665a..e184a4d6 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -894,37 +894,51 @@ tcp_rcv_ack (tcp_connection_t * tc, vlib_buffer_t * b, * @param start Start sequence number of the newest SACK block * @param end End sequence of the newest SACK block */ -static void +void tcp_update_sack_list (tcp_connection_t * tc, u32 start, u32 end) { - sack_block_t *new_list = 0, block; + sack_block_t *new_list = 0, *block = 0; int i; /* If the first segment is ooo add it to the list. Last write might've moved * rcv_nxt over the first segment. */ if (seq_lt (tc->rcv_nxt, start)) { - block.start = start; - block.end = end; - vec_add1 (new_list, block); + vec_add2 (new_list, block, 1); + block->start = start; + block->end = end; } /* Find the blocks still worth keeping. */ for (i = 0; i < vec_len (tc->snd_sacks); i++) { - /* Discard if: - * 1) rcv_nxt advanced beyond current block OR - * 2) Segment overlapped by the first segment, i.e., it has been merged - * into it.*/ - if (seq_leq (tc->snd_sacks[i].start, tc->rcv_nxt) - || seq_leq (tc->snd_sacks[i].start, end)) + /* Discard if rcv_nxt advanced beyond current block */ + if (seq_leq (tc->snd_sacks[i].start, tc->rcv_nxt)) continue; - /* Save to new SACK list. */ - vec_add1 (new_list, tc->snd_sacks[i]); + /* Merge or drop if segment overlapped by the new segment */ + if (block && (seq_geq (tc->snd_sacks[i].end, new_list[0].start) + && seq_leq (tc->snd_sacks[i].start, new_list[0].end))) + { + if (seq_lt (tc->snd_sacks[i].start, new_list[0].start)) + new_list[0].start = tc->snd_sacks[i].start; + if (seq_lt (new_list[0].end, tc->snd_sacks[i].end)) + new_list[0].end = tc->snd_sacks[i].end; + continue; + } + + /* Save to new SACK list if we have space. */ + if (vec_len (new_list) < TCP_MAX_SACK_BLOCKS) + { + vec_add1 (new_list, tc->snd_sacks[i]); + } + else + { + clib_warning ("dropped sack blocks"); + } } - ASSERT (vec_len (new_list) < TCP_MAX_SACK_BLOCKS); + ASSERT (vec_len (new_list) <= TCP_MAX_SACK_BLOCKS); /* Replace old vector with new one */ vec_free (tc->snd_sacks); diff --git a/src/vnet/tcp/tcp_test.c b/src/vnet/tcp/tcp_test.c index d65ce1be..bca5795a 100644 --- a/src/vnet/tcp/tcp_test.c +++ b/src/vnet/tcp/tcp_test.c @@ -35,7 +35,7 @@ } static int -tcp_test_sack () +tcp_test_sack_rx () { tcp_connection_t _tc, *tc = &_tc; sack_scoreboard_t *sb = &tc->sack_sb; @@ -173,6 +173,145 @@ tcp_test_sack () return 0; } +static int +tcp_test_sack_tx (vlib_main_t * vm, unformat_input_t * input) +{ + tcp_connection_t _tc, *tc = &_tc; + sack_block_t *sacks; + int i, verbose = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "verbose")) + verbose = 1; + else + { + vlib_cli_output (vm, "parse error: '%U'", format_unformat_error, + input); + return -1; + } + } + + memset (tc, 0, sizeof (*tc)); + + /* + * Add odd sack block pairs + */ + for (i = 1; i < 10; i += 2) + { + tcp_update_sack_list (tc, i * 100, (i + 1) * 100); + } + + TCP_TEST ((vec_len (tc->snd_sacks) == 5), "sack blocks %d expected %d", + vec_len (tc->snd_sacks), 5); + TCP_TEST ((tc->snd_sacks[0].start = 900), + "first sack block start %u expected %u", tc->snd_sacks[0].start, + 900); + + /* + * Try to add one extra + */ + sacks = vec_dup (tc->snd_sacks); + + tcp_update_sack_list (tc, 1100, 1200); + TCP_TEST ((vec_len (tc->snd_sacks) == 5), "sack blocks %d expected %d", + vec_len (tc->snd_sacks), 5); + TCP_TEST ((tc->snd_sacks[0].start == 1100), + "first sack block start %u expected %u", tc->snd_sacks[0].start, + 1100); + + /* restore */ + vec_free (tc->snd_sacks); + tc->snd_sacks = sacks; + + /* + * Overlap first 2 segment + */ + tc->rcv_nxt = 300; + tcp_update_sack_list (tc, 300, 300); + if (verbose) + vlib_cli_output (vm, "overlap first 2 segments:\n%U", + format_tcp_sacks, tc->snd_sacks); + TCP_TEST ((vec_len (tc->snd_sacks) == 3), "sack blocks %d expected %d", + vec_len (tc->snd_sacks), 3); + TCP_TEST ((tc->snd_sacks[0].start == 900), + "first sack block start %u expected %u", tc->snd_sacks[0].start, + 500); + + /* + * Add a new segment + */ + tcp_update_sack_list (tc, 1100, 1200); + if (verbose) + vlib_cli_output (vm, "add new segment [1100, 1200]\n%U", + format_tcp_sacks, tc->snd_sacks); + TCP_TEST ((vec_len (tc->snd_sacks) == 4), "sack blocks %d expected %d", + vec_len (tc->snd_sacks), 4); + TCP_TEST ((tc->snd_sacks[0].start == 1100), + "first sack block start %u expected %u", tc->snd_sacks[0].start, + 1100); + + /* + * Join middle segments + */ + tcp_update_sack_list (tc, 800, 900); + if (verbose) + vlib_cli_output (vm, "join middle segments [800, 900]\n%U", + format_tcp_sacks, tc->snd_sacks); + + TCP_TEST ((vec_len (tc->snd_sacks) == 3), "sack blocks %d expected %d", + vec_len (tc->snd_sacks), 3); + TCP_TEST ((tc->snd_sacks[0].start == 700), + "first sack block start %u expected %u", tc->snd_sacks[0].start, + 1100); + + /* + * Advance rcv_nxt to overlap all + */ + tc->rcv_nxt = 1200; + tcp_update_sack_list (tc, 1200, 1200); + if (verbose) + vlib_cli_output (vm, "advance rcv_nxt to 1200\n%U", + format_tcp_sacks, tc->snd_sacks); + TCP_TEST ((vec_len (tc->snd_sacks) == 0), "sack blocks %d expected %d", + vec_len (tc->snd_sacks), 0); + return 0; +} + +static int +tcp_test_sack (vlib_main_t * vm, unformat_input_t * input) +{ + int res = 0; + + /* Run all tests */ + if (unformat_check_input (input) == UNFORMAT_END_OF_INPUT) + { + if (tcp_test_sack_tx (vm, input)) + { + return -1; + } + + if (tcp_test_sack_rx ()) + { + return -1; + } + } + else + { + if (unformat (input, "tx")) + { + res = tcp_test_sack_tx (vm, input); + } + else if (unformat (input, "rx")) + { + res = tcp_test_sack_rx (); + } + } + + return res; +} + + typedef struct { u32 offset; @@ -967,7 +1106,7 @@ tcp_test (vlib_main_t * vm, { if (unformat (input, "sack")) { - res = tcp_test_sack (); + res = tcp_test_sack (vm, input); } else if (unformat (input, "fifo")) { -- cgit 1.2.3-korg From c28764fd356632763614ea579f678d8f55eca4c7 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Wed, 26 Apr 2017 00:08:42 -0700 Subject: TCP ooo reception fixes - Improve svm fifo handling of out-of-order segments - Ensure tsval_recent is updated only if rcv_las falls withing the segments's sequence space - Avoid directly dropping old ACKs - Improve debugging Change-Id: I88dbe2394a0ad7eb389a4cc12d013a13733953aa Signed-off-by: Florin Coras --- src/svm/svm_fifo.c | 144 ++++++++++++++++++++++----------------------- src/svm/svm_fifo.h | 1 + src/vnet/session/session.c | 15 +++++ src/vnet/session/session.h | 7 ++- src/vnet/tcp/tcp_debug.h | 15 +++++ src/vnet/tcp/tcp_error.def | 3 +- src/vnet/tcp/tcp_format.c | 6 +- src/vnet/tcp/tcp_input.c | 81 +++++++++++++++++-------- src/vnet/tcp/tcp_output.c | 4 +- src/vnet/tcp/tcp_test.c | 114 ++++++++++++++++++++++++++++++++--- 10 files changed, 275 insertions(+), 115 deletions(-) (limited to 'src/vnet/tcp/tcp_format.c') diff --git a/src/svm/svm_fifo.c b/src/svm/svm_fifo.c index 8f2ed0c9..9b09d0c2 100644 --- a/src/svm/svm_fifo.c +++ b/src/svm/svm_fifo.c @@ -17,6 +17,8 @@ #define offset_lt(_a, _b) ((i32)((_a)-(_b)) < 0) #define offset_leq(_a, _b) ((i32)((_a)-(_b)) <= 0) +#define offset_gt(_a, _b) ((i32)((_a)-(_b)) > 0) +#define offset_geq(_a, _b) ((i32)((_a)-(_b)) >= 0) u8 * format_ooo_segment (u8 * s, va_list * args) @@ -160,14 +162,23 @@ ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length) && offset_leq (ooo_segment_offset (f, s), offset)) s = pool_elt_at_index (f->ooo_segments, s->next); + /* If we have a previous and we overlap it, use it as starting point */ + prev = ooo_segment_get_prev (f, s); + if (prev && offset_leq (offset, ooo_segment_end_offset (f, prev))) + { + s = prev; + prev = ooo_segment_get_prev (f, s); + s_sof = ooo_segment_offset (f, s); + s_eof = ooo_segment_end_offset (f, s); + goto merge; + } + s_index = s - f->ooo_segments; s_sof = ooo_segment_offset (f, s); s_eof = ooo_segment_end_offset (f, s); - prev = ooo_segment_get_prev (f, s); /* No overlap, add before current segment */ - if (offset_lt (end_offset, s_sof) - && (!prev || offset_lt (prev->start + prev->length, offset))) + if (offset_lt (end_offset, s_sof)) { new_s = ooo_segment_new (f, offset, length); new_index = new_s - f->ooo_segments; @@ -192,7 +203,7 @@ ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length) return; } /* No overlap, add after current segment */ - else if (offset_lt (s_eof, offset)) + else if (offset_gt (offset, s_eof)) { new_s = ooo_segment_new (f, offset, length); new_index = new_s - f->ooo_segments; @@ -218,62 +229,16 @@ ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length) * Merge needed */ +merge: + /* Merge at head */ - if (offset_leq (offset, s_sof)) + if (offset_lt (offset, s_sof)) { - /* If we have a previous, check if we overlap */ - if (s->prev != OOO_SEGMENT_INVALID_INDEX) - { - prev = pool_elt_at_index (f->ooo_segments, s->prev); - - /* New segment merges prev and current. Remove previous and - * update position of current. */ - if (offset_leq (offset, ooo_segment_end_offset (f, prev))) - { - s->start = prev->start; - s->length = s_eof - ooo_segment_offset (f, prev); - ooo_segment_del (f, s->prev); - } - else - { - s->start = offset; - s->length = s_eof - ooo_segment_offset (f, s); - } - } - else - { - s->start = offset; - s->length = s_eof - ooo_segment_offset (f, s); - } - - /* The new segment's tail may cover multiple smaller ones */ - if (offset_lt (s_eof, end_offset)) - { - /* Remove segments completely covered */ - it = (s->next != OOO_SEGMENT_INVALID_INDEX) ? - pool_elt_at_index (f->ooo_segments, s->next) : 0; - while (it && offset_lt (ooo_segment_end_offset (f, it), end_offset)) - { - next = (it->next != OOO_SEGMENT_INVALID_INDEX) ? - pool_elt_at_index (f->ooo_segments, it->next) : 0; - ooo_segment_del (f, it - f->ooo_segments); - it = next; - } - - /* Update length. Segment's start might have changed. */ - s->length = end_offset - ooo_segment_offset (f, s); - - /* If partial overlap with last, merge */ - if (it && offset_lt (ooo_segment_offset (f, it), end_offset)) - { - s->length += - it->length - (ooo_segment_offset (f, it) - end_offset); - ooo_segment_del (f, it - f->ooo_segments); - } - } + s->start = offset; + s->length = s_eof - ooo_segment_offset (f, s); } /* Last but overlapping previous */ - else if (offset_leq (s_eof, end_offset)) + else if (offset_gt (end_offset, s_eof)) { s->length = end_offset - ooo_segment_offset (f, s); } @@ -281,8 +246,33 @@ ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length) else { /* Do Nothing */ + goto done; + } + + /* The new segment's tail may cover multiple smaller ones */ + if (offset_geq (end_offset, s_eof)) + { + /* Remove the completely overlapped segments */ + it = (s->next != OOO_SEGMENT_INVALID_INDEX) ? + pool_elt_at_index (f->ooo_segments, s->next) : 0; + while (it && offset_leq (ooo_segment_end_offset (f, it), end_offset)) + { + next = (it->next != OOO_SEGMENT_INVALID_INDEX) ? + pool_elt_at_index (f->ooo_segments, it->next) : 0; + ooo_segment_del (f, it - f->ooo_segments); + it = next; + } + + /* If partial overlap with last, merge */ + if (it && offset_leq (ooo_segment_offset (f, it), end_offset)) + { + s->length = ooo_segment_end_offset (f, it) - + ooo_segment_offset (f, s); + ooo_segment_del (f, it - f->ooo_segments); + } } +done: /* Most recently updated segment */ f->ooos_newest = s - f->ooo_segments; } @@ -296,14 +286,17 @@ ooo_segment_try_collect (svm_fifo_t * f, u32 n_bytes_enqueued) { ooo_segment_t *s; u32 index, bytes = 0, diff; - u32 cursize; + u32 cursize, norm_start, nitems; /* current size has not yet been updated */ cursize = svm_fifo_max_dequeue (f) + n_bytes_enqueued; + nitems = f->nitems; s = pool_elt_at_index (f->ooo_segments, f->ooos_list_head); - diff = (f->nitems + (i32) (f->tail - s->start)) % f->nitems; + norm_start = s->start % nitems; + diff = (f->nitems + (i32) (f->tail - norm_start)) % nitems; + if (diff > cursize) return 0; @@ -326,7 +319,8 @@ ooo_segment_try_collect (svm_fifo_t * f, u32 n_bytes_enqueued) if (s->next != OOO_SEGMENT_INVALID_INDEX) { s = pool_elt_at_index (f->ooo_segments, s->next); - diff = (f->nitems + (i32) (f->tail - s->start)) % f->nitems; + norm_start = s->start % nitems; + diff = (f->nitems + (i32) (f->tail - norm_start)) % nitems; ooo_segment_del (f, index); } /* End of search */ @@ -340,11 +334,11 @@ ooo_segment_try_collect (svm_fifo_t * f, u32 n_bytes_enqueued) /* If tail is adjacent to an ooo segment, 'consume' it */ if (diff == 0) { - bytes = ((f->nitems - cursize) >= s->length) ? s->length : - f->nitems - cursize; + bytes = ((nitems - cursize) >= s->length) ? s->length : + nitems - cursize; f->tail += bytes; - f->tail %= f->nitems; + f->tail %= nitems; ooo_segment_del (f, s - f->ooo_segments); } @@ -430,31 +424,22 @@ svm_fifo_enqueue_with_offset_internal (svm_fifo_t * f, { u32 total_copy_bytes, first_copy_bytes, second_copy_bytes; u32 cursize, nitems; - u32 normalized_offset; - int rv; - - /* Users would do well to avoid this */ - if (PREDICT_FALSE (f->tail == (offset % f->nitems))) - { - rv = svm_fifo_enqueue_internal (f, required_bytes, copy_from_here); - if (rv > 0) - return 0; - return -1; - } + u32 normalized_offset, offset_from_tail; /* read cursize, which can only increase while we're working */ cursize = svm_fifo_max_dequeue (f); nitems = f->nitems; + normalized_offset = offset % nitems; /* Will this request fit? */ - if ((required_bytes + (offset - f->tail) % nitems) > (nitems - cursize)) + offset_from_tail = (nitems + normalized_offset - f->tail) % nitems; + if ((required_bytes + offset_from_tail) > (nitems - cursize)) return -1; ooo_segment_add (f, offset, required_bytes); /* Number of bytes we're going to copy */ total_copy_bytes = required_bytes; - normalized_offset = offset % nitems; /* Number of bytes in first copy segment */ first_copy_bytes = ((nitems - normalized_offset) < total_copy_bytes) @@ -631,6 +616,15 @@ svm_fifo_first_ooo_segment (svm_fifo_t * f) return pool_elt_at_index (f->ooo_segments, f->ooos_list_head); } +/** + * Set fifo pointers to requested offset + */ +void +svm_fifo_init_pointers (svm_fifo_t * f, u32 pointer) +{ + f->head = f->tail = pointer % f->nitems; +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/svm/svm_fifo.h b/src/svm/svm_fifo.h index d67237c6..36158dc5 100644 --- a/src/svm/svm_fifo.h +++ b/src/svm/svm_fifo.h @@ -119,6 +119,7 @@ int svm_fifo_peek (svm_fifo_t * f, u32 offset, u32 max_bytes, u8 * copy_here); int svm_fifo_dequeue_drop (svm_fifo_t * f, u32 max_bytes); u32 svm_fifo_number_ooo_segments (svm_fifo_t * f); ooo_segment_t *svm_fifo_first_ooo_segment (svm_fifo_t * f); +void svm_fifo_init_pointers (svm_fifo_t * f, u32 pointer); format_function_t format_svm_fifo; diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index d17c93f8..e92bb440 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -609,6 +609,21 @@ session_manager_flush_enqueue_events (u32 thread_index) return errors; } +/** + * Init fifo tail and head pointers + * + * Useful if transport uses absolute offsets for tracking ooo segments. + */ +void +stream_session_init_fifos_pointers (transport_connection_t * tc, + u32 rx_pointer, u32 tx_pointer) +{ + stream_session_t *s; + s = stream_session_get (tc->s_index, tc->thread_index); + svm_fifo_init_pointers (s->server_rx_fifo, rx_pointer); + svm_fifo_init_pointers (s->server_tx_fifo, tx_pointer); +} + void stream_session_connect_notify (transport_connection_t * tc, u8 sst, u8 is_fail) diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h index 8cd72f35..f41a8a96 100644 --- a/src/vnet/session/session.h +++ b/src/vnet/session/session.h @@ -352,9 +352,10 @@ stream_session_peek_bytes (transport_connection_t * tc, u8 * buffer, u32 offset, u32 max_bytes); u32 stream_session_dequeue_drop (transport_connection_t * tc, u32 max_bytes); -void -stream_session_connect_notify (transport_connection_t * tc, u8 sst, - u8 is_fail); +void stream_session_connect_notify (transport_connection_t * tc, u8 sst, + u8 is_fail); +void stream_session_init_fifos_pointers (transport_connection_t * tc, + u32 rx_pointer, u32 tx_pointer); void stream_session_accept_notify (transport_connection_t * tc); void stream_session_disconnect_notify (transport_connection_t * tc); diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h index ecbf7887..b4497a3b 100755 --- a/src/vnet/tcp/tcp_debug.h +++ b/src/vnet/tcp/tcp_debug.h @@ -50,6 +50,7 @@ _(CC_EVT, "cc event") \ _(CC_PACK, "cc partial ack") \ _(SEG_INVALID, "invalid segment") \ + _(PAWS_FAIL, "failed paws check") \ _(ACK_RCV_ERR, "invalid ack") \ _(RCV_WND_SHRUNK, "shrunk rcv_wnd") \ @@ -382,6 +383,20 @@ typedef enum _tcp_dbg_evt ed->data[4] = _tc->rcv_wnd; \ } +#define TCP_EVT_PAWS_FAIL_HANDLER(_tc, _seq, _end, ...) \ +{ \ + ELOG_TYPE_DECLARE (_e) = \ + { \ + .format = "paws fail: seq %u end %u tsval %u tsval_recent %u", \ + .format_args = "i4i4i4i4", \ + }; \ + DECLARE_ETD(_tc, _e, 4); \ + ed->data[0] = _seq - _tc->irs; \ + ed->data[1] = _end - _tc->irs; \ + ed->data[2] = _tc->opt.tsval; \ + ed->data[3] = _tc->tsval_recent; \ +} + #define TCP_EVT_ACK_RCV_ERR_HANDLER(_tc, _type, _ack, ...) \ { \ ELOG_TYPE_DECLARE (_e) = \ diff --git a/src/vnet/tcp/tcp_error.def b/src/vnet/tcp/tcp_error.def index 0d75d975..a4e46d64 100644 --- a/src/vnet/tcp/tcp_error.def +++ b/src/vnet/tcp/tcp_error.def @@ -37,4 +37,5 @@ tcp_error (PKTS_SENT, "Packets sent") tcp_error (FILTERED_DUPACKS, "Filtered duplicate ACKs") tcp_error (RST_SENT, "Resets sent") tcp_error (INVALID_CONNECTION, "Invalid connection") -tcp_error (NO_WND, "No window") \ No newline at end of file +tcp_error (NO_WND, "No window") +tcp_error (CONNECTION_CLOSED, "Connection closed") \ No newline at end of file diff --git a/src/vnet/tcp/tcp_format.c b/src/vnet/tcp/tcp_format.c index 3148fd40..4de99235 100644 --- a/src/vnet/tcp/tcp_format.c +++ b/src/vnet/tcp/tcp_format.c @@ -131,11 +131,13 @@ format_tcp_header (u8 * s, va_list * args) u8 * format_tcp_sacks (u8 * s, va_list * args) { - sack_block_t *sacks = va_arg (*args, sack_block_t *); + tcp_connection_t *tc = va_arg (*args, tcp_connection_t *); + sack_block_t *sacks = tc->snd_sacks; sack_block_t *block; vec_foreach (block, sacks) { - s = format (s, " start %u end %u\n", block->start, block->end); + s = format (s, " start %u end %u\n", block->start - tc->irs, + block->end - tc->irs); } return s; } diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 3c65a5ea..0030cfe2 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -208,6 +208,15 @@ tcp_options_parse (tcp_header_t * th, tcp_options_t * to) } } +/** + * RFC1323: Check against wrapped sequence numbers (PAWS). If we have + * timestamp to echo and it's less than tsval_recent, drop segment + * but still send an ACK in order to retain TCP's mechanism for detecting + * and recovering from half-open connections + * + * Or at least that's what the theory says. It seems that this might not work + * very well with packet reordering and fast retransmit. XXX + */ always_inline int tcp_segment_check_paws (tcp_connection_t * tc) { @@ -215,6 +224,27 @@ tcp_segment_check_paws (tcp_connection_t * tc) && timestamp_lt (tc->opt.tsval, tc->tsval_recent); } +/** + * Update tsval recent + */ +always_inline void +tcp_update_timestamp (tcp_connection_t * tc, u32 seq, u32 seq_end) +{ + /* + * RFC1323: If Last.ACK.sent falls within the range of sequence numbers + * of an incoming segment: + * SEG.SEQ <= Last.ACK.sent < SEG.SEQ + SEG.LEN + * then the TSval from the segment is copied to TS.Recent; + * otherwise, the TSval is ignored. + */ + if (tcp_opts_tstamp (&tc->opt) && tc->tsval_recent + && seq_leq (seq, tc->rcv_las) && seq_leq (tc->rcv_las, seq_end)) + { + tc->tsval_recent = tc->opt.tsval; + tc->tsval_recent_age = tcp_time_now (); + } +} + /** * Validate incoming segment as per RFC793 p. 69 and RFC1323 p. 19 * @@ -228,21 +258,16 @@ static int tcp_segment_validate (vlib_main_t * vm, tcp_connection_t * tc0, vlib_buffer_t * b0, tcp_header_t * th0, u32 * next0) { - u8 paws_failed; - if (PREDICT_FALSE (!tcp_ack (th0) && !tcp_rst (th0) && !tcp_syn (th0))) return -1; tcp_options_parse (th0, &tc0->opt); - /* RFC1323: Check against wrapped sequence numbers (PAWS). If we have - * timestamp to echo and it's less than tsval_recent, drop segment - * but still send an ACK in order to retain TCP's mechanism for detecting - * and recovering from half-open connections */ - paws_failed = tcp_segment_check_paws (tc0); - if (paws_failed) + if (tcp_segment_check_paws (tc0)) { clib_warning ("paws failed"); + TCP_EVT_DBG (TCP_EVT_PAWS_FAIL, tc0, vnet_buffer (b0)->tcp.seq_number, + vnet_buffer (b0)->tcp.seq_end); /* If it just so happens that a segment updates tsval_recent for a * segment over 24 days old, invalidate tsval_recent. */ @@ -251,6 +276,7 @@ tcp_segment_validate (vlib_main_t * vm, tcp_connection_t * tc0, { /* Age isn't reset until we get a valid tsval (bsd inspired) */ tc0->tsval_recent = 0; + clib_warning ("paws failed - really old segment. REALLY?"); } else { @@ -305,12 +331,9 @@ tcp_segment_validate (vlib_main_t * vm, tcp_connection_t * tc0, return -1; } - /* If PAWS passed and segment in window, save timestamp */ - if (!paws_failed) - { - tc0->tsval_recent = tc0->opt.tsval; - tc0->tsval_recent_age = tcp_time_now (); - } + /* If segment in window, save timestamp */ + tcp_update_timestamp (tc0, vnet_buffer (b0)->tcp.seq_number, + vnet_buffer (b0)->tcp.seq_end); return 0; } @@ -835,7 +858,8 @@ tcp_rcv_ack (tcp_connection_t * tc, vlib_buffer_t * b, TCP_EVT_DBG (TCP_EVT_DUPACK_RCVD, tc); tcp_cc_rcv_dupack (tc, vnet_buffer (b)->tcp.ack_number); } - return -1; + /* Don't drop yet */ + return 0; } if (tcp_opts_sack_permitted (&tc->opt)) @@ -932,10 +956,6 @@ tcp_update_sack_list (tcp_connection_t * tc, u32 start, u32 end) { vec_add1 (new_list, tc->snd_sacks[i]); } - else - { - clib_warning ("dropped sack blocks"); - } } ASSERT (vec_len (new_list) <= TCP_MAX_SACK_BLOCKS); @@ -1011,7 +1031,6 @@ tcp_session_enqueue_ooo (tcp_connection_t * tc, vlib_buffer_t * b, u16 data_len) { stream_session_t *s0; - u32 offset; int rv; /* Pure ACK. Do nothing */ @@ -1021,12 +1040,11 @@ tcp_session_enqueue_ooo (tcp_connection_t * tc, vlib_buffer_t * b, } s0 = stream_session_get (tc->c_s_index, tc->c_thread_index); - offset = vnet_buffer (b)->tcp.seq_number - tc->irs; - clib_warning ("ooo: offset %d len %d", offset, data_len); - - rv = svm_fifo_enqueue_with_offset (s0->server_rx_fifo, offset, data_len, - vlib_buffer_get_current (b)); + /* Enqueue out-of-order data with absolute offset */ + rv = svm_fifo_enqueue_with_offset (s0->server_rx_fifo, + vnet_buffer (b)->tcp.seq_number, + data_len, vlib_buffer_get_current (b)); /* Nothing written */ if (rv) @@ -1542,6 +1560,9 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* Notify app that we have connection */ stream_session_connect_notify (&new_tc0->connection, sst, 0); + stream_session_init_fifos_pointers (&new_tc0->connection, + new_tc0->irs + 1, + new_tc0->iss + 1); /* Make sure after data segment processing ACK is sent */ new_tc0->flags |= TCP_CONN_SNDACK; } @@ -1552,7 +1573,9 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* Notify app that we have connection */ stream_session_connect_notify (&new_tc0->connection, sst, 0); - + stream_session_init_fifos_pointers (&new_tc0->connection, + new_tc0->irs + 1, + new_tc0->iss + 1); tcp_make_synack (new_tc0, b0); next0 = tcp_next_output (is_ip4); @@ -2139,6 +2162,10 @@ tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node, tcp_make_synack (child0, b0); next0 = tcp_next_output (is_ip4); + /* Init fifo pointers after we have iss */ + stream_session_init_fifos_pointers (&child0->connection, + child0->irs + 1, + child0->iss + 1); drop: if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -2474,6 +2501,7 @@ do { \ _(LISTEN, TCP_FLAG_SYN, TCP_INPUT_NEXT_LISTEN, TCP_ERROR_NONE); /* ACK for for a SYN-ACK -> tcp-rcv-process. */ _(SYN_RCVD, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); + _(SYN_RCVD, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); /* SYN-ACK for a SYN */ _(SYN_SENT, TCP_FLAG_SYN | TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT, TCP_ERROR_NONE); @@ -2499,6 +2527,7 @@ do { \ _(FIN_WAIT_2, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); _(LAST_ACK, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); + _(CLOSED, TCP_FLAG_ACK, TCP_INPUT_NEXT_DROP, TCP_ERROR_CONNECTION_CLOSED); #undef _ } diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 4e1a7aa5..a85d30da 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -359,7 +359,8 @@ tcp_make_established_options (tcp_connection_t * tc, tcp_options_t * opts) { opts->flags |= TCP_OPTS_FLAG_SACK; opts->sacks = tc->snd_sacks; - opts->n_sack_blocks = vec_len (tc->snd_sacks); + opts->n_sack_blocks = clib_min (vec_len (tc->snd_sacks), + TCP_OPTS_MAX_SACK_BLOCKS); len += 2 + TCP_OPTION_LEN_SACK_BLOCK * opts->n_sack_blocks; } } @@ -917,6 +918,7 @@ tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b, vnet_buffer (b)->tcp.connection_index = tc->c_c_index; tc->snd_nxt += data_len; + tc->rcv_las = tc->rcv_nxt; /* TODO this is updated in output as well ... */ if (tc->snd_nxt > tc->snd_una_max) diff --git a/src/vnet/tcp/tcp_test.c b/src/vnet/tcp/tcp_test.c index ed032206..a457ac8f 100644 --- a/src/vnet/tcp/tcp_test.c +++ b/src/vnet/tcp/tcp_test.c @@ -231,7 +231,7 @@ tcp_test_sack_tx (vlib_main_t * vm, unformat_input_t * input) tcp_update_sack_list (tc, 300, 300); if (verbose) vlib_cli_output (vm, "overlap first 2 segments:\n%U", - format_tcp_sacks, tc->snd_sacks); + format_tcp_sacks, tc); TCP_TEST ((vec_len (tc->snd_sacks) == 3), "sack blocks %d expected %d", vec_len (tc->snd_sacks), 3); TCP_TEST ((tc->snd_sacks[0].start == 900), @@ -244,7 +244,7 @@ tcp_test_sack_tx (vlib_main_t * vm, unformat_input_t * input) tcp_update_sack_list (tc, 1100, 1200); if (verbose) vlib_cli_output (vm, "add new segment [1100, 1200]\n%U", - format_tcp_sacks, tc->snd_sacks); + format_tcp_sacks, tc); TCP_TEST ((vec_len (tc->snd_sacks) == 4), "sack blocks %d expected %d", vec_len (tc->snd_sacks), 4); TCP_TEST ((tc->snd_sacks[0].start == 1100), @@ -257,7 +257,7 @@ tcp_test_sack_tx (vlib_main_t * vm, unformat_input_t * input) tcp_update_sack_list (tc, 800, 900); if (verbose) vlib_cli_output (vm, "join middle segments [800, 900]\n%U", - format_tcp_sacks, tc->snd_sacks); + format_tcp_sacks, tc); TCP_TEST ((vec_len (tc->snd_sacks) == 3), "sack blocks %d expected %d", vec_len (tc->snd_sacks), 3); @@ -271,8 +271,7 @@ tcp_test_sack_tx (vlib_main_t * vm, unformat_input_t * input) tc->rcv_nxt = 1200; tcp_update_sack_list (tc, 1200, 1200); if (verbose) - vlib_cli_output (vm, "advance rcv_nxt to 1200\n%U", - format_tcp_sacks, tc->snd_sacks); + vlib_cli_output (vm, "advance rcv_nxt to 1200\n%U", format_tcp_sacks, tc); TCP_TEST ((vec_len (tc->snd_sacks) == 0), "sack blocks %d expected %d", vec_len (tc->snd_sacks), 0); return 0; @@ -502,7 +501,13 @@ tcp_test_fifo1 (vlib_main_t * vm, unformat_input_t * input) { offset = (2 * i + 1) * sizeof (u32); data = (u8 *) (test_data + (2 * i + 1)); - rv = svm_fifo_enqueue_with_offset (f, offset, sizeof (u32), data); + if (i == 0) + { + rv = svm_fifo_enqueue_nowait (f, sizeof (u32), data); + rv = rv > 0 ? 0 : rv; + } + else + rv = svm_fifo_enqueue_with_offset (f, offset, sizeof (u32), data); if (verbose) vlib_cli_output (vm, "add [%d] [%d, %d]", 2 * i + 1, offset, offset + sizeof (u32)); @@ -517,6 +522,26 @@ tcp_test_fifo1 (vlib_main_t * vm, unformat_input_t * input) vlib_cli_output (vm, "fifo after odd segs: %U", format_svm_fifo, f, 1); TCP_TEST ((f->tail == 8), "fifo tail %u", f->tail); + TCP_TEST ((svm_fifo_number_ooo_segments (f) == 2), + "number of ooo segments %u", svm_fifo_number_ooo_segments (f)); + + /* + * Try adding a completely overlapped segment + */ + offset = 3 * sizeof (u32); + data = (u8 *) (test_data + 3); + rv = svm_fifo_enqueue_with_offset (f, offset, sizeof (u32), data); + if (rv) + { + clib_warning ("enqueue returned %d", rv); + goto err; + } + + if (verbose) + vlib_cli_output (vm, "fifo after overlap seg: %U", format_svm_fifo, f, 1); + + TCP_TEST ((svm_fifo_number_ooo_segments (f) == 2), + "number of ooo segments %u", svm_fifo_number_ooo_segments (f)); /* * Make sure format functions are not buggy @@ -887,7 +912,7 @@ tcp_test_fifo3 (vlib_main_t * vm, unformat_input_t * input) f->head = fifo_initial_offset; f->tail = fifo_initial_offset; - for (i = 0; i < vec_len (generate); i++) + for (i = !randomize; i < vec_len (generate); i++) { tp = generate + i; svm_fifo_enqueue_with_offset (f, fifo_initial_offset + tp->offset, @@ -895,6 +920,10 @@ tcp_test_fifo3 (vlib_main_t * vm, unformat_input_t * input) (u8 *) data_pattern + tp->offset); } + /* Add the first segment in order for non random data */ + if (!randomize) + svm_fifo_enqueue_nowait (f, generate[0].len, (u8 *) data_pattern); + /* * Expected result: one big fat chunk at offset 1 if randomize == 1 */ @@ -964,6 +993,73 @@ tcp_test_fifo3 (vlib_main_t * vm, unformat_input_t * input) return 0; } +static int +tcp_test_fifo4 (vlib_main_t * vm, unformat_input_t * input) +{ + svm_fifo_t *f; + u32 fifo_size = 6 << 10; + u32 fifo_initial_offset = 1000000000; + u32 test_n_bytes = 5000, j; + u8 *test_data = 0, *data_buf = 0; + int i, rv, verbose = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "verbose")) + verbose = 1; + else + { + clib_error_t *e = clib_error_return + (0, "unknown input `%U'", format_unformat_error, input); + clib_error_report (e); + return -1; + } + } + + /* + * Create a fifo and add segments + */ + f = fifo_prepare (fifo_size); + + /* Set head and tail pointers */ + fifo_initial_offset = fifo_initial_offset % fifo_size; + svm_fifo_init_pointers (f, fifo_initial_offset); + + vec_validate (test_data, test_n_bytes - 1); + for (i = 0; i < vec_len (test_data); i++) + test_data[i] = i; + + for (i = test_n_bytes - 1; i > 0; i--) + { + rv = svm_fifo_enqueue_with_offset (f, fifo_initial_offset + i, + sizeof (u8), &test_data[i]); + if (verbose) + vlib_cli_output (vm, "add [%d] [%d, %d]", i, i, i + sizeof (u8)); + if (rv) + { + clib_warning ("enqueue returned %d", rv); + svm_fifo_free (f); + vec_free (test_data); + return -1; + } + } + + svm_fifo_enqueue_nowait (f, sizeof (u8), &test_data[0]); + + vec_validate (data_buf, vec_len (test_data)); + + svm_fifo_dequeue_nowait (f, vec_len (test_data), data_buf); + rv = compare_data (data_buf, test_data, 0, vec_len (test_data), &j); + if (rv) + vlib_cli_output (vm, "[%d] dequeued %u expected %u", j, data_buf[j], + test_data[j]); + TCP_TEST ((rv == 0), "dequeued compared to original returned %d", rv); + + svm_fifo_free (f); + vec_free (test_data); + return 0; +} + static int tcp_test_fifo (vlib_main_t * vm, unformat_input_t * input) { @@ -1028,6 +1124,10 @@ tcp_test_fifo (vlib_main_t * vm, unformat_input_t * input) { res = tcp_test_fifo1 (vm, input); } + else if (unformat (input, "fifo4")) + { + res = tcp_test_fifo4 (vm, input); + } } return res; -- cgit 1.2.3-korg From 06d110189e54220c533c5fe0cea7f23e531284b9 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Wed, 17 May 2017 14:21:51 -0700 Subject: Improve sack bytes accounting and testing Change-Id: Iabeda0d0615b0f6fe20dd00611cb4c594d90b7eb Signed-off-by: Florin Coras --- src/vnet/tcp/tcp.c | 42 ++++++++++++++++++++++++++ src/vnet/tcp/tcp.h | 1 + src/vnet/tcp/tcp_format.c | 14 --------- src/vnet/tcp/tcp_input.c | 45 ++++++++++++++++------------ src/vnet/tcp/tcp_test.c | 75 +++++++++++++++++++++++++++++++++++++++++++---- 5 files changed, 140 insertions(+), 37 deletions(-) (limited to 'src/vnet/tcp/tcp_format.c') diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index e365fa0e..36d85e46 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -565,6 +565,48 @@ format_tcp_half_open_session (u8 * s, va_list * args) return format (s, "%U", format_tcp_connection, tc); } +u8 * +format_tcp_sacks (u8 * s, va_list * args) +{ + tcp_connection_t *tc = va_arg (*args, tcp_connection_t *); + sack_block_t *sacks = tc->snd_sacks; + sack_block_t *block; + vec_foreach (block, sacks) + { + s = format (s, " start %u end %u\n", block->start - tc->irs, + block->end - tc->irs); + } + return s; +} + +u8 * +format_tcp_sack_hole (u8 * s, va_list * args) +{ + sack_scoreboard_hole_t *hole = va_arg (*args, sack_scoreboard_hole_t *); + s = format (s, "[%u, %u]", hole->start, hole->end); + return s; +} + +u8 * +format_tcp_scoreboard (u8 * s, va_list * args) +{ + sack_scoreboard_t *sb = va_arg (*args, sack_scoreboard_t *); + sack_scoreboard_hole_t *hole; + s = format (s, "head %u tail %u snd_una_adv %u\n", sb->head, sb->tail, + sb->snd_una_adv); + s = format (s, "sacked_bytes %u last_sacked_bytes %u", sb->sacked_bytes, + sb->last_sacked_bytes); + s = format (s, " max_byte_sacked %u\n", sb->max_byte_sacked); + s = format (s, "holes:\n"); + hole = scoreboard_first_hole (sb); + while (hole) + { + s = format (s, "%U", format_tcp_sack_hole, hole); + hole = scoreboard_next_hole (sb, hole); + } + return s; +} + transport_connection_t * tcp_session_get_transport (u32 conn_index, u32 thread_index) { diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index 8212ada7..8d24a70b 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -389,6 +389,7 @@ void tcp_connection_reset (tcp_connection_t * tc); u8 *format_tcp_connection (u8 * s, va_list * args); u8 *format_tcp_connection_verbose (u8 * s, va_list * args); +u8 *format_tcp_scoreboard (u8 * s, va_list * args); always_inline tcp_connection_t * tcp_listener_get (u32 tli) diff --git a/src/vnet/tcp/tcp_format.c b/src/vnet/tcp/tcp_format.c index 4de99235..1ca2f58e 100644 --- a/src/vnet/tcp/tcp_format.c +++ b/src/vnet/tcp/tcp_format.c @@ -128,20 +128,6 @@ format_tcp_header (u8 * s, va_list * args) return s; } -u8 * -format_tcp_sacks (u8 * s, va_list * args) -{ - tcp_connection_t *tc = va_arg (*args, tcp_connection_t *); - sack_block_t *sacks = tc->snd_sacks; - sack_block_t *block; - vec_foreach (block, sacks) - { - s = format (s, " start %u end %u\n", block->start - tc->irs, - block->end - tc->irs); - } - return s; -} - /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index ddee41e0..9d3f4cc3 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -533,12 +533,13 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack) sack_scoreboard_t *sb = &tc->sack_sb; sack_block_t *blk, tmp; sack_scoreboard_hole_t *hole, *next_hole, *last_hole, *new_hole; - u32 blk_index = 0, old_sacked_bytes, hole_index; + u32 blk_index = 0, old_sacked_bytes, delivered_bytes, hole_index; int i, j; sb->last_sacked_bytes = 0; sb->snd_una_adv = 0; old_sacked_bytes = sb->sacked_bytes; + delivered_bytes = 0; if (!tcp_opts_sack (&tc->opt) && sb->head == TCP_INVALID_SACK_HOLE_INDEX) return; @@ -584,6 +585,8 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack) last_hole = scoreboard_insert_hole (sb, TCP_INVALID_SACK_HOLE_INDEX, tc->snd_una, tc->snd_una_max); sb->tail = scoreboard_hole_index (sb, last_hole); + tmp = tc->opt.sacks[vec_len (tc->opt.sacks) - 1]; + sb->max_byte_sacked = tmp.end; } else { @@ -614,37 +617,43 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack) { /* Bytes lost because snd_wnd left edge advances */ if (next_hole && seq_leq (next_hole->start, ack)) - sb->sacked_bytes -= next_hole->start - hole->end; + delivered_bytes += next_hole->start - hole->end; else - sb->sacked_bytes -= ack - hole->end; + delivered_bytes += ack - hole->end; } else { sb->sacked_bytes += scoreboard_hole_bytes (hole); } - /* snd_una needs to be advanced */ - if (seq_geq (ack, hole->end)) - { - if (next_hole && seq_lt (ack, next_hole->start)) - sb->snd_una_adv = next_hole->start - ack; - else - sb->snd_una_adv = sb->max_byte_sacked - ack; - - /* all these can be delivered */ - sb->sacked_bytes -= sb->snd_una_adv; - } - /* About to remove last hole */ if (hole == last_hole) { sb->tail = hole->prev; last_hole = scoreboard_last_hole (sb); - /* keep track of max byte sacked in case the last hole + /* keep track of max byte sacked for when the last hole * is acked */ if (seq_gt (hole->end, sb->max_byte_sacked)) sb->max_byte_sacked = hole->end; } + + /* snd_una needs to be advanced */ + if (blk->end == ack && seq_geq (ack, hole->end)) + { + if (next_hole && seq_lt (ack, next_hole->start)) + { + sb->snd_una_adv = next_hole->start - ack; + + /* all these can be delivered */ + delivered_bytes += sb->snd_una_adv; + } + else if (!next_hole) + { + sb->snd_una_adv = sb->max_byte_sacked - ack; + delivered_bytes += sb->snd_una_adv; + } + } + scoreboard_remove_hole (sb, hole); hole = next_hole; } @@ -693,8 +702,8 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack) } } - sb->last_sacked_bytes = sb->sacked_bytes + sb->snd_una_adv - - old_sacked_bytes; + sb->last_sacked_bytes = sb->sacked_bytes - old_sacked_bytes; + sb->sacked_bytes -= delivered_bytes; } /** Update snd_wnd diff --git a/src/vnet/tcp/tcp_test.c b/src/vnet/tcp/tcp_test.c index a457ac8f..2af38484 100644 --- a/src/vnet/tcp/tcp_test.c +++ b/src/vnet/tcp/tcp_test.c @@ -35,13 +35,19 @@ } static int -tcp_test_sack_rx () +tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input) { tcp_connection_t _tc, *tc = &_tc; sack_scoreboard_t *sb = &tc->sack_sb; sack_block_t *sacks = 0, block; sack_scoreboard_hole_t *hole; - int i; + int i, verbose = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "verbose")) + verbose = 1; + } memset (tc, 0, sizeof (*tc)); @@ -69,6 +75,10 @@ tcp_test_sack_rx () tc->opt.n_sack_blocks = vec_len (tc->opt.sacks); tcp_rcv_sacks (tc, 0); + if (verbose) + vlib_cli_output (vm, "sb after even blocks:\n%U", format_tcp_scoreboard, + sb); + TCP_TEST ((pool_elts (sb->holes) == 5), "scoreboard has %d elements", pool_elts (sb->holes)); @@ -83,7 +93,8 @@ tcp_test_sack_rx () TCP_TEST ((sb->snd_una_adv == 0), "snd_una_adv %u", sb->snd_una_adv); TCP_TEST ((sb->last_sacked_bytes == 400), "last sacked bytes %d", sb->last_sacked_bytes); - + TCP_TEST ((sb->max_byte_sacked == 900), + "max byte sacked %u", sb->max_byte_sacked); /* * Inject odd blocks */ @@ -96,6 +107,10 @@ tcp_test_sack_rx () tc->opt.n_sack_blocks = vec_len (tc->opt.sacks); tcp_rcv_sacks (tc, 0); + if (verbose) + vlib_cli_output (vm, "sb after odd blocks:\n%U", format_tcp_scoreboard, + sb); + hole = scoreboard_first_hole (sb); TCP_TEST ((pool_elts (sb->holes) == 1), "scoreboard has %d holes", pool_elts (sb->holes)); @@ -112,6 +127,9 @@ tcp_test_sack_rx () * Ack until byte 100, all bytes are now acked + sacked */ tcp_rcv_sacks (tc, 100); + if (verbose) + vlib_cli_output (vm, "ack until byte 100:\n%U", format_tcp_scoreboard, + sb); TCP_TEST ((pool_elts (sb->holes) == 0), "scoreboard has %d elements", pool_elts (sb->holes)); @@ -133,11 +151,17 @@ tcp_test_sack_rx () block.end = 1300; vec_add1 (tc->opt.sacks, block); + if (verbose) + vlib_cli_output (vm, "add [1200, 1300]:\n%U", format_tcp_scoreboard, sb); tc->snd_una_max = 1500; tc->snd_una = 1000; tc->snd_nxt = 1500; tcp_rcv_sacks (tc, 1000); + if (verbose) + vlib_cli_output (vm, "sb snd_una_max 1500, snd_una 1000:\n%U", + format_tcp_scoreboard, sb); + TCP_TEST ((sb->snd_una_adv == 0), "snd_una_adv after ack %u", sb->snd_una_adv); TCP_TEST ((pool_elts (sb->holes) == 2), @@ -145,6 +169,10 @@ tcp_test_sack_rx () hole = scoreboard_first_hole (sb); TCP_TEST ((hole->start == 1000 && hole->end == 1200), "first hole start %u end %u", hole->start, hole->end); + TCP_TEST ((sb->snd_una_adv == 0), + "snd_una_adv after ack %u", sb->snd_una_adv); + TCP_TEST ((sb->max_byte_sacked == 1300), + "max sacked byte %u", sb->max_byte_sacked); hole = scoreboard_last_hole (sb); TCP_TEST ((hole->start == 1300 && hole->end == 1500), "last hole start %u end %u", hole->start, hole->end); @@ -157,6 +185,10 @@ tcp_test_sack_rx () vec_reset_length (tc->opt.sacks); tcp_rcv_sacks (tc, 1200); + if (verbose) + vlib_cli_output (vm, "sb ack up to byte 1200:\n%U", format_tcp_scoreboard, + sb); + TCP_TEST ((sb->snd_una_adv == 100), "snd_una_adv after ack %u", sb->snd_una_adv); TCP_TEST ((sb->sacked_bytes == 0), "sacked bytes %d", sb->sacked_bytes); @@ -168,8 +200,41 @@ tcp_test_sack_rx () */ scoreboard_clear (sb); + if (verbose) + vlib_cli_output (vm, "sb cleared all:\n%U", format_tcp_scoreboard, sb); + TCP_TEST ((pool_elts (sb->holes) == 0), "number of holes %d", pool_elts (sb->holes)); + /* + * Re-inject odd blocks and ack them all + */ + + tc->snd_una = 0; + tc->snd_una_max = 1000; + tc->snd_nxt = 1000; + for (i = 0; i < 5; i++) + { + vec_add1 (tc->opt.sacks, sacks[i * 2 + 1]); + } + tc->opt.n_sack_blocks = vec_len (tc->opt.sacks); + tcp_rcv_sacks (tc, 0); + if (verbose) + vlib_cli_output (vm, "sb added odd blocks and ack [0, 950]:\n%U", + format_tcp_scoreboard, sb); + + tcp_rcv_sacks (tc, 950); + + if (verbose) + vlib_cli_output (vm, "sb added odd blocks and ack [0, 950]:\n%U", + format_tcp_scoreboard, sb); + + TCP_TEST ((pool_elts (sb->holes) == 0), + "scoreboard has %d elements", pool_elts (sb->holes)); + TCP_TEST ((sb->snd_una_adv == 50), "snd_una_adv %u", sb->snd_una_adv); + TCP_TEST ((sb->sacked_bytes == 0), "sacked bytes %d", sb->sacked_bytes); + TCP_TEST ((sb->last_sacked_bytes == 0), + "last sacked bytes %d", sb->last_sacked_bytes); + return 0; } @@ -290,7 +355,7 @@ tcp_test_sack (vlib_main_t * vm, unformat_input_t * input) return -1; } - if (tcp_test_sack_rx ()) + if (tcp_test_sack_rx (vm, input)) { return -1; } @@ -303,7 +368,7 @@ tcp_test_sack (vlib_main_t * vm, unformat_input_t * input) } else if (unformat (input, "rx")) { - res = tcp_test_sack_rx (); + res = tcp_test_sack_rx (vm, input); } } -- cgit 1.2.3-korg