aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFlorin Coras <fcoras@cisco.com>2017-06-09 21:07:32 -0700
committerDamjan Marion <dmarion.lists@gmail.com>2017-06-19 14:06:34 +0000
commitf03a59ab008908f98fd7d1b187a8c0fb78b01add (patch)
tree8ce1ab091e288d5edbc5df712f668e2e4888c90e
parent328dbc886d7acd3491cff86a7a85176e511acf35 (diff)
Overall tcp performance improvements (VPP-846)
- limit minimum rto per connection - cleanup sack scoreboard - switched svm fifo out-of-order data handling from absolute offsets to relative offsets. - improve cwnd handling when using sacks - add cc event debug stats - improved uri tcp test client/server: bugfixes and added half-duplex mode - expanded builtin client/server - updated uri socket client/server code to work in half-duplex - ensure session node unsets fifo event for empty fifo - fix session detach Change-Id: Ia446972340e32a65e0694ee2844355167d0c170d Signed-off-by: Florin Coras <fcoras@cisco.com>
-rw-r--r--src/svm/svm_fifo.c152
-rw-r--r--src/svm/svm_fifo.h26
-rw-r--r--src/svm/svm_fifo_segment.c7
-rw-r--r--src/uri/uri_socket_server.c25
-rw-r--r--src/uri/uri_socket_test.c93
-rwxr-xr-xsrc/uri/uri_tcp_test.c120
-rw-r--r--src/vnet/session/application.c2
-rw-r--r--src/vnet/session/node.c7
-rw-r--r--src/vnet/session/segment_manager.c5
-rw-r--r--src/vnet/session/session.c25
-rw-r--r--src/vnet/session/session.h4
-rwxr-xr-xsrc/vnet/session/session_api.c2
-rw-r--r--src/vnet/session/transport.h2
-rw-r--r--src/vnet/tcp/builtin_client.c325
-rw-r--r--src/vnet/tcp/builtin_client.h107
-rw-r--r--src/vnet/tcp/builtin_server.c114
-rw-r--r--src/vnet/tcp/tcp.c22
-rw-r--r--src/vnet/tcp/tcp.h12
-rwxr-xr-xsrc/vnet/tcp/tcp_debug.h186
-rw-r--r--src/vnet/tcp/tcp_input.c193
-rw-r--r--src/vnet/tcp/tcp_newreno.c4
-rw-r--r--src/vnet/tcp/tcp_output.c10
-rw-r--r--src/vnet/tcp/tcp_test.c70
23 files changed, 945 insertions, 568 deletions
diff --git a/src/svm/svm_fifo.c b/src/svm/svm_fifo.c
index 5c8f244a..6ca437cf 100644
--- a/src/svm/svm_fifo.c
+++ b/src/svm/svm_fifo.c
@@ -15,10 +15,39 @@
#include <svm/svm_fifo.h>
-#define offset_lt(_a, _b) ((i32)((_a)-(_b)) < 0)
-#define offset_leq(_a, _b) ((i32)((_a)-(_b)) <= 0)
-#define offset_gt(_a, _b) ((i32)((_a)-(_b)) > 0)
-#define offset_geq(_a, _b) ((i32)((_a)-(_b)) >= 0)
+static inline u8
+position_lt (svm_fifo_t * f, u32 a, u32 b)
+{
+ return (ooo_segment_distance_to_tail (f, a)
+ < ooo_segment_distance_to_tail (f, b));
+}
+
+static inline u8
+position_leq (svm_fifo_t * f, u32 a, u32 b)
+{
+ return (ooo_segment_distance_to_tail (f, a)
+ <= ooo_segment_distance_to_tail (f, b));
+}
+
+static inline u8
+position_gt (svm_fifo_t * f, u32 a, u32 b)
+{
+ return (ooo_segment_distance_to_tail (f, a)
+ > ooo_segment_distance_to_tail (f, b));
+}
+
+static inline u32
+position_diff (svm_fifo_t * f, u32 posa, u32 posb)
+{
+ return ooo_segment_distance_to_tail (f, posa)
+ - ooo_segment_distance_to_tail (f, posb);
+}
+
+static inline u32
+ooo_segment_end_pos (svm_fifo_t * f, ooo_segment_t * s)
+{
+ return (s->start + s->length) % f->nitems;
+}
u8 *
format_ooo_segment (u8 * s, va_list * args)
@@ -145,13 +174,17 @@ static void
ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length)
{
ooo_segment_t *s, *new_s, *prev, *next, *it;
- u32 new_index, end_offset, s_sof, s_eof, s_index;
+ u32 new_index, s_end_pos, s_index;
+ u32 normalized_position, normalized_end_position;
+
+ normalized_position = (f->tail + offset) % f->nitems;
+ normalized_end_position = (f->tail + offset + length) % f->nitems;
- end_offset = offset + length;
+ f->ooos_newest = OOO_SEGMENT_INVALID_INDEX;
if (f->ooos_list_head == OOO_SEGMENT_INVALID_INDEX)
{
- s = ooo_segment_new (f, offset, length);
+ s = ooo_segment_new (f, normalized_position, length);
f->ooos_list_head = s - f->ooo_segments;
f->ooos_newest = f->ooos_list_head;
return;
@@ -160,28 +193,26 @@ ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length)
/* Find first segment that starts after new segment */
s = pool_elt_at_index (f->ooo_segments, f->ooos_list_head);
while (s->next != OOO_SEGMENT_INVALID_INDEX
- && offset_leq (ooo_segment_offset (f, s), offset))
+ && position_lt (f, s->start, normalized_position))
s = pool_elt_at_index (f->ooo_segments, s->next);
/* If we have a previous and we overlap it, use it as starting point */
prev = ooo_segment_get_prev (f, s);
- if (prev && offset_leq (offset, ooo_segment_end_offset (f, prev)))
+ if (prev
+ && position_leq (f, normalized_position, ooo_segment_end_pos (f, prev)))
{
s = prev;
- prev = ooo_segment_get_prev (f, s);
- s_sof = ooo_segment_offset (f, s);
- s_eof = ooo_segment_end_offset (f, s);
+ s_end_pos = ooo_segment_end_pos (f, s);
goto merge;
}
s_index = s - f->ooo_segments;
- s_sof = ooo_segment_offset (f, s);
- s_eof = ooo_segment_end_offset (f, s);
+ s_end_pos = ooo_segment_end_pos (f, s);
/* No overlap, add before current segment */
- if (offset_lt (end_offset, s_sof))
+ if (position_lt (f, normalized_end_position, s->start))
{
- new_s = ooo_segment_new (f, offset, length);
+ new_s = ooo_segment_new (f, normalized_position, length);
new_index = new_s - f->ooo_segments;
/* Pool might've moved, get segment again */
@@ -198,28 +229,23 @@ ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length)
f->ooos_list_head = new_index;
}
- new_s->next = s - f->ooo_segments;
+ new_s->next = s_index;
s->prev = new_index;
f->ooos_newest = new_index;
return;
}
/* No overlap, add after current segment */
- else if (offset_gt (offset, s_eof))
+ else if (position_gt (f, normalized_position, s_end_pos))
{
- new_s = ooo_segment_new (f, offset, length);
+ new_s = ooo_segment_new (f, normalized_position, length);
new_index = new_s - f->ooo_segments;
/* Pool might've moved, get segment again */
s = pool_elt_at_index (f->ooo_segments, s_index);
- if (s->next != OOO_SEGMENT_INVALID_INDEX)
- {
- new_s->next = s->next;
- next = pool_elt_at_index (f->ooo_segments, new_s->next);
- next->prev = new_index;
- }
+ ASSERT (s->next == OOO_SEGMENT_INVALID_INDEX);
- new_s->prev = s - f->ooo_segments;
+ new_s->prev = s_index;
s->next = new_index;
f->ooos_newest = new_index;
@@ -233,30 +259,32 @@ ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length)
merge:
/* Merge at head */
- if (offset_lt (offset, s_sof))
+ if (position_lt (f, normalized_position, s->start))
{
- s->start = offset;
- s->length = s_eof - ooo_segment_offset (f, s);
+ s->start = normalized_position;
+ s->length = position_diff (f, s_end_pos, s->start);
}
- /* Last but overlapping previous */
- else if (offset_gt (end_offset, s_eof))
+ /* Overlapping tail */
+ else if (position_gt (f, normalized_end_position, s_end_pos))
{
- s->length = end_offset - ooo_segment_offset (f, s);
+ s->length = position_diff (f, normalized_end_position, s->start);
}
/* New segment completely covered by current one */
else
{
/* Do Nothing */
+ s = 0;
goto done;
}
/* The new segment's tail may cover multiple smaller ones */
- if (offset_geq (end_offset, s_eof))
+ if (position_gt (f, normalized_end_position, s_end_pos))
{
/* Remove the completely overlapped segments */
it = (s->next != OOO_SEGMENT_INVALID_INDEX) ?
pool_elt_at_index (f->ooo_segments, s->next) : 0;
- while (it && offset_leq (ooo_segment_end_offset (f, it), end_offset))
+ while (it && position_leq (f, ooo_segment_end_pos (f, it),
+ normalized_end_position))
{
next = (it->next != OOO_SEGMENT_INVALID_INDEX) ?
pool_elt_at_index (f->ooo_segments, it->next) : 0;
@@ -265,17 +293,17 @@ merge:
}
/* If partial overlap with last, merge */
- if (it && offset_leq (ooo_segment_offset (f, it), end_offset))
+ if (it && position_leq (f, it->start, normalized_end_position))
{
- s->length = ooo_segment_end_offset (f, it) -
- ooo_segment_offset (f, s);
+ s->length = ooo_segment_end_pos (f, it) - s->start;
ooo_segment_del (f, it - f->ooo_segments);
}
}
done:
/* Most recently updated segment */
- f->ooos_newest = s - f->ooo_segments;
+ if (s)
+ f->ooos_newest = s - f->ooo_segments;
}
/**
@@ -286,32 +314,28 @@ static int
ooo_segment_try_collect (svm_fifo_t * f, u32 n_bytes_enqueued)
{
ooo_segment_t *s;
- u32 index, bytes = 0, diff;
- u32 cursize, norm_start, nitems;
-
- /* current size has not yet been updated */
- cursize = svm_fifo_max_dequeue (f) + n_bytes_enqueued;
- nitems = f->nitems;
+ u32 index, bytes = 0;
+ i32 diff;
s = pool_elt_at_index (f->ooo_segments, f->ooos_list_head);
- norm_start = s->start % nitems;
- diff = (f->nitems + (i32) (f->tail - norm_start)) % nitems;
+ diff = (f->tail >= s->start) ?
+ f->tail - s->start : f->nitems + f->tail - s->start;
- if (diff > cursize)
+ if (diff > n_bytes_enqueued)
return 0;
/* If last tail update overlaps one/multiple ooo segments, remove them */
- while (0 < diff && diff < cursize)
+ while (0 <= diff && diff < n_bytes_enqueued)
{
index = s - f->ooo_segments;
/* Segment end is beyond the tail. Advance tail and remove segment */
- if (diff < s->length)
+ if (s->length > diff)
{
- f->tail += s->length - diff;
- f->tail %= f->nitems;
bytes = s->length - diff;
+ f->tail += bytes;
+ f->tail %= f->nitems;
ooo_segment_del (f, index);
break;
}
@@ -320,8 +344,8 @@ ooo_segment_try_collect (svm_fifo_t * f, u32 n_bytes_enqueued)
if (s->next != OOO_SEGMENT_INVALID_INDEX)
{
s = pool_elt_at_index (f->ooo_segments, s->next);
- norm_start = s->start % nitems;
- diff = (f->nitems + (i32) (f->tail - norm_start)) % nitems;
+ diff = (f->tail >= s->start) ?
+ f->tail - s->start : f->nitems + f->tail - s->start;
ooo_segment_del (f, index);
}
/* End of search */
@@ -332,18 +356,6 @@ ooo_segment_try_collect (svm_fifo_t * f, u32 n_bytes_enqueued)
}
}
- /* If tail is adjacent to an ooo segment, 'consume' it */
- if (diff == 0)
- {
- bytes = ((nitems - cursize) >= s->length) ? s->length :
- nitems - cursize;
-
- f->tail += bytes;
- f->tail %= nitems;
-
- ooo_segment_del (f, s - f->ooo_segments);
- }
-
return bytes;
}
@@ -355,6 +367,7 @@ svm_fifo_enqueue_internal (svm_fifo_t * f, u32 max_bytes, u8 * copy_from_here)
/* read cursize, which can only increase while we're working */
cursize = svm_fifo_max_dequeue (f);
+ f->ooos_newest = OOO_SEGMENT_INVALID_INDEX;
if (PREDICT_FALSE (cursize == f->nitems))
return -2; /* fifo stuffed */
@@ -424,13 +437,16 @@ svm_fifo_enqueue_with_offset_internal (svm_fifo_t * f,
u8 * copy_from_here)
{
u32 total_copy_bytes, first_copy_bytes, second_copy_bytes;
- u32 cursize, nitems;
- u32 normalized_offset, offset_from_tail;
+ u32 cursize, nitems, normalized_offset;
+ u32 offset_from_tail;
+
+ f->ooos_newest = OOO_SEGMENT_INVALID_INDEX;
/* read cursize, which can only increase while we're working */
cursize = svm_fifo_max_dequeue (f);
nitems = f->nitems;
- normalized_offset = offset % nitems;
+
+ normalized_offset = (f->tail + offset) % nitems;
/* Will this request fit? */
offset_from_tail = (nitems + normalized_offset - f->tail) % nitems;
diff --git a/src/svm/svm_fifo.h b/src/svm/svm_fifo.h
index 9cb93ff4..f32ef41d 100644
--- a/src/svm/svm_fifo.h
+++ b/src/svm/svm_fifo.h
@@ -127,21 +127,37 @@ format_function_t format_svm_fifo;
always_inline ooo_segment_t *
svm_fifo_newest_ooo_segment (svm_fifo_t * f)
{
- return f->ooo_segments + f->ooos_newest;
+ if (f->ooos_newest == OOO_SEGMENT_INVALID_INDEX)
+ return 0;
+ return pool_elt_at_index (f->ooo_segments, f->ooos_newest);
+}
+
+always_inline u32
+ooo_segment_distance_to_tail (svm_fifo_t * f, u32 a)
+{
+ /* Ambiguous. Assumption is that ooo segments don't touch tail */
+ if (a == f->tail && f->tail == f->head)
+ return f->nitems;
+
+ return ((f->nitems + a - f->tail) % f->nitems);
}
always_inline u32
ooo_segment_offset (svm_fifo_t * f, ooo_segment_t * s)
{
-// return ((f->nitems + s->fifo_position - f->tail) % f->nitems);
- return s->start;
+ return ooo_segment_distance_to_tail (f, s->start);
}
always_inline u32
ooo_segment_end_offset (svm_fifo_t * f, ooo_segment_t * s)
{
-// return ((f->nitems + s->fifo_position + s->length - f->tail) % f->nitems);
- return s->start + s->length;
+ return ooo_segment_distance_to_tail (f, s->start) + s->length;
+}
+
+always_inline u32
+ooo_segment_length (svm_fifo_t * f, ooo_segment_t * s)
+{
+ return s->length;
}
always_inline ooo_segment_t *
diff --git a/src/svm/svm_fifo_segment.c b/src/svm/svm_fifo_segment.c
index eef2168c..c4ac2352 100644
--- a/src/svm/svm_fifo_segment.c
+++ b/src/svm/svm_fifo_segment.c
@@ -305,14 +305,17 @@ svm_fifo_segment_free_fifo (svm_fifo_segment_private_t * s, svm_fifo_t * f,
/* Remove from active list */
if (f->prev)
f->prev->next = f->next;
+ else
+ fsh->fifos = f->next;
if (f->next)
f->next->prev = f->prev;
- /* FALLTHROUGH */
+ /* Fall through: we add only rx fifos to active pool */
case FIFO_SEGMENT_TX_FREELIST:
/* Add to free list */
f->next = fsh->free_fifos[list_index];
+ f->prev = 0;
fsh->free_fifos[list_index] = f;
- /* FALLTHROUGH */
+ break;
case FIFO_SEGMENT_FREELIST_NONE:
break;
diff --git a/src/uri/uri_socket_server.c b/src/uri/uri_socket_server.c
index 2366f420..4f4c5f30 100644
--- a/src/uri/uri_socket_server.c
+++ b/src/uri/uri_socket_server.c
@@ -22,6 +22,7 @@
#include <vppinfra/format.h>
#include <signal.h>
#include <sys/ucontext.h>
+#include <sys/time.h>
volatile int signal_received;
@@ -78,7 +79,10 @@ main (int argc, char *argv[])
struct sockaddr_in serv_addr;
struct sockaddr_in client;
struct hostent *server;
- u8 *rx_buffer = 0;
+ u8 *rx_buffer = 0, no_echo = 0;
+ struct timeval start, end;
+ long rcvd = 0;
+ double deltat;
if (argc > 1 && argc < 3)
{
@@ -86,8 +90,9 @@ main (int argc, char *argv[])
exit (0);
}
- if (argc >= 3)
+ if (argc >= 4)
{
+ no_echo = atoi (argv[3]);
portno = atoi (argv[2]);
server = gethostbyname (argv[1]);
if (server == NULL)
@@ -137,7 +142,7 @@ main (int argc, char *argv[])
exit (1);
}
- vec_validate (rx_buffer, 8999 /* jumbo mtu */ );
+ vec_validate (rx_buffer, 128 << 10);
if (listen (sockfd, 5 /* backlog */ ) < 0)
{
@@ -160,6 +165,8 @@ main (int argc, char *argv[])
}
fformat (stderr, "Accepted connection from: %s : %d\n",
inet_ntoa (client.sin_addr), client.sin_port);
+ gettimeofday (&start, NULL);
+
while (1)
{
n = recv (accfd, rx_buffer, vec_len (rx_buffer), 0 /* flags */ );
@@ -167,6 +174,14 @@ main (int argc, char *argv[])
{
/* Graceful exit */
close (accfd);
+ gettimeofday (&end, NULL);
+ deltat = (end.tv_sec - start.tv_sec);
+ deltat += (end.tv_usec - start.tv_usec) / 1000000.0;
+ clib_warning ("Finished in %.6f", deltat);
+ clib_warning ("%.4f Gbit/second %s",
+ (((f64) rcvd * 8.0) / deltat / 1e9),
+ no_echo ? "half" : "full");
+ rcvd = 0;
break;
}
if (n < 0)
@@ -179,6 +194,10 @@ main (int argc, char *argv[])
if (signal_received)
break;
+ rcvd += n;
+ if (no_echo)
+ continue;
+
sent = send (accfd, rx_buffer, n, 0 /* flags */ );
if (n < 0)
{
diff --git a/src/uri/uri_socket_test.c b/src/uri/uri_socket_test.c
index 9f049bda..5f7084d5 100644
--- a/src/uri/uri_socket_test.c
+++ b/src/uri/uri_socket_test.c
@@ -19,6 +19,7 @@
#include <netinet/in.h>
#include <netdb.h>
#include <vppinfra/format.h>
+#include <sys/time.h>
int
main (int argc, char *argv[])
@@ -26,28 +27,44 @@ main (int argc, char *argv[])
int sockfd, portno, n;
struct sockaddr_in serv_addr;
struct hostent *server;
- u8 *rx_buffer = 0, *tx_buffer = 0;
+ u8 *rx_buffer = 0, *tx_buffer = 0, no_echo = 0, test_bytes = 0;
u32 offset;
- int iter, i;
- if (0 && argc < 3)
+ long bytes = 1 << 20, to_send;
+ int i;
+ struct timeval start, end;
+ double deltat;
+
+ if (argc >= 3)
{
- fformat (stderr, "usage %s hostname port\n", argv[0]);
- exit (0);
+ bytes = ((long) atoi (argv[4])) << 20;
+ no_echo = atoi (argv[3]);
+ portno = atoi (argv[2]);
+ server = gethostbyname (argv[1]);
+ if (server == NULL)
+ {
+ clib_unix_warning ("gethostbyname");
+ exit (1);
+ }
+ }
+ else
+ {
+ portno = 1234; // atoi(argv[2]);
+ server = gethostbyname ("6.0.1.1" /* argv[1] */ );
+ if (server == NULL)
+ {
+ clib_unix_warning ("gethostbyname");
+ exit (1);
+ }
}
- portno = 1234; // atoi(argv[2]);
+ to_send = bytes;
sockfd = socket (AF_INET, SOCK_STREAM, 0);
if (sockfd < 0)
{
clib_unix_error ("socket");
exit (1);
}
- server = gethostbyname ("6.0.1.1" /* argv[1] */ );
- if (server == NULL)
- {
- clib_unix_warning ("gethostbyname");
- exit (1);
- }
+
bzero ((char *) &serv_addr, sizeof (serv_addr));
serv_addr.sin_family = AF_INET;
bcopy ((char *) server->h_addr,
@@ -59,8 +76,8 @@ main (int argc, char *argv[])
exit (1);
}
- vec_validate (rx_buffer, 1400);
- vec_validate (tx_buffer, 1400);
+ vec_validate (rx_buffer, 128 << 10);
+ vec_validate (tx_buffer, 128 << 10);
for (i = 0; i < vec_len (tx_buffer); i++)
tx_buffer[i] = (i + 1) % 0xff;
@@ -75,19 +92,28 @@ main (int argc, char *argv[])
exit (0);
}
- for (iter = 0; iter < 100000; iter++)
+ gettimeofday (&start, NULL);
+ while (bytes > 0)
{
- if (iter < 99999)
+ /*
+ * TX
+ */
+ n = send (sockfd, tx_buffer, vec_len (tx_buffer), 0 /* flags */ );
+ if (n != vec_len (tx_buffer))
{
- n = send (sockfd, tx_buffer, vec_len (tx_buffer), 0 /* flags */ );
- if (n != vec_len (tx_buffer))
- {
- clib_unix_warning ("write");
- exit (0);
- }
+ clib_unix_warning ("write");
+ exit (0);
}
- offset = 0;
+ bytes -= n;
+ if (no_echo)
+ continue;
+
+ /*
+ * RX
+ */
+
+ offset = 0;
do
{
n = recv (sockfd, rx_buffer + offset,
@@ -101,18 +127,27 @@ main (int argc, char *argv[])
}
while (offset < vec_len (rx_buffer));
- for (i = 0; i < vec_len (rx_buffer); i++)
+ if (test_bytes)
{
- if (rx_buffer[i] != tx_buffer[i])
+ for (i = 0; i < vec_len (rx_buffer); i++)
{
- clib_warning ("[%d] read 0x%x not 0x%x",
- rx_buffer[i], tx_buffer[i]);
- exit (1);
+ if (rx_buffer[i] != tx_buffer[i])
+ {
+ clib_warning ("[%d] read 0x%x not 0x%x", rx_buffer[i],
+ tx_buffer[i]);
+ exit (1);
+ }
}
}
-
}
close (sockfd);
+ gettimeofday (&end, NULL);
+
+ deltat = (end.tv_sec - start.tv_sec);
+ deltat += (end.tv_usec - start.tv_usec) / 1000000.0; // us to ms
+ clib_warning ("Finished in %.6f", deltat);
+ clib_warning ("%.4f Gbit/second %s", (((f64) to_send * 8.0) / deltat / 1e9),
+ no_echo ? "half" : "full");
return 0;
}
diff --git a/src/uri/uri_tcp_test.c b/src/uri/uri_tcp_test.c
index e201a359..d1694cf4 100755
--- a/src/uri/uri_tcp_test.c
+++ b/src/uri/uri_tcp_test.c
@@ -46,6 +46,8 @@ typedef struct
svm_fifo_t *server_tx_fifo;
u64 vpp_session_handle;
+ u64 bytes_received;
+ f64 start;
} session_t;
typedef enum
@@ -174,7 +176,7 @@ wait_for_state_change (uri_tcp_test_main_t * utm, connection_state_t state)
if (utm->state == STATE_FAILED)
return -1;
if (utm->time_to_stop == 1)
- return -1;
+ return 0;
}
clib_warning ("timeout waiting for STATE_READY");
return -1;
@@ -184,7 +186,7 @@ void
application_send_attach (uri_tcp_test_main_t * utm)
{
vl_api_application_attach_t *bmp;
- u32 fifo_size = 3 << 20;
+ u32 fifo_size = 4 << 20;
bmp = vl_msg_api_alloc (sizeof (*bmp));
memset (bmp, 0, sizeof (*bmp));
@@ -344,10 +346,22 @@ vl_api_map_another_segment_t_handler (vl_api_map_another_segment_t * mp)
}
static void
+session_print_stats (uri_tcp_test_main_t * utm, session_t * session)
+{
+ f64 deltat;
+ u64 bytes;
+
+ deltat = clib_time_now (&utm->clib_time) - session->start;
+ bytes = utm->i_am_master ? session->bytes_received : utm->bytes_to_send;
+ fformat (stdout, "Finished in %.6f\n", deltat);
+ fformat (stdout, "%.4f Gbit/second\n", (bytes * 8.0) / deltat / 1e9);
+}
+
+static void
vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp)
{
uri_tcp_test_main_t *utm = &uri_tcp_test_main;
- session_t *session;
+ session_t *session = 0;
vl_api_disconnect_session_reply_t *rmp;
uword *p;
int rv = 0;
@@ -366,7 +380,7 @@ vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp)
rv = -11;
}
- utm->time_to_stop = 1;
+// utm->time_to_stop = 1;
rmp = vl_msg_api_alloc (sizeof (*rmp));
memset (rmp, 0, sizeof (*rmp));
@@ -375,6 +389,9 @@ vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp)
rmp->retval = rv;
rmp->handle = mp->handle;
vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp);
+
+ if (session)
+ session_print_stats (utm, session);
}
static void
@@ -431,14 +448,19 @@ client_handle_fifo_event_rx (uri_tcp_test_main_t * utm,
if (n_read > 0)
{
bytes -= n_read;
- for (i = 0; i < n_read; i++)
+ if (utm->test_return_packets)
{
- if (utm->rx_buf[i] != ((utm->client_bytes_received + i) & 0xff))
+ for (i = 0; i < n_read; i++)
{
- clib_warning ("error at byte %lld, 0x%x not 0x%x",
- utm->client_bytes_received + i,
- utm->rx_buf[i],
- ((utm->client_bytes_received + i) & 0xff));
+ if (utm->rx_buf[i]
+ != ((utm->client_bytes_received + i) & 0xff))
+ {
+ clib_warning ("error at byte %lld, 0x%x not 0x%x",
+ utm->client_bytes_received + i,
+ utm->rx_buf[i],
+ ((utm->client_bytes_received +
+ i) & 0xff));
+ }
}
}
utm->client_bytes_received += n_read;
@@ -545,6 +567,7 @@ vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp)
session->server_rx_fifo = rx_fifo;
session->server_tx_fifo = tx_fifo;
session->vpp_session_handle = mp->handle;
+ session->start = clib_time_now (&utm->clib_time);
/* Save handle */
utm->connected_session_index = session_index;
@@ -571,7 +594,7 @@ send_test_chunk (uri_tcp_test_main_t * utm, svm_fifo_t * tx_fifo, int mypid,
u64 bytes_sent = 0;
int test_buf_offset = 0;
u32 bytes_to_snd;
- u32 queue_max_chunk = 64 << 10, actual_write;
+ u32 queue_max_chunk = 128 << 10, actual_write;
session_fifo_event_t evt;
static int serial_number = 0;
int rv;
@@ -582,8 +605,8 @@ send_test_chunk (uri_tcp_test_main_t * utm, svm_fifo_t * tx_fifo, int mypid,
while (bytes_to_snd > 0)
{
- actual_write =
- bytes_to_snd > queue_max_chunk ? queue_max_chunk : bytes_to_snd;
+ actual_write = (bytes_to_snd > queue_max_chunk) ?
+ queue_max_chunk : bytes_to_snd;
rv = svm_fifo_enqueue_nowait (tx_fifo, actual_write,
test_data + test_buf_offset);
@@ -635,9 +658,9 @@ client_send_data (uri_tcp_test_main_t * utm)
if (leftover)
send_test_chunk (utm, tx_fifo, mypid, leftover);
- if (utm->test_return_packets)
+ if (!utm->drop_packets)
{
- f64 timeout = clib_time_now (&utm->clib_time) + 2;
+ f64 timeout = clib_time_now (&utm->clib_time) + 10;
/* Wait for the outstanding packets */
while (utm->client_bytes_received <
@@ -698,6 +721,7 @@ int
client_disconnect (uri_tcp_test_main_t * utm)
{
client_send_disconnect (utm);
+ clib_warning ("Sent disconnect");
if (wait_for_state_change (utm, STATE_START))
{
clib_warning ("Disconnect failed");
@@ -721,7 +745,7 @@ client_test (uri_tcp_test_main_t * utm)
}
/* Init test data */
- vec_validate (utm->connect_test_data, 64 * 1024 - 1);
+ vec_validate (utm->connect_test_data, 128 * 1024 - 1);
for (i = 0; i < vec_len (utm->connect_test_data); i++)
utm->connect_test_data[i] = i & 0xff;
@@ -899,6 +923,9 @@ vl_api_accept_session_t_handler (vl_api_accept_session_t * mp)
rmp->_vl_msg_id = ntohs (VL_API_ACCEPT_SESSION_REPLY);
rmp->handle = mp->handle;
vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp);
+
+ session->bytes_received = 0;
+ session->start = clib_time_now (&utm->clib_time);
}
void
@@ -909,37 +936,50 @@ server_handle_fifo_event_rx (uri_tcp_test_main_t * utm,
int n_read;
session_fifo_event_t evt;
unix_shared_memory_queue_t *q;
- int rv, bytes;
+ session_t *session;
+ int rv;
+ u32 max_dequeue, offset, max_transfer, rx_buf_len;
+ rx_buf_len = vec_len (utm->rx_buf);
rx_fifo = e->fifo;
- tx_fifo = utm->sessions[rx_fifo->client_session_index].server_tx_fifo;
+ session = &utm->sessions[rx_fifo->client_session_index];
+ tx_fifo = session->server_tx_fifo;
- bytes = svm_fifo_max_dequeue (rx_fifo);
+ max_dequeue = svm_fifo_max_dequeue (rx_fifo);
/* Allow enqueuing of a new event */
svm_fifo_unset_event (rx_fifo);
- if (bytes == 0)
- return;
+ if (PREDICT_FALSE (max_dequeue == 0))
+ {
+ return;
+ }
- /* Read the bytes */
+ /* Read the max_dequeue */
do
{
- n_read = svm_fifo_dequeue_nowait (rx_fifo, vec_len (utm->rx_buf),
- utm->rx_buf);
+ max_transfer = clib_min (rx_buf_len, max_dequeue);
+ n_read = svm_fifo_dequeue_nowait (rx_fifo, max_transfer, utm->rx_buf);
if (n_read > 0)
- bytes -= n_read;
-
- if (utm->drop_packets)
- continue;
+ {
+ max_dequeue -= n_read;
+ session->bytes_received += n_read;
+ }
/* Reflect if a non-drop session */
- if (n_read > 0)
+ if (!utm->drop_packets && n_read > 0)
{
+ offset = 0;
do
{
- rv = svm_fifo_enqueue_nowait (tx_fifo, n_read, utm->rx_buf);
+ rv = svm_fifo_enqueue_nowait (tx_fifo, n_read,
+ &utm->rx_buf[offset]);
+ if (rv > 0)
+ {
+ n_read -= rv;
+ offset += rv;
+ }
}
- while (rv <= 0 && !utm->time_to_stop);
+ while ((rv <= 0 || n_read > 0) && !utm->time_to_stop);
/* If event wasn't set, add one */
if (svm_fifo_set_event (tx_fifo))
@@ -951,11 +991,11 @@ server_handle_fifo_event_rx (uri_tcp_test_main_t * utm,
q = utm->vpp_event_queue;
unix_shared_memory_queue_add (q, (u8 *) & evt,
- 0 /* do wait for mutex */ );
+ 1 /* do wait for mutex */ );
}
}
}
- while ((n_read < 0 || bytes > 0) && !utm->time_to_stop);
+ while ((n_read < 0 || max_dequeue > 0) && !utm->time_to_stop);
}
void
@@ -1068,9 +1108,18 @@ vl_api_disconnect_session_reply_t_handler (vl_api_disconnect_session_reply_t *
mp)
{
uri_tcp_test_main_t *utm = &uri_tcp_test_main;
+ session_t *session;
+
+ if (mp->retval)
+ {
+ clib_warning ("vpp complained about disconnect: %d",
+ ntohl (mp->retval));
+ }
- clib_warning ("retval %d", ntohl (mp->retval));
utm->state = STATE_START;
+ session = pool_elt_at_index (utm->sessions, utm->connected_session_index);
+ if (session)
+ session_print_stats (utm, session);
}
#define foreach_uri_msg \
@@ -1123,7 +1172,7 @@ main (int argc, char **argv)
/* make the main heap thread-safe */
h->flags |= MHEAP_FLAG_THREAD_SAFE;
- vec_validate (utm->rx_buf, 65536);
+ vec_validate (utm->rx_buf, 128 << 10);
utm->session_index_by_vpp_handles = hash_create (0, sizeof (uword));
@@ -1186,6 +1235,7 @@ main (int argc, char **argv)
utm->drop_packets = drop_packets;
utm->test_return_packets = test_return_packets;
utm->bytes_to_send = bytes_to_send;
+ utm->time_to_stop = 0;
setup_signal_handlers ();
uri_api_hookup (utm);
diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c
index c679b1f5..4bdb1027 100644
--- a/src/vnet/session/application.c
+++ b/src/vnet/session/application.c
@@ -117,7 +117,7 @@ application_del (application_t * app)
/* Actual listener cleanup */
for (i = 0; i < vec_len (handles); i++)
{
- a->app_index = app->api_client_index;
+ a->app_index = app->index;
a->handle = handles[i];
/* seg manager is removed when unbind completes */
vnet_unbind (a);
diff --git a/src/vnet/session/node.c b/src/vnet/session/node.c
index 07eeae82..c0ab1bf0 100644
--- a/src/vnet/session/node.c
+++ b/src/vnet/session/node.c
@@ -171,7 +171,10 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
/* Nothing to read return */
if (max_dequeue0 == 0)
- return 0;
+ {
+ svm_fifo_unset_event (s0->server_tx_fifo);
+ return 0;
+ }
/* Ensure we're not writing more than transport window allows */
if (max_dequeue0 < snd_space0)
@@ -393,7 +396,7 @@ session_event_get_session (session_fifo_event_t * e0, u8 thread_index)
s0 = stream_session_get_if_valid (session_index0, thread_index);
- ASSERT (s0->thread_index == thread_index);
+ ASSERT (s0 == 0 || s0->thread_index == thread_index);
return s0;
}
diff --git a/src/vnet/session/segment_manager.c b/src/vnet/session/segment_manager.c
index caf8eaa3..bf571963 100644
--- a/src/vnet/session/segment_manager.c
+++ b/src/vnet/session/segment_manager.c
@@ -306,11 +306,13 @@ again:
if (added_a_segment)
{
clib_warning ("added a segment, still cant allocate a fifo");
+ clib_spinlock_unlock (&sm->lockp);
return SESSION_ERROR_NEW_SEG_NO_SPACE;
}
if (session_manager_add_segment (sm))
{
+ clib_spinlock_unlock (&sm->lockp);
return VNET_API_ERROR_URI_FIFO_CREATE_FAILED;
}
@@ -320,6 +322,7 @@ again:
else
{
clib_warning ("No space to allocate fifos!");
+ clib_spinlock_unlock (&sm->lockp);
return SESSION_ERROR_NO_SPACE;
}
}
@@ -361,8 +364,10 @@ segment_manager_dealloc_fifos (u32 svm_segment_index, svm_fifo_t * rx_fifo,
if (sm->segment_indices[0] != svm_segment_index
&& !svm_fifo_segment_has_fifos (fifo_segment))
{
+ clib_spinlock_lock (&sm->lockp);
svm_fifo_segment_delete (fifo_segment);
vec_del1 (sm->segment_indices, svm_segment_index);
+ clib_spinlock_unlock (&sm->lockp);
}
}
diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c
index 534598d6..fe198044 100644
--- a/src/vnet/session/session.c
+++ b/src/vnet/session/session.c
@@ -700,7 +700,7 @@ stream_session_init_fifos_pointers (transport_connection_t * tc,
svm_fifo_init_pointers (s->server_tx_fifo, tx_pointer);
}
-void
+int
stream_session_connect_notify (transport_connection_t * tc, u8 sst,
u8 is_fail)
{
@@ -709,6 +709,7 @@ stream_session_connect_notify (transport_connection_t * tc, u8 sst,
stream_session_t *new_s = 0;
u64 handle;
u32 api_context = 0;
+ int error = 0;
handle = stream_session_half_open_lookup (smm, &tc->lcl_ip, &tc->rmt_ip,
tc->lcl_port, tc->rmt_port,
@@ -716,7 +717,7 @@ stream_session_connect_notify (transport_connection_t * tc, u8 sst,
if (handle == HALF_OPEN_LOOKUP_INVALID_VALUE)
{
clib_warning ("This can't be good!");
- return;
+ return -1;
}
/* Get the app's index from the handle we stored when opening connection */
@@ -730,9 +731,12 @@ stream_session_connect_notify (transport_connection_t * tc, u8 sst,
/* Create new session (svm segments are allocated if needed) */
if (stream_session_create_i (sm, tc, &new_s))
- return;
-
- new_s->app_index = app->index;
+ {
+ is_fail = 1;
+ error = -1;
+ }
+ else
+ new_s->app_index = app->index;
}
/* Notify client */
@@ -741,6 +745,8 @@ stream_session_connect_notify (transport_connection_t * tc, u8 sst,
/* Cleanup session lookup */
stream_session_half_open_table_del (smm, sst, tc);
+
+ return error;
}
void
@@ -981,8 +987,13 @@ session_send_session_evt_to_thread (u64 session_handle,
/* Based on request block (or not) for lack of space */
if (PREDICT_TRUE (q->cursize < q->maxsize))
- unix_shared_memory_queue_add (q, (u8 *) & evt,
- 0 /* do wait for mutex */ );
+ {
+ if (unix_shared_memory_queue_add (q, (u8 *) & evt,
+ 1 /* do wait for mutex */ ))
+ {
+ clib_warning ("failed to enqueue evt");
+ }
+ }
else
{
clib_warning ("queue full");
diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h
index d9c38bd1..5fa4225c 100644
--- a/src/vnet/session/session.h
+++ b/src/vnet/session/session.h
@@ -368,8 +368,8 @@ stream_session_peek_bytes (transport_connection_t * tc, u8 * buffer,
u32 offset, u32 max_bytes);
u32 stream_session_dequeue_drop (transport_connection_t * tc, u32 max_bytes);
-void stream_session_connect_notify (transport_connection_t * tc, u8 sst,
- u8 is_fail);
+int stream_session_connect_notify (transport_connection_t * tc, u8 sst,
+ u8 is_fail);
void stream_session_init_fifos_pointers (transport_connection_t * tc,
u32 rx_pointer, u32 tx_pointer);
diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c
index f772cb9f..60f764af 100755
--- a/src/vnet/session/session_api.c
+++ b/src/vnet/session/session_api.c
@@ -419,7 +419,7 @@ done:
REPLY_MACRO (VL_API_UNBIND_URI_REPLY);
}
-void
+static void
vl_api_connect_uri_t_handler (vl_api_connect_uri_t * mp)
{
vl_api_connect_uri_reply_t *rmp;
diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h
index e5f788be..04bd5ca0 100644
--- a/src/vnet/session/transport.h
+++ b/src/vnet/session/transport.h
@@ -39,6 +39,7 @@ typedef struct _transport_connection
#if TRANSPORT_DEBUG
elog_track_t elog_track; /**< Event logging */
+ u32 cc_stat_tstamp; /**< CC stats timestamp */
#endif
/** Macros for 'derived classes' where base is named "connection" */
@@ -57,6 +58,7 @@ typedef struct _transport_connection
#define c_is_ip4 connection.is_ip4
#define c_thread_index connection.thread_index
#define c_elog_track connection.elog_track
+#define c_cc_stat_tstamp connection.cc_stat_tstamp
} transport_connection_t;
/*
diff --git a/src/vnet/tcp/builtin_client.c b/src/vnet/tcp/builtin_client.c
index 7238cda3..6f8be082 100644
--- a/src/vnet/tcp/builtin_client.c
+++ b/src/vnet/tcp/builtin_client.c
@@ -43,7 +43,7 @@
#include <vpp/api/vpe_all_api_h.h>
#undef vl_printfun
-#define TCP_BUILTIN_CLIENT_DBG (1)
+#define TCP_BUILTIN_CLIENT_DBG (0)
static void
send_test_chunk (tclient_main_t * tm, session_t * s)
@@ -92,7 +92,7 @@ send_test_chunk (tclient_main_t * tm, session_t * s)
ed->data[2] = s->bytes_to_send;
}
- /* Poke the TCP state machine */
+ /* Poke the session layer */
if (svm_fifo_set_event (s->server_tx_fifo))
{
/* Fabricate TX event, send to vpp */
@@ -100,8 +100,9 @@ send_test_chunk (tclient_main_t * tm, session_t * s)
evt.event_type = FIFO_EVENT_APP_TX;
evt.event_id = serial_number++;
- unix_shared_memory_queue_add (tm->vpp_event_queue, (u8 *) & evt,
- 0 /* do wait for mutex */ );
+ if (unix_shared_memory_queue_add (tm->vpp_event_queue, (u8 *) & evt,
+ 0 /* do wait for mutex */ ))
+ clib_warning ("could not enqueue event");
}
}
}
@@ -188,13 +189,13 @@ builtin_client_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
sp = pool_elt_at_index (tm->sessions, connection_indices[i]);
- if (tx_quota < 60 && sp->bytes_to_send > 0)
+ if ((tm->no_return || tx_quota < 60) && sp->bytes_to_send > 0)
{
send_test_chunk (tm, sp);
delete_session = 0;
tx_quota++;
}
- if (sp->bytes_to_receive > 0)
+ if (!tm->no_return && sp->bytes_to_receive > 0)
{
prev_bytes_received_this_session = sp->bytes_received;
receive_test_chunk (tm, sp);
@@ -205,13 +206,14 @@ builtin_client_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
}
if (PREDICT_FALSE (delete_session == 1))
{
+ __sync_fetch_and_add (&tm->tx_total, tm->bytes_to_send);
__sync_fetch_and_add (&tm->rx_total, sp->bytes_received);
+
dmp = vl_msg_api_alloc_as_if_client (sizeof (*dmp));
memset (dmp, 0, sizeof (*dmp));
dmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION);
dmp->client_index = tm->my_client_index;
dmp->handle = sp->vpp_session_handle;
-// vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & dmp);
if (!unix_shared_memory_queue_add (tm->vl_input_queue, (u8 *) & dmp,
1))
{
@@ -247,7 +249,6 @@ VLIB_REGISTER_NODE (builtin_client_node) =
};
/* *INDENT-ON* */
-
/* So we don't get "no handler for... " msgs */
static void
vl_api_memclnt_create_reply_t_handler (vl_api_memclnt_create_reply_t * mp)
@@ -255,76 +256,10 @@ vl_api_memclnt_create_reply_t_handler (vl_api_memclnt_create_reply_t * mp)
vlib_main_t *vm = vlib_get_main ();
tclient_main_t *tm = &tclient_main;
tm->my_client_index = mp->index;
- vlib_process_signal_event (vm, tm->node_index, 1 /* evt */ ,
+ vlib_process_signal_event (vm, tm->cli_node_index, 1 /* evt */ ,
0 /* data */ );
}
-static void
-vl_api_connect_uri_reply_t_handler (vl_api_connect_uri_reply_t * mp)
-{
- tclient_main_t *tm = &tclient_main;
- session_t *session;
- u32 session_index;
- i32 retval = /* clib_net_to_host_u32 ( */ mp->retval /*) */ ;
- int i;
-
- if (retval < 0)
- {
- clib_warning ("connection failed: retval %d", retval);
- return;
- }
-
- tm->our_event_queue =
- uword_to_pointer (mp->vpp_event_queue_address,
- unix_shared_memory_queue_t *);
- tm->vpp_event_queue =
- uword_to_pointer (mp->vpp_event_queue_address,
- unix_shared_memory_queue_t *);
-
- /*
- * Setup session
- */
- pool_get (tm->sessions, session);
- memset (session, 0, sizeof (*session));
- session_index = session - tm->sessions;
- session->bytes_to_receive = session->bytes_to_send = tm->bytes_to_send;
-
- session->server_rx_fifo =
- uword_to_pointer (mp->server_rx_fifo, svm_fifo_t *);
- session->server_rx_fifo->client_session_index = session_index;
- session->server_tx_fifo =
- uword_to_pointer (mp->server_tx_fifo, svm_fifo_t *);
- session->server_tx_fifo->client_session_index = session_index;
- session->vpp_session_handle = mp->handle;
-
- /* Add it to the session lookup table */
- hash_set (tm->session_index_by_vpp_handles, mp->handle, session_index);
-
- if (tm->ready_connections == tm->expected_connections - 1)
- {
- vlib_thread_main_t *thread_main = vlib_get_thread_main ();
- int thread_index;
-
- thread_index = 0;
- for (i = 0; i < pool_elts (tm->sessions); i++)
- {
- vec_add1 (tm->connection_index_by_thread[thread_index], i);
- thread_index++;
- if (thread_index == thread_main->n_vlib_mains)
- thread_index = 0;
- }
- }
- __sync_fetch_and_add (&tm->ready_connections, 1);
- if (tm->ready_connections == tm->expected_connections)
- {
- tm->run_test = 1;
- tm->test_start_time = vlib_time_now (tm->vlib_main);
- /* Signal the CLI process that the action is starting... */
- vlib_process_signal_event (tm->vlib_main, tm->cli_node_index,
- 1, 0 /* data */ );
- }
-}
-
static int
create_api_loopback (tclient_main_t * tm)
{
@@ -347,12 +282,11 @@ create_api_loopback (tclient_main_t * tm)
mp->_vl_msg_id = VL_API_MEMCLNT_CREATE;
mp->context = 0xFEEDFACE;
mp->input_queue = pointer_to_uword (tm->vl_input_queue);
- strncpy ((char *) mp->name, "tcp_tester", sizeof (mp->name) - 1);
+ strncpy ((char *) mp->name, "tcp_clients_tester", sizeof (mp->name) - 1);
vl_api_memclnt_create_t_handler (mp);
/* Wait for reply */
- tm->node_index = vlib_get_current_process (vm)->node_runtime.node_index;
vlib_process_wait_for_event_or_clock (vm, 1.0);
event_type = vlib_process_get_events (vm, &event_data);
switch (event_type)
@@ -373,7 +307,6 @@ create_api_loopback (tclient_main_t * tm)
#define foreach_tclient_static_api_msg \
_(MEMCLNT_CREATE_REPLY, memclnt_create_reply) \
-_(CONNECT_URI_REPLY, connect_uri_reply)
static clib_error_t *
tclient_api_hookup (vlib_main_t * vm)
@@ -411,8 +344,8 @@ tcp_test_clients_init (vlib_main_t * vm)
if (create_api_loopback (tm))
return -1;
- /* Init test data */
- vec_validate (tm->connect_test_data, 64 * 1024 - 1);
+ /* Init test data. Big buffer */
+ vec_validate (tm->connect_test_data, 1024 * 1024 - 1);
for (i = 0; i < vec_len (tm->connect_test_data); i++)
tm->connect_test_data[i] = i & 0xff;
@@ -430,37 +363,66 @@ static int
builtin_session_connected_callback (u32 app_index, u32 api_context,
stream_session_t * s, u8 is_fail)
{
- vl_api_connect_uri_reply_t _m, *mp = &_m;
- unix_shared_memory_queue_t *q;
- application_t *app;
- unix_shared_memory_queue_t *vpp_queue;
+ tclient_main_t *tm = &tclient_main;
+ session_t *session;
+ u32 session_index;
+ int i;
- app = application_get (app_index);
- q = vl_api_client_index_to_input_queue (app->api_client_index);
+ if (is_fail)
+ {
+ clib_warning ("connection %d failed!", api_context);
+ vlib_process_signal_event (tm->vlib_main, tm->cli_node_index, -1,
+ 0 /* data */ );
+ return -1;
+ }
- if (!q)
- return -1;
+ /* Mark vpp session as connected */
+ s->session_state = SESSION_STATE_READY;
- memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_CONNECT_URI_REPLY);
- mp->context = api_context;
- if (!is_fail)
+ tm->our_event_queue = session_manager_get_vpp_event_queue (s->thread_index);
+ tm->vpp_event_queue = session_manager_get_vpp_event_queue (s->thread_index);
+
+ /*
+ * Setup session
+ */
+ pool_get (tm->sessions, session);
+ memset (session, 0, sizeof (*session));
+ session_index = session - tm->sessions;
+ session->bytes_to_receive = session->bytes_to_send = tm->bytes_to_send;
+ session->server_rx_fifo = s->server_rx_fifo;
+ session->server_rx_fifo->client_session_index = session_index;
+ session->server_tx_fifo = s->server_tx_fifo;
+ session->server_tx_fifo->client_session_index = session_index;
+ session->vpp_session_handle = stream_session_handle (s);
+
+ /* Add it to the session lookup table */
+ hash_set (tm->session_index_by_vpp_handles, session->vpp_session_handle,
+ session_index);
+
+ if (tm->ready_connections == tm->expected_connections - 1)
{
- vpp_queue = session_manager_get_vpp_event_queue (s->thread_index);
- mp->server_rx_fifo = pointer_to_uword (s->server_rx_fifo);
- mp->server_tx_fifo = pointer_to_uword (s->server_tx_fifo);
- mp->handle = stream_session_handle (s);
- mp->vpp_event_queue_address = pointer_to_uword (vpp_queue);
- mp->retval = 0;
- s->session_state = SESSION_STATE_READY;
+ vlib_thread_main_t *thread_main = vlib_get_thread_main ();
+ int thread_index;
+
+ thread_index = 0;
+ for (i = 0; i < pool_elts (tm->sessions); i++)
+ {
+ vec_add1 (tm->connection_index_by_thread[thread_index], i);
+ thread_index++;
+ if (thread_index == thread_main->n_vlib_mains)
+ thread_index = 0;
+ }
}
- else
+ __sync_fetch_and_add (&tm->ready_connections, 1);
+ if (tm->ready_connections == tm->expected_connections)
{
- mp->retval = clib_host_to_net_u32 (VNET_API_ERROR_SESSION_CONNECT_FAIL);
+ tm->run_test = 1;
+ tm->test_start_time = vlib_time_now (tm->vlib_main);
+ /* Signal the CLI process that the action is starting... */
+ vlib_process_signal_event (tm->vlib_main, tm->cli_node_index, 1,
+ 0 /* data */ );
}
- vl_api_connect_uri_reply_t_handler (mp);
-
return 0;
}
@@ -489,23 +451,22 @@ builtin_server_rx_callback (stream_session_t * s)
}
/* *INDENT-OFF* */
-static session_cb_vft_t builtin_clients =
- {
- .session_reset_callback = builtin_session_reset_callback,
- .session_connected_callback = builtin_session_connected_callback,
- .session_accept_callback = builtin_session_create_callback,
- .session_disconnect_callback = builtin_session_disconnect_callback,
- .builtin_server_rx_callback = builtin_server_rx_callback
- };
+static session_cb_vft_t builtin_clients = {
+ .session_reset_callback = builtin_session_reset_callback,
+ .session_connected_callback = builtin_session_connected_callback,
+ .session_accept_callback = builtin_session_create_callback,
+ .session_disconnect_callback = builtin_session_disconnect_callback,
+ .builtin_server_rx_callback = builtin_server_rx_callback
+};
/* *INDENT-ON* */
static int
-attach_builtin_test_clients ()
+attach_builtin_test_clients_app (void)
{
tclient_main_t *tm = &tclient_main;
vnet_app_attach_args_t _a, *a = &_a;
u8 segment_name[128];
- u32 segment_name_length;
+ u32 segment_name_length, prealloc_fifos;
u64 options[16];
segment_name_length = ARRAY_LEN (segment_name);
@@ -518,13 +479,68 @@ attach_builtin_test_clients ()
a->segment_name_length = segment_name_length;
a->session_cb_vft = &builtin_clients;
+ prealloc_fifos = tm->prealloc_fifos ? tm->expected_connections : 1;
+
options[SESSION_OPTIONS_ACCEPT_COOKIE] = 0x12345678;
- options[SESSION_OPTIONS_SEGMENT_SIZE] = (2 << 30); /*$$$$ config / arg */
+ options[SESSION_OPTIONS_SEGMENT_SIZE] = (2ULL << 32);
+ options[SESSION_OPTIONS_RX_FIFO_SIZE] = tm->fifo_size;
+ options[SESSION_OPTIONS_TX_FIFO_SIZE] = tm->fifo_size / 2;
+ options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = prealloc_fifos;
+
options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_BUILTIN_APP;
a->options = options;
- return vnet_application_attach (a);
+ if (vnet_application_attach (a))
+ return -1;
+
+ tm->app_index = a->app_index;
+ return 0;
+}
+
+static void *
+tclient_thread_fn (void *arg)
+{
+ return 0;
+}
+
+/** Start a transmit thread */
+int
+start_tx_pthread (tclient_main_t * tm)
+{
+ if (tm->client_thread_handle == 0)
+ {
+ int rv = pthread_create (&tm->client_thread_handle,
+ NULL /*attr */ ,
+ tclient_thread_fn, 0);
+ if (rv)
+ {
+ tm->client_thread_handle = 0;
+ return -1;
+ }
+ }
+ return 0;
+}
+
+void
+clients_connect (vlib_main_t * vm, u8 * uri, u32 n_clients)
+{
+ tclient_main_t *tm = &tclient_main;
+ vnet_connect_args_t _a, *a = &_a;
+ int i;
+ for (i = 0; i < n_clients; i++)
+ {
+ memset (a, 0, sizeof (*a));
+
+ a->uri = (char *) uri;
+ a->api_context = i;
+ a->app_index = tm->app_index;
+ a->mp = 0;
+ vnet_connect_uri (a);
+
+ /* Crude pacing for call setups, 100k/sec */
+ vlib_process_suspend (vm, 10e-6);
+ }
}
static clib_error_t *
@@ -534,17 +550,18 @@ test_tcp_clients_command_fn (vlib_main_t * vm,
{
tclient_main_t *tm = &tclient_main;
vlib_thread_main_t *thread_main = vlib_get_thread_main ();
- uword *event_data = 0;
- uword event_type;
- u8 *connect_uri = (u8 *) "tcp://6.0.1.1/1234";
- u8 *uri;
+ uword *event_data = 0, event_type;
+ u8 *default_connect_uri = (u8 *) "tcp://6.0.1.1/1234", *uri;
+ u64 tmp, total_bytes;
+ f64 cli_timeout = 20.0, delta;
u32 n_clients = 1;
+ char *transfer_type;
int i;
- u64 tmp;
- f64 cli_timeout = 20.0;
- f64 delta;
tm->bytes_to_send = 8192;
+ tm->no_return = 0;
+ tm->fifo_size = 64 << 10;
+
vec_free (tm->connect_uri);
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
@@ -561,11 +578,18 @@ test_tcp_clients_command_fn (vlib_main_t * vm,
;
else if (unformat (input, "cli-timeout %f", &cli_timeout))
;
+ else if (unformat (input, "no-return"))
+ tm->no_return = 1;
+ else if (unformat (input, "fifo-size %d", &tm->fifo_size))
+ tm->fifo_size <<= 10;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
}
+ /* Store cli process node index for signalling */
+ tm->cli_node_index = vlib_get_current_process (vm)->node_runtime.node_index;
+
if (tm->is_init == 0)
{
if (tcp_test_clients_init (vm))
@@ -575,28 +599,25 @@ test_tcp_clients_command_fn (vlib_main_t * vm,
tm->ready_connections = 0;
tm->expected_connections = n_clients;
tm->rx_total = 0;
+ tm->tx_total = 0;
- uri = connect_uri;
+ uri = default_connect_uri;
if (tm->connect_uri)
uri = tm->connect_uri;
#if TCP_BUILTIN_CLIENT_PTHREAD
- /* Start a transmit thread */
- if (tm->client_thread_handle == 0)
+ start_tx_pthread ();
+#endif
+
+ vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
+
+ if (tm->test_client_attached == 0)
{
- int rv = pthread_create (&tm->client_thread_handle,
- NULL /*attr */ ,
- tclient_thread_fn, 0);
- if (rv)
+ if (attach_builtin_test_clients_app ())
{
- tm->client_thread_handle = 0;
- return clib_error_return (0, "pthread_create returned %d", rv);
+ return clib_error_return (0, "app attach failed");
}
}
-#endif
- vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
- if (tm->test_client_attached == 0)
- attach_builtin_test_clients ();
tm->test_client_attached = 1;
/* Turn on the builtin client input nodes */
@@ -604,25 +625,8 @@ test_tcp_clients_command_fn (vlib_main_t * vm,
vlib_node_set_state (vlib_mains[i], builtin_client_node.index,
VLIB_NODE_STATE_POLLING);
- tm->cli_node_index = vlib_get_current_process (vm)->node_runtime.node_index;
-
/* Fire off connect requests */
- for (i = 0; i < n_clients; i++)
- {
- vl_api_connect_uri_t _cmp, *cmp = &_cmp;
- void vl_api_connect_uri_t_handler (vl_api_connect_uri_t * cmp);
-
- memset (cmp, 0, sizeof (*cmp));
-
- cmp->_vl_msg_id = ntohs (VL_API_CONNECT_URI);
- cmp->client_index = tm->my_client_index;
- cmp->context = ntohl (0xfeedface);
- memcpy (cmp->uri, uri, strlen ((char *) uri) + 1);
-
- vl_api_connect_uri_t_handler (cmp);
- /* Crude pacing for call setups, 100k/sec */
- vlib_process_suspend (vm, 10e-6);
- }
+ clients_connect (vm, uri, n_clients);
/* Park until the sessions come up, or ten seconds elapse... */
vlib_process_wait_for_event_or_clock (vm, 10.0 /* timeout, seconds */ );
@@ -668,14 +672,17 @@ test_tcp_clients_command_fn (vlib_main_t * vm,
if (delta != 0.0)
{
+ total_bytes = (tm->no_return ? tm->tx_total : tm->rx_total);
+ transfer_type = tm->no_return ? "half-duplex" : "full-duplex";
vlib_cli_output (vm,
"%lld bytes (%lld mbytes, %lld gbytes) in %.2f seconds",
- tm->rx_total, tm->rx_total / (1ULL << 20),
- tm->rx_total / (1ULL << 30), delta);
- vlib_cli_output (vm, "%.2f bytes/second full-duplex",
- ((f64) tm->rx_total) / (delta));
- vlib_cli_output (vm, "%.4f gbit/second full-duplex",
- (((f64) tm->rx_total * 8.0) / delta / 1e9));
+ total_bytes, total_bytes / (1ULL << 20),
+ total_bytes / (1ULL << 30), delta);
+ vlib_cli_output (vm, "%.2f bytes/second %s",
+ ((f64) total_bytes) / (delta), transfer_type);
+ vlib_cli_output (vm, "%.4f gbit/second %s",
+ (((f64) total_bytes * 8.0) / delta / 1e9),
+ transfer_type);
}
else
vlib_cli_output (vm, "zero delta-t?");
diff --git a/src/vnet/tcp/builtin_client.h b/src/vnet/tcp/builtin_client.h
index d5d79e53..3462e0ee 100644
--- a/src/vnet/tcp/builtin_client.h
+++ b/src/vnet/tcp/builtin_client.h
@@ -44,78 +44,59 @@ typedef struct
typedef struct
{
- /* API message ID base */
- u16 msg_id_base;
-
- /* vpe input queue */
- unix_shared_memory_queue_t *vl_input_queue;
-
- /* API client handle */
- u32 my_client_index;
-
- /* The URI we're playing with */
- u8 *uri;
-
- /* Session pool */
- session_t *sessions;
-
- /* Hash table for disconnect processing */
- uword *session_index_by_vpp_handles;
-
- /* intermediate rx buffer */
- u8 *rx_buf;
-
- /* URI for slave's connect */
- u8 *connect_uri;
-
- u32 connected_session_index;
-
- int i_am_master;
-
- /* drop all packets */
- int drop_packets;
-
- /* Our event queue */
- unix_shared_memory_queue_t *our_event_queue;
-
- /* $$$ single thread only for the moment */
- unix_shared_memory_queue_t *vpp_event_queue;
-
- pid_t my_pid;
-
- f64 test_start_time;
- f64 test_end_time;
-
- u32 expected_connections;
+ /*
+ * Application setup parameters
+ */
+ unix_shared_memory_queue_t *vl_input_queue; /**< vpe input queue */
+ unix_shared_memory_queue_t *our_event_queue; /**< Our event queue */
+ unix_shared_memory_queue_t *vpp_event_queue; /**< $$$ single thread */
+
+ u32 cli_node_index; /**< cli process node index */
+ u32 my_client_index; /**< loopback API client handle */
+ u32 app_index; /**< app index after attach */
+
+ /*
+ * Configuration params
+ */
+ u8 *connect_uri; /**< URI for slave's connect */
+ u64 bytes_to_send; /**< Bytes to send */
+ u32 configured_segment_size;
+ u32 fifo_size;
+ u32 expected_connections; /**< Number of clients/connections */
+
+ /*
+ * Test state variables
+ */
+ session_t *sessions; /**< Sessions pool */
+ u8 *rx_buf; /**< intermediate rx buffer */
+ uword *session_index_by_vpp_handles; /**< Hash table for disconnecting */
+ u8 *connect_test_data; /**< Pre-computed test data */
u32 **connection_index_by_thread;
+ pthread_t client_thread_handle;
+
volatile u32 ready_connections;
volatile u32 finished_connections;
-
volatile u64 rx_total;
- u32 cli_node_index;
-
- /* Signal variable */
- volatile int run_test;
-
- /* Bytes to send */
- u64 bytes_to_send;
-
- u32 configured_segment_size;
+ volatile u64 tx_total;
+ volatile int run_test; /**< Signal start of test */
- /* VNET_API_ERROR_FOO -> "Foo" hash table */
- uword *error_string_by_error_number;
-
- u8 *connect_test_data;
- pthread_t client_thread_handle;
- u64 client_bytes_received;
- u8 test_return_packets;
+ f64 test_start_time;
+ f64 test_end_time;
+ /*
+ * Flags
+ */
u8 is_init;
u8 test_client_attached;
+ u8 no_return;
+ u8 test_return_packets;
+ int i_am_master;
+ int drop_packets; /**< drop all packets */
+ u8 prealloc_fifos; /**< Request fifo preallocation */
- u32 node_index;
-
- /* convenience */
+ /*
+ * Convenience
+ */
vlib_main_t *vlib_main;
vnet_main_t *vnet_main;
ethernet_main_t *ethernet_main;
diff --git a/src/vnet/tcp/builtin_server.c b/src/vnet/tcp/builtin_server.c
index 8bd2f360..775bfc26 100644
--- a/src/vnet/tcp/builtin_server.c
+++ b/src/vnet/tcp/builtin_server.c
@@ -39,21 +39,30 @@
typedef struct
{
- /* Per-thread RX buffer */
- u8 **rx_buf;
+ /*
+ * Server app parameters
+ */
unix_shared_memory_queue_t **vpp_queue;
- u64 byte_index;
+ unix_shared_memory_queue_t *vl_input_queue; /**< Sever's event queue */
- /* Sever's event queue */
- unix_shared_memory_queue_t *vl_input_queue;
+ u32 app_index; /**< Server app index */
+ u32 my_client_index; /**< API client handle */
+ u32 node_index; /**< process node index for evnt scheduling */
- /* API client handle */
- u32 my_client_index;
+ /*
+ * Config params
+ */
+ u8 no_echo; /**< Don't echo traffic */
+ u32 fifo_size; /**< Fifo size */
+ u32 rcv_buffer_size; /**< Rcv buffer size */
+ u32 prealloc_fifos; /**< Preallocate fifos */
- u32 app_index;
+ /*
+ * Test state
+ */
+ u8 **rx_buf; /**< Per-thread RX buffer */
+ u64 byte_index;
- /* process node index for evnt scheduling */
- u32 node_index;
vlib_main_t *vlib_main;
} builtin_server_main_t;
@@ -132,6 +141,29 @@ test_bytes (builtin_server_main_t * bsm, int actual_transfer)
bsm->byte_index += actual_transfer;
}
+/*
+ * If no-echo, just read the data and be done with it
+ */
+int
+builtin_server_rx_callback_no_echo (stream_session_t * s)
+{
+ builtin_server_main_t *bsm = &builtin_server_main;
+ u32 my_thread_id = vlib_get_thread_index ();
+ int actual_transfer;
+ svm_fifo_t *rx_fifo;
+
+ rx_fifo = s->server_rx_fifo;
+
+ do
+ {
+ actual_transfer =
+ svm_fifo_dequeue_nowait (rx_fifo, bsm->rcv_buffer_size,
+ bsm->rx_buf[my_thread_id]);
+ }
+ while (actual_transfer > 0);
+ return 0;
+}
+
int
builtin_server_rx_callback (stream_session_t * s)
{
@@ -143,8 +175,8 @@ builtin_server_rx_callback (stream_session_t * s)
static int serial_number = 0;
u32 my_thread_id = vlib_get_thread_index ();
- tx_fifo = s->server_tx_fifo;
rx_fifo = s->server_rx_fifo;
+ tx_fifo = s->server_tx_fifo;
max_dequeue = svm_fifo_max_dequeue (s->server_rx_fifo);
max_enqueue = svm_fifo_max_enqueue (s->server_tx_fifo);
@@ -164,19 +196,22 @@ builtin_server_rx_callback (stream_session_t * s)
/* Program self-tap to retry */
if (svm_fifo_set_event (rx_fifo))
{
+ unix_shared_memory_queue_t *q;
evt.fifo = rx_fifo;
evt.event_type = FIFO_EVENT_BUILTIN_RX;
evt.event_id = 0;
- unix_shared_memory_queue_add (bsm->vpp_queue[s->thread_index],
- (u8 *) & evt,
- 0 /* do wait for mutex */ );
+
+ q = bsm->vpp_queue[s->thread_index];
+ if (PREDICT_FALSE (q->cursize == q->maxsize))
+ clib_warning ("out of event queue space");
+ else
+ unix_shared_memory_queue_add (q, (u8 *) & evt,
+ 0 /* don't wait for mutex */ );
}
return 0;
}
- vec_validate (bsm->rx_buf, my_thread_id);
- vec_validate (bsm->rx_buf[my_thread_id], max_transfer - 1);
_vec_len (bsm->rx_buf[my_thread_id]) = max_transfer;
actual_transfer = svm_fifo_dequeue_nowait (rx_fifo, max_transfer,
@@ -281,14 +316,21 @@ server_attach ()
memset (a, 0, sizeof (*a));
memset (options, 0, sizeof (options));
+ if (bsm->no_echo)
+ builtin_session_cb_vft.builtin_server_rx_callback =
+ builtin_server_rx_callback_no_echo;
+ else
+ builtin_session_cb_vft.builtin_server_rx_callback =
+ builtin_server_rx_callback;
a->api_client_index = bsm->my_client_index;
a->session_cb_vft = &builtin_session_cb_vft;
a->options = options;
a->options[SESSION_OPTIONS_SEGMENT_SIZE] = 512 << 20;
- a->options[SESSION_OPTIONS_RX_FIFO_SIZE] = 64 << 10;
- a->options[SESSION_OPTIONS_TX_FIFO_SIZE] = 64 << 10;
+ a->options[SESSION_OPTIONS_RX_FIFO_SIZE] = bsm->fifo_size;
+ a->options[SESSION_OPTIONS_TX_FIFO_SIZE] = bsm->fifo_size;
a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_BUILTIN_APP;
- a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = 8192;
+ a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] =
+ bsm->prealloc_fifos ? bsm->prealloc_fifos : 1;
a->segment_name = segment_name;
a->segment_name_length = ARRAY_LEN (segment_name);
@@ -316,17 +358,24 @@ static int
server_create (vlib_main_t * vm)
{
builtin_server_main_t *bsm = &builtin_server_main;
- u32 num_threads;
vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ u32 num_threads;
+ int i;
if (bsm->my_client_index == (u32) ~ 0)
{
if (create_api_loopback (vm))
- return -1;
+ {
+ clib_warning ("failed to create api loopback");
+ return -1;
+ }
}
num_threads = 1 /* main thread */ + vtm->n_threads;
vec_validate (builtin_server_main.vpp_queue, num_threads - 1);
+ vec_validate (bsm->rx_buf, num_threads - 1);
+ for (i = 0; i < num_threads; i++)
+ vec_validate (bsm->rx_buf[i], bsm->rcv_buffer_size);
if (server_attach ())
{
@@ -381,23 +430,35 @@ tcp_builtin_server_api_hookup (vlib_main_t * vm)
}
static clib_error_t *
-server_create_command_fn (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
+server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
{
+ builtin_server_main_t *bsm = &builtin_server_main;
int rv;
-#if 0
+
+ bsm->no_echo = 0;
+ bsm->fifo_size = 64 << 10;
+ bsm->rcv_buffer_size = 128 << 10;
+ bsm->prealloc_fifos = 0;
+
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (input, "whatever %d", &whatever))
+ if (unformat (input, "no-echo"))
+ bsm->no_echo = 1;
+ else if (unformat (input, "fifo-size %d", &bsm->fifo_size))
+ bsm->fifo_size <<= 10;
+ else if (unformat (input, "rcv-buf-size %d", &bsm->rcv_buffer_size))
+ ;
+ else if (unformat (input, "prealloc-fifos", &bsm->prealloc_fifos))
;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
}
-#endif
tcp_builtin_server_api_hookup (vm);
vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
+
rv = server_create (vm);
switch (rv)
{
@@ -406,6 +467,7 @@ server_create_command_fn (vlib_main_t * vm,
default:
return clib_error_return (0, "server_create returned %d", rv);
}
+
return 0;
}
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index e0b67a8e..5c554bac 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -726,15 +726,25 @@ tcp_round_snd_space (tcp_connection_t * tc, u32 snd_space)
u32
tcp_snd_space (tcp_connection_t * tc)
{
- int snd_space;
+ int snd_space, snt_limited;
- /* If we haven't gotten dupacks or if we did and have gotten sacked bytes
- * then we can still send */
- if (PREDICT_TRUE (tcp_in_cong_recovery (tc) == 0
- && (tc->rcv_dupacks == 0
- || tc->sack_sb.last_sacked_bytes)))
+ if (PREDICT_TRUE (tcp_in_cong_recovery (tc) == 0))
{
snd_space = tcp_available_snd_space (tc);
+
+ /* If we haven't gotten dupacks or if we did and have gotten sacked
+ * bytes then we can still send as per Limited Transmit (RFC3042) */
+ if (PREDICT_FALSE (tc->rcv_dupacks != 0
+ && (tcp_opts_sack_permitted (tc)
+ && tc->sack_sb.last_sacked_bytes == 0)))
+ {
+ if (tc->rcv_dupacks == 1 && tc->limited_transmit != tc->snd_nxt)
+ tc->limited_transmit = tc->snd_nxt;
+ ASSERT (seq_leq (tc->limited_transmit, tc->snd_nxt));
+
+ snt_limited = tc->snd_nxt - tc->limited_transmit;
+ snd_space = clib_max (2 * tc->snd_mss - snt_limited, 0);
+ }
return tcp_round_snd_space (tc, snd_space);
}
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index 071f1ab1..e8398718 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -31,9 +31,9 @@
#define TCP_MAX_OPTION_SPACE 40
#define TCP_DUPACK_THRESHOLD 3
-#define TCP_MAX_RX_FIFO_SIZE 2 << 20
+#define TCP_MAX_RX_FIFO_SIZE 4 << 20
#define TCP_IW_N_SEGMENTS 10
-#define TCP_ALWAYS_ACK 0 /**< If on, we always ack */
+#define TCP_ALWAYS_ACK 1 /**< On/off delayed acks */
#define TCP_USE_SACKS 1 /**< Disable only for testing */
/** TCP FSM state definitions as per RFC793. */
@@ -100,6 +100,7 @@ extern timer_expiration_handler tcp_timer_retransmit_syn_handler;
#define TCP_TIMER_PERSIST_MIN 2 /* 0.2s */
#define TCP_RTO_MAX 60 * THZ /* Min max RTO (60s) as per RFC6298 */
+#define TCP_RTO_MIN 0.2 * THZ /* Min RTO (200ms) - lower than standard */
#define TCP_RTT_MAX 30 * THZ /* 30s (probably too much) */
#define TCP_RTO_SYN_RETRIES 3 /* SYN retries without doubling RTO */
#define TCP_RTO_INIT 1 * THZ /* Initial retransmit timer */
@@ -149,7 +150,7 @@ enum
#undef _
};
-#define TCP_MAX_SACK_BLOCKS 5 /**< Max number of SACK blocks stored */
+#define TCP_MAX_SACK_BLOCKS 15 /**< Max number of SACK blocks stored */
#define TCP_INVALID_SACK_HOLE_INDEX ((u32)~0)
typedef struct _sack_scoreboard_hole
@@ -208,6 +209,7 @@ typedef struct _tcp_connection
u32 snd_wl1; /**< seq number used for last snd.wnd update */
u32 snd_wl2; /**< ack number used for last snd.wnd update */
u32 snd_nxt; /**< next seq number to be sent */
+ u16 snd_mss; /**< Effective send max seg (data) size */
/** Receive sequence variables RFC793 */
u32 rcv_nxt; /**< next sequence number expected */
@@ -252,8 +254,8 @@ typedef struct _tcp_connection
u32 rtt_ts; /**< Timestamp for tracked ACK */
u32 rtt_seq; /**< Sequence number for tracked ACK */
- u16 snd_mss; /**< Effective send max seg (data) size */
u16 mss; /**< Our max seg size that includes options */
+ u32 limited_transmit; /**< snd_nxt when limited transmit starts */
} tcp_connection_t;
struct _tcp_cc_algorithm
@@ -433,6 +435,7 @@ tcp_end_seq (tcp_header_t * th, u32 len)
#define seq_leq(_s1, _s2) ((i32)((_s1)-(_s2)) <= 0)
#define seq_gt(_s1, _s2) ((i32)((_s1)-(_s2)) > 0)
#define seq_geq(_s1, _s2) ((i32)((_s1)-(_s2)) >= 0)
+#define seq_max(_s1, _s2) (seq_gt((_s1), (_s2)) ? (_s1) : (_s2))
/* Modulo arithmetic for timestamps */
#define timestamp_lt(_t1, _t2) ((i32)((_t1)-(_t2)) < 0)
@@ -719,6 +722,7 @@ scoreboard_clear (sack_scoreboard_t * sb)
{
scoreboard_remove_hole (sb, hole);
}
+ ASSERT (sb->head == sb->tail && sb->head == TCP_INVALID_SACK_HOLE_INDEX);
sb->sacked_bytes = 0;
sb->last_sacked_bytes = 0;
sb->last_bytes_delivered = 0;
diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h
index 3a16cf63..ae68ad1b 100755
--- a/src/vnet/tcp/tcp_debug.h
+++ b/src/vnet/tcp/tcp_debug.h
@@ -19,8 +19,10 @@
#include <vlib/vlib.h>
#define TCP_DEBUG (1)
+#define TCP_DEBUG_SM (0)
#define TCP_DEBUG_CC (1)
-#define TCP_DEBUG_VERBOSE (0)
+#define TCP_DEBUG_CC_STAT (1)
+#define TCP_DEBUG_SM_VERBOSE (0)
#define foreach_tcp_dbg_evt \
_(INIT, "") \
@@ -49,6 +51,8 @@
_(CC_RTX, "retransmit") \
_(CC_EVT, "cc event") \
_(CC_PACK, "cc partial ack") \
+ _(CC_STAT, "cc stats") \
+ _(CC_RTO_STAT, "cc rto stats") \
_(SEG_INVALID, "invalid segment") \
_(PAWS_FAIL, "failed paws check") \
_(ACK_RCV_ERR, "invalid ack") \
@@ -72,6 +76,10 @@ typedef enum _tcp_dbg_evt
#define TRANSPORT_DEBUG (1)
+/*
+ * Infra and evt track setup
+ */
+
#define TCP_DBG(_tc, _evt, _args...) \
{ \
u8 *_tmp = 0; \
@@ -158,6 +166,30 @@ typedef enum _tcp_dbg_evt
TCP_EVT_DEALLOC_HANDLER(_tc); \
}
+#define TCP_EVT_SYN_RCVD_HANDLER(_tc, ...) \
+{ \
+ TCP_EVT_INIT_HANDLER(_tc, "s%d%c"); \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "SYNrx: irs %u", \
+ .format_args = "i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 1); \
+ ed->data[0] = _tc->irs; \
+}
+
+#define CONCAT_HELPER(_a, _b) _a##_b
+#define CC(_a, _b) CONCAT_HELPER(_a, _b)
+#define TCP_EVT_DBG(_evt, _args...) CC(_evt, _HANDLER)(_args)
+#else
+#define TCP_EVT_DBG(_evt, _args...)
+#endif
+
+/*
+ * State machine
+ */
+#if TCP_DEBUG_SM
+
#define TCP_EVT_ACK_SENT_HANDLER(_tc, ...) \
{ \
ELOG_TYPE_DECLARE (_e) = \
@@ -234,18 +266,6 @@ typedef enum _tcp_dbg_evt
ed->data[1] = _tc->rcv_nxt - _tc->irs; \
}
-#define TCP_EVT_SYN_RCVD_HANDLER(_tc, ...) \
-{ \
- TCP_EVT_INIT_HANDLER(_tc, "s%d%c"); \
- ELOG_TYPE_DECLARE (_e) = \
- { \
- .format = "SYNrx: irs %u", \
- .format_args = "i4", \
- }; \
- DECLARE_ETD(_tc, _e, 1); \
- ed->data[0] = _tc->irs; \
-}
-
#define TCP_EVT_FIN_RCVD_HANDLER(_tc, ...) \
{ \
ELOG_TYPE_DECLARE (_e) = \
@@ -418,6 +438,74 @@ typedef enum _tcp_dbg_evt
ed->data[4] = _tc->snd_una_max - _tc->iss; \
}
+#define TCP_EVT_RCV_WND_SHRUNK_HANDLER(_tc, _obs, _av, ...) \
+{ \
+if (_av > 0) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "huh?: rcv_wnd %u obsd %u av %u rcv_nxt %u rcv_las %u", \
+ .format_args = "i4i4i4i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 5); \
+ ed->data[0] = _tc->rcv_wnd; \
+ ed->data[1] = _obs; \
+ ed->data[2] = _av; \
+ ed->data[3] = _tc->rcv_nxt - _tc->irs; \
+ ed->data[4] = _tc->rcv_las - _tc->irs; \
+} \
+}
+#else
+#define TCP_EVT_ACK_SENT_HANDLER(_tc, ...)
+#define TCP_EVT_DUPACK_SENT_HANDLER(_tc, ...)
+#define TCP_EVT_SYN_SENT_HANDLER(_tc, ...)
+#define TCP_EVT_SYN_RTX_HANDLER(_tc, ...)
+#define TCP_EVT_FIN_SENT_HANDLER(_tc, ...)
+#define TCP_EVT_RST_SENT_HANDLER(_tc, ...)
+#define TCP_EVT_FIN_RCVD_HANDLER(_tc, ...)
+#define TCP_EVT_RST_RCVD_HANDLER(_tc, ...)
+#define TCP_EVT_ACK_RCVD_HANDLER(_tc, ...)
+#define TCP_EVT_DUPACK_RCVD_HANDLER(_tc, ...)
+#define TCP_EVT_PKTIZE_HANDLER(_tc, ...)
+#define TCP_EVT_INPUT_HANDLER(_tc, _type, _len, _written, ...)
+#define TCP_EVT_TIMER_POP_HANDLER(_tc_index, _timer_id, ...)
+#define TCP_EVT_SEG_INVALID_HANDLER(_tc, _seq, _end, ...)
+#define TCP_EVT_PAWS_FAIL_HANDLER(_tc, _seq, _end, ...)
+#define TCP_EVT_ACK_RCV_ERR_HANDLER(_tc, _type, _ack, ...)
+#define TCP_EVT_RCV_WND_SHRUNK_HANDLER(_tc, _obs, _av, ...)
+#endif
+
+/*
+ * State machine verbose
+ */
+#if TCP_DBG_SM_VERBOSE
+#define TCP_EVT_SND_WND_HANDLER(_tc, ...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "snd_wnd update: %u ", \
+ .format_args = "i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 1); \
+ ed->data[0] = _tc->snd_wnd; \
+}
+
+#define TCP_EVT_OUTPUT_HANDLER(_tc, flags, n_bytes,...) \
+{ \
+ ELOG_TYPE_DECLARE (_e) = \
+ { \
+ .format = "out: flags %x, bytes %u", \
+ .format_args = "i4i4", \
+ }; \
+ DECLARE_ETD(_tc, _e, 2); \
+ ed->data[0] = flags; \
+ ed->data[1] = n_bytes; \
+}
+#else
+#define TCP_EVT_SND_WND_HANDLER(_tc, ...)
+#define TCP_EVT_OUTPUT_HANDLER(_tc, flags, n_bytes,...)
+#endif
+
/*
* Congestion Control
*/
@@ -471,67 +559,59 @@ typedef enum _tcp_dbg_evt
ed->data[1] = _tc->snd_una_max - _tc->iss; \
}
-#else
-#define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...)
-#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, _snd_space, ...)
-#define TCP_EVT_CC_PACK_HANDLER(_tc, ...)
-#endif
+/*
+ * Congestion control stats
+ */
+#if TCP_DEBUG_CC_STAT
-#define TCP_EVT_RCV_WND_SHRUNK_HANDLER(_tc, _obs, _av, ...) \
+#define STATS_INTERVAL 1
+
+#define TCP_EVT_CC_RTO_STAT_HANDLER(_tc, ...) \
{ \
-if (_av > 0) \
+if (_tc->c_cc_stat_tstamp + STATS_INTERVAL < tcp_time_now()) \
{ \
ELOG_TYPE_DECLARE (_e) = \
{ \
- .format = "huh?: rcv_wnd %u obsd %u av %u rcv_nxt %u rcv_las %u", \
- .format_args = "i4i4i4i4i4", \
+ .format = "rto_stat: rto %u srtt %u rttvar %u ", \
+ .format_args = "i4i4i4", \
}; \
- DECLARE_ETD(_tc, _e, 5); \
- ed->data[0] = _tc->rcv_wnd; \
- ed->data[1] = _obs; \
- ed->data[2] = _av; \
- ed->data[3] = _tc->rcv_nxt - _tc->irs; \
- ed->data[4] = _tc->rcv_las - _tc->irs; \
+ DECLARE_ETD(_tc, _e, 3); \
+ ed->data[0] = _tc->rto; \
+ ed->data[1] = _tc->srtt; \
+ ed->data[2] = _tc->rttvar; \
} \
}
-#if TCP_DBG_VERBOSE
-#define TCP_EVT_SND_WND_HANDLER(_tc, ...) \
+#define TCP_EVT_CC_STAT_HANDLER(_tc, ...) \
{ \
- ELOG_TYPE_DECLARE (_e) = \
- { \
- .format = "snd_wnd update: %u ", \
- .format_args = "i4", \
- }; \
- DECLARE_ETD(_tc, _e, 1); \
- ed->data[0] = _tc->snd_wnd; \
-}
-
-#define TCP_EVT_OUTPUT_HANDLER(_tc, flags, n_bytes,...) \
+if (_tc->c_cc_stat_tstamp + STATS_INTERVAL < tcp_time_now()) \
{ \
ELOG_TYPE_DECLARE (_e) = \
{ \
- .format = "out: flags %x, bytes %u", \
- .format_args = "i4i4", \
+ .format = "cc_stat: cwnd %u flight %u space %u ssthresh %u snd_wnd %u",\
+ .format_args = "i4i4i4i4i4", \
}; \
- DECLARE_ETD(_tc, _e, 2); \
- ed->data[0] = flags; \
- ed->data[1] = n_bytes; \
+ DECLARE_ETD(_tc, _e, 5); \
+ ed->data[0] = _tc->cwnd; \
+ ed->data[1] = tcp_flight_size (_tc); \
+ ed->data[2] = tcp_snd_space (_tc); \
+ ed->data[3] = _tc->ssthresh; \
+ ed->data[4] = _tc->snd_wnd; \
+ TCP_EVT_CC_RTO_STAT_HANDLER (_tc); \
+ _tc->c_cc_stat_tstamp = tcp_time_now(); \
+} \
}
+
#else
-#define TCP_EVT_SND_WND_HANDLER(_tc, ...)
-#define TCP_EVT_OUTPUT_HANDLER(_tc, flags, n_bytes,...)
+#define TCP_EVT_CC_STAT_HANDLER(_tc, ...)
#endif
-#define CONCAT_HELPER(_a, _b) _a##_b
-#define CC(_a, _b) CONCAT_HELPER(_a, _b)
-#define TCP_EVT_DBG(_evt, _args...) CC(_evt, _HANDLER)(_args)
-
#else
-#define TCP_EVT_DBG(_evt, _args...)
+#define TCP_EVT_CC_RTX_HANDLER(_tc, offset, n_bytes, ...)
+#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...)
+#define TCP_EVT_CC_PACK_HANDLER(_tc, ...)
#endif
-
#endif /* SRC_VNET_TCP_TCP_DEBUG_H_ */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index ff2229b3..a2e6dad1 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -378,16 +378,20 @@ tcp_rcv_ack_is_acceptable (tcp_connection_t * tc0, vlib_buffer_t * tb0)
static void
tcp_estimate_rtt (tcp_connection_t * tc, u32 mrtt)
{
- int err;
+ int err, diff;
if (tc->srtt != 0)
{
err = mrtt - tc->srtt;
- tc->srtt += err >> 3;
+// tc->srtt += err >> 3;
/* XXX Drop in RTT results in RTTVAR increase and bigger RTO.
* The increase should be bound */
- tc->rttvar += ((int) clib_abs (err) - (int) tc->rttvar) >> 2;
+// tc->rttvar += ((int) clib_abs (err) - (int) tc->rttvar) >> 2;
+
+ tc->srtt = clib_max ((int) tc->srtt + (err >> 3), 1);
+ diff = (clib_abs (err) - (int) tc->rttvar) >> 2;
+ tc->rttvar = clib_max ((int) tc->rttvar + diff, 1);
}
else
{
@@ -401,6 +405,7 @@ void
tcp_update_rto (tcp_connection_t * tc)
{
tc->rto = clib_min (tc->srtt + (tc->rttvar << 2), TCP_RTO_MAX);
+ tc->rto = clib_max (tc->rto, TCP_RTO_MIN);
}
/** Update RTT estimate and RTO timer
@@ -417,8 +422,8 @@ tcp_update_rtt (tcp_connection_t * tc, u32 ack)
u32 mrtt = 0;
u8 rtx_acked;
- /* Determine if only rtx bytes are acked. TODO XXX fast retransmit */
- rtx_acked = tc->rto_boff && (tc->bytes_acked <= tc->snd_mss);
+ /* Determine if only rtx bytes are acked. */
+ rtx_acked = tcp_in_cong_recovery (tc) || !tc->bytes_acked;
/* Karn's rule, part 1. Don't use retransmitted segments to estimate
* RTT because they're ambiguous. */
@@ -428,8 +433,7 @@ tcp_update_rtt (tcp_connection_t * tc, u32 ack)
}
/* As per RFC7323 TSecr can be used for RTTM only if the segment advances
* snd_una, i.e., the left side of the send window:
- * seq_lt (tc->snd_una, ack). Note: last condition could be dropped, we don't
- * try to update rtt for dupacks */
+ * seq_lt (tc->snd_una, ack). */
else if (tcp_opts_tstamp (&tc->rcv_opts) && tc->rcv_opts.tsecr
&& tc->bytes_acked)
{
@@ -550,11 +554,13 @@ scoreboard_insert_hole (sack_scoreboard_t * sb, u32 prev_index,
prev = scoreboard_get_hole (sb, prev_index);
if (prev)
{
- hole->prev = prev - sb->holes;
+ hole->prev = prev_index;
hole->next = prev->next;
if ((next = scoreboard_next_hole (sb, hole)))
next->prev = hole_index;
+ else
+ sb->tail = hole_index;
prev->next = hole_index;
}
@@ -569,12 +575,13 @@ scoreboard_insert_hole (sack_scoreboard_t * sb, u32 prev_index,
}
void
-scoreboard_update_lost (tcp_connection_t * tc, sack_scoreboard_t * sb)
+scoreboard_update_bytes (tcp_connection_t * tc, sack_scoreboard_t * sb)
{
sack_scoreboard_hole_t *hole, *prev;
u32 bytes = 0, blks = 0;
sb->lost_bytes = 0;
+ sb->sacked_bytes = 0;
hole = scoreboard_last_hole (sb);
if (!hole)
return;
@@ -594,13 +601,16 @@ scoreboard_update_lost (tcp_connection_t * tc, sack_scoreboard_t * sb)
hole = prev;
}
- hole = prev;
while (hole)
{
sb->lost_bytes += scoreboard_hole_bytes (hole);
hole->is_lost = 1;
+ prev = hole;
hole = scoreboard_prev_hole (sb, hole);
+ if (hole)
+ bytes += prev->start - hole->end;
}
+ sb->sacked_bytes = bytes;
}
/**
@@ -677,7 +687,7 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack)
{
sack_scoreboard_t *sb = &tc->sack_sb;
sack_block_t *blk, tmp;
- sack_scoreboard_hole_t *hole, *next_hole, *last_hole, *new_hole;
+ sack_scoreboard_hole_t *hole, *next_hole, *last_hole;
u32 blk_index = 0, old_sacked_bytes, hole_index;
int i, j;
@@ -743,6 +753,10 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack)
if (seq_gt (tc->snd_una_max, sb->high_sacked)
&& seq_gt (tc->snd_una_max, last_hole->end))
last_hole->end = tc->snd_una_max;
+ /* keep track of max byte sacked for when the last hole
+ * is acked */
+ if (seq_gt (tmp.end, sb->high_sacked))
+ sb->high_sacked = tmp.end;
}
/* Walk the holes with the SACK blocks */
@@ -758,45 +772,20 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack)
{
next_hole = scoreboard_next_hole (sb, hole);
- /* Byte accounting */
- if (seq_leq (hole->end, ack))
- {
- /* Bytes lost because snd_wnd left edge advances */
- if (next_hole && seq_leq (next_hole->start, ack))
- sb->last_bytes_delivered += next_hole->start - hole->end;
- else
- sb->last_bytes_delivered += ack - hole->end;
- }
- else
- {
- sb->sacked_bytes += scoreboard_hole_bytes (hole);
- }
-
- /* About to remove last hole */
- if (hole == last_hole)
- {
- sb->tail = hole->prev;
- last_hole = scoreboard_last_hole (sb);
- /* keep track of max byte sacked for when the last hole
- * is acked */
- if (seq_gt (hole->end, sb->high_sacked))
- sb->high_sacked = hole->end;
- }
-
- /* snd_una needs to be advanced */
- if (blk->end == ack && seq_geq (ack, hole->end))
+ /* Byte accounting: snd_una needs to be advanced */
+ if (blk->end == ack)
{
- if (next_hole && seq_lt (ack, next_hole->start))
+ if (next_hole)
{
- sb->snd_una_adv = next_hole->start - ack;
-
- /* all these can be delivered */
- sb->last_bytes_delivered += sb->snd_una_adv;
+ if (seq_lt (ack, next_hole->start))
+ sb->snd_una_adv = next_hole->start - ack;
+ sb->last_bytes_delivered +=
+ next_hole->start - hole->end;
}
else if (!next_hole)
{
sb->snd_una_adv = sb->high_sacked - ack;
- sb->last_bytes_delivered += sb->snd_una_adv;
+ sb->last_bytes_delivered += sb->high_sacked - hole->end;
}
}
@@ -808,7 +797,6 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack)
{
if (seq_gt (blk->end, hole->start))
{
- sb->sacked_bytes += blk->end - hole->start;
hole->start = blk->end;
}
blk_index++;
@@ -819,28 +807,16 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack)
/* Hole must be split */
if (seq_lt (blk->end, hole->end))
{
- sb->sacked_bytes += blk->end - blk->start;
hole_index = scoreboard_hole_index (sb, hole);
- new_hole = scoreboard_insert_hole (sb, hole_index, blk->end,
- hole->end);
+ scoreboard_insert_hole (sb, hole_index, blk->end, hole->end);
/* Pool might've moved */
hole = scoreboard_get_hole (sb, hole_index);
hole->end = blk->start;
-
- /* New or split of tail */
- if ((last_hole->end == new_hole->end)
- || seq_lt (last_hole->end, new_hole->start))
- {
- last_hole = new_hole;
- sb->tail = scoreboard_hole_index (sb, new_hole);
- }
-
blk_index++;
}
- else if (seq_leq (blk->start, hole->end))
+ else if (seq_lt (blk->start, hole->end))
{
- sb->sacked_bytes += hole->end - blk->start;
hole->end = blk->start;
}
@@ -848,9 +824,13 @@ tcp_rcv_sacks (tcp_connection_t * tc, u32 ack)
}
}
- sb->last_sacked_bytes = sb->sacked_bytes - old_sacked_bytes;
- sb->sacked_bytes -= sb->last_bytes_delivered;
- scoreboard_update_lost (tc, sb);
+ scoreboard_update_bytes (tc, sb);
+ sb->last_sacked_bytes = sb->sacked_bytes
+ - (old_sacked_bytes - sb->last_bytes_delivered);
+ ASSERT (sb->sacked_bytes == 0
+ || sb->sacked_bytes < tc->snd_una_max - seq_max (tc->snd_una, ack));
+ ASSERT (sb->last_sacked_bytes + sb->lost_bytes <= tc->snd_una_max
+ - seq_max (tc->snd_una, ack));
}
/**
@@ -998,9 +978,14 @@ tcp_should_fastrecover (tcp_connection_t * tc)
|| tcp_should_fastrecover_sack (tc));
}
+/**
+ * One function to rule them all ... and in the darkness bind them
+ */
static void
tcp_cc_handle_event (tcp_connection_t * tc, u32 is_dack)
{
+ u32 rxt_delivered;
+
/*
* Duplicate ACK. Check if we should enter fast recovery, or if already in
* it account for the bytes that left the network.
@@ -1028,10 +1013,15 @@ tcp_cc_handle_event (tcp_connection_t * tc, u32 is_dack)
}
/* If of of the two conditions lower hold, reset dupacks
- * 1) Cumulative ack does not cover more than congestion threshold
+ * 1) Cumulative ack does not cover more than congestion threshold,
+ * and the following doesn't hold: the congestion window is
+ * greater than SMSS bytes and the difference between highest_ack
+ * and prev_highest_ack is at most 4*SMSS bytes (XXX)
* 2) RFC6582 heuristic to avoid multiple fast retransmits
*/
- if (seq_leq (tc->snd_una, tc->snd_congestion)
+ if ((seq_gt (tc->snd_una, tc->snd_congestion)
+ || !(tc->cwnd > tc->snd_mss
+ && tc->bytes_acked <= 4 * tc->snd_mss))
|| tc->rcv_opts.tsecr != tc->tsecr_last_ack)
{
tc->rcv_dupacks = 0;
@@ -1089,7 +1079,10 @@ partial_ack:
{
/* If spurious return, we've already updated everything */
if (tcp_cc_recover (tc))
- return;
+ {
+ tc->tsecr_last_ack = tc->rcv_opts.tsecr;
+ return;
+ }
tc->snd_nxt = tc->snd_una_max;
@@ -1115,12 +1108,16 @@ partial_ack:
return;
/* Remove retransmitted bytes that have been delivered */
- if (tc->sack_sb.last_bytes_delivered
- && seq_gt (tc->sack_sb.high_rxt, tc->snd_una))
+ ASSERT (tc->bytes_acked + tc->sack_sb.snd_una_adv
+ >= tc->sack_sb.last_bytes_delivered);
+ rxt_delivered = tc->bytes_acked + tc->sack_sb.snd_una_adv
+ - tc->sack_sb.last_bytes_delivered;
+ if (rxt_delivered && seq_gt (tc->sack_sb.high_rxt, tc->snd_una))
{
/* If we have sacks and we haven't gotten an ack beyond high_rxt,
* remove sacked bytes delivered */
- tc->snd_rxt_bytes -= tc->sack_sb.last_bytes_delivered;
+ ASSERT (tc->snd_rxt_bytes >= rxt_delivered);
+ tc->snd_rxt_bytes -= rxt_delivered;
}
else
{
@@ -1154,6 +1151,8 @@ tcp_rcv_ack (tcp_connection_t * tc, vlib_buffer_t * b,
u32 prev_snd_wnd, prev_snd_una;
u8 is_dack;
+ TCP_EVT_DBG (TCP_EVT_CC_STAT, tc);
+
/* If the ACK acks something not yet sent (SEG.ACK > SND.NXT) */
if (PREDICT_FALSE (seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)))
{
@@ -1282,6 +1281,10 @@ tcp_update_sack_list (tcp_connection_t * tc, u32 start, u32 end)
{
vec_add1 (new_list, tc->snd_sacks[i]);
}
+ else
+ {
+ clib_warning ("sack discarded");
+ }
}
ASSERT (vec_len (new_list) <= TCP_MAX_SACK_BLOCKS);
@@ -1358,16 +1361,18 @@ tcp_session_enqueue_ooo (tcp_connection_t * tc, vlib_buffer_t * b,
stream_session_t *s0;
int rv;
+ ASSERT (seq_gt (vnet_buffer (b)->tcp.seq_number, tc->rcv_nxt));
+
/* Pure ACK. Do nothing */
if (PREDICT_FALSE (data_len == 0))
{
return TCP_ERROR_PURE_ACK;
}
- /* Enqueue out-of-order data with absolute offset */
+ /* Enqueue out-of-order data with relative offset */
rv = stream_session_enqueue_data (&tc->connection, b,
- vnet_buffer (b)->tcp.seq_number,
- 0 /* queue event */ , 0);
+ vnet_buffer (b)->tcp.seq_number -
+ tc->rcv_nxt, 0 /* queue event */ , 0);
/* Nothing written */
if (rv)
@@ -1388,10 +1393,15 @@ tcp_session_enqueue_ooo (tcp_connection_t * tc, vlib_buffer_t * b,
/* Get the newest segment from the fifo */
newest = svm_fifo_newest_ooo_segment (s0->server_rx_fifo);
- start = ooo_segment_offset (s0->server_rx_fifo, newest);
- end = ooo_segment_end_offset (s0->server_rx_fifo, newest);
+ if (newest)
+ {
+ start =
+ tc->rcv_nxt + ooo_segment_offset (s0->server_rx_fifo, newest);
+ end = start + ooo_segment_length (s0->server_rx_fifo, newest);
+ tcp_update_sack_list (tc, start, end);
- tcp_update_sack_list (tc, start, end);
+ ASSERT (seq_gt (start, tc->rcv_nxt));
+ }
}
return TCP_ERROR_ENQUEUED;
@@ -1411,7 +1421,7 @@ tcp_can_delack (tcp_connection_t * tc)
/* constrained to send ack */
|| (tc->flags & TCP_CONN_SNDACK) != 0
/* we're almost out of tx wnd */
- || tcp_available_snd_space (tc) < 2 * tc->snd_mss)
+ || tcp_available_snd_space (tc) < 4 * tc->snd_mss)
return 0;
return 1;
@@ -1434,7 +1444,7 @@ tcp_segment_rcv (tcp_main_t * tm, tcp_connection_t * tc, vlib_buffer_t * b,
*next0 = TCP_NEXT_DROP;
/* Completely in the past (possible retransmit) */
- if (seq_lt (vnet_buffer (b)->tcp.seq_end, tc->rcv_nxt))
+ if (seq_leq (vnet_buffer (b)->tcp.seq_end, tc->rcv_nxt))
goto done;
/* Chop off the bytes in the past */
@@ -1873,8 +1883,8 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if (tcp_opts_wscale (&new_tc0->rcv_opts))
new_tc0->snd_wscale = new_tc0->rcv_opts.wscale;
- /* No scaling */
- new_tc0->snd_wnd = clib_net_to_host_u16 (tcp0->window);
+ new_tc0->snd_wnd = clib_net_to_host_u16 (tcp0->window)
+ << new_tc0->snd_wscale;
new_tc0->snd_wl1 = seq0;
new_tc0->snd_wl2 = ack0;
@@ -1892,8 +1902,15 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
/* Make sure las is initialized for the wnd computation */
new_tc0->rcv_las = new_tc0->rcv_nxt;
- /* Notify app that we have connection */
- stream_session_connect_notify (&new_tc0->connection, sst, 0);
+ /* Notify app that we have connection. If session layer can't
+ * allocate session send reset */
+ if (stream_session_connect_notify (&new_tc0->connection, sst,
+ 0))
+ {
+ tcp_connection_cleanup (new_tc0);
+ tcp_send_reset (b0, is_ip4);
+ goto drop;
+ }
stream_session_init_fifos_pointers (&new_tc0->connection,
new_tc0->irs + 1,
@@ -1907,7 +1924,14 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
new_tc0->state = TCP_STATE_SYN_RCVD;
/* Notify app that we have connection */
- stream_session_connect_notify (&new_tc0->connection, sst, 0);
+ if (stream_session_connect_notify
+ (&new_tc0->connection, sst, 0))
+ {
+ tcp_connection_cleanup (new_tc0);
+ tcp_send_reset (b0, is_ip4);
+ goto drop;
+ }
+
stream_session_init_fifos_pointers (&new_tc0->connection,
new_tc0->irs + 1,
new_tc0->iss + 1);
@@ -2508,8 +2532,8 @@ tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if (tcp_opts_wscale (&child0->rcv_opts))
child0->snd_wscale = child0->rcv_opts.wscale;
- /* No scaling */
- child0->snd_wnd = clib_net_to_host_u16 (th0->window);
+ child0->snd_wnd = clib_net_to_host_u16 (th0->window)
+ << child0->snd_wscale;
child0->snd_wl1 = vnet_buffer (b0)->tcp.seq_number;
child0->snd_wl2 = vnet_buffer (b0)->tcp.ack_number;
@@ -2892,6 +2916,9 @@ do { \
_(FIN_WAIT_2, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
_(FIN_WAIT_2, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
TCP_ERROR_NONE);
+ _(CLOSE_WAIT, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+ _(CLOSE_WAIT, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
+ TCP_ERROR_NONE);
_(LAST_ACK, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
_(LAST_ACK, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
_(LAST_ACK, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
diff --git a/src/vnet/tcp/tcp_newreno.c b/src/vnet/tcp/tcp_newreno.c
index c66250e4..c825e952 100644
--- a/src/vnet/tcp/tcp_newreno.c
+++ b/src/vnet/tcp/tcp_newreno.c
@@ -18,7 +18,6 @@
void
newreno_congestion (tcp_connection_t * tc)
{
- tc->prev_ssthresh = tc->ssthresh;
tc->ssthresh = clib_max (tcp_flight_size (tc) / 2, 2 * tc->snd_mss);
}
@@ -47,7 +46,8 @@ newreno_rcv_cong_ack (tcp_connection_t * tc, tcp_cc_ack_t ack_type)
{
if (ack_type == TCP_CC_DUPACK)
{
- tc->cwnd += tc->snd_mss;
+ if (!tcp_opts_sack_permitted (tc))
+ tc->cwnd += tc->snd_mss;
}
else if (ack_type == TCP_CC_PARTIALACK)
{
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index 47c94e6d..554a981d 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -1052,6 +1052,7 @@ tcp_rtx_timeout_cc (tcp_connection_t * tc)
tc->ssthresh = clib_max (tcp_flight_size (tc) / 2, 2 * tc->snd_mss);
tc->cwnd = tcp_loss_wnd (tc);
tc->snd_congestion = tc->snd_una_max;
+
tcp_recovery_on (tc);
}
@@ -1213,7 +1214,7 @@ tcp_timer_persist_handler (u32 index)
tc->timers[TCP_TIMER_PERSIST] = TCP_TIMER_HANDLE_INVALID;
/* Problem already solved or worse */
- if (tc->state == TCP_STATE_CLOSED
+ if (tc->state == TCP_STATE_CLOSED || tc->state > TCP_STATE_ESTABLISHED
|| tc->snd_wnd > tc->snd_mss || tcp_in_recovery (tc))
return;
@@ -1505,10 +1506,7 @@ tcp46_output_inline (vlib_main_t * vm,
/* Stop DELACK timer and fix flags */
tc0->flags &= ~(TCP_CONN_SNDACK);
- if (tcp_timer_is_active (tc0, TCP_TIMER_DELACK))
- {
- tcp_timer_reset (tc0, TCP_TIMER_DELACK);
- }
+ tcp_timer_reset (tc0, TCP_TIMER_DELACK);
/* If not retransmitting
* 1) update snd_una_max (SYN, SYNACK, FIN)
@@ -1630,7 +1628,7 @@ tcp_push_header (transport_connection_t * tconn, vlib_buffer_t * b)
tc = (tcp_connection_t *) tconn;
tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED, 0);
- if (tc->rtt_ts == 0)
+ if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc))
{
tc->rtt_ts = tcp_time_now ();
tc->rtt_seq = tc->snd_nxt;
diff --git a/src/vnet/tcp/tcp_test.c b/src/vnet/tcp/tcp_test.c
index 3f8afa40..a461e3b8 100644
--- a/src/vnet/tcp/tcp_test.c
+++ b/src/vnet/tcp/tcp_test.c
@@ -190,11 +190,18 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input)
TCP_TEST ((sb->sacked_bytes == 0), "sacked bytes %d", sb->sacked_bytes);
TCP_TEST ((pool_elts (sb->holes) == 1),
"scoreboard has %d elements", pool_elts (sb->holes));
+ hole = scoreboard_first_hole (sb);
+ TCP_TEST ((hole->prev == TCP_INVALID_SACK_HOLE_INDEX
+ && hole->next == TCP_INVALID_SACK_HOLE_INDEX), "hole is valid");
+ TCP_TEST ((sb->last_bytes_delivered == 100), "last bytes delivered %d",
+ sb->last_bytes_delivered);
/*
* Add some more blocks and then remove all
*/
vec_reset_length (tc->rcv_opts.sacks);
+ tc->snd_una += sb->snd_una_adv;
+ tc->snd_una_max = 1900;
for (i = 0; i < 5; i++)
{
block.start = i * 100 + 1200;
@@ -242,6 +249,39 @@ tcp_test_sack_rx (vlib_main_t * vm, unformat_input_t * input)
TCP_TEST ((sb->last_sacked_bytes == 0),
"last sacked bytes %d", sb->last_sacked_bytes);
+ /*
+ * Inject one block, ack it and overlap hole
+ */
+
+ tc->snd_una = 0;
+ tc->snd_una_max = 1000;
+ tc->snd_nxt = 1000;
+
+ block.start = 100;
+ block.end = 500;
+ vec_add1 (tc->rcv_opts.sacks, block);
+ tc->rcv_opts.n_sack_blocks = vec_len (tc->rcv_opts.sacks);
+
+ tcp_rcv_sacks (tc, 0);
+
+ if (verbose)
+ vlib_cli_output (vm, "sb added [100, 500]:\n%U",
+ format_tcp_scoreboard, sb);
+
+ tcp_rcv_sacks (tc, 800);
+
+ if (verbose)
+ vlib_cli_output (vm, "sb ack [0, 800]:\n%U", format_tcp_scoreboard, sb);
+
+ TCP_TEST ((pool_elts (sb->holes) == 1),
+ "scoreboard has %d elements", pool_elts (sb->holes));
+ TCP_TEST ((sb->snd_una_adv == 0), "snd_una_adv %u", sb->snd_una_adv);
+ TCP_TEST ((sb->sacked_bytes == 0), "sacked bytes %d", sb->sacked_bytes);
+ TCP_TEST ((sb->last_sacked_bytes == 0),
+ "last sacked bytes %d", sb->last_sacked_bytes);
+ TCP_TEST ((sb->last_bytes_delivered == 400),
+ "last bytes delivered %d", sb->last_bytes_delivered);
+
return 0;
}
@@ -571,7 +611,7 @@ tcp_test_fifo1 (vlib_main_t * vm, unformat_input_t * input)
*/
for (i = 0; i < 3; i++)
{
- offset = (2 * i + 1) * sizeof (u32);
+ offset = (2 * i + 1) * sizeof (u32) - f->tail;
data = (u8 *) (test_data + (2 * i + 1));
if (i == 0)
{
@@ -600,7 +640,7 @@ tcp_test_fifo1 (vlib_main_t * vm, unformat_input_t * input)
/*
* Try adding a completely overlapped segment
*/
- offset = 3 * sizeof (u32);
+ offset = 3 * sizeof (u32) - f->tail;
data = (u8 *) (test_data + 3);
rv = svm_fifo_enqueue_with_offset (f, offset, sizeof (u32), data);
if (rv)
@@ -626,7 +666,7 @@ tcp_test_fifo1 (vlib_main_t * vm, unformat_input_t * input)
*/
for (i = 3; i > 1; i--)
{
- offset = (2 * i + 0) * sizeof (u32);
+ offset = (2 * i + 0) * sizeof (u32) - f->tail;
data = (u8 *) (test_data + (2 * i + 0));
rv = svm_fifo_enqueue_with_offset (f, offset, sizeof (u32), data);
if (verbose)
@@ -688,7 +728,7 @@ tcp_test_fifo1 (vlib_main_t * vm, unformat_input_t * input)
for (i = 0; i < 4; i++)
{
- offset = (2 * i + 1) * sizeof (u32);
+ offset = (2 * i + 1) * sizeof (u32) - f->tail;
data = (u8 *) (test_data + (2 * i + 1));
rv = svm_fifo_enqueue_with_offset (f, offset, sizeof (u32), data);
if (verbose)
@@ -701,7 +741,7 @@ tcp_test_fifo1 (vlib_main_t * vm, unformat_input_t * input)
}
}
- rv = svm_fifo_enqueue_with_offset (f, 8, 21, data);
+ rv = svm_fifo_enqueue_with_offset (f, 8 - f->tail, 21, data);
TCP_TEST ((rv == 0), "ooo enqueued %u", rv);
TCP_TEST ((svm_fifo_number_ooo_segments (f) == 1),
"number of ooo segments %u", svm_fifo_number_ooo_segments (f));
@@ -722,7 +762,7 @@ tcp_test_fifo1 (vlib_main_t * vm, unformat_input_t * input)
for (i = 0; i < 4; i++)
{
- offset = (2 * i + 1) * sizeof (u32);
+ offset = (2 * i + 1) * sizeof (u32) - f->tail;
data = (u8 *) (test_data + (2 * i + 1));
rv = svm_fifo_enqueue_with_offset (f, offset, sizeof (u32), data);
if (verbose)
@@ -735,7 +775,13 @@ tcp_test_fifo1 (vlib_main_t * vm, unformat_input_t * input)
}
}
+ if (verbose)
+ vlib_cli_output (vm, "fifo after enqueue: %U", format_svm_fifo, f, 1);
+
rv = svm_fifo_enqueue_nowait (f, 29, data);
+ if (verbose)
+ vlib_cli_output (vm, "fifo after enqueueing 29: %U", format_svm_fifo, f,
+ 1);
TCP_TEST ((rv == 32), "ooo enqueued %u", rv);
TCP_TEST ((svm_fifo_number_ooo_segments (f) == 0),
"number of ooo segments %u", svm_fifo_number_ooo_segments (f));
@@ -788,7 +834,8 @@ tcp_test_fifo2 (vlib_main_t * vm)
{
tp = vp + i;
data64 = tp->offset;
- svm_fifo_enqueue_with_offset (f, tp->offset, tp->len, (u8 *) & data64);
+ svm_fifo_enqueue_with_offset (f, tp->offset - f->tail, tp->len,
+ (u8 *) & data64);
}
/* Expected result: one big fat chunk at offset 4 */
@@ -817,7 +864,7 @@ tcp_test_fifo2 (vlib_main_t * vm)
{
tp = &test_data[i];
data64 = tp->offset;
- rv = svm_fifo_enqueue_with_offset (f, tp->offset, tp->len,
+ rv = svm_fifo_enqueue_with_offset (f, tp->offset - f->tail, tp->len,
(u8 *) & data64);
if (rv)
{
@@ -991,8 +1038,9 @@ tcp_test_fifo3 (vlib_main_t * vm, unformat_input_t * input)
for (i = !randomize; i < vec_len (generate); i++)
{
tp = generate + i;
- svm_fifo_enqueue_with_offset (f, fifo_initial_offset + tp->offset,
- tp->len,
+ svm_fifo_enqueue_with_offset (f,
+ fifo_initial_offset + tp->offset -
+ f->tail, tp->len,
(u8 *) data_pattern + tp->offset);
}
@@ -1107,7 +1155,7 @@ tcp_test_fifo4 (vlib_main_t * vm, unformat_input_t * input)
for (i = test_n_bytes - 1; i > 0; i--)
{
- rv = svm_fifo_enqueue_with_offset (f, fifo_initial_offset + i,
+ rv = svm_fifo_enqueue_with_offset (f, fifo_initial_offset + i - f->tail,
sizeof (u8), &test_data[i]);
if (verbose)
vlib_cli_output (vm, "add [%d] [%d, %d]", i, i, i + sizeof (u8));