aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFlorin Coras <fcoras@cisco.com>2017-06-19 12:26:09 -0400
committerDave Barach <openvpp@barachs.net>2017-06-22 16:55:03 +0000
commitf6359c8cace5b73a813e5f4e3d1bc28f7752fcdf (patch)
tree38aa2a4dca15fb49c8c6ed967f1fd50d522926cc
parent5be2d073ceb695104fd84bce592cb588f8aaaa66 (diff)
Improve svm fifo and tcp tx path performance (VPP-846)
- multiarch on svm fifo - avoid ip lookup on tx Change-Id: Iab0d85204a710979417bca1d692cc47877131203 Signed-off-by: Florin Coras <fcoras@cisco.com> Signed-off-by: Dave Barach <dbarach@cisco.com>
-rw-r--r--src/svm/svm_fifo.c95
-rwxr-xr-xsrc/uri/uri_tcp_test.c10
-rw-r--r--src/vnet/session/node.c13
-rw-r--r--src/vnet/session/transport.h7
-rw-r--r--src/vnet/tcp/tcp.c100
-rw-r--r--src/vnet/tcp/tcp.h4
-rw-r--r--src/vnet/tcp/tcp_output.c29
-rw-r--r--src/vnet/tcp/tcp_packet.h9
8 files changed, 232 insertions, 35 deletions
diff --git a/src/svm/svm_fifo.c b/src/svm/svm_fifo.c
index 6ca437cfd7b..aed5d6a7420 100644
--- a/src/svm/svm_fifo.c
+++ b/src/svm/svm_fifo.c
@@ -14,6 +14,7 @@
*/
#include <svm/svm_fifo.h>
+#include <vppinfra/cpu.h>
static inline u8
position_lt (svm_fifo_t * f, u32 a, u32 b)
@@ -417,10 +418,38 @@ svm_fifo_enqueue_internal (svm_fifo_t * f, u32 max_bytes, u8 * copy_from_here)
return (total_copy_bytes);
}
+#define SVM_ENQUEUE_CLONE_TEMPLATE(arch, fn, tgt) \
+ uword \
+ __attribute__ ((flatten)) \
+ __attribute__ ((target (tgt))) \
+ CLIB_CPU_OPTIMIZED \
+ fn ## _ ## arch ( svm_fifo_t * f, u32 max_bytes, u8 * copy_from_here) \
+ { return fn (f, max_bytes, copy_from_here);}
+
+static int
+svm_fifo_enqueue_nowait_ma (svm_fifo_t * f, u32 max_bytes,
+ u8 * copy_from_here)
+{
+ return svm_fifo_enqueue_internal (f, max_bytes, copy_from_here);
+}
+
+foreach_march_variant (SVM_ENQUEUE_CLONE_TEMPLATE,
+ svm_fifo_enqueue_nowait_ma);
+CLIB_MULTIARCH_SELECT_FN (svm_fifo_enqueue_nowait_ma);
+
int
svm_fifo_enqueue_nowait (svm_fifo_t * f, u32 max_bytes, u8 * copy_from_here)
{
- return svm_fifo_enqueue_internal (f, max_bytes, copy_from_here);
+#if CLIB_DEBUG > 0
+ return svm_fifo_enqueue_nowait_ma (f, max_bytes, copy_from_here);
+#else
+ static int (*fp) (svm_fifo_t *, u32, u8 *);
+
+ if (PREDICT_FALSE (fp == 0))
+ fp = (void *) svm_fifo_enqueue_nowait_ma_multiarch_select ();
+
+ return (*fp) (f, max_bytes, copy_from_here);
+#endif
}
/**
@@ -541,15 +570,43 @@ svm_fifo_dequeue_internal (svm_fifo_t * f, u32 max_bytes, u8 * copy_here)
return (total_copy_bytes);
}
-int
-svm_fifo_dequeue_nowait (svm_fifo_t * f, u32 max_bytes, u8 * copy_here)
+static int
+svm_fifo_dequeue_nowait_ma (svm_fifo_t * f, u32 max_bytes, u8 * copy_here)
{
return svm_fifo_dequeue_internal (f, max_bytes, copy_here);
}
+#define SVM_FIFO_DEQUEUE_CLONE_TEMPLATE(arch, fn, tgt) \
+ uword \
+ __attribute__ ((flatten)) \
+ __attribute__ ((target (tgt))) \
+ CLIB_CPU_OPTIMIZED \
+ fn ## _ ## arch ( svm_fifo_t * f, u32 max_bytes, \
+ u8 * copy_here) \
+ { return fn (f, max_bytes, copy_here);}
+
+foreach_march_variant (SVM_FIFO_DEQUEUE_CLONE_TEMPLATE,
+ svm_fifo_dequeue_nowait_ma);
+CLIB_MULTIARCH_SELECT_FN (svm_fifo_dequeue_nowait_ma);
+
int
-svm_fifo_peek (svm_fifo_t * f, u32 relative_offset, u32 max_bytes,
- u8 * copy_here)
+svm_fifo_dequeue_nowait (svm_fifo_t * f, u32 max_bytes, u8 * copy_here)
+{
+#if CLIB_DEBUG > 0
+ return svm_fifo_dequeue_nowait_ma (f, max_bytes, copy_here);
+#else
+ static int (*fp) (svm_fifo_t *, u32, u8 *);
+
+ if (PREDICT_FALSE (fp == 0))
+ fp = (void *) svm_fifo_dequeue_nowait_ma_multiarch_select ();
+
+ return (*fp) (f, max_bytes, copy_here);
+#endif
+}
+
+static int
+svm_fifo_peek_ma (svm_fifo_t * f, u32 relative_offset, u32 max_bytes,
+ u8 * copy_here)
{
u32 total_copy_bytes, first_copy_bytes, second_copy_bytes;
u32 cursize, nitems, real_head;
@@ -586,6 +643,34 @@ svm_fifo_peek (svm_fifo_t * f, u32 relative_offset, u32 max_bytes,
return total_copy_bytes;
}
+#define SVM_FIFO_PEEK_CLONE_TEMPLATE(arch, fn, tgt) \
+ uword \
+ __attribute__ ((flatten)) \
+ __attribute__ ((target (tgt))) \
+ CLIB_CPU_OPTIMIZED \
+ fn ## _ ## arch ( svm_fifo_t * f, u32 relative_offset, u32 max_bytes, \
+ u8 * copy_here) \
+ { return fn (f, relative_offset, max_bytes, copy_here);}
+
+foreach_march_variant (SVM_FIFO_PEEK_CLONE_TEMPLATE, svm_fifo_peek_ma);
+CLIB_MULTIARCH_SELECT_FN (svm_fifo_peek_ma);
+
+int
+svm_fifo_peek (svm_fifo_t * f, u32 relative_offset, u32 max_bytes,
+ u8 * copy_here)
+{
+#if CLIB_DEBUG > 0
+ return svm_fifo_peek_ma (f, relative_offset, max_bytes, copy_here);
+#else
+ static int (*fp) (svm_fifo_t *, u32, u32, u8 *);
+
+ if (PREDICT_FALSE (fp == 0))
+ fp = (void *) svm_fifo_peek_ma_multiarch_select ();
+
+ return (*fp) (f, relative_offset, max_bytes, copy_here);
+#endif
+}
+
int
svm_fifo_dequeue_drop (svm_fifo_t * f, u32 max_bytes)
{
diff --git a/src/uri/uri_tcp_test.c b/src/uri/uri_tcp_test.c
index d1694cf4e73..80aab183b0a 100755
--- a/src/uri/uri_tcp_test.c
+++ b/src/uri/uri_tcp_test.c
@@ -398,7 +398,6 @@ static void
vl_api_reset_session_t_handler (vl_api_reset_session_t * mp)
{
uri_tcp_test_main_t *utm = &uri_tcp_test_main;
- session_t *session;
vl_api_reset_session_reply_t *rmp;
uword *p;
int rv = 0;
@@ -407,9 +406,8 @@ vl_api_reset_session_t_handler (vl_api_reset_session_t * mp)
if (p)
{
- session = pool_elt_at_index (utm->sessions, p[0]);
- hash_unset (utm->session_index_by_vpp_handles, mp->handle);
- pool_put (utm->sessions, session);
+ clib_warning ("got reset");
+ /* Cleanup later */
utm->time_to_stop = 1;
}
else
@@ -603,7 +601,7 @@ send_test_chunk (uri_tcp_test_main_t * utm, svm_fifo_t * tx_fifo, int mypid,
if (bytes_to_snd > vec_len (test_data))
bytes_to_snd = vec_len (test_data);
- while (bytes_to_snd > 0)
+ while (bytes_to_snd > 0 && !utm->time_to_stop)
{
actual_write = (bytes_to_snd > queue_max_chunk) ?
queue_max_chunk : bytes_to_snd;
@@ -652,6 +650,8 @@ client_send_data (uri_tcp_test_main_t * utm)
for (i = 0; i < n_iterations; i++)
{
send_test_chunk (utm, tx_fifo, mypid, 0);
+ if (utm->time_to_stop)
+ break;
}
leftover = utm->bytes_to_send % vec_len (test_data);
diff --git a/src/vnet/session/node.c b/src/vnet/session/node.c
index c0ab1bf096f..b24f5fd994d 100644
--- a/src/vnet/session/node.c
+++ b/src/vnet/session/node.c
@@ -248,6 +248,11 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
ASSERT (bi0);
_vec_len (smm->tx_buffers[thread_index]) = n_bufs;
+ /* usual speculation, or the enqueue_x1 macro will barf */
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+
b0 = vlib_get_buffer (vm, bi0);
b0->error = 0;
b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID
@@ -255,10 +260,6 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
b0->current_data = 0;
b0->total_length_not_including_first_buffer = 0;
- /* RX on the local interface. tx in default fib */
- vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
-
len_to_deq0 = clib_min (left_to_snd0, deq_per_buf);
data0 = vlib_buffer_make_headroom (b0, MAX_HDRS_LEN);
@@ -307,10 +308,6 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
}));
/* *INDENT-ON* */
- /* usual speculation, or the enqueue_x1 macro will barf */
- to_next[0] = bi0;
- to_next += 1;
- n_left_to_next -= 1;
VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
if (PREDICT_FALSE (n_trace > 0))
diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h
index 04bd5ca0f29..561a92575cb 100644
--- a/src/vnet/session/transport.h
+++ b/src/vnet/session/transport.h
@@ -30,13 +30,16 @@ typedef struct _transport_connection
ip46_address_t lcl_ip; /**< Local IP */
u16 lcl_port; /**< Local port */
u16 rmt_port; /**< Remote port */
- u8 proto; /**< Transport protocol id (also session type) */
+ u8 proto; /**< Protocol id (also session type) */
u32 s_index; /**< Parent session index */
u32 c_index; /**< Connection index in transport pool */
u8 is_ip4; /**< Flag if IP4 connection */
u32 thread_index; /**< Worker-thread index */
+ fib_node_index_t rmt_fei; /**< FIB entry index for rmt */
+ dpo_id_t rmt_dpo; /**< Forwarding DPO for rmt */
+
#if TRANSPORT_DEBUG
elog_track_t elog_track; /**< Event logging */
u32 cc_stat_tstamp; /**< CC stats timestamp */
@@ -59,6 +62,8 @@ typedef struct _transport_connection
#define c_thread_index connection.thread_index
#define c_elog_track connection.elog_track
#define c_cc_stat_tstamp connection.cc_stat_tstamp
+#define c_rmt_fei connection.rmt_fei
+#define c_rmt_dpo connection.rmt_dpo
} transport_connection_t;
/*
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index 5c554bac5a9..4e85eb3fc93 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -16,6 +16,7 @@
#include <vnet/tcp/tcp.h>
#include <vnet/session/session.h>
#include <vnet/fib/fib.h>
+#include <vnet/dpo/load_balance.h>
#include <math.h>
tcp_main_t tcp_main;
@@ -342,6 +343,99 @@ tcp_connection_timers_reset (tcp_connection_t * tc)
}
}
+typedef struct ip4_tcp_hdr
+{
+ ip4_header_t ip;
+ tcp_header_t tcp;
+} ip4_tcp_hdr_t;
+
+typedef struct ip6_tcp_hdr
+{
+ ip6_header_t ip;
+ tcp_header_t tcp;
+} ip6_tcp_hdr_t;
+
+static void
+tcp_connection_select_lb_bucket (tcp_connection_t * tc, const dpo_id_t * dpo,
+ dpo_id_t * result)
+{
+ const dpo_id_t *choice;
+ load_balance_t *lb;
+ int hash;
+
+ lb = load_balance_get (dpo->dpoi_index);
+ if (tc->c_is_ip4)
+ {
+ ip4_tcp_hdr_t hdr;
+ memset (&hdr, 0, sizeof (hdr));
+ hdr.ip.protocol = IP_PROTOCOL_TCP;
+ hdr.ip.address_pair.src.as_u32 = tc->c_lcl_ip.ip4.as_u32;
+ hdr.ip.address_pair.dst.as_u32 = tc->c_rmt_ip.ip4.as_u32;
+ hdr.tcp.src_port = tc->c_lcl_port;
+ hdr.tcp.dst_port = tc->c_rmt_port;
+ hash = ip4_compute_flow_hash (&hdr.ip, lb->lb_hash_config);
+ }
+ else
+ {
+ ip6_tcp_hdr_t hdr;
+ memset (&hdr, 0, sizeof (hdr));
+ hdr.ip.protocol = IP_PROTOCOL_TCP;
+ clib_memcpy (&hdr.ip.src_address, &tc->c_lcl_ip.ip6,
+ sizeof (ip6_address_t));
+ clib_memcpy (&hdr.ip.dst_address, &tc->c_rmt_ip.ip6,
+ sizeof (ip6_address_t));
+ hdr.tcp.src_port = tc->c_lcl_port;
+ hdr.tcp.dst_port = tc->c_rmt_port;
+ hash = ip6_compute_flow_hash (&hdr.ip, lb->lb_hash_config);
+ }
+ choice = load_balance_get_bucket_i (lb, hash & lb->lb_n_buckets_minus_1);
+ dpo_copy (result, choice);
+}
+
+fib_node_index_t
+tcp_lookup_rmt_in_fib (tcp_connection_t * tc)
+{
+ fib_prefix_t prefix;
+
+ clib_memcpy (&prefix.fp_addr, &tc->c_rmt_ip, sizeof (prefix.fp_addr));
+ prefix.fp_proto = tc->c_is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
+ prefix.fp_len = tc->c_is_ip4 ? 32 : 128;
+ return fib_table_lookup (0, &prefix);
+}
+
+static int
+tcp_connection_stack_on_fib_entry (tcp_connection_t * tc)
+{
+ dpo_id_t choice = DPO_INVALID;
+ u32 output_node_index;
+ fib_entry_t *fe;
+
+ fe = fib_entry_get (tc->c_rmt_fei);
+ if (fe->fe_lb.dpoi_type != DPO_LOAD_BALANCE)
+ return -1;
+
+ tcp_connection_select_lb_bucket (tc, &fe->fe_lb, &choice);
+
+ output_node_index =
+ tc->c_is_ip4 ? tcp4_output_node.index : tcp6_output_node.index;
+ dpo_stack_from_node (output_node_index, &tc->c_rmt_dpo, &choice);
+ return 0;
+}
+
+/** Stack tcp connection on peer's fib entry.
+ *
+ * This ultimately populates the dpo the connection will use to send packets.
+ */
+static void
+tcp_connection_fib_attach (tcp_connection_t * tc)
+{
+ tc->c_rmt_fei = tcp_lookup_rmt_in_fib (tc);
+
+ ASSERT (tc->c_rmt_fei != FIB_NODE_INDEX_INVALID);
+
+ tcp_connection_stack_on_fib_entry (tc);
+}
+
/** Initialize tcp connection variables
*
* Should be called after having received a msg from the peer, i.e., a SYN or
@@ -353,6 +447,7 @@ tcp_connection_init_vars (tcp_connection_t * tc)
tcp_init_mss (tc);
scoreboard_init (&tc->sack_sb);
tcp_cc_init (tc);
+ tcp_connection_fib_attach (tc);
}
int
@@ -361,7 +456,8 @@ tcp_connection_open (ip46_address_t * rmt_addr, u16 rmt_port, u8 is_ip4)
tcp_main_t *tm = vnet_get_tcp_main ();
tcp_connection_t *tc;
fib_prefix_t prefix;
- u32 fei, sw_if_index;
+ fib_node_index_t fei;
+ u32 sw_if_index;
ip46_address_t lcl_addr;
u16 lcl_port;
@@ -985,8 +1081,6 @@ tcp_main_enable (vlib_main_t * vm)
vec_validate (tm->timer_wheels, num_threads - 1);
tcp_initialize_timer_wheels (tm);
-// vec_validate (tm->delack_connections, num_threads - 1);
-
/* Initialize clocks per tick for TCP timestamp. Used to compute
* monotonically increasing timestamps. */
tm->tstamp_ticks_per_clock = vm->clib_time.seconds_per_clock
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index e83987182fa..12d804b82f6 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -28,6 +28,7 @@
#define THZ (u32) (1/TCP_TICK) /**< TCP tick frequency */
#define TCP_TSTAMP_RESOLUTION TCP_TICK /**< Time stamp resolution */
#define TCP_PAWS_IDLE 24 * 24 * 60 * 60 * THZ /**< 24 days */
+#define TCP_FIB_RECHECK_PERIOD 1 * THZ /**< Recheck every 1s */
#define TCP_MAX_OPTION_SPACE 40
#define TCP_DUPACK_THRESHOLD 3
@@ -256,6 +257,7 @@ typedef struct _tcp_connection
u16 mss; /**< Our max seg size that includes options */
u32 limited_transmit; /**< snd_nxt when limited transmit starts */
+ u32 last_fib_check; /**< Last time we checked fib route for peer */
} tcp_connection_t;
struct _tcp_cc_algorithm
@@ -528,6 +530,8 @@ void tcp_cc_init_congestion (tcp_connection_t * tc);
int tcp_cc_recover (tcp_connection_t * tc);
void tcp_cc_fastrecovery_exit (tcp_connection_t * tc);
+fib_node_index_t tcp_lookup_rmt_in_fib (tcp_connection_t * tc);
+
/* Made public for unit testing only */
void tcp_update_sack_list (tcp_connection_t * tc, u32 start, u32 end);
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index 554a981d924..41bebcb34af 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -22,17 +22,14 @@ vlib_node_registration_t tcp6_output_node;
typedef enum _tcp_output_nect
{
TCP_OUTPUT_NEXT_DROP,
- TCP_OUTPUT_NEXT_IP_LOOKUP,
TCP_OUTPUT_N_NEXT
} tcp_output_next_t;
#define foreach_tcp4_output_next \
_ (DROP, "error-drop") \
- _ (IP_LOOKUP, "ip4-lookup")
#define foreach_tcp6_output_next \
_ (DROP, "error-drop") \
- _ (IP_LOOKUP, "ip6-lookup")
static char *tcp_error_strings[] = {
#define tcp_error(n,s) s,
@@ -1451,7 +1448,7 @@ tcp46_output_inline (vlib_main_t * vm,
tcp_connection_t *tc0;
tcp_tx_trace_t *t0;
tcp_header_t *th0 = 0;
- u32 error0 = TCP_ERROR_PKTS_SENT, next0 = TCP_OUTPUT_NEXT_IP_LOOKUP;
+ u32 error0 = TCP_ERROR_PKTS_SENT, next0 = TCP_OUTPUT_NEXT_DROP;
bi0 = from[0];
to_next[0] = bi0;
@@ -1530,10 +1527,26 @@ tcp46_output_inline (vlib_main_t * vm,
tc0->rto_boff = 0;
}
- /* set fib index to default and lookup node */
- /* XXX network virtualization (vrf/vni) */
- vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ /* Make sure we haven't lost route to our peer */
+ if (PREDICT_FALSE (tc0->last_fib_check
+ < tc0->snd_opts.tsval + TCP_FIB_RECHECK_PERIOD))
+ {
+ if (PREDICT_TRUE
+ (tc0->c_rmt_fei == tcp_lookup_rmt_in_fib (tc0)))
+ {
+ tc0->last_fib_check = tc0->snd_opts.tsval;
+ }
+ else
+ {
+ clib_warning ("lost connection to peer");
+ tcp_connection_reset (tc0);
+ goto done;
+ }
+ }
+
+ /* Use pre-computed dpo to set next node */
+ next0 = tc0->c_rmt_dpo.dpoi_next_node;
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = tc0->c_rmt_dpo.dpoi_index;
b0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
done:
diff --git a/src/vnet/tcp/tcp_packet.h b/src/vnet/tcp/tcp_packet.h
index 4f28cf32708..a6f62ee16d5 100644
--- a/src/vnet/tcp/tcp_packet.h
+++ b/src/vnet/tcp/tcp_packet.h
@@ -144,12 +144,11 @@ typedef struct
{
u8 flags; /** Option flags, see above */
- /* Received options */
- u16 mss; /**< Maximum segment size advertised by peer */
- u8 wscale; /**< Window scale advertised by peer */
- u32 tsval; /**< Peer's timestamp value */
+ u16 mss; /**< Maximum segment size advertised */
+ u8 wscale; /**< Window scale advertised */
+ u32 tsval; /**< Timestamp value */
u32 tsecr; /**< Echoed/reflected time stamp */
- sack_block_t *sacks; /**< SACK blocks received */
+ sack_block_t *sacks; /**< SACK blocks */
u8 n_sack_blocks; /**< Number of SACKs blocks */
} tcp_options_t;