summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFlorin Coras <fcoras@cisco.com>2017-09-14 03:08:00 -0400
committerDave Barach <openvpp@barachs.net>2017-09-19 19:10:29 +0000
commit9d063047eb1a3738cb0fc9ebebb55793d155bb20 (patch)
treea5100d8f3224cde45c874c9f32a49434ed86a238
parentcaac350076e386e5caf6322a3439ea0c36d77cc5 (diff)
session/tcp: improve preallocated segment handling
- add preallocated segment flag - don't remove pre-allocated segments except if application detaches - when preallocating fifos in multiple segments, completely fill a segment before moving to the next - detach server application from segment-managers when deleting app - batch syn/syn-ack/fin (re)transmissions - loosen up close-wait and time-wait times Change-Id: I412f53ce601cc83b3acc26aeffd7fa2d52d73b03 Signed-off-by: Florin Coras <fcoras@cisco.com>
-rw-r--r--src/svm/svm_fifo_segment.c33
-rw-r--r--src/svm/svm_fifo_segment.h1
-rw-r--r--src/vnet/session/application.c2
-rw-r--r--src/vnet/session/segment_manager.c73
-rw-r--r--src/vnet/session/session.c18
-rw-r--r--src/vnet/session/session_node.c21
-rw-r--r--src/vnet/tcp/builtin_client.c2
-rw-r--r--src/vnet/tcp/tcp.c7
-rw-r--r--src/vnet/tcp/tcp.h9
-rwxr-xr-xsrc/vnet/tcp/tcp_debug.h9
-rw-r--r--src/vnet/tcp/tcp_input.c81
-rw-r--r--src/vnet/tcp/tcp_output.c91
12 files changed, 209 insertions, 138 deletions
diff --git a/src/svm/svm_fifo_segment.c b/src/svm/svm_fifo_segment.c
index 3bdd2b28ebb..da2b79351a5 100644
--- a/src/svm/svm_fifo_segment.c
+++ b/src/svm/svm_fifo_segment.c
@@ -57,11 +57,12 @@ allocate_new_fifo_chunk (svm_fifo_segment_header_t * fsh,
}
static void
-preallocate_fifo_pairs (svm_fifo_segment_header_t * fsh,
+preallocate_fifo_pairs (svm_fifo_segment_private_t * s,
svm_fifo_segment_create_args_t * a)
{
- u32 rx_fifo_size, tx_fifo_size;
- u32 rx_rounded_data_size, tx_rounded_data_size;
+ svm_fifo_segment_header_t *fsh = s->h;
+ u32 rx_fifo_size, tx_fifo_size, pairs_to_allocate;
+ u32 rx_rounded_data_size, tx_rounded_data_size, pair_size;
svm_fifo_t *f;
u8 *rx_fifo_space, *tx_fifo_space;
int rx_freelist_index, tx_freelist_index;
@@ -97,10 +98,11 @@ preallocate_fifo_pairs (svm_fifo_segment_header_t * fsh,
- max_log2 (FIFO_SEGMENT_MIN_FIFO_SIZE);
/* Calculate space requirements */
- rx_fifo_size = (sizeof (*f) + rx_rounded_data_size)
- * a->preallocated_fifo_pairs;
- tx_fifo_size = (sizeof (*f) + tx_rounded_data_size)
- * a->preallocated_fifo_pairs;
+ pair_size = 2 * sizeof (*f) + rx_rounded_data_size + tx_rounded_data_size;
+ pairs_to_allocate = clib_min (s->ssvm.ssvm_size / pair_size,
+ a->preallocated_fifo_pairs);
+ rx_fifo_size = (sizeof (*f) + rx_rounded_data_size) * pairs_to_allocate;
+ tx_fifo_size = (sizeof (*f) + tx_rounded_data_size) * pairs_to_allocate;
vec_validate_init_empty (fsh->free_fifos,
clib_max (rx_freelist_index, tx_freelist_index),
@@ -139,7 +141,7 @@ preallocate_fifo_pairs (svm_fifo_segment_header_t * fsh,
/* Carve rx fifo space */
f = (svm_fifo_t *) rx_fifo_space;
- for (i = 0; i < a->preallocated_fifo_pairs; i++)
+ for (i = 0; i < pairs_to_allocate; i++)
{
f->freelist_index = rx_freelist_index;
f->next = fsh->free_fifos[rx_freelist_index];
@@ -149,7 +151,7 @@ preallocate_fifo_pairs (svm_fifo_segment_header_t * fsh,
}
/* Carve tx fifo space */
f = (svm_fifo_t *) tx_fifo_space;
- for (i = 0; i < a->preallocated_fifo_pairs; i++)
+ for (i = 0; i < pairs_to_allocate; i++)
{
f->freelist_index = tx_freelist_index;
f->next = fsh->free_fifos[tx_freelist_index];
@@ -157,6 +159,9 @@ preallocate_fifo_pairs (svm_fifo_segment_header_t * fsh,
tx_fifo_space += sizeof (*f) + tx_rounded_data_size;
f = (svm_fifo_t *) tx_fifo_space;
}
+
+ /* Account for the pairs allocated */
+ a->preallocated_fifo_pairs -= pairs_to_allocate;
}
/** (master) create an svm fifo segment */
@@ -200,7 +205,7 @@ svm_fifo_segment_create (svm_fifo_segment_create_args_t * a)
sh->opaque[0] = fsh;
s->h = fsh;
fsh->segment_name = format (0, "%s%c", a->segment_name, 0);
- preallocate_fifo_pairs (fsh, a);
+ preallocate_fifo_pairs (s, a);
ssvm_pop_heap (oldheap);
@@ -245,10 +250,6 @@ svm_fifo_segment_create_process_private (svm_fifo_segment_create_args_t * a)
segment_count = a->private_segment_count;
}
- /* Spread preallocated fifo pairs across segments */
- a->preallocated_fifo_pairs =
- (a->preallocated_fifo_pairs + segment_count - 1) / segment_count;
-
/* Allocate segments */
for (i = 0; i < segment_count; i++)
{
@@ -280,9 +281,11 @@ svm_fifo_segment_create_process_private (svm_fifo_segment_create_args_t * a)
if (a->private_segment_count)
{
+ if (i != 0)
+ fsh->flags |= FIFO_SEGMENT_F_IS_PREALLOCATED;
oldheap = clib_mem_get_heap ();
clib_mem_set_heap (sh->heap);
- preallocate_fifo_pairs (fsh, a);
+ preallocate_fifo_pairs (s, a);
clib_mem_set_heap (oldheap);
}
sh->ready = 1;
diff --git a/src/svm/svm_fifo_segment.h b/src/svm/svm_fifo_segment.h
index 7c97e9b489f..5b771328c6c 100644
--- a/src/svm/svm_fifo_segment.h
+++ b/src/svm/svm_fifo_segment.h
@@ -33,6 +33,7 @@ typedef enum
#define FIFO_SEGMENT_F_IS_PRIVATE 1 << 0 /* Private segment */
#define FIFO_SEGMENT_F_IS_MAIN_HEAP 1 << 1 /* Segment is main heap */
+#define FIFO_SEGMENT_F_IS_PREALLOCATED 1 << 2 /* Segment is preallocated */
typedef struct
{
diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c
index d105119c557..2b789c5f420 100644
--- a/src/vnet/session/application.c
+++ b/src/vnet/session/application.c
@@ -111,6 +111,8 @@ application_del (application_t * app)
hash_foreach (handle, index, app->listeners_table,
({
vec_add1 (handles, handle);
+ sm = segment_manager_get (index);
+ sm->app_index = SEGMENT_MANAGER_INVALID_APP_INDEX;
}));
/* *INDENT-ON* */
diff --git a/src/vnet/session/segment_manager.c b/src/vnet/session/segment_manager.c
index c23e4c0237c..48d027553b1 100644
--- a/src/vnet/session/segment_manager.c
+++ b/src/vnet/session/segment_manager.c
@@ -197,27 +197,24 @@ u8
segment_manager_has_fifos (segment_manager_t * sm)
{
svm_fifo_segment_private_t *segment;
- /* Weird, but handle it */
- if (vec_len (sm->segment_indices) == 0)
- return 0;
- if (vec_len (sm->segment_indices) == 1)
- {
- segment = svm_fifo_segment_get_segment (sm->segment_indices[0]);
- if (svm_fifo_segment_num_fifos (segment) == 0)
- return 0;
- }
- if (CLIB_DEBUG)
+ int i;
+
+ for (i = 0; i < vec_len (sm->segment_indices); i++)
{
- svm_fifo_segment_private_t *segment;
- int i;
- for (i = 1; i < vec_len (sm->segment_indices); i++)
- {
- segment = svm_fifo_segment_get_segment (sm->segment_indices[i]);
- if (!svm_fifo_segment_has_fifos (segment))
- clib_warning ("segment has no fifos!");
- }
+ segment = svm_fifo_segment_get_segment (sm->segment_indices[i]);
+ if (CLIB_DEBUG && i && !svm_fifo_segment_has_fifos (segment)
+ && !(segment->h->flags & FIFO_SEGMENT_F_IS_PREALLOCATED))
+ clib_warning ("segment %d has no fifos!", sm->segment_indices[i]);
+ if (svm_fifo_segment_has_fifos (segment))
+ return 1;
}
- return 1;
+ return 0;
+}
+
+static u8
+segment_manager_app_detached (segment_manager_t * sm)
+{
+ return (sm->app_index == SEGMENT_MANAGER_INVALID_APP_INDEX);
}
static void
@@ -228,6 +225,13 @@ segment_manager_del_segment (segment_manager_t * sm, u32 segment_index)
clib_spinlock_lock (&sm->lockp);
svm_segment_index = sm->segment_indices[segment_index];
fifo_segment = svm_fifo_segment_get_segment (svm_segment_index);
+ if (!fifo_segment
+ || ((fifo_segment->h->flags & FIFO_SEGMENT_F_IS_PREALLOCATED)
+ && !segment_manager_app_detached (sm)))
+ {
+ clib_spinlock_unlock (&sm->lockp);
+ return;
+ }
svm_fifo_segment_delete (fifo_segment);
vec_del1 (sm->segment_indices, segment_index);
clib_spinlock_unlock (&sm->lockp);
@@ -288,26 +292,29 @@ segment_manager_del_sessions (segment_manager_t * sm)
*
* Since the fifos allocated in the segment keep backpointers to the sessions
* prior to removing the segment, we call session disconnect. This
- * subsequently propages into transport.
+ * subsequently propagates into transport.
*/
void
segment_manager_del (segment_manager_t * sm)
{
+ int i;
- ASSERT (vec_len (sm->segment_indices) <= 1);
- if (vec_len (sm->segment_indices))
+ ASSERT (!segment_manager_has_fifos (sm)
+ && segment_manager_app_detached (sm));
+
+ /* If we have empty preallocated segments that haven't been removed, remove
+ * them now. Apart from that, the first segment in the first segment manager
+ * is not removed when all fifos are removed. It can only be removed when
+ * the manager is explicitly deleted/detached by the app. */
+ for (i = vec_len (sm->segment_indices) - 1; i >= 0; i--)
{
- /* The first segment in the first segment manager is not removed when
- * all fifos are removed. It can only be removed when the manager is
- * explicitly deleted/detached by the app. */
if (CLIB_DEBUG)
{
- svm_fifo_segment_private_t *fifo_segment;
- fifo_segment =
- svm_fifo_segment_get_segment (sm->segment_indices[0]);
- ASSERT (!svm_fifo_segment_has_fifos (fifo_segment));
+ svm_fifo_segment_private_t *segment;
+ segment = svm_fifo_segment_get_segment (sm->segment_indices[i]);
+ ASSERT (!svm_fifo_segment_has_fifos (segment));
}
- segment_manager_del_segment (sm, 0);
+ segment_manager_del_segment (sm, i);
}
clib_spinlock_free (&sm->lockp);
if (CLIB_DEBUG)
@@ -322,8 +329,7 @@ segment_manager_init_del (segment_manager_t * sm)
segment_manager_del_sessions (sm);
else
{
- ASSERT (!sm->first_is_protected
- || sm->app_index == SEGMENT_MANAGER_INVALID_APP_INDEX);
+ ASSERT (!sm->first_is_protected || segment_manager_app_detached (sm));
segment_manager_del (sm);
}
}
@@ -478,7 +484,8 @@ segment_manager_dealloc_fifos (u32 svm_segment_index, svm_fifo_t * rx_fifo,
}
/* Remove segment manager if no sessions and detached from app */
- if (sm->app_index == SEGMENT_MANAGER_INVALID_APP_INDEX && is_first)
+ if (segment_manager_app_detached (sm)
+ && !segment_manager_has_fifos (sm))
segment_manager_del (sm);
}
}
diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c
index 792e6612dc1..dc930ce87d3 100644
--- a/src/vnet/session/session.c
+++ b/src/vnet/session/session.c
@@ -453,7 +453,7 @@ stream_session_connect_notify (transport_connection_t * tc, u8 is_fail)
st);
if (handle == HALF_OPEN_LOOKUP_INVALID_VALUE)
{
- clib_warning ("half-open was removed!");
+ TCP_DBG ("half-open was removed!");
return -1;
}
@@ -732,6 +732,7 @@ session_send_session_evt_to_thread (u64 session_handle,
u32 thread_index)
{
static u16 serial_number = 0;
+ u32 tries = 0;
session_fifo_event_t evt;
unix_shared_memory_queue_t *q;
@@ -741,21 +742,14 @@ session_send_session_evt_to_thread (u64 session_handle,
evt.event_id = serial_number++;
q = session_manager_get_vpp_event_queue (thread_index);
-
- /* Based on request block (or not) for lack of space */
- if (PREDICT_TRUE (q->cursize < q->maxsize))
+ while (unix_shared_memory_queue_add (q, (u8 *) & evt, 1))
{
- if (unix_shared_memory_queue_add (q, (u8 *) & evt,
- 0 /* do wait for mutex */ ))
+ if (tries++ == 3)
{
- clib_warning ("failed to enqueue evt");
+ TCP_DBG ("failed to enqueue evt");
+ break;
}
}
- else
- {
- clib_warning ("queue full");
- return;
- }
}
/**
diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c
index 09687687189..d015584990e 100644
--- a/src/vnet/session/session_node.c
+++ b/src/vnet/session/session_node.c
@@ -168,15 +168,19 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
return 0;
}
+ /* Check how much we can pull. */
+ max_dequeue0 = svm_fifo_max_dequeue (s0->server_tx_fifo);
+
if (peek_data)
{
- /* Offset in rx fifo from where to peek data */
+ /* Offset in rx fifo from where to peek data */
tx_offset = transport_vft->tx_fifo_offset (tc0);
+ if (PREDICT_FALSE (tx_offset >= max_dequeue0))
+ max_dequeue0 = 0;
+ else
+ max_dequeue0 -= tx_offset;
}
- /* Check how much we can pull. If buffering, subtract the offset */
- max_dequeue0 = svm_fifo_max_dequeue (s0->server_tx_fifo) - tx_offset;
-
/* Nothing to read return */
if (max_dequeue0 == 0)
{
@@ -277,6 +281,8 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
{
n_bytes_read = svm_fifo_peek (s0->server_tx_fifo, tx_offset,
len_to_deq0, data0);
+ if (n_bytes_read <= 0)
+ goto dequeue_fail;
/* Keep track of progress locally, transport is also supposed to
* increment it independently when pushing the header */
tx_offset += n_bytes_read;
@@ -285,11 +291,10 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
{
n_bytes_read = svm_fifo_dequeue_nowait (s0->server_tx_fifo,
len_to_deq0, data0);
+ if (n_bytes_read <= 0)
+ goto dequeue_fail;
}
- if (n_bytes_read <= 0)
- goto dequeue_fail;
-
b0->current_length = n_bytes_read;
left_to_snd0 -= n_bytes_read;
@@ -616,7 +621,7 @@ skip_dequeue:
case FIFO_EVENT_APP_TX:
s0 = session_event_get_session (e0, my_thread_index);
- if (CLIB_DEBUG && !s0)
+ if (PREDICT_FALSE (!s0))
{
clib_warning ("It's dead, Jim!");
continue;
diff --git a/src/vnet/tcp/builtin_client.c b/src/vnet/tcp/builtin_client.c
index 5b4c8679970..527b3289924 100644
--- a/src/vnet/tcp/builtin_client.c
+++ b/src/vnet/tcp/builtin_client.c
@@ -510,7 +510,7 @@ clients_connect (vlib_main_t * vm, u8 * uri, u32 n_clients)
if ((i % 4) == 0)
vlib_process_suspend (vm, 10e-6);
ASSERT (i + 1 >= tm->ready_connections);
- while (i + 1 - tm->ready_connections > 8000)
+ while (i + 1 - tm->ready_connections > 1000)
{
vlib_process_suspend (vm, 100e-6);
}
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index 04f1e068b9d..f779428fbaf 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -1035,7 +1035,7 @@ tcp_round_snd_space (tcp_connection_t * tc, u32 snd_space)
/* If not snd_wnd constrained and we can't write at least a segment,
* don't try at all */
if (PREDICT_FALSE (snd_space < tc->snd_mss))
- return 0;
+ return snd_space < tc->cwnd ? 0 : snd_space;
/* round down to mss multiple */
return snd_space - (snd_space % tc->snd_mss);
@@ -1167,6 +1167,7 @@ tcp_timer_establish_handler (u32 conn_index)
{
ASSERT (tc->state == TCP_STATE_SYN_SENT);
stream_session_connect_notify (&tc->connection, 1 /* fail */ );
+ TCP_DBG ("establish pop: %U", format_tcp_connection, tc, 2);
}
else
{
@@ -1174,7 +1175,7 @@ tcp_timer_establish_handler (u32 conn_index)
/* note: the connection may have already disappeared */
if (PREDICT_FALSE (tc == 0))
return;
-
+ TCP_DBG ("establish pop: %U", format_tcp_connection, tc, 2);
ASSERT (tc->state == TCP_STATE_SYN_RCVD);
/* Start cleanup. App wasn't notified yet so use delete notify as
* opposed to delete to cleanup session layer state. */
@@ -1369,6 +1370,8 @@ tcp_main_enable (vlib_main_t * vm)
vec_validate (tm->tx_frames[0], num_threads - 1);
vec_validate (tm->tx_frames[1], num_threads - 1);
+ vec_validate (tm->ip_lookup_tx_frames[0], num_threads - 1);
+ vec_validate (tm->ip_lookup_tx_frames[1], num_threads - 1);
tm->bytes_per_buffer = vlib_buffer_free_list_buffer_size
(vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index 6020a3debbe..bb8091af84f 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -99,8 +99,9 @@ extern timer_expiration_handler tcp_timer_retransmit_syn_handler;
#define TCP_ESTABLISH_TIME 750 /* 75s */
#define TCP_SYN_RCVD_TIME 600 /* 60s */
#define TCP_2MSL_TIME 300 /* 30s */
-#define TCP_CLOSEWAIT_TIME 20 /* 0.1s */
-#define TCP_CLEANUP_TIME 5 /* 0.5s Time to wait before cleanup */
+#define TCP_CLOSEWAIT_TIME 20 /* 2s */
+#define TCP_TIMEWAIT_TIME 20 /* 2s */
+#define TCP_CLEANUP_TIME 10 /* 1s Time to wait before cleanup */
#define TCP_TIMER_PERSIST_MIN 2 /* 0.2s */
#define TCP_RTO_MAX 60 * THZ /* Min max RTO (60s) as per RFC6298 */
@@ -372,8 +373,10 @@ typedef struct _tcp_main
/** per-worker tx buffer free lists */
u32 **tx_buffers;
- /** per-worker tx frames to 4/6 output nodes */
+ /** per-worker tx frames to tcp 4/6 output nodes */
vlib_frame_t **tx_frames[2];
+ /** per-worker tx frames to ip 4/6 lookup nodes */
+ vlib_frame_t **ip_lookup_tx_frames[2];
/* Per worker-thread timer wheel for connections timers */
tw_timer_wheel_16t_2w_512sl_t *timer_wheels;
diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h
index cf77e6e6682..4bc6b42e297 100755
--- a/src/vnet/tcp/tcp_debug.h
+++ b/src/vnet/tcp/tcp_debug.h
@@ -82,13 +82,7 @@ typedef enum _tcp_dbg_evt
* Infra and evt track setup
*/
-#define TCP_DBG(_tc, _evt, _args...) \
-{ \
- u8 *_tmp = 0; \
- _tmp = format(_tmp, "%U", format_tcp_connection_verbose, _tc); \
- clib_warning("%s", _tmp); \
- vec_free(_tmp); \
-}
+#define TCP_DBG(_fmt, _args...) clib_warning (_fmt, ##_args)
#define DECLARE_ETD(_tc, _e, _size) \
struct \
@@ -240,6 +234,7 @@ typedef enum _tcp_dbg_evt
#define TCP_EVT_DBG(_evt, _args...) CC(_evt, _HANDLER)(_args)
#else
#define TCP_EVT_DBG(_evt, _args...)
+#define TCP_DBG(_fmt, _args...)
#endif
/*
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index 841e72a503e..64a07070ec2 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -351,12 +351,17 @@ tcp_segment_validate (vlib_main_t * vm, tcp_connection_t * tc0,
if (tcp_syn (th0))
{
/* TODO implement RFC 5961 */
- if (tc0->state != TCP_STATE_SYN_RCVD)
- tcp_make_ack (tc0, b0);
+ if (tc0->state == TCP_STATE_SYN_RCVD)
+ {
+ tcp_make_synack (tc0, b0);
+ TCP_EVT_DBG (TCP_EVT_SYN_RCVD, tc0, 0);
+ }
else
- tcp_make_synack (tc0, b0);
+ {
+ tcp_make_ack (tc0, b0);
+ TCP_EVT_DBG (TCP_EVT_SYNACK_RCVD, tc0);
+ }
*next0 = tcp_next_output (tc0->c_is_ip4);
- TCP_EVT_DBG (TCP_EVT_SYN_RCVD, tc0, 0);
return -1;
}
@@ -1747,18 +1752,17 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
/* 8: check the FIN bit */
if (PREDICT_FALSE (is_fin))
{
- /* Enter CLOSE-WAIT and notify session. Don't send ACK, instead
- * wait for session to call close. To avoid lingering
+ /* Enter CLOSE-WAIT and notify session. To avoid lingering
* in CLOSE-WAIT, set timer (reuse WAITCLOSE). */
- tc0->state = TCP_STATE_CLOSE_WAIT;
- TCP_EVT_DBG (TCP_EVT_FIN_RCVD, tc0);
+ /* Account for the FIN if nothing else was received */
if (vnet_buffer (b0)->tcp.data_len == 0)
- {
- tc0->rcv_nxt += 1;
- next0 = TCP_ESTABLISHED_NEXT_DROP;
- }
+ tc0->rcv_nxt += 1;
+ tcp_make_ack (tc0, b0);
+ next0 = tcp_next_output (tc0->c_is_ip4);
+ tc0->state = TCP_STATE_CLOSE_WAIT;
stream_session_disconnect_notify (&tc0->connection);
tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
+ TCP_EVT_DBG (TCP_EVT_FIN_RCVD, tc0);
}
done:
@@ -1973,6 +1977,12 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
seq0 = vnet_buffer (b0)->tcp.seq_number;
tcp0 = tcp_buffer_hdr (b0);
+ /* Crude check to see if the connection handle does not match
+ * the packet. Probably connection just switched to established */
+ if (PREDICT_FALSE (tcp0->dst_port != tc0->c_lcl_port
+ || tcp0->src_port != tc0->c_rmt_port))
+ goto drop;
+
if (PREDICT_FALSE
(!tcp_ack (tcp0) && !tcp_rst (tcp0) && !tcp_syn (tcp0)))
goto drop;
@@ -2265,6 +2275,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
tcp_header_t *tcp0 = 0;
tcp_connection_t *tc0;
u32 next0 = TCP_RCV_PROCESS_NEXT_DROP, error0 = TCP_ERROR_ENQUEUED;
+ u8 is_fin0;
bi0 = from[0];
to_next[0] = bi0;
@@ -2283,11 +2294,11 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
tcp0 = tcp_buffer_hdr (b0);
+ is_fin0 = tcp_is_fin (tcp0);
/* SYNs, FINs and data consume sequence numbers */
vnet_buffer (b0)->tcp.seq_end = vnet_buffer (b0)->tcp.seq_number
- + tcp_is_syn (tcp0) + tcp_is_fin (tcp0)
- + vnet_buffer (b0)->tcp.data_len;
+ + tcp_is_syn (tcp0) + is_fin0 + vnet_buffer (b0)->tcp.data_len;
if (CLIB_DEBUG)
{
@@ -2384,21 +2395,14 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
/* If FIN is ACKed */
else if (tc0->snd_una == tc0->snd_una_max)
{
- tc0->rcv_nxt += 1;
tc0->state = TCP_STATE_FIN_WAIT_2;
TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0);
- if (tcp_fin (tcp0))
- {
- /* Stop all timers, 2MSL will be set lower */
- tcp_connection_timers_reset (tc0);
- }
- else
- {
- /* Wait for peer to finish sending its data */
- tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE,
- TCP_2MSL_TIME);
- }
+ /* Stop all retransmit timers because we have nothing more
+ * to send. Enable waitclose though because we're willing to
+ * wait for peer's FIN but not indefinitely. */
+ tcp_connection_timers_reset (tc0);
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
}
break;
case TCP_STATE_FIN_WAIT_2:
@@ -2434,10 +2438,10 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if (!tcp_rcv_ack_is_acceptable (tc0, b0))
goto drop;
+ tc0->snd_una = vnet_buffer (b0)->tcp.ack_number;
/* Apparently our FIN was lost */
- if (tcp_fin (tcp0))
+ if (is_fin0)
{
- /* Don't "make" fin since that increments snd_nxt */
tcp_send_fin (tc0);
goto drop;
}
@@ -2450,8 +2454,6 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
* particular, this makes sure that we won't have dead sessions
* when processing events on the tx path */
tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
-
- /* Stop retransmit */
tcp_retransmit_timer_reset (tc0);
goto drop;
@@ -2466,8 +2468,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
goto drop;
tcp_make_ack (tc0, b0);
- tcp_timer_reset (tc0, TCP_TIMER_WAITCLOSE);
- tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
goto drop;
@@ -2486,6 +2487,8 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
case TCP_STATE_FIN_WAIT_2:
if (vnet_buffer (b0)->tcp.data_len)
error0 = tcp_segment_rcv (tm, tc0, b0, &next0);
+ else if (is_fin0)
+ tc0->rcv_nxt += 1;
break;
case TCP_STATE_CLOSE_WAIT:
case TCP_STATE_CLOSING:
@@ -2497,7 +2500,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
/* 8: check the FIN bit */
- if (!tcp_fin (tcp0))
+ if (!is_fin0)
goto drop;
switch (tc0->state)
@@ -2527,19 +2530,19 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
break;
case TCP_STATE_FIN_WAIT_2:
- /* Got FIN, send ACK! */
+ /* Got FIN, send ACK! Be more aggressive with resource cleanup */
tc0->state = TCP_STATE_TIME_WAIT;
tcp_connection_timers_reset (tc0);
- tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_TIMEWAIT_TIME);
tcp_make_ack (tc0, b0);
next0 = tcp_next_output (is_ip4);
TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0);
break;
case TCP_STATE_TIME_WAIT:
- /* Remain in the TIME-WAIT state. Restart the 2 MSL time-wait
+ /* Remain in the TIME-WAIT state. Restart the time-wait
* timeout.
*/
- tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_TIMEWAIT_TIME);
break;
}
TCP_EVT_DBG (TCP_EVT_FIN_RCVD, tc0);
@@ -3162,9 +3165,9 @@ do { \
TCP_ERROR_NONE);
_(TIME_WAIT, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
_(TIME_WAIT, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
- _(CLOSED, TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET, TCP_ERROR_CONNECTION_CLOSED);
+ _(CLOSED, TCP_FLAG_ACK, TCP_INPUT_NEXT_DROP, TCP_ERROR_CONNECTION_CLOSED);
_(CLOSED, TCP_FLAG_RST, TCP_INPUT_NEXT_DROP, TCP_ERROR_CONNECTION_CLOSED);
- _(CLOSED, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET,
+ _(CLOSED, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_DROP,
TCP_ERROR_CONNECTION_CLOSED);
#undef _
}
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index b843c926afe..be29f05f65c 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -629,9 +629,11 @@ tcp_make_synack (tcp_connection_t * tc, vlib_buffer_t * b)
}
always_inline void
-tcp_enqueue_to_ip_lookup (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
- u8 is_ip4)
+tcp_enqueue_to_ip_lookup_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
+ u8 is_ip4, u8 flush)
{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ u32 thread_index = vlib_get_thread_index ();
u32 *to_next, next_index;
vlib_frame_t *f;
@@ -643,13 +645,42 @@ tcp_enqueue_to_ip_lookup (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
/* Send to IP lookup */
next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
- f = vlib_get_frame_to_node (vm, next_index);
+ if (VLIB_BUFFER_TRACE_TRAJECTORY > 0)
+ {
+ b->pre_data[0] = 2;
+ b->pre_data[1] = next_index;
+ }
+
+ f = tm->ip_lookup_tx_frames[!is_ip4][thread_index];
+ if (!f)
+ {
+ f = vlib_get_frame_to_node (vm, next_index);
+ ASSERT (f);
+ tm->ip_lookup_tx_frames[!is_ip4][thread_index] = f;
+ }
- /* Enqueue the packet */
to_next = vlib_frame_vector_args (f);
- to_next[0] = bi;
- f->n_vectors = 1;
- vlib_put_frame_to_node (vm, next_index, f);
+ to_next[f->n_vectors] = bi;
+ f->n_vectors += 1;
+ if (flush || f->n_vectors == VLIB_FRAME_SIZE)
+ {
+ vlib_put_frame_to_node (vm, next_index, f);
+ tm->ip_lookup_tx_frames[!is_ip4][thread_index] = 0;
+ }
+}
+
+always_inline void
+tcp_enqueue_to_ip_lookup_now (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
+ u8 is_ip4)
+{
+ tcp_enqueue_to_ip_lookup_i (vm, b, bi, is_ip4, 1);
+}
+
+always_inline void
+tcp_enqueue_to_ip_lookup (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
+ u8 is_ip4)
+{
+ tcp_enqueue_to_ip_lookup_i (vm, b, bi, is_ip4, 0);
}
always_inline void
@@ -666,8 +697,6 @@ tcp_enqueue_to_output_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
/* Decide where to send the packet */
next_index = is_ip4 ? tcp4_output_node.index : tcp6_output_node.index;
-
- /* Initialize the trajectory trace, if configured */
if (VLIB_BUFFER_TRACE_TRAJECTORY > 0)
{
b->pre_data[0] = 1;
@@ -856,7 +885,7 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt, u8 is_ip4)
ASSERT (!bogus);
}
- tcp_enqueue_to_ip_lookup (vm, b, bi, is_ip4);
+ tcp_enqueue_to_ip_lookup_now (vm, b, bi, is_ip4);
TCP_EVT_DBG (TCP_EVT_RST_SENT, tc);
}
@@ -968,7 +997,24 @@ tcp_flush_frame_to_output (vlib_main_t * vm, u8 thread_index, u8 is_ip4)
}
/**
- * Flush both v4 and v6 tx frames for thread index
+ * Flush ip lookup tx frames populated by timer pops
+ */
+always_inline void
+tcp_flush_frame_to_ip_lookup (vlib_main_t * vm, u8 thread_index, u8 is_ip4)
+{
+ if (tcp_main.ip_lookup_tx_frames[!is_ip4][thread_index])
+ {
+ u32 next_index;
+ next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
+ vlib_put_frame_to_node (vm, next_index,
+ tcp_main.ip_lookup_tx_frames[!is_ip4]
+ [thread_index]);
+ tcp_main.ip_lookup_tx_frames[!is_ip4][thread_index] = 0;
+ }
+}
+
+/**
+ * Flush v4 and v6 tcp and ip-lookup tx frames for thread index
*/
void
tcp_flush_frames_to_output (u8 thread_index)
@@ -976,6 +1022,8 @@ tcp_flush_frames_to_output (u8 thread_index)
vlib_main_t *vm = vlib_get_main ();
tcp_flush_frame_to_output (vm, thread_index, 1);
tcp_flush_frame_to_output (vm, thread_index, 0);
+ tcp_flush_frame_to_ip_lookup (vm, thread_index, 1);
+ tcp_flush_frame_to_ip_lookup (vm, thread_index, 0);
}
/**
@@ -984,22 +1032,28 @@ tcp_flush_frames_to_output (u8 thread_index)
void
tcp_send_fin (tcp_connection_t * tc)
{
- vlib_buffer_t *b;
- u32 bi;
tcp_main_t *tm = vnet_get_tcp_main ();
vlib_main_t *vm = vlib_get_main ();
+ vlib_buffer_t *b;
+ u32 bi;
+ u8 fin_snt = 0;
+
if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
return;
b = vlib_get_buffer (vm, bi);
- /* buffer will be initialized by in tcp_make_fin */
+ fin_snt = tc->flags & TCP_CONN_FINSNT;
+ if (fin_snt)
+ tc->snd_nxt = tc->snd_una;
tcp_make_fin (tc, b);
tcp_enqueue_to_output_now (vm, b, bi, tc->c_is_ip4);
- if (!(tc->flags & TCP_CONN_FINSNT))
+ if (!fin_snt)
{
tc->flags |= TCP_CONN_FINSNT;
tc->flags &= ~TCP_CONN_FINPNDG;
- tc->snd_nxt += 1;
+ /* Account for the FIN */
+ tc->snd_una_max += 1;
+ tc->snd_nxt = tc->snd_una_max;
}
tcp_retransmit_timer_force_update (tc);
TCP_EVT_DBG (TCP_EVT_FIN_SENT, tc);
@@ -1398,7 +1452,8 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
else if (tc->state == TCP_STATE_SYN_RCVD)
{
tc->rto_boff += 1;
- tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
+ if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
+ tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
tc->rtt_ts = 0;
if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
@@ -1414,7 +1469,7 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
else
{
ASSERT (tc->state == TCP_STATE_CLOSED);
- clib_warning ("connection closed ...");
+ TCP_DBG ("connection state: %d", tc->state);
return;
}
}
ass="p">; goto out; } p.sa_id = ntohl (mp->entry.sa_id); rv = ipsec_policy_mk_type (mp->entry.is_outbound, p.is_ipv6, p.policy, &p.type); if (rv) goto out; rv = ipsec_add_del_policy (vm, &p, mp->is_add, &stat_index); if (rv) goto out; #else rv = VNET_API_ERROR_UNIMPLEMENTED; goto out; #endif out: /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_IPSEC_SPD_ENTRY_ADD_DEL_REPLY, ({ rmp->stat_index = ntohl(stat_index); })); /* *INDENT-ON* */ } static int ipsec_proto_decode (vl_api_ipsec_proto_t in, ipsec_protocol_t * out) { in = clib_net_to_host_u32 (in); switch (in) { case IPSEC_API_PROTO_ESP: *out = IPSEC_PROTOCOL_ESP; return (0); case IPSEC_API_PROTO_AH: *out = IPSEC_PROTOCOL_AH; return (0); } return (VNET_API_ERROR_INVALID_PROTOCOL); } static vl_api_ipsec_proto_t ipsec_proto_encode (ipsec_protocol_t p) { switch (p) { case IPSEC_PROTOCOL_ESP: return clib_host_to_net_u32 (IPSEC_API_PROTO_ESP); case IPSEC_PROTOCOL_AH: return clib_host_to_net_u32 (IPSEC_API_PROTO_AH); } return (VNET_API_ERROR_UNIMPLEMENTED); } static int ipsec_crypto_algo_decode (vl_api_ipsec_crypto_alg_t in, ipsec_crypto_alg_t * out) { in = clib_net_to_host_u32 (in); switch (in) { #define _(v,f,s) case IPSEC_API_CRYPTO_ALG_##f: \ *out = IPSEC_CRYPTO_ALG_##f; \ return (0); foreach_ipsec_crypto_alg #undef _ } return (VNET_API_ERROR_INVALID_ALGORITHM); } static vl_api_ipsec_crypto_alg_t ipsec_crypto_algo_encode (ipsec_crypto_alg_t c) { switch (c) { #define _(v,f,s) case IPSEC_CRYPTO_ALG_##f: \ return clib_host_to_net_u32(IPSEC_API_CRYPTO_ALG_##f); foreach_ipsec_crypto_alg #undef _ case IPSEC_CRYPTO_N_ALG: break; } ASSERT (0); return (VNET_API_ERROR_UNIMPLEMENTED); } static int ipsec_integ_algo_decode (vl_api_ipsec_integ_alg_t in, ipsec_integ_alg_t * out) { in = clib_net_to_host_u32 (in); switch (in) { #define _(v,f,s) case IPSEC_API_INTEG_ALG_##f: \ *out = IPSEC_INTEG_ALG_##f; \ return (0); foreach_ipsec_integ_alg #undef _ } return (VNET_API_ERROR_INVALID_ALGORITHM); } static vl_api_ipsec_integ_alg_t ipsec_integ_algo_encode (ipsec_integ_alg_t i) { switch (i) { #define _(v,f,s) case IPSEC_INTEG_ALG_##f: \ return (clib_host_to_net_u32(IPSEC_API_INTEG_ALG_##f)); foreach_ipsec_integ_alg #undef _ case IPSEC_INTEG_N_ALG: break; } ASSERT (0); return (VNET_API_ERROR_UNIMPLEMENTED); } static void ipsec_key_decode (const vl_api_key_t * key, ipsec_key_t * out) { ipsec_mk_key (out, key->data, key->length); } static void ipsec_key_encode (const ipsec_key_t * in, vl_api_key_t * out) { out->length = in->len; clib_memcpy (out->data, in->data, out->length); } static ipsec_sa_flags_t ipsec_sa_flags_decode (vl_api_ipsec_sad_flags_t in) { ipsec_sa_flags_t flags = IPSEC_SA_FLAG_NONE; in = clib_net_to_host_u32 (in); #define _(v,f,s) if (in & IPSEC_API_SAD_FLAG_##f) \ flags |= IPSEC_SA_FLAG_##f; foreach_ipsec_sa_flags #undef _ return (flags); } static vl_api_ipsec_sad_flags_t ipsec_sad_flags_encode (const ipsec_sa_t * sa) { vl_api_ipsec_sad_flags_t flags = IPSEC_API_SAD_FLAG_NONE; if (ipsec_sa_is_set_USE_EXTENDED_SEQ_NUM (sa)) flags |= IPSEC_API_SAD_FLAG_USE_EXTENDED_SEQ_NUM; if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa)) flags |= IPSEC_API_SAD_FLAG_USE_ANTI_REPLAY; if (ipsec_sa_is_set_IS_TUNNEL (sa)) flags |= IPSEC_API_SAD_FLAG_IS_TUNNEL; if (ipsec_sa_is_set_IS_TUNNEL_V6 (sa)) flags |= IPSEC_API_SAD_FLAG_IS_TUNNEL_V6; if (ipsec_sa_is_set_UDP_ENCAP (sa)) flags |= IPSEC_API_SAD_FLAG_UDP_ENCAP; return clib_host_to_net_u32 (flags); } static void vl_api_ipsec_sad_entry_add_del_t_handler (vl_api_ipsec_sad_entry_add_del_t * mp) { vlib_main_t *vm __attribute__ ((unused)) = vlib_get_main (); vl_api_ipsec_sad_entry_add_del_reply_t *rmp; ip46_address_t tun_src = { }, tun_dst = { }; ipsec_key_t crypto_key, integ_key; ipsec_crypto_alg_t crypto_alg; ipsec_integ_alg_t integ_alg; ipsec_protocol_t proto; ipsec_sa_flags_t flags; u32 id, spi, sa_index = ~0; int rv; #if WITH_LIBSSL > 0 id = ntohl (mp->entry.sad_id); spi = ntohl (mp->entry.spi); rv = ipsec_proto_decode (mp->entry.protocol, &proto); if (rv) goto out; rv = ipsec_crypto_algo_decode (mp->entry.crypto_algorithm, &crypto_alg); if (rv) goto out; rv = ipsec_integ_algo_decode (mp->entry.integrity_algorithm, &integ_alg); if (rv) goto out; ipsec_key_decode (&mp->entry.crypto_key, &crypto_key); ipsec_key_decode (&mp->entry.integrity_key, &integ_key); flags = ipsec_sa_flags_decode (mp->entry.flags); ip_address_decode (&mp->entry.tunnel_src, &tun_src); ip_address_decode (&mp->entry.tunnel_dst, &tun_dst); if (mp->is_add) rv = ipsec_sa_add (id, spi, proto, crypto_alg, &crypto_key, integ_alg, &integ_key, flags, 0, &tun_src, &tun_dst, &sa_index); else rv = ipsec_sa_del (id); #else rv = VNET_API_ERROR_UNIMPLEMENTED; #endif out: /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_IPSEC_SAD_ENTRY_ADD_DEL_REPLY, { rmp->stat_index = htonl (sa_index); }); /* *INDENT-ON* */ } static void send_ipsec_spds_details (ipsec_spd_t * spd, vl_api_registration_t * reg, u32 context) { vl_api_ipsec_spds_details_t *mp; u32 n_policies = 0; mp = vl_msg_api_alloc (sizeof (*mp)); clib_memset (mp, 0, sizeof (*mp)); mp->_vl_msg_id = ntohs (VL_API_IPSEC_SPDS_DETAILS); mp->context = context; mp->spd_id = htonl (spd->id); #define _(s, n) n_policies += vec_len (spd->policies[IPSEC_SPD_POLICY_##s]); foreach_ipsec_spd_policy_type #undef _ mp->npolicies = htonl (n_policies); vl_api_send_msg (reg, (u8 *) mp); } static void vl_api_ipsec_spds_dump_t_handler (vl_api_ipsec_spds_dump_t * mp) { vl_api_registration_t *reg; ipsec_main_t *im = &ipsec_main; ipsec_spd_t *spd; #if WITH_LIBSSL > 0 reg = vl_api_client_index_to_registration (mp->client_index); if (!reg) return; /* *INDENT-OFF* */ pool_foreach (spd, im->spds, ({ send_ipsec_spds_details (spd, reg, mp->context); })); /* *INDENT-ON* */ #else clib_warning ("unimplemented"); #endif } vl_api_ipsec_spd_action_t ipsec_spd_action_encode (ipsec_policy_action_t in) { vl_api_ipsec_spd_action_t out = IPSEC_API_SPD_ACTION_BYPASS; switch (in) { #define _(v,f,s) case IPSEC_POLICY_ACTION_##f: \ out = IPSEC_API_SPD_ACTION_##f; \ break; foreach_ipsec_policy_action #undef _ } return (clib_host_to_net_u32 (out)); } static void send_ipsec_spd_details (ipsec_policy_t * p, vl_api_registration_t * reg, u32 context) { vl_api_ipsec_spd_details_t *mp; mp = vl_msg_api_alloc (sizeof (*mp)); clib_memset (mp, 0, sizeof (*mp)); mp->_vl_msg_id = ntohs (VL_API_IPSEC_SPD_DETAILS); mp->context = context; mp->entry.spd_id = htonl (p->id); mp->entry.priority = htonl (p->priority); mp->entry.is_outbound = ((p->type == IPSEC_SPD_POLICY_IP6_OUTBOUND) || (p->type == IPSEC_SPD_POLICY_IP4_OUTBOUND)); ip_address_encode (&p->laddr.start, IP46_TYPE_ANY, &mp->entry.local_address_start); ip_address_encode (&p->laddr.stop, IP46_TYPE_ANY, &mp->entry.local_address_stop); ip_address_encode (&p->raddr.start, IP46_TYPE_ANY, &mp->entry.remote_address_start); ip_address_encode (&p->raddr.stop, IP46_TYPE_ANY, &mp->entry.remote_address_stop); mp->entry.local_port_start = p->lport.start; mp->entry.local_port_stop = p->lport.stop; mp->entry.remote_port_start = p->rport.start; mp->entry.remote_port_stop = p->rport.stop; mp->entry.protocol = p->protocol; mp->entry.policy = ipsec_spd_action_encode (p->policy); mp->entry.sa_id = htonl (p->sa_id); vl_api_send_msg (reg, (u8 *) mp); } static void vl_api_ipsec_spd_dump_t_handler (vl_api_ipsec_spd_dump_t * mp) { vl_api_registration_t *reg; ipsec_main_t *im = &ipsec_main; ipsec_spd_policy_type_t ptype; ipsec_policy_t *policy; ipsec_spd_t *spd; uword *p; u32 spd_index, *ii; #if WITH_LIBSSL > 0 reg = vl_api_client_index_to_registration (mp->client_index); if (!reg) return; p = hash_get (im->spd_index_by_spd_id, ntohl (mp->spd_id)); if (!p) return; spd_index = p[0]; spd = pool_elt_at_index (im->spds, spd_index); /* *INDENT-OFF* */ FOR_EACH_IPSEC_SPD_POLICY_TYPE(ptype) { vec_foreach(ii, spd->policies[ptype]) { policy = pool_elt_at_index(im->policies, *ii); if (mp->sa_id == ~(0) || ntohl (mp->sa_id) == policy->sa_id) send_ipsec_spd_details (policy, reg, mp->context); } } /* *INDENT-ON* */ #else clib_warning ("unimplemented"); #endif } static void send_ipsec_spd_interface_details (vl_api_registration_t * reg, u32 spd_index, u32 sw_if_index, u32 context) { vl_api_ipsec_spd_interface_details_t *mp; mp = vl_msg_api_alloc (sizeof (*mp)); clib_memset (mp, 0, sizeof (*mp)); mp->_vl_msg_id = ntohs (VL_API_IPSEC_SPD_INTERFACE_DETAILS); mp->context = context; mp->spd_index = htonl (spd_index); mp->sw_if_index = htonl (sw_if_index); vl_api_send_msg (reg, (u8 *) mp); } static void vl_api_ipsec_spd_interface_dump_t_handler (vl_api_ipsec_spd_interface_dump_t * mp) { ipsec_main_t *im = &ipsec_main; vl_api_registration_t *reg; u32 k, v, spd_index; #if WITH_LIBSSL > 0 reg = vl_api_client_index_to_registration (mp->client_index); if (!reg) return; if (mp->spd_index_valid) { spd_index = ntohl (mp->spd_index); /* *INDENT-OFF* */ hash_foreach(k, v, im->spd_index_by_sw_if_index, ({ if (v == spd_index) send_ipsec_spd_interface_details(reg, v, k, mp->context); })); /* *INDENT-ON* */ } else { /* *INDENT-OFF* */ hash_foreach(k, v, im->spd_index_by_sw_if_index, ({ send_ipsec_spd_interface_details(reg, v, k, mp->context); })); /* *INDENT-ON* */ } #else clib_warning ("unimplemented"); #endif } static void vl_api_ipsec_sa_set_key_t_handler (vl_api_ipsec_sa_set_key_t * mp) { vlib_main_t *vm __attribute__ ((unused)) = vlib_get_main (); vl_api_ipsec_sa_set_key_reply_t *rmp; ipsec_key_t ck, ik; u32 id; int rv; #if WITH_LIBSSL > 0 id = ntohl (mp->sa_id); ipsec_key_decode (&mp->crypto_key, &ck); ipsec_key_decode (&mp->integrity_key, &ik); rv = ipsec_set_sa_key (id, &ck, &ik); #else rv = VNET_API_ERROR_UNIMPLEMENTED; #endif REPLY_MACRO (VL_API_IPSEC_SA_SET_KEY_REPLY); } static void vl_api_ipsec_tunnel_if_add_del_t_handler (vl_api_ipsec_tunnel_if_add_del_t * mp) { vl_api_ipsec_tunnel_if_add_del_reply_t *rmp; ipsec_main_t *im = &ipsec_main; vnet_main_t *vnm = im->vnet_main; u32 sw_if_index = ~0; ip46_type_t itype; int rv; #if WITH_LIBSSL > 0 ipsec_add_del_tunnel_args_t tun; clib_memset (&tun, 0, sizeof (ipsec_add_del_tunnel_args_t)); tun.is_add = mp->is_add; tun.esn = mp->esn; tun.anti_replay = mp->anti_replay; tun.local_spi = ntohl (mp->local_spi); tun.remote_spi = ntohl (mp->remote_spi); tun.crypto_alg = mp->crypto_alg; tun.local_crypto_key_len = mp->local_crypto_key_len; tun.remote_crypto_key_len = mp->remote_crypto_key_len; tun.integ_alg = mp->integ_alg; tun.local_integ_key_len = mp->local_integ_key_len; tun.remote_integ_key_len = mp->remote_integ_key_len; tun.udp_encap = mp->udp_encap; tun.tx_table_id = ntohl (mp->tx_table_id); itype = ip_address_decode (&mp->local_ip, &tun.local_ip); itype = ip_address_decode (&mp->remote_ip, &tun.remote_ip); tun.is_ip6 = (IP46_TYPE_IP6 == itype); memcpy (&tun.local_crypto_key, &mp->local_crypto_key, mp->local_crypto_key_len); memcpy (&tun.remote_crypto_key, &mp->remote_crypto_key, mp->remote_crypto_key_len); memcpy (&tun.local_integ_key, &mp->local_integ_key, mp->local_integ_key_len); memcpy (&tun.remote_integ_key, &mp->remote_integ_key, mp->remote_integ_key_len); tun.renumber = mp->renumber; tun.show_instance = ntohl (mp->show_instance); rv = ipsec_add_del_tunnel_if_internal (vnm, &tun, &sw_if_index); #else rv = VNET_API_ERROR_UNIMPLEMENTED; #endif /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_IPSEC_TUNNEL_IF_ADD_DEL_REPLY, ({ rmp->sw_if_index = htonl (sw_if_index); })); /* *INDENT-ON* */ } static void send_ipsec_sa_details (ipsec_sa_t * sa, vl_api_registration_t * reg, u32 context, u32 sw_if_index) { vl_api_ipsec_sa_details_t *mp; mp = vl_msg_api_alloc (sizeof (*mp)); clib_memset (mp, 0, sizeof (*mp)); mp->_vl_msg_id = ntohs (VL_API_IPSEC_SA_DETAILS); mp->context = context; mp->entry.sad_id = htonl (sa->id); mp->entry.spi = htonl (sa->spi); mp->entry.protocol = ipsec_proto_encode (sa->protocol); mp->entry.tx_table_id = htonl (fib_table_get_table_id (sa->tx_fib_index, FIB_PROTOCOL_IP4)); mp->entry.crypto_algorithm = ipsec_crypto_algo_encode (sa->crypto_alg); ipsec_key_encode (&sa->crypto_key, &mp->entry.crypto_key); mp->entry.integrity_algorithm = ipsec_integ_algo_encode (sa->integ_alg); ipsec_key_encode (&sa->integ_key, &mp->entry.integrity_key); mp->entry.flags = ipsec_sad_flags_encode (sa); if (ipsec_sa_is_set_IS_TUNNEL (sa)) { ip_address_encode (&sa->tunnel_src_addr, IP46_TYPE_ANY, &mp->entry.tunnel_src); ip_address_encode (&sa->tunnel_dst_addr, IP46_TYPE_ANY, &mp->entry.tunnel_dst); } mp->sw_if_index = htonl (sw_if_index); mp->salt = clib_host_to_net_u32 (sa->salt); mp->seq_outbound = clib_host_to_net_u64 (((u64) sa->seq)); mp->last_seq_inbound = clib_host_to_net_u64 (((u64) sa->last_seq)); if (ipsec_sa_is_set_USE_EXTENDED_SEQ_NUM (sa)) { mp->seq_outbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi)); mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->last_seq_hi)); } if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa)) mp->replay_window = clib_host_to_net_u64 (sa->replay_window); vl_api_send_msg (reg, (u8 *) mp); } static void vl_api_ipsec_sa_dump_t_handler (vl_api_ipsec_sa_dump_t * mp) { vl_api_registration_t *reg; ipsec_main_t *im = &ipsec_main; vnet_main_t *vnm = im->vnet_main; ipsec_sa_t *sa; ipsec_tunnel_if_t *t; u32 *sa_index_to_tun_if_index = 0; #if WITH_LIBSSL > 0 reg = vl_api_client_index_to_registration (mp->client_index); if (!reg || pool_elts (im->sad) == 0) return; vec_validate_init_empty (sa_index_to_tun_if_index, vec_len (im->sad) - 1, ~0); /* *INDENT-OFF* */ pool_foreach (t, im->tunnel_interfaces, ({ vnet_hw_interface_t *hi; u32 sw_if_index = ~0; hi = vnet_get_hw_interface (vnm, t->hw_if_index); sw_if_index = hi->sw_if_index; sa_index_to_tun_if_index[t->input_sa_index] = sw_if_index; sa_index_to_tun_if_index[t->output_sa_index] = sw_if_index; })); pool_foreach (sa, im->sad, ({ if (mp->sa_id == ~(0) || ntohl (mp->sa_id) == sa->id) send_ipsec_sa_details (sa, reg, mp->context, sa_index_to_tun_if_index[sa - im->sad]); })); /* *INDENT-ON* */ vec_free (sa_index_to_tun_if_index); #else clib_warning ("unimplemented"); #endif } static void vl_api_ipsec_tunnel_if_set_key_t_handler (vl_api_ipsec_tunnel_if_set_key_t * mp) { vl_api_ipsec_tunnel_if_set_key_reply_t *rmp; ipsec_main_t *im = &ipsec_main; vnet_main_t *vnm = im->vnet_main; vnet_sw_interface_t *sw; u8 *key = 0; int rv; #if WITH_LIBSSL > 0 sw = vnet_get_sw_interface (vnm, ntohl (mp->sw_if_index)); switch (mp->key_type) { case IPSEC_IF_SET_KEY_TYPE_LOCAL_CRYPTO: case IPSEC_IF_SET_KEY_TYPE_REMOTE_CRYPTO: if (mp->alg < IPSEC_CRYPTO_ALG_AES_CBC_128 || mp->alg >= IPSEC_CRYPTO_N_ALG) { rv = VNET_API_ERROR_INVALID_ALGORITHM; goto out; } break; case IPSEC_IF_SET_KEY_TYPE_LOCAL_INTEG: case IPSEC_IF_SET_KEY_TYPE_REMOTE_INTEG: if (mp->alg >= IPSEC_INTEG_N_ALG) { rv = VNET_API_ERROR_INVALID_ALGORITHM; goto out; } break; case IPSEC_IF_SET_KEY_TYPE_NONE: default: rv = VNET_API_ERROR_UNIMPLEMENTED; goto out; break; } key = vec_new (u8, mp->key_len); clib_memcpy (key, mp->key, mp->key_len); rv = ipsec_set_interface_key (vnm, sw->hw_if_index, mp->key_type, mp->alg, key); vec_free (key); #else clib_warning ("unimplemented"); #endif out: REPLY_MACRO (VL_API_IPSEC_TUNNEL_IF_SET_KEY_REPLY); } static void vl_api_ipsec_tunnel_if_set_sa_t_handler (vl_api_ipsec_tunnel_if_set_sa_t * mp) { vl_api_ipsec_tunnel_if_set_sa_reply_t *rmp; ipsec_main_t *im = &ipsec_main; vnet_main_t *vnm = im->vnet_main; vnet_sw_interface_t *sw; int rv; #if WITH_LIBSSL > 0 sw = vnet_get_sw_interface (vnm, ntohl (mp->sw_if_index)); rv = ipsec_set_interface_sa (vnm, sw->hw_if_index, ntohl (mp->sa_id), mp->is_outbound); #else clib_warning ("unimplemented"); #endif REPLY_MACRO (VL_API_IPSEC_TUNNEL_IF_SET_SA_REPLY); } static void vl_api_ipsec_backend_dump_t_handler (vl_api_ipsec_backend_dump_t * mp) { vl_api_registration_t *rp; ipsec_main_t *im = &ipsec_main; u32 context = mp->context; rp = vl_api_client_index_to_registration (mp->client_index); if (rp == 0) { clib_warning ("Client %d AWOL", mp->client_index); return; } ipsec_ah_backend_t *ab; ipsec_esp_backend_t *eb; /* *INDENT-OFF* */ pool_foreach (ab, im->ah_backends, { vl_api_ipsec_backend_details_t *mp = vl_msg_api_alloc (sizeof (*mp)); clib_memset (mp, 0, sizeof (*mp)); mp->_vl_msg_id = ntohs (VL_API_IPSEC_BACKEND_DETAILS); mp->context = context; snprintf ((char *)mp->name, sizeof (mp->name), "%.*s", vec_len (ab->name), ab->name); mp->protocol = ntohl (IPSEC_API_PROTO_AH); mp->index = ab - im->ah_backends; mp->active = mp->index == im->ah_current_backend ? 1 : 0; vl_api_send_msg (rp, (u8 *)mp); }); pool_foreach (eb, im->esp_backends, { vl_api_ipsec_backend_details_t *mp = vl_msg_api_alloc (sizeof (*mp)); clib_memset (mp, 0, sizeof (*mp)); mp->_vl_msg_id = ntohs (VL_API_IPSEC_BACKEND_DETAILS); mp->context = context; snprintf ((char *)mp->name, sizeof (mp->name), "%.*s", vec_len (eb->name), eb->name); mp->protocol = ntohl (IPSEC_API_PROTO_ESP); mp->index = eb - im->esp_backends; mp->active = mp->index == im->esp_current_backend ? 1 : 0; vl_api_send_msg (rp, (u8 *)mp); }); /* *INDENT-ON* */ } static void vl_api_ipsec_select_backend_t_handler (vl_api_ipsec_select_backend_t * mp) { ipsec_main_t *im = &ipsec_main; vl_api_ipsec_select_backend_reply_t *rmp; ipsec_protocol_t protocol; int rv = 0; if (pool_elts (im->sad) > 0) { rv = VNET_API_ERROR_INSTANCE_IN_USE; goto done; } rv = ipsec_proto_decode (mp->protocol, &protocol); if (rv) goto done; #if WITH_LIBSSL > 0 switch (protocol) { case IPSEC_PROTOCOL_ESP: if (pool_is_free_index (im->esp_backends, mp->index)) { rv = VNET_API_ERROR_INVALID_VALUE; break; } ipsec_select_esp_backend (im, mp->index); break; case IPSEC_PROTOCOL_AH: if (pool_is_free_index (im->ah_backends, mp->index)) { rv = VNET_API_ERROR_INVALID_VALUE; break; } ipsec_select_ah_backend (im, mp->index); break; default: rv = VNET_API_ERROR_INVALID_VALUE; break; } #else clib_warning ("unimplemented"); /* FIXME */ #endif done: REPLY_MACRO (VL_API_IPSEC_SELECT_BACKEND_REPLY); } /* * ipsec_api_hookup * Add vpe's API message handlers to the table. * vlib has already mapped shared memory and * added the client registration handlers. * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() */ #define vl_msg_name_crc_list #include <vnet/vnet_all_api_h.h> #undef vl_msg_name_crc_list static void setup_message_id_table (api_main_t * am) { #define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); foreach_vl_msg_name_crc_ipsec; #undef _ } static clib_error_t * ipsec_api_hookup (vlib_main_t * vm) { api_main_t *am = &api_main; #define _(N,n) \ vl_msg_api_set_handlers(VL_API_##N, #n, \ vl_api_##n##_t_handler, \ vl_noop_handler, \ vl_api_##n##_t_endian, \ vl_api_##n##_t_print, \ sizeof(vl_api_##n##_t), 1); foreach_vpe_api_msg; #undef _ /* * Set up the (msg_name, crc, message-id) table */ setup_message_id_table (am); return 0; } VLIB_API_INIT_FUNCTION (ipsec_api_hookup); /* * fd.io coding-style-patch-verification: ON * * Local Variables: * eval: (c-set-style "gnu") * End: */