summaryrefslogtreecommitdiffstats
path: root/src/vnet/session
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet/session')
-rw-r--r--src/vnet/session/application.c7
-rw-r--r--src/vnet/session/application_interface.c32
-rw-r--r--src/vnet/session/application_interface.h21
-rw-r--r--src/vnet/session/segment_manager.c2
-rw-r--r--src/vnet/session/session.c461
-rw-r--r--src/vnet/session/session.h149
-rwxr-xr-xsrc/vnet/session/session_api.c23
-rwxr-xr-xsrc/vnet/session/session_cli.c21
-rw-r--r--src/vnet/session/session_lookup.c96
-rw-r--r--src/vnet/session/session_lookup.h16
-rw-r--r--src/vnet/session/session_node.c20
-rw-r--r--src/vnet/session/session_table.h1
-rw-r--r--src/vnet/session/session_test.c25
-rw-r--r--src/vnet/session/stream_session.h1
-rw-r--r--src/vnet/session/transport.c306
-rw-r--r--src/vnet/session/transport.h34
-rw-r--r--src/vnet/session/transport_interface.c109
-rw-r--r--src/vnet/session/transport_interface.h18
18 files changed, 951 insertions, 391 deletions
diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c
index 75d3cfb2e33..c6fd1197304 100644
--- a/src/vnet/session/application.c
+++ b/src/vnet/session/application.c
@@ -415,7 +415,6 @@ application_open_session (application_t * app, session_endpoint_t * sep,
u32 api_context)
{
segment_manager_t *sm;
- transport_connection_t *tc = 0;
int rv;
/* Make sure we have a segment manager for connects */
@@ -427,13 +426,9 @@ application_open_session (application_t * app, session_endpoint_t * sep,
app->connects_seg_manager = segment_manager_index (sm);
}
- if ((rv = stream_session_open (app->index, sep, &tc)))
+ if ((rv = session_open (app->index, sep, api_context)))
return rv;
- /* Store api_context for when the reply comes. Not the nicest thing
- * but better than allocating a separate half-open pool. */
- tc->s_index = api_context;
-
return 0;
}
diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c
index a0dff90565a..8599c74fe46 100644
--- a/src/vnet/session/application_interface.c
+++ b/src/vnet/session/application_interface.c
@@ -92,7 +92,8 @@ static int
vnet_bind_i (u32 app_index, session_endpoint_t * sep, u64 * handle)
{
application_t *app;
- u32 table_index, listener_index;
+ u32 table_index;
+ u64 listener;
int rv, have_local = 0;
app = application_get_if_valid (app_index);
@@ -108,8 +109,8 @@ vnet_bind_i (u32 app_index, session_endpoint_t * sep, u64 * handle)
table_index = application_session_table (app,
session_endpoint_fib_proto (sep));
- listener_index = session_lookup_session_endpoint (table_index, sep);
- if (listener_index != SESSION_INVALID_INDEX)
+ listener = session_lookup_session_endpoint (table_index, sep);
+ if (listener != SESSION_INVALID_HANDLE)
return VNET_API_ERROR_ADDRESS_IN_USE;
/*
@@ -119,8 +120,8 @@ vnet_bind_i (u32 app_index, session_endpoint_t * sep, u64 * handle)
if (application_has_local_scope (app) && session_endpoint_is_zero (sep))
{
table_index = application_local_session_table (app);
- listener_index = session_lookup_session_endpoint (table_index, sep);
- if (listener_index != SESSION_INVALID_INDEX)
+ listener = session_lookup_session_endpoint (table_index, sep);
+ if (listener != SESSION_INVALID_HANDLE)
return VNET_API_ERROR_ADDRESS_IN_USE;
session_lookup_add_session_endpoint (table_index, sep, app->index);
*handle = session_lookup_local_listener_make_handle (sep);
@@ -206,6 +207,7 @@ vnet_connect_i (u32 app_index, u32 api_context, session_endpoint_t * sep,
{
application_t *server, *app;
u32 table_index;
+ stream_session_t *listener;
if (session_endpoint_is_zero (sep))
return VNET_API_ERROR_INVALID_VALUE;
@@ -243,10 +245,13 @@ vnet_connect_i (u32 app_index, u32 api_context, session_endpoint_t * sep,
table_index = application_session_table (app,
session_endpoint_fib_proto (sep));
- app_index = session_lookup_session_endpoint (table_index, sep);
- server = application_get (app_index);
- if (server && (server->flags & APP_OPTIONS_FLAGS_ACCEPT_REDIRECT))
- return app_connect_redirect (server, mp);
+ listener = session_lookup_listener (table_index, sep);
+ if (listener)
+ {
+ server = application_get (listener->app_index);
+ if (server && (server->flags & APP_OPTIONS_FLAGS_ACCEPT_REDIRECT))
+ return app_connect_redirect (server, mp);
+ }
/*
* Not connecting to a local server, propagate to transport
@@ -470,14 +475,15 @@ vnet_unbind_uri (vnet_unbind_args_t * a)
clib_error_t *
vnet_connect_uri (vnet_connect_args_t * a)
{
- session_endpoint_t sep = SESSION_ENDPOINT_NULL;
+ session_endpoint_t sep_null = SESSION_ENDPOINT_NULL;
int rv;
/* Parse uri */
- rv = parse_uri (a->uri, &sep);
+ a->sep = sep_null;
+ rv = parse_uri (a->uri, &a->sep);
if (rv)
return clib_error_return_code (0, rv, 0, "app init: %d", rv);
- if ((rv = vnet_connect_i (a->app_index, a->api_context, &sep, a->mp)))
+ if ((rv = vnet_connect_i (a->app_index, a->api_context, &a->sep, a->mp)))
return clib_error_return_code (0, rv, 0, "connect failed");
return 0;
}
@@ -489,7 +495,7 @@ vnet_disconnect_session (vnet_disconnect_args_t * a)
stream_session_t *s;
session_parse_handle (a->handle, &index, &thread_index);
- s = stream_session_get_if_valid (index, thread_index);
+ s = session_get_if_valid (index, thread_index);
if (!s || s->app_index != a->app_index)
return VNET_API_ERROR_INVALID_VALUE;
diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h
index 5e1fe8ee528..0251c3bc311 100644
--- a/src/vnet/session/application_interface.h
+++ b/src/vnet/session/application_interface.h
@@ -56,11 +56,7 @@ typedef struct _vnet_bind_args_t
union
{
char *uri;
- struct
- {
- session_endpoint_t sep;
- transport_proto_t proto;
- };
+ session_endpoint_t sep;
};
u32 app_index;
@@ -86,23 +82,14 @@ typedef struct _vnet_unbind_args_t
typedef struct _vnet_connect_args
{
- union
- {
- char *uri;
- struct
- {
- session_endpoint_t sep;
- transport_proto_t proto;
- };
- };
+ char *uri;
+ session_endpoint_t sep;
u32 app_index;
u32 api_context;
/* Used for redirects */
void *mp;
-
- /* used for proxy connections */
- u64 server_handle;
+ u64 session_handle;
} vnet_connect_args_t;
typedef struct _vnet_disconnect_args_t
diff --git a/src/vnet/session/segment_manager.c b/src/vnet/session/segment_manager.c
index f35dec72d88..cb83d8e53bc 100644
--- a/src/vnet/session/segment_manager.c
+++ b/src/vnet/session/segment_manager.c
@@ -273,7 +273,7 @@ segment_manager_del_sessions (segment_manager_t * sm)
if (session->session_state != SESSION_STATE_CLOSED)
{
session->session_state = SESSION_STATE_CLOSED;
- session_send_session_evt_to_thread (stream_session_handle
+ session_send_session_evt_to_thread (session_handle
(session),
FIFO_EVENT_DISCONNECT,
thread_index);
diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c
index 88b38f15a61..7f28a3992ed 100644
--- a/src/vnet/session/session.c
+++ b/src/vnet/session/session.c
@@ -28,67 +28,151 @@
session_manager_main_t session_manager_main;
extern transport_proto_vft_t *tp_vfts;
-int
-stream_session_create_i (segment_manager_t * sm, transport_connection_t * tc,
- u8 alloc_fifos, stream_session_t ** ret_s)
+static void
+session_send_evt_to_thread (u64 session_handle, fifo_event_type_t evt_type,
+ u32 thread_index, void *fp, void *rpc_args)
+{
+ u32 tries = 0;
+ session_fifo_event_t evt = { {0}, };
+ unix_shared_memory_queue_t *q;
+
+ evt.event_type = evt_type;
+ if (evt_type == FIFO_EVENT_RPC)
+ {
+ evt.rpc_args.fp = fp;
+ evt.rpc_args.arg = rpc_args;
+ }
+ else
+ evt.session_handle = session_handle;
+
+ q = session_manager_get_vpp_event_queue (thread_index);
+ while (unix_shared_memory_queue_add (q, (u8 *) & evt, 1))
+ {
+ if (tries++ == 3)
+ {
+ SESSION_DBG ("failed to enqueue evt");
+ break;
+ }
+ }
+}
+
+void
+session_send_session_evt_to_thread (u64 session_handle,
+ fifo_event_type_t evt_type,
+ u32 thread_index)
+{
+ session_send_evt_to_thread (session_handle, evt_type, thread_index, 0, 0);
+}
+
+void
+session_send_rpc_evt_to_thread (u32 thread_index, void *fp, void *rpc_args)
+{
+ if (thread_index != vlib_get_thread_index ())
+ session_send_evt_to_thread (0, FIFO_EVENT_RPC, thread_index, fp,
+ rpc_args);
+ else
+ {
+ void (*fnp) (void *) = fp;
+ fnp (rpc_args);
+ }
+}
+
+stream_session_t *
+session_alloc (u32 thread_index)
{
session_manager_main_t *smm = &session_manager_main;
+ stream_session_t *s;
+ u8 will_expand = 0;
+ pool_get_aligned_will_expand (smm->sessions[thread_index], will_expand,
+ CLIB_CACHE_LINE_BYTES);
+ /* If we have peekers, let them finish */
+ if (PREDICT_FALSE (will_expand))
+ {
+ clib_spinlock_lock_if_init (&smm->peekers_write_locks[thread_index]);
+ pool_get_aligned (session_manager_main.sessions[thread_index], s,
+ CLIB_CACHE_LINE_BYTES);
+ clib_spinlock_unlock_if_init (&smm->peekers_write_locks[thread_index]);
+ }
+ else
+ {
+ pool_get_aligned (session_manager_main.sessions[thread_index], s,
+ CLIB_CACHE_LINE_BYTES);
+ }
+ memset (s, 0, sizeof (*s));
+ s->session_index = s - session_manager_main.sessions[thread_index];
+ s->thread_index = thread_index;
+ return s;
+}
+
+static void
+session_free (stream_session_t * s)
+{
+ pool_put (session_manager_main.sessions[s->thread_index], s);
+ if (CLIB_DEBUG)
+ memset (s, 0xFA, sizeof (*s));
+}
+
+static int
+session_alloc_fifos (segment_manager_t * sm, stream_session_t * s)
+{
svm_fifo_t *server_rx_fifo = 0, *server_tx_fifo = 0;
u32 fifo_segment_index;
- u32 pool_index;
- stream_session_t *s;
- u64 value;
- u32 thread_index = tc->thread_index;
int rv;
- ASSERT (thread_index == vlib_get_thread_index ());
+ if ((rv = segment_manager_alloc_session_fifos (sm, &server_rx_fifo,
+ &server_tx_fifo,
+ &fifo_segment_index)))
+ return rv;
+ /* Initialize backpointers */
+ server_rx_fifo->master_session_index = s->session_index;
+ server_rx_fifo->master_thread_index = s->thread_index;
- /* Create the session */
- pool_get_aligned (smm->sessions[thread_index], s, CLIB_CACHE_LINE_BYTES);
- memset (s, 0, sizeof (*s));
- pool_index = s - smm->sessions[thread_index];
+ server_tx_fifo->master_session_index = s->session_index;
+ server_tx_fifo->master_thread_index = s->thread_index;
- /* Allocate fifos */
- if (alloc_fifos)
- {
- if ((rv = segment_manager_alloc_session_fifos (sm, &server_rx_fifo,
- &server_tx_fifo,
- &fifo_segment_index)))
- {
- pool_put (smm->sessions[thread_index], s);
- return rv;
- }
- /* Initialize backpointers */
- server_rx_fifo->master_session_index = pool_index;
- server_rx_fifo->master_thread_index = thread_index;
+ s->server_rx_fifo = server_rx_fifo;
+ s->server_tx_fifo = server_tx_fifo;
+ s->svm_segment_index = fifo_segment_index;
+ return 0;
+}
- server_tx_fifo->master_session_index = pool_index;
- server_tx_fifo->master_thread_index = thread_index;
+static stream_session_t *
+session_alloc_for_connection (transport_connection_t * tc)
+{
+ stream_session_t *s;
+ u32 thread_index = tc->thread_index;
- s->server_rx_fifo = server_rx_fifo;
- s->server_tx_fifo = server_tx_fifo;
- s->svm_segment_index = fifo_segment_index;
- }
+ ASSERT (thread_index == vlib_get_thread_index ());
- /* Initialize state machine, such as it is... */
- s->session_type = session_type_from_proto_and_ip (tc->transport_proto,
- tc->is_ip4);
+ s = session_alloc (thread_index);
+ s->session_type = session_type_from_proto_and_ip (tc->proto, tc->is_ip4);
s->session_state = SESSION_STATE_CONNECTING;
s->thread_index = thread_index;
- s->session_index = pool_index;
- /* Attach transport to session */
+ /* Attach transport to session and vice versa */
s->connection_index = tc->c_index;
-
- /* Attach session to transport */
tc->s_index = s->session_index;
+ return s;
+}
+
+static int
+session_alloc_and_init (segment_manager_t * sm, transport_connection_t * tc,
+ u8 alloc_fifos, stream_session_t ** ret_s)
+{
+ stream_session_t *s;
+ int rv;
+
+ s = session_alloc_for_connection (tc);
+ if (alloc_fifos && (rv = session_alloc_fifos (sm, s)))
+ {
+ session_free (s);
+ return rv;
+ }
/* Add to the main lookup table */
- value = stream_session_handle (s);
- session_lookup_add_connection (tc, value);
+ session_lookup_add_connection (tc, session_handle (s));
*ret_s = s;
-
return 0;
}
@@ -217,8 +301,9 @@ session_enqueue_chain_tail (stream_session_t * s, vlib_buffer_t * b,
* @return Number of bytes enqueued or a negative value if enqueueing failed.
*/
int
-stream_session_enqueue_data (transport_connection_t * tc, vlib_buffer_t * b,
- u32 offset, u8 queue_event, u8 is_in_order)
+session_enqueue_stream_connection (transport_connection_t * tc,
+ vlib_buffer_t * b, u32 offset,
+ u8 queue_event, u8 is_in_order)
{
stream_session_t *s;
int enqueued = 0, rv, in_order_off;
@@ -257,12 +342,12 @@ stream_session_enqueue_data (transport_connection_t * tc, vlib_buffer_t * b,
* by calling stream_server_flush_enqueue_events () */
session_manager_main_t *smm = vnet_get_session_manager_main ();
u32 thread_index = s->thread_index;
- u32 my_enqueue_epoch = smm->current_enqueue_epoch[thread_index];
+ u32 enqueue_epoch = smm->current_enqueue_epoch[tc->proto][thread_index];
- if (s->enqueue_epoch != my_enqueue_epoch)
+ if (s->enqueue_epoch != enqueue_epoch)
{
- s->enqueue_epoch = my_enqueue_epoch;
- vec_add1 (smm->session_indices_to_enqueue_by_thread[thread_index],
+ s->enqueue_epoch = enqueue_epoch;
+ vec_add1 (smm->session_to_enqueue[tc->proto][thread_index],
s - smm->sessions[thread_index]);
}
}
@@ -270,6 +355,41 @@ stream_session_enqueue_data (transport_connection_t * tc, vlib_buffer_t * b,
return enqueued;
}
+int
+session_enqueue_dgram_connection (stream_session_t * s, vlib_buffer_t * b,
+ u8 proto, u8 queue_event)
+{
+ int enqueued = 0, rv, in_order_off;
+
+ if (svm_fifo_max_enqueue (s->server_rx_fifo) < b->current_length)
+ return -1;
+ enqueued = svm_fifo_enqueue_nowait (s->server_rx_fifo, b->current_length,
+ vlib_buffer_get_current (b));
+ if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) && enqueued >= 0))
+ {
+ in_order_off = enqueued > b->current_length ? enqueued : 0;
+ rv = session_enqueue_chain_tail (s, b, in_order_off, 1);
+ if (rv > 0)
+ enqueued += rv;
+ }
+ if (queue_event)
+ {
+ /* Queue RX event on this fifo. Eventually these will need to be flushed
+ * by calling stream_server_flush_enqueue_events () */
+ session_manager_main_t *smm = vnet_get_session_manager_main ();
+ u32 thread_index = s->thread_index;
+ u32 enqueue_epoch = smm->current_enqueue_epoch[proto][thread_index];
+
+ if (s->enqueue_epoch != enqueue_epoch)
+ {
+ s->enqueue_epoch = enqueue_epoch;
+ vec_add1 (smm->session_to_enqueue[proto][thread_index],
+ s - smm->sessions[thread_index]);
+ }
+ }
+ return enqueued;
+}
+
/** Check if we have space in rx fifo to push more bytes */
u8
stream_session_no_space (transport_connection_t * tc, u32 thread_index,
@@ -319,12 +439,11 @@ stream_session_dequeue_drop (transport_connection_t * tc, u32 max_bytes)
* @return 0 on succes or negative number if failed to send notification.
*/
static int
-stream_session_enqueue_notify (stream_session_t * s, u8 block)
+session_enqueue_notify (stream_session_t * s, u8 block)
{
application_t *app;
session_fifo_event_t evt;
unix_shared_memory_queue_t *q;
- static u32 serial_number;
if (PREDICT_FALSE (s->session_state == SESSION_STATE_CLOSED))
{
@@ -354,7 +473,6 @@ stream_session_enqueue_notify (stream_session_t * s, u8 block)
/* Fabricate event */
evt.fifo = s->server_rx_fifo;
evt.event_type = FIFO_EVENT_APP_RX;
- evt.event_id = serial_number++;
/* Add event to server's event queue */
q = app->event_queue;
@@ -389,35 +507,25 @@ stream_session_enqueue_notify (stream_session_t * s, u8 block)
* failures due to API queue being full.
*/
int
-session_manager_flush_enqueue_events (u32 thread_index)
+session_manager_flush_enqueue_events (u8 transport_proto, u32 thread_index)
{
session_manager_main_t *smm = &session_manager_main;
- u32 *session_indices_to_enqueue;
+ u32 *indices;
+ stream_session_t *s;
int i, errors = 0;
- session_indices_to_enqueue =
- smm->session_indices_to_enqueue_by_thread[thread_index];
+ indices = smm->session_to_enqueue[transport_proto][thread_index];
- for (i = 0; i < vec_len (session_indices_to_enqueue); i++)
+ for (i = 0; i < vec_len (indices); i++)
{
- stream_session_t *s0;
-
- /* Get session */
- s0 = stream_session_get_if_valid (session_indices_to_enqueue[i],
- thread_index);
- if (s0 == 0 || stream_session_enqueue_notify (s0, 0 /* don't block */ ))
- {
- errors++;
- }
+ s = session_get_if_valid (indices[i], thread_index);
+ if (s == 0 || session_enqueue_notify (s, 0 /* don't block */ ))
+ errors++;
}
- vec_reset_length (session_indices_to_enqueue);
-
- smm->session_indices_to_enqueue_by_thread[thread_index] =
- session_indices_to_enqueue;
-
- /* Increment enqueue epoch for next round */
- smm->current_enqueue_epoch[thread_index]++;
+ vec_reset_length (indices);
+ smm->session_to_enqueue[transport_proto][thread_index] = indices;
+ smm->current_enqueue_epoch[transport_proto][thread_index]++;
return errors;
}
@@ -438,22 +546,25 @@ stream_session_init_fifos_pointers (transport_connection_t * tc,
}
int
-stream_session_connect_notify (transport_connection_t * tc, u8 is_fail)
+session_stream_connect_notify (transport_connection_t * tc, u8 is_fail)
{
application_t *app;
stream_session_t *new_s = 0;
u64 handle;
u32 opaque = 0;
int error = 0;
+ segment_manager_t *sm;
+ u8 alloc_fifos;
+ /*
+ * Find connection handle and cleanup half-open table
+ */
handle = session_lookup_half_open_handle (tc);
if (handle == HALF_OPEN_LOOKUP_INVALID_VALUE)
{
SESSION_DBG ("half-open was removed!");
return -1;
}
-
- /* Cleanup half-open table */
session_lookup_del_half_open (tc);
/* Get the app's index from the handle we stored when opening connection
@@ -462,17 +573,16 @@ stream_session_connect_notify (transport_connection_t * tc, u8 is_fail)
app = application_get_if_valid (handle >> 32);
if (!app)
return -1;
-
opaque = tc->s_index;
+ /*
+ * Allocate new session with fifos (svm segments are allocated if needed)
+ */
if (!is_fail)
{
- segment_manager_t *sm;
- u8 alloc_fifos;
sm = application_get_connect_segment_manager (app);
alloc_fifos = application_is_proxy (app);
- /* Create new session (svm segments are allocated if needed) */
- if (stream_session_create_i (sm, tc, alloc_fifos, &new_s))
+ if (session_alloc_and_init (sm, tc, alloc_fifos, &new_s))
{
is_fail = 1;
error = -1;
@@ -481,7 +591,9 @@ stream_session_connect_notify (transport_connection_t * tc, u8 is_fail)
new_s->app_index = app->index;
}
- /* Notify client application */
+ /*
+ * Notify client application
+ */
if (app->cb_fns.session_connected_callback (app->index, opaque, new_s,
is_fail))
{
@@ -498,6 +610,67 @@ stream_session_connect_notify (transport_connection_t * tc, u8 is_fail)
return error;
}
+typedef struct _session_switch_pool_args
+{
+ u32 session_index;
+ u32 thread_index;
+ u32 new_thread_index;
+ u32 new_session_index;
+} session_switch_pool_args_t;
+
+static void
+session_switch_pool (void *cb_args)
+{
+ session_switch_pool_args_t *args = (session_switch_pool_args_t *) cb_args;
+ stream_session_t *s;
+ ASSERT (args->thread_index == vlib_get_thread_index ());
+ s = session_get (args->session_index, args->thread_index);
+ s->server_tx_fifo->master_session_index = args->new_session_index;
+ s->server_tx_fifo->master_thread_index = args->new_thread_index;
+ tp_vfts[s->session_type].cleanup (s->connection_index, s->thread_index);
+ session_free (s);
+ clib_mem_free (cb_args);
+}
+
+/**
+ * Move dgram session to the right thread
+ */
+int
+session_dgram_connect_notify (transport_connection_t * tc,
+ u32 old_thread_index,
+ stream_session_t ** new_session)
+{
+ stream_session_t *new_s;
+ session_switch_pool_args_t *rpc_args;
+
+ /*
+ * Clone half-open session to the right thread.
+ */
+ new_s = session_clone_safe (tc->s_index, old_thread_index);
+ new_s->connection_index = tc->c_index;
+ new_s->server_rx_fifo->master_session_index = new_s->session_index;
+ new_s->server_rx_fifo->master_thread_index = new_s->thread_index;
+ new_s->session_state = SESSION_STATE_READY;
+ session_lookup_add_connection (tc, session_handle (new_s));
+
+ /*
+ * Ask thread owning the old session to clean it up and make us the tx
+ * fifo owner
+ */
+ rpc_args = clib_mem_alloc (sizeof (*rpc_args));
+ rpc_args->new_session_index = new_s->session_index;
+ rpc_args->new_thread_index = new_s->thread_index;
+ rpc_args->session_index = tc->s_index;
+ rpc_args->thread_index = old_thread_index;
+ session_send_rpc_evt_to_thread (rpc_args->thread_index, session_switch_pool,
+ rpc_args);
+
+ tc->s_index = new_s->session_index;
+ new_s->connection_index = tc->c_index;
+ *new_session = new_s;
+ return 0;
+}
+
void
stream_session_accept_notify (transport_connection_t * tc)
{
@@ -533,7 +706,6 @@ stream_session_disconnect_notify (transport_connection_t * tc)
void
stream_session_delete (stream_session_t * s)
{
- session_manager_main_t *smm = vnet_get_session_manager_main ();
int rv;
/* Delete from the main lookup table. */
@@ -543,10 +715,7 @@ stream_session_delete (stream_session_t * s)
/* Cleanup fifo segments */
segment_manager_dealloc_fifos (s->svm_segment_index, s->server_rx_fifo,
s->server_tx_fifo);
-
- pool_put (smm->sessions[s->thread_index], s);
- if (CLIB_DEBUG)
- memset (s, 0xFA, sizeof (*s));
+ session_free (s);
}
/**
@@ -563,7 +732,7 @@ stream_session_delete_notify (transport_connection_t * tc)
stream_session_t *s;
/* App might've been removed already */
- s = stream_session_get_if_valid (tc->s_index, tc->thread_index);
+ s = session_get_if_valid (tc->s_index, tc->thread_index);
if (!s)
return;
stream_session_delete (s);
@@ -596,14 +765,14 @@ stream_session_accept (transport_connection_t * tc, u32 listener_index,
session_type_t sst;
int rv;
- sst = session_type_from_proto_and_ip (tc->transport_proto, tc->is_ip4);
+ sst = session_type_from_proto_and_ip (tc->proto, tc->is_ip4);
/* Find the server */
listener = listen_session_get (sst, listener_index);
server = application_get (listener->app_index);
sm = application_get_listen_segment_manager (server, listener);
- if ((rv = stream_session_create_i (sm, tc, 1, &s)))
+ if ((rv = session_alloc_and_init (sm, tc, 1, &s)))
return rv;
s->app_index = server->index;
@@ -629,14 +798,17 @@ stream_session_accept (transport_connection_t * tc, u32 listener_index,
* @param app_index Index of the application requesting the connect
* @param st Session type requested.
* @param tep Remote transport endpoint
- * @param res Resulting transport connection .
+ * @param opaque Opaque data (typically, api_context) the application expects
+ * on open completion.
*/
int
-stream_session_open (u32 app_index, session_endpoint_t * rmt,
- transport_connection_t ** res)
+session_open (u32 app_index, session_endpoint_t * rmt, u32 opaque)
{
transport_connection_t *tc;
session_type_t sst;
+ segment_manager_t *sm;
+ stream_session_t *s;
+ application_t *app;
int rv;
u64 handle;
@@ -644,22 +816,45 @@ stream_session_open (u32 app_index, session_endpoint_t * rmt,
rv = tp_vfts[sst].open (session_endpoint_to_transport (rmt));
if (rv < 0)
{
- clib_warning ("Transport failed to open connection.");
+ SESSION_DBG ("Transport failed to open connection.");
return VNET_API_ERROR_SESSION_CONNECT;
}
tc = tp_vfts[sst].get_half_open ((u32) rv);
- /* Save app and tc index. The latter is needed to help establish the
- * connection while the former is needed when the connect notify comes
- * and we have to notify the external app */
- handle = (((u64) app_index) << 32) | (u64) tc->c_index;
-
- /* Add to the half-open lookup table */
- session_lookup_add_half_open (tc, handle);
+ /* If transport offers a stream service, only allocate session once the
+ * connection has been established.
+ */
+ if (transport_is_stream (rmt->transport_proto))
+ {
+ /* Add connection to half-open table and save app and tc index. The
+ * latter is needed to help establish the connection while the former
+ * is needed when the connect notify comes and we have to notify the
+ * external app
+ */
+ handle = (((u64) app_index) << 32) | (u64) tc->c_index;
+ session_lookup_add_half_open (tc, handle);
+
+ /* Store api_context (opaque) for when the reply comes. Not the nicest
+ * thing but better than allocating a separate half-open pool.
+ */
+ tc->s_index = opaque;
+ }
+ /* For dgram type of service, allocate session and fifos now.
+ */
+ else
+ {
+ app = application_get (app_index);
+ sm = application_get_connect_segment_manager (app);
- *res = tc;
+ if (session_alloc_and_init (sm, tc, 1, &s))
+ return -1;
+ s->app_index = app->index;
+ s->session_state = SESSION_STATE_CONNECTING_READY;
+ /* Tell the app about the new event fifo for this session */
+ app->cb_fns.session_connected_callback (app->index, opaque, s, 0);
+ }
return 0;
}
@@ -672,14 +867,14 @@ stream_session_open (u32 app_index, session_endpoint_t * rmt,
* @param tep Local endpoint to be listened on.
*/
int
-stream_session_listen (stream_session_t * s, session_endpoint_t * tep)
+stream_session_listen (stream_session_t * s, session_endpoint_t * sep)
{
transport_connection_t *tc;
u32 tci;
/* Transport bind/listen */
tci = tp_vfts[s->session_type].bind (s->session_index,
- session_endpoint_to_transport (tep));
+ session_endpoint_to_transport (sep));
if (tci == (u32) ~ 0)
return -1;
@@ -694,7 +889,6 @@ stream_session_listen (stream_session_t * s, session_endpoint_t * tep)
/* Add to the main lookup table */
session_lookup_add_connection (tc, s->session_index);
-
return 0;
}
@@ -726,32 +920,6 @@ stream_session_stop_listen (stream_session_t * s)
return 0;
}
-void
-session_send_session_evt_to_thread (u64 session_handle,
- fifo_event_type_t evt_type,
- u32 thread_index)
-{
- static u16 serial_number = 0;
- u32 tries = 0;
- session_fifo_event_t evt;
- unix_shared_memory_queue_t *q;
-
- /* Fabricate event */
- evt.session_handle = session_handle;
- evt.event_type = evt_type;
- evt.event_id = serial_number++;
-
- q = session_manager_get_vpp_event_queue (thread_index);
- while (unix_shared_memory_queue_add (q, (u8 *) & evt, 1))
- {
- if (tries++ == 3)
- {
- TCP_DBG ("failed to enqueue evt");
- break;
- }
- }
-}
-
/**
* Disconnect session and propagate to transport. This should eventually
* result in a delete notification that allows us to cleanup session state.
@@ -837,6 +1005,21 @@ session_type_from_proto_and_ip (transport_proto_t proto, u8 is_ip4)
return SESSION_N_TYPES;
}
+transport_connection_t *
+session_get_transport (stream_session_t * s)
+{
+ if (s->session_state >= SESSION_STATE_READY)
+ return tp_vfts[s->session_type].get_connection (s->connection_index,
+ s->thread_index);
+ return 0;
+}
+
+transport_connection_t *
+listen_session_get_transport (stream_session_t * s)
+{
+ return tp_vfts[s->session_type].get_listener (s->connection_index);
+}
+
int
listen_session_get_local_session_endpoint (stream_session_t * listener,
session_endpoint_t * sep)
@@ -852,7 +1035,7 @@ listen_session_get_local_session_endpoint (stream_session_t * listener,
/* N.B. The ip should not be copied because this is the local endpoint */
sep->port = tc->lcl_port;
- sep->transport_proto = tc->transport_proto;
+ sep->transport_proto = tc->proto;
sep->is_ip4 = tc->is_ip4;
return 0;
}
@@ -864,7 +1047,7 @@ session_manager_main_enable (vlib_main_t * vm)
vlib_thread_main_t *vtm = vlib_get_thread_main ();
u32 num_threads;
u32 preallocated_sessions_per_worker;
- int i;
+ int i, j;
num_threads = 1 /* main thread */ + vtm->n_threads;
@@ -877,12 +1060,21 @@ session_manager_main_enable (vlib_main_t * vm)
/* configure per-thread ** vectors */
vec_validate (smm->sessions, num_threads - 1);
- vec_validate (smm->session_indices_to_enqueue_by_thread, num_threads - 1);
vec_validate (smm->tx_buffers, num_threads - 1);
vec_validate (smm->pending_event_vector, num_threads - 1);
+ vec_validate (smm->pending_disconnects, num_threads - 1);
vec_validate (smm->free_event_vector, num_threads - 1);
- vec_validate (smm->current_enqueue_epoch, num_threads - 1);
vec_validate (smm->vpp_event_queues, num_threads - 1);
+ vec_validate (smm->session_peekers, num_threads - 1);
+ vec_validate (smm->peekers_readers_locks, num_threads - 1);
+ vec_validate (smm->peekers_write_locks, num_threads - 1);
+
+ for (i = 0; i < TRANSPORT_N_PROTO; i++)
+ for (j = 0; j < num_threads; j++)
+ {
+ vec_validate (smm->session_to_enqueue[i], num_threads - 1);
+ vec_validate (smm->current_enqueue_epoch[i], num_threads - 1);
+ }
for (i = 0; i < num_threads; i++)
{
@@ -890,6 +1082,8 @@ session_manager_main_enable (vlib_main_t * vm)
_vec_len (smm->free_event_vector[i]) = 0;
vec_validate (smm->pending_event_vector[i], 0);
_vec_len (smm->pending_event_vector[i]) = 0;
+ vec_validate (smm->pending_disconnects[i], 0);
+ _vec_len (smm->pending_disconnects[i]) = 0;
}
#if SESSION_DBG
@@ -924,6 +1118,7 @@ session_manager_main_enable (vlib_main_t * vm)
session_lookup_init ();
app_namespaces_init ();
+ transport_init ();
smm->is_enabled = 1;
diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h
index b1a03d213e9..bd854d4b4c5 100644
--- a/src/vnet/session/session.h
+++ b/src/vnet/session/session.h
@@ -105,7 +105,7 @@ typedef CLIB_PACKED (struct {
rpc_args_t rpc_args;
};
u8 event_type;
- u16 event_id;
+ u8 postponed;
}) session_fifo_event_t;
/* *INDENT-ON* */
@@ -128,17 +128,21 @@ struct _session_manager_main
/** Per worker thread session pools */
stream_session_t **sessions;
+ /** Per worker-thread count of threads peeking into the session pool */
+ u32 *session_peekers;
+
+ /** Per worker-thread rw peekers locks */
+ clib_spinlock_t *peekers_readers_locks;
+ clib_spinlock_t *peekers_write_locks;
+
/** Pool of listen sessions. Same type as stream sessions to ease lookups */
stream_session_t *listen_sessions[SESSION_N_TYPES];
- /** Sparse vector to map dst port to stream server */
- u16 *stream_server_by_dst_port[SESSION_N_TYPES];
-
- /** per-worker enqueue epoch counters */
- u8 *current_enqueue_epoch;
+ /** Per-proto, per-worker enqueue epoch counters */
+ u8 *current_enqueue_epoch[TRANSPORT_N_PROTO];
- /** Per-worker thread vector of sessions to enqueue */
- u32 **session_indices_to_enqueue_by_thread;
+ /** Per-proto, per-worker thread vector of sessions to enqueue */
+ u32 **session_to_enqueue[TRANSPORT_N_PROTO];
/** per-worker tx buffer free lists */
u32 **tx_buffers;
@@ -149,6 +153,9 @@ struct _session_manager_main
/** per-worker active event vectors */
session_fifo_event_t **pending_event_vector;
+ /** per-worker postponed disconnects */
+ session_fifo_event_t **pending_disconnects;
+
/** vpp fifo event queue */
unix_shared_memory_queue_t **vpp_event_queues;
@@ -213,6 +220,8 @@ stream_session_is_valid (u32 si, u8 thread_index)
return 1;
}
+stream_session_t *session_alloc (u32 thread_index);
+
always_inline stream_session_t *
session_get (u32 si, u32 thread_index)
{
@@ -221,7 +230,7 @@ session_get (u32 si, u32 thread_index)
}
always_inline stream_session_t *
-stream_session_get_if_valid (u64 si, u32 thread_index)
+session_get_if_valid (u64 si, u32 thread_index)
{
if (thread_index >= vec_len (session_manager_main.sessions))
return 0;
@@ -234,7 +243,7 @@ stream_session_get_if_valid (u64 si, u32 thread_index)
}
always_inline u64
-stream_session_handle (stream_session_t * s)
+session_handle (stream_session_t * s)
{
return ((u64) s->thread_index << 32) | (u64) s->session_index;
}
@@ -267,6 +276,66 @@ session_get_from_handle (u64 handle)
session_index_from_handle (handle));
}
+/**
+ * Acquires a lock that blocks a session pool from expanding.
+ *
+ * This is typically used for safely peeking into other threads'
+ * pools in order to clone elements. Lock should be dropped as soon
+ * as possible by calling @ref session_pool_remove_peeker.
+ *
+ * NOTE: Avoid using pool_elt_at_index while the lock is held because
+ * it may lead to free elt bitmap expansion/contraction!
+ */
+always_inline void
+session_pool_add_peeker (u32 thread_index)
+{
+ session_manager_main_t *smm = &session_manager_main;
+ if (thread_index == vlib_get_thread_index ())
+ return;
+ clib_spinlock_lock_if_init (&smm->peekers_readers_locks[thread_index]);
+ smm->session_peekers[thread_index] += 1;
+ if (smm->session_peekers[thread_index] == 1)
+ clib_spinlock_lock_if_init (&smm->peekers_write_locks[thread_index]);
+ clib_spinlock_unlock_if_init (&smm->peekers_readers_locks[thread_index]);
+}
+
+always_inline void
+session_pool_remove_peeker (u32 thread_index)
+{
+ session_manager_main_t *smm = &session_manager_main;
+ if (thread_index == vlib_get_thread_index ())
+ return;
+ ASSERT (session_manager_main.session_peekers[thread_index] > 0);
+ clib_spinlock_lock_if_init (&smm->peekers_readers_locks[thread_index]);
+ smm->session_peekers[thread_index] -= 1;
+ if (smm->session_peekers[thread_index] == 0)
+ clib_spinlock_unlock_if_init (&smm->peekers_write_locks[thread_index]);
+ clib_spinlock_unlock_if_init (&smm->peekers_readers_locks[thread_index]);
+}
+
+/**
+ * Get session from handle and 'lock' pool resize if not in same thread
+ *
+ * Caller should drop the peek 'lock' as soon as possible.
+ */
+always_inline stream_session_t *
+session_get_from_handle_safe (u64 handle)
+{
+ session_manager_main_t *smm = &session_manager_main;
+ u32 thread_index = session_thread_from_handle (handle);
+ if (thread_index == vlib_get_thread_index ())
+ {
+ return pool_elt_at_index (smm->sessions[thread_index],
+ session_index_from_handle (handle));
+ }
+ else
+ {
+ session_pool_add_peeker (thread_index);
+ /* Don't use pool_elt_at index. See @ref session_pool_add_peeker */
+ return smm->sessions[thread_index] + session_index_from_handle (handle);
+ }
+}
+
always_inline stream_session_t *
stream_session_listener_get (u8 sst, u64 si)
{
@@ -296,17 +365,52 @@ stream_session_rx_fifo_size (transport_connection_t * tc)
return s->server_rx_fifo->nitems;
}
+always_inline u32
+session_get_index (stream_session_t * s)
+{
+ return (s - session_manager_main.sessions[s->thread_index]);
+}
+
+always_inline stream_session_t *
+session_clone_safe (u32 session_index, u32 thread_index)
+{
+ stream_session_t *old_s, *new_s;
+ u32 current_thread_index = vlib_get_thread_index ();
+
+ /* If during the memcpy pool is reallocated AND the memory allocator
+ * decides to give the old chunk of memory to somebody in a hurry to
+ * scribble something on it, we have a problem. So add this thread as
+ * a session pool peeker.
+ */
+ session_pool_add_peeker (thread_index);
+ new_s = session_alloc (current_thread_index);
+ old_s = session_manager_main.sessions[thread_index] + session_index;
+ clib_memcpy (new_s, old_s, sizeof (*new_s));
+ session_pool_remove_peeker (thread_index);
+ new_s->thread_index = current_thread_index;
+ new_s->session_index = session_get_index (new_s);
+ return new_s;
+}
+
+transport_connection_t *session_get_transport (stream_session_t * s);
+
u32 stream_session_tx_fifo_max_dequeue (transport_connection_t * tc);
+stream_session_t *session_alloc (u32 thread_index);
int
-stream_session_enqueue_data (transport_connection_t * tc, vlib_buffer_t * b,
- u32 offset, u8 queue_event, u8 is_in_order);
-int
-stream_session_peek_bytes (transport_connection_t * tc, u8 * buffer,
- u32 offset, u32 max_bytes);
+session_enqueue_stream_connection (transport_connection_t * tc,
+ vlib_buffer_t * b, u32 offset,
+ u8 queue_event, u8 is_in_order);
+int session_enqueue_dgram_connection (stream_session_t * s, vlib_buffer_t * b,
+ u8 proto, u8 queue_event);
+int stream_session_peek_bytes (transport_connection_t * tc, u8 * buffer,
+ u32 offset, u32 max_bytes);
u32 stream_session_dequeue_drop (transport_connection_t * tc, u32 max_bytes);
-int stream_session_connect_notify (transport_connection_t * tc, u8 is_fail);
+int session_stream_connect_notify (transport_connection_t * tc, u8 is_fail);
+int session_dgram_connect_notify (transport_connection_t * tc,
+ u32 old_thread_index,
+ stream_session_t ** new_session);
void stream_session_init_fifos_pointers (transport_connection_t * tc,
u32 rx_pointer, u32 tx_pointer);
@@ -314,12 +418,9 @@ void stream_session_accept_notify (transport_connection_t * tc);
void stream_session_disconnect_notify (transport_connection_t * tc);
void stream_session_delete_notify (transport_connection_t * tc);
void stream_session_reset_notify (transport_connection_t * tc);
-int
-stream_session_accept (transport_connection_t * tc, u32 listener_index,
- u8 notify);
-int
-stream_session_open (u32 app_index, session_endpoint_t * tep,
- transport_connection_t ** tc);
+int stream_session_accept (transport_connection_t * tc, u32 listener_index,
+ u8 notify);
+int session_open (u32 app_index, session_endpoint_t * tep, u32 opaque);
int stream_session_listen (stream_session_t * s, session_endpoint_t * tep);
int stream_session_stop_listen (stream_session_t * s);
void stream_session_disconnect (stream_session_t * s);
@@ -346,7 +447,7 @@ session_manager_get_vpp_event_queue (u32 thread_index)
return session_manager_main.vpp_event_queues[thread_index];
}
-int session_manager_flush_enqueue_events (u32 thread_index);
+int session_manager_flush_enqueue_events (u8 proto, u32 thread_index);
always_inline u64
listen_session_get_handle (stream_session_t * s)
@@ -400,6 +501,8 @@ listen_session_del (stream_session_t * s)
pool_put (session_manager_main.listen_sessions[s->session_type], s);
}
+transport_connection_t *listen_session_get_transport (stream_session_t * s);
+
int
listen_session_get_local_session_endpoint (stream_session_t * listener,
session_endpoint_t * sep);
diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c
index 5bfca7be6fc..432c7ba6013 100755
--- a/src/vnet/session/session_api.c
+++ b/src/vnet/session/session_api.c
@@ -99,10 +99,10 @@ send_session_accept_callback (stream_session_t * s)
mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_ACCEPT_SESSION);
mp->context = server->index;
listener = listen_session_get (s->session_type, s->listener_index);
- tp_vft = session_get_transport_vft (s->session_type);
+ tp_vft = transport_protocol_get_vft (s->session_type);
tc = tp_vft->get_connection (s->connection_index, s->thread_index);
mp->listener_handle = listen_session_get_handle (listener);
- mp->handle = stream_session_handle (s);
+ mp->handle = session_handle (s);
mp->server_rx_fifo = pointer_to_uword (s->server_rx_fifo);
mp->server_tx_fifo = pointer_to_uword (s->server_tx_fifo);
mp->vpp_event_queue_address = pointer_to_uword (vpp_queue);
@@ -129,7 +129,7 @@ send_session_disconnect_callback (stream_session_t * s)
mp = vl_msg_api_alloc (sizeof (*mp));
memset (mp, 0, sizeof (*mp));
mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_DISCONNECT_SESSION);
- mp->handle = stream_session_handle (s);
+ mp->handle = session_handle (s);
vl_msg_api_send_shmem (q, (u8 *) & mp);
}
@@ -148,7 +148,7 @@ send_session_reset_callback (stream_session_t * s)
mp = vl_msg_api_alloc (sizeof (*mp));
memset (mp, 0, sizeof (*mp));
mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_RESET_SESSION);
- mp->handle = stream_session_handle (s);
+ mp->handle = session_handle (s);
vl_msg_api_send_shmem (q, (u8 *) & mp);
}
@@ -175,7 +175,7 @@ send_session_connected_callback (u32 app_index, u32 api_context,
vpp_queue = session_manager_get_vpp_event_queue (s->thread_index);
mp->server_rx_fifo = pointer_to_uword (s->server_rx_fifo);
mp->server_tx_fifo = pointer_to_uword (s->server_tx_fifo);
- mp->handle = stream_session_handle (s);
+ mp->handle = session_handle (s);
mp->vpp_event_queue_address = pointer_to_uword (vpp_queue);
mp->retval = 0;
}
@@ -463,11 +463,14 @@ vl_api_connect_uri_t_handler (vl_api_connect_uri_t * mp)
rv = VNET_API_ERROR_APPLICATION_NOT_ATTACHED;
}
+ /*
+ * Don't reply to stream (tcp) connects. The reply will come once
+ * the connection is established. In case of the redirects, the reply
+ * will come from the server app.
+ */
if (rv == 0 || rv == VNET_API_ERROR_SESSION_REDIRECT)
return;
- /* Got some error, relay it */
-
done:
/* *INDENT-OFF* */
REPLY_MACRO (VL_API_CONNECT_SESSION_REPLY);
@@ -540,7 +543,7 @@ vl_api_reset_session_reply_t_handler (vl_api_reset_session_reply_t * mp)
return;
session_parse_handle (mp->handle, &index, &thread_index);
- s = stream_session_get_if_valid (index, thread_index);
+ s = session_get_if_valid (index, thread_index);
if (s == 0 || app->index != s->app_index)
{
clib_warning ("Invalid session!");
@@ -576,7 +579,7 @@ vl_api_accept_session_reply_t_handler (vl_api_accept_session_reply_t * mp)
else
{
session_parse_handle (mp->handle, &session_index, &thread_index);
- s = stream_session_get_if_valid (session_index, thread_index);
+ s = session_get_if_valid (session_index, thread_index);
if (!s)
{
clib_warning ("session doesn't exist");
@@ -623,8 +626,8 @@ vl_api_bind_sock_t_handler (vl_api_bind_sock_t * mp)
a->sep.port = mp->port;
a->sep.fib_index = mp->vrf;
a->sep.sw_if_index = ENDPOINT_INVALID_INDEX;
+ a->sep.transport_proto = mp->proto;
a->app_index = app->index;
- a->proto = mp->proto;
if ((error = vnet_bind (a)))
{
diff --git a/src/vnet/session/session_cli.c b/src/vnet/session/session_cli.c
index 588cb603d39..f0f490daa21 100755
--- a/src/vnet/session/session_cli.c
+++ b/src/vnet/session/session_cli.c
@@ -55,7 +55,7 @@ format_stream_session (u8 * s, va_list * args)
int verbose = va_arg (*args, int);
transport_proto_vft_t *tp_vft;
u8 *str = 0;
- tp_vft = session_get_transport_vft (ss->session_type);
+ tp_vft = transport_protocol_get_vft (ss->session_type);
if (verbose == 1 && ss->session_state >= SESSION_STATE_ACCEPTING)
str = format (0, "%-10u%-10u%-10lld",
@@ -63,9 +63,7 @@ format_stream_session (u8 * s, va_list * args)
svm_fifo_max_enqueue (ss->server_tx_fifo),
stream_session_get_index (ss));
- if (ss->session_state == SESSION_STATE_READY
- || ss->session_state == SESSION_STATE_ACCEPTING
- || ss->session_state == SESSION_STATE_CLOSED)
+ if (ss->session_state >= SESSION_STATE_ACCEPTING)
{
s = format (s, "%U", tp_vft->format_connection, ss->connection_index,
ss->thread_index, verbose);
@@ -146,16 +144,17 @@ unformat_stream_session (unformat_input_t * input, va_list * args)
return 0;
if (is_ip4)
- s = session_lookup4 (fib_index, &lcl.ip4, &rmt.ip4,
- clib_host_to_net_u16 (lcl_port),
- clib_host_to_net_u16 (rmt_port), proto);
+ s = session_lookup_safe4 (fib_index, &lcl.ip4, &rmt.ip4,
+ clib_host_to_net_u16 (lcl_port),
+ clib_host_to_net_u16 (rmt_port), proto);
else
- s = session_lookup6 (fib_index, &lcl.ip6, &rmt.ip6,
- clib_host_to_net_u16 (lcl_port),
- clib_host_to_net_u16 (rmt_port), proto);
+ s = session_lookup_safe6 (fib_index, &lcl.ip6, &rmt.ip6,
+ clib_host_to_net_u16 (lcl_port),
+ clib_host_to_net_u16 (rmt_port), proto);
if (s)
{
*result = s;
+ session_pool_remove_peeker (s->thread_index);
return 1;
}
return 0;
@@ -324,7 +323,7 @@ clear_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (session_index != ~0)
{
- session = stream_session_get_if_valid (session_index, thread_index);
+ session = session_get_if_valid (session_index, thread_index);
if (!session)
return clib_error_return (0, "no session %d on thread %d",
session_index, thread_index);
diff --git a/src/vnet/session/session_lookup.c b/src/vnet/session/session_lookup.c
index 796d93ec33e..740c5a6d533 100644
--- a/src/vnet/session/session_lookup.c
+++ b/src/vnet/session/session_lookup.c
@@ -116,7 +116,7 @@ always_inline void
make_v4_ss_kv_from_tc (session_kv4_t * kv, transport_connection_t * t)
{
make_v4_ss_kv (kv, &t->lcl_ip.ip4, &t->rmt_ip.ip4, t->lcl_port, t->rmt_port,
- session_type_from_proto_and_ip (t->transport_proto, 1));
+ session_type_from_proto_and_ip (t->proto, 1));
}
always_inline void
@@ -159,7 +159,7 @@ always_inline void
make_v6_ss_kv_from_tc (session_kv6_t * kv, transport_connection_t * t)
{
make_v6_ss_kv (kv, &t->lcl_ip.ip6, &t->rmt_ip.ip6, t->lcl_port, t->rmt_port,
- session_type_from_proto_and_ip (t->transport_proto, 0));
+ session_type_from_proto_and_ip (t->proto, 0));
}
@@ -339,7 +339,7 @@ session_lookup_del_session (stream_session_t * s)
return session_lookup_del_connection (ts);
}
-u32
+u64
session_lookup_session_endpoint (u32 table_index, session_endpoint_t * sep)
{
session_table_t *st;
@@ -349,14 +349,14 @@ session_lookup_session_endpoint (u32 table_index, session_endpoint_t * sep)
st = session_table_get (table_index);
if (!st)
- return SESSION_INVALID_INDEX;
+ return SESSION_INVALID_HANDLE;
if (sep->is_ip4)
{
make_v4_listener_kv (&kv4, &sep->ip.ip4, sep->port,
sep->transport_proto);
rv = clib_bihash_search_inline_16_8 (&st->v4_session_hash, &kv4);
if (rv == 0)
- return (u32) kv4.value;
+ return kv4.value;
}
else
{
@@ -364,9 +364,43 @@ session_lookup_session_endpoint (u32 table_index, session_endpoint_t * sep)
sep->transport_proto);
rv = clib_bihash_search_inline_48_8 (&st->v6_session_hash, &kv6);
if (rv == 0)
- return (u32) kv6.value;
+ return kv6.value;
}
- return SESSION_INVALID_INDEX;
+ return SESSION_INVALID_HANDLE;
+}
+
+stream_session_t *
+session_lookup_global_session_endpoint (session_endpoint_t * sep)
+{
+ session_table_t *st;
+ session_kv4_t kv4;
+ session_kv6_t kv6;
+ u8 fib_proto;
+ u32 table_index;
+ int rv;
+
+ fib_proto = session_endpoint_fib_proto (sep);
+ table_index = session_lookup_get_index_for_fib (fib_proto, sep->fib_index);
+ st = session_table_get (table_index);
+ if (!st)
+ return 0;
+ if (sep->is_ip4)
+ {
+ make_v4_listener_kv (&kv4, &sep->ip.ip4, sep->port,
+ sep->transport_proto);
+ rv = clib_bihash_search_inline_16_8 (&st->v4_session_hash, &kv4);
+ if (rv == 0)
+ return session_get_from_handle (kv4.value);
+ }
+ else
+ {
+ make_v6_listener_kv (&kv6, &sep->ip.ip6, sep->port,
+ sep->transport_proto);
+ rv = clib_bihash_search_inline_48_8 (&st->v6_session_hash, &kv6);
+ if (rv == 0)
+ return session_get_from_handle (kv6.value);
+ }
+ return 0;
}
u32
@@ -562,7 +596,7 @@ session_lookup_half_open_handle (transport_connection_t * tc)
if (tc->is_ip4)
{
make_v4_ss_kv (&kv4, &tc->lcl_ip.ip4, &tc->rmt_ip.ip4, tc->lcl_port,
- tc->rmt_port, tc->transport_proto);
+ tc->rmt_port, tc->proto);
rv = clib_bihash_search_inline_16_8 (&st->v4_half_open_hash, &kv4);
if (rv == 0)
return kv4.value;
@@ -570,7 +604,7 @@ session_lookup_half_open_handle (transport_connection_t * tc)
else
{
make_v6_ss_kv (&kv6, &tc->lcl_ip.ip6, &tc->rmt_ip.ip6, tc->lcl_port,
- tc->rmt_port, tc->transport_proto);
+ tc->rmt_port, tc->proto);
rv = clib_bihash_search_inline_48_8 (&st->v6_half_open_hash, &kv6);
if (rv == 0)
return kv6.value;
@@ -713,12 +747,19 @@ session_lookup_connection4 (u32 fib_index, ip4_address_t * lcl,
/**
* Lookup session with ip4 and transport layer information
*
- * Lookup logic is identical to that of @ref session_lookup_connection_wt4 but
- * this returns a session as opposed to a transport connection;
+ * Important note: this may look into another thread's pool table and
+ * register as 'peeker'. Caller should call @ref session_pool_remove_peeker as
+ * if needed as soon as possible.
+ *
+ * Lookup logic is similar to that of @ref session_lookup_connection_wt4 but
+ * this returns a session as opposed to a transport connection and it does not
+ * try to lookup half-open sessions.
+ *
+ * Typically used by dgram connections
*/
stream_session_t *
-session_lookup4 (u32 fib_index, ip4_address_t * lcl, ip4_address_t * rmt,
- u16 lcl_port, u16 rmt_port, u8 proto)
+session_lookup_safe4 (u32 fib_index, ip4_address_t * lcl, ip4_address_t * rmt,
+ u16 lcl_port, u16 rmt_port, u8 proto)
{
session_table_t *st;
session_kv4_t kv4;
@@ -733,16 +774,11 @@ session_lookup4 (u32 fib_index, ip4_address_t * lcl, ip4_address_t * rmt,
make_v4_ss_kv (&kv4, lcl, rmt, lcl_port, rmt_port, proto);
rv = clib_bihash_search_inline_16_8 (&st->v4_session_hash, &kv4);
if (rv == 0)
- return session_get_from_handle (kv4.value);
+ return session_get_from_handle_safe (kv4.value);
/* If nothing is found, check if any listener is available */
if ((s = session_lookup_listener4_i (st, lcl, lcl_port, proto)))
return s;
-
- /* Finally, try half-open connections */
- rv = clib_bihash_search_inline_16_8 (&st->v4_half_open_hash, &kv4);
- if (rv == 0)
- return session_get_from_handle (kv4.value);
return 0;
}
@@ -868,12 +904,19 @@ session_lookup_connection6 (u32 fib_index, ip6_address_t * lcl,
/**
* Lookup session with ip6 and transport layer information
*
- * Lookup logic is identical to that of @ref session_lookup_connection_wt6 but
- * this returns a session as opposed to a transport connection;
+ * Important note: this may look into another thread's pool table and
+ * register as 'peeker'. Caller should call @ref session_pool_remove_peeker as
+ * if needed as soon as possible.
+ *
+ * Lookup logic is similar to that of @ref session_lookup_connection_wt6 but
+ * this returns a session as opposed to a transport connection and it does not
+ * try to lookup half-open sessions.
+ *
+ * Typically used by dgram connections
*/
stream_session_t *
-session_lookup6 (u32 fib_index, ip6_address_t * lcl, ip6_address_t * rmt,
- u16 lcl_port, u16 rmt_port, u8 proto)
+session_lookup_safe6 (u32 fib_index, ip6_address_t * lcl, ip6_address_t * rmt,
+ u16 lcl_port, u16 rmt_port, u8 proto)
{
session_table_t *st;
session_kv6_t kv6;
@@ -887,16 +930,11 @@ session_lookup6 (u32 fib_index, ip6_address_t * lcl, ip6_address_t * rmt,
make_v6_ss_kv (&kv6, lcl, rmt, lcl_port, rmt_port, proto);
rv = clib_bihash_search_inline_48_8 (&st->v6_session_hash, &kv6);
if (rv == 0)
- return session_get_from_handle (kv6.value);
+ return session_get_from_handle_safe (kv6.value);
/* If nothing is found, check if any listener is available */
if ((s = session_lookup_listener6_i (st, lcl, lcl_port, proto)))
return s;
-
- /* Finally, try half-open connections */
- rv = clib_bihash_search_inline_48_8 (&st->v6_half_open_hash, &kv6);
- if (rv == 0)
- return session_get_from_handle (kv6.value);
return 0;
}
diff --git a/src/vnet/session/session_lookup.h b/src/vnet/session/session_lookup.h
index 20cbaf2acd6..449f8f4e2d2 100644
--- a/src/vnet/session/session_lookup.h
+++ b/src/vnet/session/session_lookup.h
@@ -20,12 +20,12 @@
#include <vnet/session/stream_session.h>
#include <vnet/session/transport.h>
-stream_session_t *session_lookup4 (u32 fib_index, ip4_address_t * lcl,
- ip4_address_t * rmt, u16 lcl_port,
- u16 rmt_port, u8 proto);
-stream_session_t *session_lookup6 (u32 fib_index, ip6_address_t * lcl,
- ip6_address_t * rmt, u16 lcl_port,
- u16 rmt_port, u8 proto);
+stream_session_t *session_lookup_safe4 (u32 fib_index, ip4_address_t * lcl,
+ ip4_address_t * rmt, u16 lcl_port,
+ u16 rmt_port, u8 proto);
+stream_session_t *session_lookup_safe6 (u32 fib_index, ip6_address_t * lcl,
+ ip6_address_t * rmt, u16 lcl_port,
+ u16 rmt_port, u8 proto);
transport_connection_t *session_lookup_connection_wt4 (u32 fib_index,
ip4_address_t * lcl,
ip4_address_t * rmt,
@@ -58,10 +58,12 @@ stream_session_t *session_lookup_listener (u32 table_index,
session_endpoint_t * sep);
int session_lookup_add_connection (transport_connection_t * tc, u64 value);
int session_lookup_del_connection (transport_connection_t * tc);
-u32 session_lookup_session_endpoint (u32 table_index,
+u64 session_lookup_session_endpoint (u32 table_index,
session_endpoint_t * sep);
u32 session_lookup_local_session_endpoint (u32 table_index,
session_endpoint_t * sep);
+stream_session_t *session_lookup_global_session_endpoint (session_endpoint_t
+ *);
int session_lookup_add_session_endpoint (u32 table_index,
session_endpoint_t * sep, u64 value);
int session_lookup_del_session_endpoint (u32 table_index,
diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c
index d2291fa38de..cbe936ccd86 100644
--- a/src/vnet/session/session_node.c
+++ b/src/vnet/session/session_node.c
@@ -154,7 +154,7 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
next_index = next0 = session_type_to_next[s0->session_type];
- transport_vft = session_get_transport_vft (s0->session_type);
+ transport_vft = transport_protocol_get_vft (s0->session_type);
tc0 = transport_vft->get_connection (s0->connection_index, thread_index);
/* Make sure we have space to send and there's something to dequeue */
@@ -401,8 +401,7 @@ session_tx_fifo_dequeue_and_snd (vlib_main_t * vm, vlib_node_runtime_t * node,
always_inline stream_session_t *
session_event_get_session (session_fifo_event_t * e, u8 thread_index)
{
- return stream_session_get_if_valid (e->fifo->master_session_index,
- thread_index);
+ return session_get_if_valid (e->fifo->master_session_index, thread_index);
}
void
@@ -540,7 +539,7 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
session_manager_main_t *smm = vnet_get_session_manager_main ();
- session_fifo_event_t *my_pending_event_vector, *e;
+ session_fifo_event_t *my_pending_event_vector, *pending_disconnects, *e;
session_fifo_event_t *my_fifo_events;
u32 n_to_dequeue, n_events;
unix_shared_memory_queue_t *q;
@@ -570,8 +569,10 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
/* min number of events we can dequeue without blocking */
n_to_dequeue = q->cursize;
my_pending_event_vector = smm->pending_event_vector[my_thread_index];
+ pending_disconnects = smm->pending_disconnects[my_thread_index];
- if (n_to_dequeue == 0 && vec_len (my_pending_event_vector) == 0)
+ if (!n_to_dequeue && !vec_len (my_pending_event_vector)
+ && !vec_len (pending_disconnects))
return 0;
SESSION_EVT_DBG (SESSION_EVT_DEQ_NODE, 0);
@@ -603,9 +604,11 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
pthread_mutex_unlock (&q->mutex);
vec_append (my_fifo_events, my_pending_event_vector);
+ vec_append (my_fifo_events, smm->pending_disconnects[my_thread_index]);
_vec_len (my_pending_event_vector) = 0;
smm->pending_event_vector[my_thread_index] = my_pending_event_vector;
+ _vec_len (smm->pending_disconnects[my_thread_index]) = 0;
skip_dequeue:
n_events = vec_len (my_fifo_events);
@@ -644,6 +647,13 @@ skip_dequeue:
}
break;
case FIFO_EVENT_DISCONNECT:
+ /* Make sure disconnects run after the pending list is drained */
+ if (!e0->postponed)
+ {
+ e0->postponed = 1;
+ vec_add1 (smm->pending_disconnects[my_thread_index], *e0);
+ continue;
+ }
s0 = session_get_from_handle (e0->session_handle);
stream_session_disconnect (s0);
break;
diff --git a/src/vnet/session/session_table.h b/src/vnet/session/session_table.h
index ce0b4a2ff25..5e0564043f3 100644
--- a/src/vnet/session/session_table.h
+++ b/src/vnet/session/session_table.h
@@ -37,6 +37,7 @@ typedef struct _session_lookup_table
#define SESSION_TABLE_INVALID_INDEX ((u32)~0)
#define SESSION_LOCAL_TABLE_PREFIX ((u32)~0)
#define SESSION_INVALID_INDEX ((u32)~0)
+#define SESSION_INVALID_HANDLE ((u64)~0)
typedef int (*ip4_session_table_walk_fn_t) (clib_bihash_kv_16_8_t * kvp,
void *ctx);
diff --git a/src/vnet/session/session_test.c b/src/vnet/session/session_test.c
index b46b33d1396..433c20e5e1d 100644
--- a/src/vnet/session/session_test.c
+++ b/src/vnet/session/session_test.c
@@ -260,8 +260,9 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input)
SESSION_TEST ((s->app_index == server_index), "app_index should be that of "
"the server");
server_local_st_index = application_local_session_table (server);
- local_listener = session_lookup_session_endpoint (server_local_st_index,
- &server_sep);
+ local_listener =
+ session_lookup_local_session_endpoint (server_local_st_index,
+ &server_sep);
SESSION_TEST ((local_listener != SESSION_INVALID_INDEX),
"listener should exist in local table");
@@ -312,8 +313,9 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input)
s = session_lookup_listener (server_st_index, &server_sep);
SESSION_TEST ((s == 0), "listener should not exist in global table");
- local_listener = session_lookup_session_endpoint (server_local_st_index,
- &server_sep);
+ local_listener =
+ session_lookup_local_session_endpoint (server_local_st_index,
+ &server_sep);
SESSION_TEST ((s == 0), "listener should not exist in local table");
detach_args.app_index = server_index;
@@ -337,8 +339,9 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input)
s = session_lookup_listener (server_st_index, &server_sep);
SESSION_TEST ((s == 0), "listener should not exist in global table");
server_local_st_index = application_local_session_table (server);
- local_listener = session_lookup_session_endpoint (server_local_st_index,
- &server_sep);
+ local_listener =
+ session_lookup_local_session_endpoint (server_local_st_index,
+ &server_sep);
SESSION_TEST ((local_listener != SESSION_INVALID_INDEX),
"listener should exist in local table");
@@ -346,8 +349,9 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input)
error = vnet_unbind (&unbind_args);
SESSION_TEST ((error == 0), "unbind should work");
- local_listener = session_lookup_session_endpoint (server_local_st_index,
- &server_sep);
+ local_listener =
+ session_lookup_local_session_endpoint (server_local_st_index,
+ &server_sep);
SESSION_TEST ((local_listener == SESSION_INVALID_INDEX),
"listener should not exist in local table");
@@ -417,8 +421,9 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input)
SESSION_TEST ((s->app_index == server_index), "app_index should be that of "
"the server");
server_local_st_index = application_local_session_table (server);
- local_listener = session_lookup_session_endpoint (server_local_st_index,
- &server_sep);
+ local_listener =
+ session_lookup_local_session_endpoint (server_local_st_index,
+ &server_sep);
SESSION_TEST ((local_listener != SESSION_INVALID_INDEX),
"zero listener should exist in local table");
detach_args.app_index = server_index;
diff --git a/src/vnet/session/stream_session.h b/src/vnet/session/stream_session.h
index 1ed6e0b9eec..51d5065059b 100644
--- a/src/vnet/session/stream_session.h
+++ b/src/vnet/session/stream_session.h
@@ -43,6 +43,7 @@ typedef enum
SESSION_STATE_CONNECTING,
SESSION_STATE_ACCEPTING,
SESSION_STATE_READY,
+ SESSION_STATE_CONNECTING_READY,
SESSION_STATE_CLOSED,
SESSION_STATE_N_STATES,
} stream_session_state_t;
diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c
new file mode 100644
index 00000000000..fc722e45668
--- /dev/null
+++ b/src/vnet/session/transport.c
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/session/transport_interface.h>
+#include <vnet/session/session.h>
+#include <vnet/fib/fib.h>
+
+/**
+ * Per-type vector of transport protocol virtual function tables
+ */
+transport_proto_vft_t *tp_vfts;
+
+/*
+ * Port allocator seed
+ */
+static u32 port_allocator_seed;
+
+/*
+ * Local endpoints table
+ */
+static transport_endpoint_table_t local_endpoints_table;
+
+/*
+ * Pool of local endpoints
+ */
+static transport_endpoint_t *local_endpoints;
+
+/*
+ * Local endpoints pool lock
+ */
+static clib_spinlock_t local_endpoints_lock;
+
+
+u32
+transport_endpoint_lookup (transport_endpoint_table_t * ht, u8 proto,
+ ip46_address_t * ip, u16 port)
+{
+ clib_bihash_kv_24_8_t kv;
+ int rv;
+
+ kv.key[0] = ip->as_u64[0];
+ kv.key[1] = ip->as_u64[1];
+ kv.key[2] = (u64) port << 8 | (u64) proto;
+
+ rv = clib_bihash_search_inline_24_8 (ht, &kv);
+ if (rv == 0)
+ return kv.value;
+
+ return ENDPOINT_INVALID_INDEX;
+}
+
+void
+transport_endpoint_table_add (transport_endpoint_table_t * ht, u8 proto,
+ transport_endpoint_t * te, u32 value)
+{
+ clib_bihash_kv_24_8_t kv;
+
+ kv.key[0] = te->ip.as_u64[0];
+ kv.key[1] = te->ip.as_u64[1];
+ kv.key[2] = (u64) te->port << 8 | (u64) proto;
+ kv.value = value;
+
+ clib_bihash_add_del_24_8 (ht, &kv, 1);
+}
+
+void
+transport_endpoint_table_del (transport_endpoint_table_t * ht, u8 proto,
+ transport_endpoint_t * te)
+{
+ clib_bihash_kv_24_8_t kv;
+
+ kv.key[0] = te->ip.as_u64[0];
+ kv.key[1] = te->ip.as_u64[1];
+ kv.key[2] = (u64) te->port << 8 | (u64) proto;
+
+ clib_bihash_add_del_24_8 (ht, &kv, 0);
+}
+
+/**
+ * Register transport virtual function table.
+ *
+ * @param type - session type (not protocol type)
+ * @param vft - virtual function table
+ */
+void
+transport_register_protocol (transport_proto_t transport_proto, u8 is_ip4,
+ const transport_proto_vft_t * vft)
+{
+ u8 session_type;
+ session_type = session_type_from_proto_and_ip (transport_proto, is_ip4);
+
+ vec_validate (tp_vfts, session_type);
+ tp_vfts[session_type] = *vft;
+
+ /* If an offset function is provided, then peek instead of dequeue */
+ session_manager_set_transport_rx_fn (session_type,
+ vft->tx_fifo_offset != 0);
+}
+
+/**
+ * Get transport virtual function table
+ *
+ * @param type - session type (not protocol type)
+ */
+transport_proto_vft_t *
+transport_protocol_get_vft (u8 session_type)
+{
+ if (session_type >= vec_len (tp_vfts))
+ return 0;
+ return &tp_vfts[session_type];
+}
+
+#define PORT_MASK ((1 << 16)- 1)
+
+void
+transport_endpoint_del (u32 tepi)
+{
+ clib_spinlock_lock_if_init (&local_endpoints_lock);
+ pool_put_index (local_endpoints, tepi);
+ clib_spinlock_unlock_if_init (&local_endpoints_lock);
+}
+
+always_inline transport_endpoint_t *
+transport_endpoint_new (void)
+{
+ transport_endpoint_t *tep;
+ pool_get (local_endpoints, tep);
+ return tep;
+}
+
+void
+transport_endpoint_cleanup (u8 proto, ip46_address_t * lcl_ip, u16 port)
+{
+ u32 tepi;
+ transport_endpoint_t *tep;
+
+ /* Cleanup local endpoint if this was an active connect */
+ tepi = transport_endpoint_lookup (&local_endpoints_table, proto, lcl_ip,
+ clib_net_to_host_u16 (port));
+ if (tepi != ENDPOINT_INVALID_INDEX)
+ {
+ tep = pool_elt_at_index (local_endpoints, tepi);
+ transport_endpoint_table_del (&local_endpoints_table, proto, tep);
+ transport_endpoint_del (tepi);
+ }
+}
+
+/**
+ * Allocate local port and add if successful add entry to local endpoint
+ * table to mark the pair as used.
+ */
+int
+transport_alloc_local_port (u8 proto, ip46_address_t * ip)
+{
+ transport_endpoint_t *tep;
+ u32 tei;
+ u16 min = 1024, max = 65535; /* XXX configurable ? */
+ int tries, limit;
+
+ limit = max - min;
+
+ /* Only support active opens from thread 0 */
+ ASSERT (vlib_get_thread_index () == 0);
+
+ /* Search for first free slot */
+ for (tries = 0; tries < limit; tries++)
+ {
+ u16 port = 0;
+
+ /* Find a port in the specified range */
+ while (1)
+ {
+ port = random_u32 (&port_allocator_seed) & PORT_MASK;
+ if (PREDICT_TRUE (port >= min && port < max))
+ break;
+ }
+
+ /* Look it up. If not found, we're done */
+ tei = transport_endpoint_lookup (&local_endpoints_table, proto, ip,
+ port);
+ if (tei == ENDPOINT_INVALID_INDEX)
+ {
+ clib_spinlock_lock_if_init (&local_endpoints_lock);
+ tep = transport_endpoint_new ();
+ clib_memcpy (&tep->ip, ip, sizeof (*ip));
+ tep->port = port;
+ transport_endpoint_table_add (&local_endpoints_table, proto, tep,
+ tep - local_endpoints);
+ clib_spinlock_unlock_if_init (&local_endpoints_lock);
+
+ return tep->port;
+ }
+ }
+ return -1;
+}
+
+int
+transport_alloc_local_endpoint (u8 proto, transport_endpoint_t * rmt,
+ ip46_address_t * lcl_addr, u16 * lcl_port)
+{
+ fib_prefix_t prefix;
+ fib_node_index_t fei;
+ u32 sw_if_index;
+ int port;
+
+ /*
+ * Find the local address and allocate port
+ */
+
+ /* Find a FIB path to the destination */
+ clib_memcpy (&prefix.fp_addr, &rmt->ip, sizeof (rmt->ip));
+ prefix.fp_proto = rmt->is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
+ prefix.fp_len = rmt->is_ip4 ? 32 : 128;
+
+ ASSERT (rmt->fib_index != ENDPOINT_INVALID_INDEX);
+ fei = fib_table_lookup (rmt->fib_index, &prefix);
+
+ /* Couldn't find route to destination. Bail out. */
+ if (fei == FIB_NODE_INDEX_INVALID)
+ {
+ clib_warning ("no route to destination");
+ return -1;
+ }
+
+ sw_if_index = rmt->sw_if_index;
+ if (sw_if_index == ENDPOINT_INVALID_INDEX)
+ sw_if_index = fib_entry_get_resolving_interface (fei);
+
+ if (sw_if_index == ENDPOINT_INVALID_INDEX)
+ {
+ clib_warning ("no resolving interface for %U", format_ip46_address,
+ &rmt->ip, (rmt->is_ip4 == 0) + 1);
+ return -1;
+ }
+
+ memset (lcl_addr, 0, sizeof (*lcl_addr));
+
+ if (rmt->is_ip4)
+ {
+ ip4_address_t *ip4;
+ ip4 = ip_interface_get_first_ip (sw_if_index, 1);
+ lcl_addr->ip4.as_u32 = ip4->as_u32;
+ }
+ else
+ {
+ ip6_address_t *ip6;
+ ip6 = ip_interface_get_first_ip (sw_if_index, 0);
+ if (ip6 == 0)
+ {
+ clib_warning ("no routable ip6 addresses on %U",
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+ return -1;
+ }
+ clib_memcpy (&lcl_addr->ip6, ip6, sizeof (*ip6));
+ }
+
+ /* Allocate source port */
+ port = transport_alloc_local_port (proto, lcl_addr);
+ if (port < 1)
+ {
+ clib_warning ("Failed to allocate src port");
+ return -1;
+ }
+ *lcl_port = port;
+ return 0;
+}
+
+void
+transport_init (void)
+{
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ u32 local_endpoints_table_buckets = 250000;
+ u32 local_endpoints_table_memory = 512 << 20;
+ u32 num_threads;
+
+ /* Initialize [port-allocator] random number seed */
+ port_allocator_seed = (u32) clib_cpu_time_now ();
+
+ clib_bihash_init_24_8 (&local_endpoints_table, "local endpoints table",
+ local_endpoints_table_buckets,
+ local_endpoints_table_memory);
+ num_threads = 1 /* main thread */ + vtm->n_threads;
+ if (num_threads > 1)
+ clib_spinlock_init (&local_endpoints_lock);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h
index 8c299c46490..f2cc80bb23a 100644
--- a/src/vnet/session/transport.h
+++ b/src/vnet/session/transport.h
@@ -29,7 +29,7 @@ typedef struct _transport_connection
ip46_address_t lcl_ip; /**< Local IP */
u16 lcl_port; /**< Local port */
u16 rmt_port; /**< Remote port */
- u8 transport_proto; /**< Protocol id */
+ u8 proto; /**< Protocol id */
u8 is_ip4; /**< Flag if IP4 connection */
u32 fib_index; /**< Network namespace */
@@ -54,7 +54,7 @@ typedef struct _transport_connection
#define c_rmt_ip6 connection.rmt_ip.ip6
#define c_lcl_port connection.lcl_port
#define c_rmt_port connection.rmt_port
-#define c_transport_proto connection.transport_proto
+#define c_proto connection.proto
#define c_fib_index connection.fib_index
#define c_s_index connection.s_index
#define c_c_index connection.c_index
@@ -69,7 +69,8 @@ typedef struct _transport_connection
typedef enum _transport_proto
{
TRANSPORT_PROTO_TCP,
- TRANSPORT_PROTO_UDP
+ TRANSPORT_PROTO_UDP,
+ TRANSPORT_N_PROTO
} transport_proto_t;
#define foreach_transport_connection_fields \
@@ -86,6 +87,8 @@ typedef struct _transport_endpoint
#undef _
} transport_endpoint_t;
+typedef clib_bihash_24_8_t transport_endpoint_table_t;
+
#define ENDPOINT_INVALID_INDEX ((u32)~0)
always_inline u8
@@ -94,6 +97,31 @@ transport_connection_fib_proto (transport_connection_t * tc)
return tc->is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
}
+always_inline u8
+transport_endpoint_fib_proto (transport_endpoint_t * tep)
+{
+ return tep->is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
+}
+
+always_inline u8
+transport_is_stream (u8 proto)
+{
+ return (proto == TRANSPORT_PROTO_TCP);
+}
+
+always_inline u8
+transport_is_dgram (u8 proto)
+{
+ return (proto == TRANSPORT_PROTO_UDP);
+}
+
+int transport_alloc_local_port (u8 proto, ip46_address_t * ip);
+int transport_alloc_local_endpoint (u8 proto, transport_endpoint_t * rmt,
+ ip46_address_t * lcl_addr,
+ u16 * lcl_port);
+void transport_endpoint_cleanup (u8 proto, ip46_address_t * lcl_ip, u16 port);
+void transport_init (void);
+
#endif /* VNET_VNET_URI_TRANSPORT_H_ */
/*
diff --git a/src/vnet/session/transport_interface.c b/src/vnet/session/transport_interface.c
deleted file mode 100644
index ef8d1e49524..00000000000
--- a/src/vnet/session/transport_interface.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2017 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vnet/session/transport_interface.h>
-#include <vnet/session/session.h>
-
-/**
- * Per-type vector of transport protocol virtual function tables
- */
-transport_proto_vft_t *tp_vfts;
-
-u32
-transport_endpoint_lookup (transport_endpoint_table_t * ht,
- ip46_address_t * ip, u16 port)
-{
- clib_bihash_kv_24_8_t kv;
- int rv;
-
- kv.key[0] = ip->as_u64[0];
- kv.key[1] = ip->as_u64[1];
- kv.key[2] = port;
-
- rv = clib_bihash_search_inline_24_8 (ht, &kv);
- if (rv == 0)
- return kv.value;
-
- return TRANSPORT_ENDPOINT_INVALID_INDEX;
-}
-
-void
-transport_endpoint_table_add (transport_endpoint_table_t * ht,
- transport_endpoint_t * te, u32 value)
-{
- clib_bihash_kv_24_8_t kv;
-
- kv.key[0] = te->ip.as_u64[0];
- kv.key[1] = te->ip.as_u64[1];
- kv.key[2] = te->port;
- kv.value = value;
-
- clib_bihash_add_del_24_8 (ht, &kv, 1);
-}
-
-void
-transport_endpoint_table_del (transport_endpoint_table_t * ht,
- transport_endpoint_t * te)
-{
- clib_bihash_kv_24_8_t kv;
-
- kv.key[0] = te->ip.as_u64[0];
- kv.key[1] = te->ip.as_u64[1];
- kv.key[2] = te->port;
-
- clib_bihash_add_del_24_8 (ht, &kv, 0);
-}
-
-/**
- * Register transport virtual function table.
- *
- * @param type - session type (not protocol type)
- * @param vft - virtual function table
- */
-void
-session_register_transport (transport_proto_t transport_proto, u8 is_ip4,
- const transport_proto_vft_t * vft)
-{
- u8 session_type;
- session_type = session_type_from_proto_and_ip (transport_proto, is_ip4);
-
- vec_validate (tp_vfts, session_type);
- tp_vfts[session_type] = *vft;
-
- /* If an offset function is provided, then peek instead of dequeue */
- session_manager_set_transport_rx_fn (session_type,
- vft->tx_fifo_offset != 0);
-}
-
-/**
- * Get transport virtual function table
- *
- * @param type - session type (not protocol type)
- */
-transport_proto_vft_t *
-session_get_transport_vft (u8 session_type)
-{
- if (session_type >= vec_len (tp_vfts))
- return 0;
- return &tp_vfts[session_type];
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/session/transport_interface.h b/src/vnet/session/transport_interface.h
index 661221c484a..079e6464268 100644
--- a/src/vnet/session/transport_interface.h
+++ b/src/vnet/session/transport_interface.h
@@ -56,20 +56,10 @@ typedef struct _transport_proto_vft
u8 *(*format_half_open) (u8 * s, va_list * args);
} transport_proto_vft_t;
-typedef clib_bihash_24_8_t transport_endpoint_table_t;
-
-#define TRANSPORT_ENDPOINT_INVALID_INDEX ((u32)~0)
-
-u32 transport_endpoint_lookup (transport_endpoint_table_t * ht,
- ip46_address_t * ip, u16 port);
-void transport_endpoint_table_add (transport_endpoint_table_t * ht,
- transport_endpoint_t * te, u32 value);
-void transport_endpoint_table_del (transport_endpoint_table_t * ht,
- transport_endpoint_t * te);
-
-void session_register_transport (transport_proto_t transport_proto, u8 is_ip4,
- const transport_proto_vft_t * vft);
-transport_proto_vft_t *session_get_transport_vft (u8 session_type);
+void transport_register_protocol (transport_proto_t transport_proto,
+ u8 is_ip4,
+ const transport_proto_vft_t * vft);
+transport_proto_vft_t *transport_protocol_get_vft (u8 session_type);
#endif /* SRC_VNET_SESSION_TRANSPORT_INTERFACE_H_ */