aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/session
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet/session')
-rw-r--r--src/vnet/session/application.c306
-rw-r--r--src/vnet/session/application.h87
-rw-r--r--src/vnet/session/application_interface.c26
-rw-r--r--src/vnet/session/application_interface.h135
-rw-r--r--src/vnet/session/application_local.c578
-rw-r--r--src/vnet/session/application_local.h3
-rw-r--r--src/vnet/session/application_namespace.c203
-rw-r--r--src/vnet/session/application_namespace.h19
-rw-r--r--src/vnet/session/application_worker.c434
-rw-r--r--src/vnet/session/mma_template.c5
-rw-r--r--src/vnet/session/mma_template.h2
-rw-r--r--src/vnet/session/segment_manager.c399
-rw-r--r--src/vnet/session/segment_manager.h21
-rw-r--r--src/vnet/session/session.api122
-rw-r--r--src/vnet/session/session.c1036
-rw-r--r--src/vnet/session/session.h368
-rw-r--r--src/vnet/session/session_api.c822
-rw-r--r--src/vnet/session/session_cli.c151
-rw-r--r--src/vnet/session/session_debug.c125
-rw-r--r--src/vnet/session/session_debug.h203
-rw-r--r--src/vnet/session/session_input.c343
-rw-r--r--src/vnet/session/session_lookup.c183
-rw-r--r--src/vnet/session/session_lookup.h7
-rw-r--r--src/vnet/session/session_node.c706
-rw-r--r--src/vnet/session/session_rules_table.c23
-rw-r--r--src/vnet/session/session_rules_table.h9
-rw-r--r--src/vnet/session/session_table.c86
-rw-r--r--src/vnet/session/session_table.h5
-rw-r--r--src/vnet/session/session_test.c363
-rw-r--r--src/vnet/session/session_types.h152
-rw-r--r--src/vnet/session/transport.c325
-rw-r--r--src/vnet/session/transport.h44
-rw-r--r--src/vnet/session/transport_types.h54
33 files changed, 5125 insertions, 2220 deletions
diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c
index 7fe81885725..c66548507e5 100644
--- a/src/vnet/session/application.c
+++ b/src/vnet/session/application.c
@@ -31,10 +31,12 @@ static app_main_t app_main;
static app_listener_t *
app_listener_alloc (application_t * app)
{
+ app_main_t *am = &app_main;
app_listener_t *app_listener;
- pool_get (app->listeners, app_listener);
+
+ pool_get (am->listeners, app_listener);
clib_memset (app_listener, 0, sizeof (*app_listener));
- app_listener->al_index = app_listener - app->listeners;
+ app_listener->al_index = app_listener - am->listeners;
app_listener->app_index = app->app_index;
app_listener->session_index = SESSION_INVALID_INDEX;
app_listener->local_index = SESSION_INVALID_INDEX;
@@ -43,18 +45,23 @@ app_listener_alloc (application_t * app)
}
app_listener_t *
-app_listener_get (application_t * app, u32 app_listener_index)
+app_listener_get (u32 app_listener_index)
{
- return pool_elt_at_index (app->listeners, app_listener_index);
+ app_main_t *am = &app_main;
+
+ return pool_elt_at_index (am->listeners, app_listener_index);
}
static void
app_listener_free (application_t * app, app_listener_t * app_listener)
{
+ app_main_t *am = &app_main;
+
clib_bitmap_free (app_listener->workers);
+ vec_free (app_listener->cl_listeners);
if (CLIB_DEBUG)
clib_memset (app_listener, 0xfa, sizeof (*app_listener));
- pool_put (app->listeners, app_listener);
+ pool_put (am->listeners, app_listener);
}
session_handle_t
@@ -63,24 +70,14 @@ app_listener_handle (app_listener_t * al)
return al->ls_handle;
}
-app_listener_t *
-app_listener_get_w_session (session_t * ls)
-{
- application_t *app;
-
- app = application_get_if_valid (ls->app_index);
- if (!app)
- return 0;
- return app_listener_get (app, ls->al_index);
-}
-
session_handle_t
app_listen_session_handle (session_t * ls)
{
app_listener_t *al;
- al = app_listener_get_w_session (ls);
- if (!al)
+ /* TODO(fcoras): quic session handles */
+ if (ls->al_index == SESSION_INVALID_INDEX)
return listen_session_get_handle (ls);
+ al = app_listener_get (ls->al_index);
return al->ls_handle;
}
@@ -91,7 +88,7 @@ app_listener_get_w_handle (session_handle_t handle)
ls = session_get_from_handle_if_valid (handle);
if (!ls)
return 0;
- return app_listener_get_w_session (ls);
+ return app_listener_get (ls->al_index);
}
app_listener_t *
@@ -112,7 +109,7 @@ app_listener_lookup (application_t * app, session_endpoint_cfg_t * sep_ext)
if (handle != SESSION_INVALID_HANDLE)
{
ls = listen_session_get_from_handle (handle);
- return app_listener_get_w_session (ls);
+ return app_listener_get (ls->al_index);
}
}
@@ -122,7 +119,7 @@ app_listener_lookup (application_t * app, session_endpoint_cfg_t * sep_ext)
if (handle != SESSION_INVALID_HANDLE)
{
ls = listen_session_get_from_handle (handle);
- return app_listener_get_w_session ((session_t *) ls);
+ return app_listener_get (ls->al_index);
}
/*
@@ -144,7 +141,7 @@ app_listener_lookup (application_t * app, session_endpoint_cfg_t * sep_ext)
if (handle != SESSION_INVALID_HANDLE)
{
ls = listen_session_get_from_handle (handle);
- return app_listener_get_w_session ((session_t *) ls);
+ return app_listener_get (ls->al_index);
}
}
}
@@ -181,7 +178,6 @@ app_listener_alloc_and_init (application_t * app,
local_st = session_type_from_proto_and_ip (TRANSPORT_PROTO_NONE,
sep->is_ip4);
ls = listen_session_alloc (0, local_st);
- ls->app_index = app->app_index;
ls->app_wrk_index = sep->app_wrk_index;
lh = session_handle (ls);
@@ -189,11 +185,12 @@ app_listener_alloc_and_init (application_t * app,
{
ls = session_get_from_handle (lh);
session_free (ls);
+ app_listener_free (app, app_listener);
return rv;
}
ls = session_get_from_handle (lh);
- app_listener = app_listener_get (app, al_index);
+ app_listener = app_listener_get (al_index);
app_listener->local_index = ls->session_index;
app_listener->ls_handle = lh;
ls->al_index = al_index;
@@ -212,7 +209,6 @@ app_listener_alloc_and_init (application_t * app,
* build it's own specific listening connection.
*/
ls = listen_session_alloc (0, st);
- ls->app_index = app->app_index;
ls->app_wrk_index = sep->app_wrk_index;
/* Listen pool can be reallocated if the transport is
@@ -223,10 +219,11 @@ app_listener_alloc_and_init (application_t * app,
{
ls = listen_session_get_from_handle (lh);
session_free (ls);
+ app_listener_free (app, app_listener);
return rv;
}
ls = listen_session_get_from_handle (lh);
- app_listener = app_listener_get (app, al_index);
+ app_listener = app_listener_get (al_index);
app_listener->session_index = ls->session_index;
app_listener->ls_handle = lh;
ls->al_index = al_index;
@@ -288,8 +285,9 @@ app_listener_cleanup (app_listener_t * al)
}
static app_worker_t *
-app_listener_select_worker (application_t * app, app_listener_t * al)
+app_listener_select_worker (app_listener_t *al)
{
+ application_t *app;
u32 wrk_index;
app = application_get (al->app_index);
@@ -319,6 +317,13 @@ app_listener_get_local_session (app_listener_t * al)
return listen_session_get (al->local_index);
}
+session_t *
+app_listener_get_wrk_cl_session (app_listener_t *al, u32 wrk_map_index)
+{
+ u32 si = vec_elt (al->cl_listeners, wrk_map_index);
+ return session_get (si, 0 /* listener thread */);
+}
+
static app_worker_map_t *
app_worker_map_alloc (application_t * app)
{
@@ -642,7 +647,7 @@ app_rx_mqs_alloc (application_t *app)
cfg->ring_cfgs = rc;
eqs->ssvm.ssvm_size = svm_msg_q_size_to_alloc (cfg) * n_mqs + (1 << 20);
- eqs->ssvm.name = format (0, "%s-rx-mqs-seg%c", app->name, 0);
+ eqs->ssvm.name = format (0, "%v-rx-mqs-seg%c", app->name, 0);
if (ssvm_server_init (&eqs->ssvm, SSVM_SEGMENT_MEMFD))
{
@@ -684,7 +689,7 @@ application_get_rx_mqs_segment (application_t *app)
{
if (application_use_private_rx_mqs ())
return &app->rx_mqs_segment;
- return session_main_get_evt_q_segment ();
+ return session_main_get_wrk_mqs_segment ();
}
void
@@ -723,6 +728,12 @@ application_get_if_valid (u32 app_index)
return pool_elt_at_index (app_main.app_pool, app_index);
}
+static int
+_null_app_tx_callback (session_t *s)
+{
+ return 0;
+}
+
static void
application_verify_cb_fns (session_cb_vft_t * cb_fns)
{
@@ -734,6 +745,8 @@ application_verify_cb_fns (session_cb_vft_t * cb_fns)
clib_warning ("No session disconnect callback function provided");
if (cb_fns->session_reset_callback == 0)
clib_warning ("No session reset callback function provided");
+ if (!cb_fns->builtin_app_tx_callback)
+ cb_fns->builtin_app_tx_callback = _null_app_tx_callback;
}
/**
@@ -747,14 +760,14 @@ application_verify_cfg (ssvm_segment_type_t st)
u8 is_valid;
if (st == SSVM_SEGMENT_MEMFD)
{
- is_valid = (session_main_get_evt_q_segment () != 0);
+ is_valid = (session_main_get_wrk_mqs_segment () != 0);
if (!is_valid)
clib_warning ("memfd seg: vpp's event qs IN binary api svm region");
return is_valid;
}
else if (st == SSVM_SEGMENT_SHM)
{
- is_valid = (session_main_get_evt_q_segment () == 0);
+ is_valid = (session_main_get_wrk_mqs_segment () == 0);
if (!is_valid)
clib_warning ("shm seg: vpp's event qs NOT IN binary api svm region");
return is_valid;
@@ -763,8 +776,8 @@ application_verify_cfg (ssvm_segment_type_t st)
return 1;
}
-static int
-application_alloc_and_init (app_init_args_t * a)
+static session_error_t
+application_alloc_and_init (app_init_args_t *a)
{
ssvm_segment_type_t seg_type = SSVM_SEGMENT_MEMFD;
segment_manager_props_t *props;
@@ -785,15 +798,15 @@ application_alloc_and_init (app_init_args_t * a)
{
clib_warning ("mq eventfds can only be used if socket transport is "
"used for binary api");
- return VNET_API_ERROR_APP_UNSUPPORTED_CFG;
+ return SESSION_E_NOSUPPORT;
}
if (!application_verify_cfg (seg_type))
- return VNET_API_ERROR_APP_UNSUPPORTED_CFG;
+ return SESSION_E_NOSUPPORT;
if (opts[APP_OPTIONS_PREALLOC_FIFO_PAIRS] &&
opts[APP_OPTIONS_PREALLOC_FIFO_HDRS])
- return VNET_API_ERROR_APP_UNSUPPORTED_CFG;
+ return SESSION_E_NOSUPPORT;
/* Check that the obvious things are properly set up */
application_verify_cb_fns (a->session_cb_vft);
@@ -819,6 +832,8 @@ application_alloc_and_init (app_init_args_t * a)
props->add_segment_size = opts[APP_OPTIONS_ADD_SEGMENT_SIZE];
props->add_segment = 1;
}
+ if (opts[APP_OPTIONS_FLAGS] & APP_OPTIONS_FLAGS_USE_HUGE_PAGE)
+ props->huge_page = 1;
if (opts[APP_OPTIONS_RX_FIFO_SIZE])
props->rx_fifo_size = opts[APP_OPTIONS_RX_FIFO_SIZE];
if (opts[APP_OPTIONS_TX_FIFO_SIZE])
@@ -872,12 +887,10 @@ application_free (application_t * app)
* Free workers
*/
- /* *INDENT-OFF* */
pool_flush (wrk_map, app->worker_maps, ({
app_wrk = app_worker_get (wrk_map->wrk_index);
app_worker_free (app_wrk);
}));
- /* *INDENT-ON* */
pool_free (app->worker_maps);
/*
@@ -920,13 +933,11 @@ application_detach_process (application_t * app, u32 api_client_index)
APP_DBG ("Detaching for app %v index %u api client index %u", app->name,
app->app_index, api_client_index);
- /* *INDENT-OFF* */
pool_foreach (wrk_map, app->worker_maps) {
app_wrk = app_worker_get (wrk_map->wrk_index);
if (app_wrk->api_client_index == api_client_index)
vec_add1 (wrks, app_wrk->wrk_index);
}
- /* *INDENT-ON* */
if (!vec_len (wrks))
{
@@ -947,6 +958,31 @@ application_detach_process (application_t * app, u32 api_client_index)
vec_free (wrks);
}
+void
+application_namespace_cleanup (app_namespace_t *app_ns)
+{
+ u32 *app_indices = 0, *app_index;
+ application_t *app;
+ u32 ns_index;
+
+ ns_index = app_namespace_index (app_ns);
+ pool_foreach (app, app_main.app_pool)
+ if (app->ns_index == ns_index)
+ vec_add1 (app_indices, app->ns_index);
+
+ vec_foreach (app_index, app_indices)
+ {
+ app = application_get (*app_index);
+
+ if (application_is_proxy (app))
+ application_remove_proxy (app);
+ app->flags &= ~APP_OPTIONS_FLAGS_IS_PROXY;
+
+ application_free (app);
+ }
+ vec_free (app_indices);
+}
+
app_worker_t *
application_get_worker (application_t * app, u32 wrk_map_index)
{
@@ -972,12 +1008,55 @@ application_n_workers (application_t * app)
app_worker_t *
application_listener_select_worker (session_t * ls)
{
- application_t *app;
app_listener_t *al;
- app = application_get (ls->app_index);
- al = app_listener_get (app, ls->al_index);
- return app_listener_select_worker (app, al);
+ al = app_listener_get (ls->al_index);
+ return app_listener_select_worker (al);
+}
+
+always_inline u32
+app_listener_cl_flow_hash (session_dgram_hdr_t *hdr)
+{
+ u32 hash = 0;
+
+ if (hdr->is_ip4)
+ {
+ hash = clib_crc32c_u32 (hash, hdr->rmt_ip.ip4.as_u32);
+ hash = clib_crc32c_u32 (hash, hdr->lcl_ip.ip4.as_u32);
+ hash = clib_crc32c_u16 (hash, hdr->rmt_port);
+ hash = clib_crc32c_u16 (hash, hdr->lcl_port);
+ }
+ else
+ {
+ hash = clib_crc32c_u64 (hash, hdr->rmt_ip.ip6.as_u64[0]);
+ hash = clib_crc32c_u64 (hash, hdr->rmt_ip.ip6.as_u64[1]);
+ hash = clib_crc32c_u64 (hash, hdr->lcl_ip.ip6.as_u64[0]);
+ hash = clib_crc32c_u64 (hash, hdr->lcl_ip.ip6.as_u64[1]);
+ hash = clib_crc32c_u16 (hash, hdr->rmt_port);
+ hash = clib_crc32c_u16 (hash, hdr->lcl_port);
+ }
+
+ return hash;
+}
+
+session_t *
+app_listener_select_wrk_cl_session (session_t *ls, session_dgram_hdr_t *hdr)
+{
+ u32 wrk_map_index = 0;
+ app_listener_t *al;
+
+ al = app_listener_get (ls->al_index);
+ /* Crude test to check if only worker 0 is set */
+ if (al->workers[0] != 1)
+ {
+ u32 hash = app_listener_cl_flow_hash (hdr);
+ hash %= vec_len (al->workers) * sizeof (uword);
+ wrk_map_index = clib_bitmap_next_set (al->workers, hash);
+ if (wrk_map_index == ~0)
+ wrk_map_index = clib_bitmap_first_set (al->workers);
+ }
+
+ return app_listener_get_wrk_cl_session (al, wrk_map_index);
}
int
@@ -1019,8 +1098,8 @@ application_alloc_worker_and_init (application_t * app, app_worker_t ** wrk)
return 0;
}
-int
-vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a)
+session_error_t
+vnet_app_worker_add_del (vnet_app_worker_add_del_args_t *a)
{
fifo_segment_t *fs;
app_worker_map_t *wrk_map;
@@ -1031,7 +1110,7 @@ vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a)
app = application_get (a->app_index);
if (!app)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
if (a->is_add)
{
@@ -1054,13 +1133,15 @@ vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a)
{
wrk_map = app_worker_map_get (app, a->wrk_map_index);
if (!wrk_map)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
app_wrk = app_worker_get (wrk_map->wrk_index);
if (!app_wrk)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
application_api_table_del (app_wrk->api_client_index);
+ if (appns_sapi_enabled ())
+ sapi_socket_close_w_handle (app_wrk->api_client_index);
app_worker_free (app_wrk);
app_worker_map_free (app, wrk_map);
if (application_n_workers (app) == 0)
@@ -1069,8 +1150,8 @@ vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a)
return 0;
}
-static int
-app_validate_namespace (u8 * namespace_id, u64 secret, u32 * app_ns_index)
+static session_error_t
+app_validate_namespace (u8 *namespace_id, u64 secret, u32 *app_ns_index)
{
app_namespace_t *app_ns;
if (vec_len (namespace_id) == 0)
@@ -1082,12 +1163,12 @@ app_validate_namespace (u8 * namespace_id, u64 secret, u32 * app_ns_index)
*app_ns_index = app_namespace_index_from_id (namespace_id);
if (*app_ns_index == APP_NAMESPACE_INVALID_INDEX)
- return VNET_API_ERROR_APP_INVALID_NS;
+ return SESSION_E_INVALID_NS;
app_ns = app_namespace_get (*app_ns_index);
if (!app_ns)
- return VNET_API_ERROR_APP_INVALID_NS;
+ return SESSION_E_INVALID_NS;
if (app_ns->ns_secret != secret)
- return VNET_API_ERROR_APP_WRONG_NS_SECRET;
+ return SESSION_E_WRONG_NS_SECRET;
return 0;
}
@@ -1111,8 +1192,8 @@ app_name_from_api_index (u32 api_client_index)
* to external app and a segment manager for shared memory fifo based
* communication with the external app.
*/
-int
-vnet_application_attach (vnet_app_attach_args_t * a)
+session_error_t
+vnet_application_attach (vnet_app_attach_args_t *a)
{
fifo_segment_t *fs;
application_t *app = 0;
@@ -1121,17 +1202,17 @@ vnet_application_attach (vnet_app_attach_args_t * a)
u32 app_ns_index = 0;
u8 *app_name = 0;
u64 secret;
- int rv;
+ session_error_t rv;
if (a->api_client_index != APP_INVALID_INDEX)
app = application_lookup (a->api_client_index);
else if (a->name)
app = application_lookup_name (a->name);
else
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
if (app)
- return VNET_API_ERROR_APP_ALREADY_ATTACHED;
+ return SESSION_E_APP_ATTACHED;
/* Socket api sets the name and validates namespace prior to attach */
if (!a->use_sock_api)
@@ -1185,8 +1266,8 @@ vnet_application_attach (vnet_app_attach_args_t * a)
/**
* Detach application from vpp
*/
-int
-vnet_application_detach (vnet_app_detach_args_t * a)
+session_error_t
+vnet_application_detach (vnet_app_detach_args_t *a)
{
application_t *app;
@@ -1194,7 +1275,7 @@ vnet_application_detach (vnet_app_detach_args_t * a)
if (!app)
{
clib_warning ("app not attached");
- return VNET_API_ERROR_APPLICATION_NOT_ATTACHED;
+ return SESSION_E_NOAPP;
}
app_interface_check_thread_and_barrier (vnet_application_detach, a);
@@ -1202,11 +1283,15 @@ vnet_application_detach (vnet_app_detach_args_t * a)
return 0;
}
-
static u8
-session_endpoint_in_ns (session_endpoint_t * sep)
+session_endpoint_in_ns (session_endpoint_cfg_t *sep)
{
- u8 is_lep = session_endpoint_is_local (sep);
+ u8 is_lep;
+
+ if (sep->flags & SESSION_ENDPT_CFG_F_PROXY_LISTEN)
+ return 1;
+
+ is_lep = session_endpoint_is_local ((session_endpoint_t *) sep);
if (!is_lep && sep->sw_if_index != ENDPOINT_INVALID_INDEX
&& !ip_interface_has_address (sep->sw_if_index, &sep->ip, sep->is_ip4))
{
@@ -1215,6 +1300,7 @@ session_endpoint_in_ns (session_endpoint_t * sep)
sep->is_ip4);
return 0;
}
+
return (is_lep || ip_is_local (sep->fib_index, &sep->ip, sep->is_ip4));
}
@@ -1263,8 +1349,8 @@ session_endpoint_update_for_app (session_endpoint_cfg_t * sep,
}
}
-int
-vnet_listen (vnet_listen_args_t * a)
+session_error_t
+vnet_listen (vnet_listen_args_t *a)
{
app_listener_t *app_listener;
app_worker_t *app_wrk;
@@ -1284,7 +1370,7 @@ vnet_listen (vnet_listen_args_t * a)
a->sep_ext.app_wrk_index = app_wrk->wrk_index;
session_endpoint_update_for_app (&a->sep_ext, app, 0 /* is_connect */ );
- if (!session_endpoint_in_ns (&a->sep))
+ if (!session_endpoint_in_ns (&a->sep_ext))
return SESSION_E_INVALID_NS;
/*
@@ -1317,13 +1403,13 @@ vnet_listen (vnet_listen_args_t * a)
return 0;
}
-int
-vnet_connect (vnet_connect_args_t * a)
+session_error_t
+vnet_connect (vnet_connect_args_t *a)
{
app_worker_t *client_wrk;
application_t *client;
- ASSERT (vlib_thread_is_main_w_barrier ());
+ ASSERT (session_vlib_thread_is_cl_thread ());
if (session_endpoint_is_zero (&a->sep))
return SESSION_E_INVALID_RMT_IP;
@@ -1341,7 +1427,7 @@ vnet_connect (vnet_connect_args_t * a)
*/
if (application_has_local_scope (client))
{
- int rv;
+ session_error_t rv;
a->sep_ext.original_tp = a->sep_ext.transport_proto;
a->sep_ext.transport_proto = TRANSPORT_PROTO_NONE;
@@ -1356,8 +1442,8 @@ vnet_connect (vnet_connect_args_t * a)
return app_worker_connect_session (client_wrk, &a->sep_ext, &a->sh);
}
-int
-vnet_unlisten (vnet_unlisten_args_t * a)
+session_error_t
+vnet_unlisten (vnet_unlisten_args_t *a)
{
app_worker_t *app_wrk;
app_listener_t *al;
@@ -1387,7 +1473,7 @@ vnet_unlisten (vnet_unlisten_args_t * a)
return app_worker_stop_listen (app_wrk, al);
}
-int
+session_error_t
vnet_shutdown_session (vnet_shutdown_args_t *a)
{
app_worker_t *app_wrk;
@@ -1408,8 +1494,8 @@ vnet_shutdown_session (vnet_shutdown_args_t *a)
return 0;
}
-int
-vnet_disconnect_session (vnet_disconnect_args_t * a)
+session_error_t
+vnet_disconnect_session (vnet_disconnect_args_t *a)
{
app_worker_t *app_wrk;
session_t *s;
@@ -1449,7 +1535,7 @@ application_change_listener_owner (session_t * s, app_worker_t * app_wrk)
if (!app)
return SESSION_E_NOAPP;
- app_listener = app_listener_get (app, s->al_index);
+ app_listener = app_listener_get (s->al_index);
/* Only remove from lb for now */
app_listener->workers = clib_bitmap_set (app_listener->workers,
@@ -1493,6 +1579,12 @@ application_has_global_scope (application_t * app)
return app->flags & APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE;
}
+int
+application_original_dst_is_enabled (application_t *app)
+{
+ return app->flags & APP_OPTIONS_FLAGS_GET_ORIGINAL_DST;
+}
+
static clib_error_t *
application_start_stop_proxy_fib_proto (application_t * app, u8 fib_proto,
u8 transport_proto, u8 is_start)
@@ -1609,12 +1701,8 @@ application_setup_proxy (application_t * app)
ASSERT (application_is_proxy (app));
- /* *INDENT-OFF* */
- transport_proto_foreach (tp, ({
- if (transports & (1 << tp))
- application_start_stop_proxy (app, tp, 1);
- }));
- /* *INDENT-ON* */
+ transport_proto_foreach (tp, transports)
+ application_start_stop_proxy (app, tp, 1);
}
void
@@ -1625,12 +1713,8 @@ application_remove_proxy (application_t * app)
ASSERT (application_is_proxy (app));
- /* *INDENT-OFF* */
- transport_proto_foreach (tp, ({
- if (transports & (1 << tp))
- application_start_stop_proxy (app, tp, 0);
- }));
- /* *INDENT-ON* */
+ transport_proto_foreach (tp, transports)
+ application_start_stop_proxy (app, tp, 0);
}
segment_manager_props_t *
@@ -1657,12 +1741,11 @@ application_format_listeners (application_t * app, int verbose)
if (!app)
{
- vlib_cli_output (vm, "%U", format_app_worker_listener, 0 /* header */ ,
+ vlib_cli_output (vm, "%U", format_app_worker_listener, NULL /* header */,
0, 0, verbose);
return;
}
- /* *INDENT-OFF* */
pool_foreach (wrk_map, app->worker_maps) {
app_wrk = app_worker_get (wrk_map->wrk_index);
if (hash_elts (app_wrk->listeners_table) == 0)
@@ -1672,7 +1755,6 @@ application_format_listeners (application_t * app, int verbose)
handle, sm_index, verbose);
}));
}
- /* *INDENT-ON* */
}
static void
@@ -1687,12 +1769,10 @@ application_format_connects (application_t * app, int verbose)
return;
}
- /* *INDENT-OFF* */
pool_foreach (wrk_map, app->worker_maps) {
app_wrk = app_worker_get (wrk_map->wrk_index);
app_worker_format_connects (app_wrk, verbose);
}
- /* *INDENT-ON* */
}
u8 *
@@ -1793,12 +1873,10 @@ format_application (u8 * s, va_list * args)
format_memory_size, props->rx_fifo_size,
format_memory_size, props->tx_fifo_size);
- /* *INDENT-OFF* */
pool_foreach (wrk_map, app->worker_maps) {
app_wrk = app_worker_get (wrk_map->wrk_index);
s = format (s, "%U", format_app_worker, app_wrk);
}
- /* *INDENT-ON* */
return s;
}
@@ -1816,11 +1894,9 @@ application_format_all_listeners (vlib_main_t * vm, int verbose)
application_format_listeners (0, verbose);
- /* *INDENT-OFF* */
pool_foreach (app, app_main.app_pool) {
application_format_listeners (app, verbose);
}
- /* *INDENT-ON* */
}
void
@@ -1836,11 +1912,9 @@ application_format_all_clients (vlib_main_t * vm, int verbose)
application_format_connects (0, verbose);
- /* *INDENT-OFF* */
pool_foreach (app, app_main.app_pool) {
application_format_connects (app, verbose);
}
- /* *INDENT-ON* */
}
static clib_error_t *
@@ -1850,11 +1924,9 @@ show_certificate_command_fn (vlib_main_t * vm, unformat_input_t * input,
app_cert_key_pair_t *ckpair;
session_cli_return_if_not_enabled ();
- /* *INDENT-OFF* */
pool_foreach (ckpair, app_main.cert_key_pair_store) {
vlib_cli_output (vm, "%U", format_cert_key_pair, ckpair);
}
- /* *INDENT-ON* */
return 0;
}
@@ -1865,14 +1937,12 @@ appliction_format_app_mq (vlib_main_t * vm, application_t * app)
app_worker_t *wrk;
int i;
- /* *INDENT-OFF* */
pool_foreach (map, app->worker_maps) {
wrk = app_worker_get (map->wrk_index);
vlib_cli_output (vm, "[A%d][%d]%U", app->app_index,
map->wrk_index, format_svm_msg_q,
wrk->event_queue);
}
- /* *INDENT-ON* */
for (i = 0; i < vec_len (app->rx_mqs); i++)
vlib_cli_output (vm, "[A%d][R%d]%U", app->app_index, i, format_svm_msg_q,
@@ -1893,11 +1963,9 @@ appliction_format_all_app_mq (vlib_main_t * vm)
session_main_get_vpp_event_queue (i));
}
- /* *INDENT-OFF* */
pool_foreach (app, app_main.app_pool) {
appliction_format_app_mq (vm, app);
}
- /* *INDENT-ON* */
return 0;
}
@@ -1905,10 +1973,11 @@ static clib_error_t *
show_app_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
- int do_server = 0, do_client = 0, do_mq = 0;
+ int do_server = 0, do_client = 0, do_mq = 0, do_transports = 0;
application_t *app;
u32 app_index = ~0;
int verbose = 0;
+ u8 is_ta;
session_cli_return_if_not_enabled ();
@@ -1918,6 +1987,8 @@ show_app_command_fn (vlib_main_t * vm, unformat_input_t * input,
do_server = 1;
else if (unformat (input, "client"))
do_client = 1;
+ else if (unformat (input, "transports"))
+ do_transports = 1;
else if (unformat (input, "mq"))
do_mq = 1;
else if (unformat (input, "%u", &app_index))
@@ -1971,11 +2042,11 @@ show_app_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (!do_server && !do_client)
{
vlib_cli_output (vm, "%U", format_application, 0, 0);
- /* *INDENT-OFF* */
pool_foreach (app, app_main.app_pool) {
- vlib_cli_output (vm, "%U", format_application, app, 0);
+ is_ta = app->flags & APP_OPTIONS_FLAGS_IS_TRANSPORT_APP;
+ if ((!do_transports && !is_ta) || (do_transports && is_ta))
+ vlib_cli_output (vm, "%U", format_application, app, 0);
}
- /* *INDENT-ON* */
}
return 0;
@@ -2045,7 +2116,7 @@ vnet_app_del_cert_key_pair (u32 index)
u32 *app_index;
if (!(ckpair = app_cert_key_pair_get_if_valid (index)))
- return (VNET_API_ERROR_INVALID_VALUE);
+ return SESSION_E_INVALID;
vec_foreach (app_index, ckpair->app_interests)
{
@@ -2078,23 +2149,20 @@ application_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (application_init);
-VLIB_CLI_COMMAND (show_app_command, static) =
-{
+VLIB_CLI_COMMAND (show_app_command, static) = {
.path = "show app",
- .short_help = "show app [app_id] [server|client] [mq] [verbose]",
+ .short_help = "show app [index] [server|client] [mq] [verbose] "
+ "[transports]",
.function = show_app_command_fn,
};
-VLIB_CLI_COMMAND (show_certificate_command, static) =
-{
+VLIB_CLI_COMMAND (show_certificate_command, static) = {
.path = "show app certificate",
.short_help = "list app certs and keys present in store",
.function = show_certificate_command_fn,
};
-/* *INDENT-ON* */
crypto_engine_type_t
app_crypto_engine_type_add (void)
diff --git a/src/vnet/session/application.h b/src/vnet/session/application.h
index 5ddf1d21fe3..c68a911230f 100644
--- a/src/vnet/session/application.h
+++ b/src/vnet/session/application.h
@@ -29,6 +29,16 @@
#define APP_DBG(_fmt, _args...)
#endif
+typedef struct app_wrk_postponed_msg_
+{
+ u32 len;
+ u8 event_type;
+ u8 ring;
+ u8 is_sapi;
+ int fd;
+ u8 data[SESSION_CTRL_MSG_TX_MAX_SIZE];
+} app_wrk_postponed_msg_t;
+
typedef struct app_worker_
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
@@ -59,11 +69,20 @@ typedef struct app_worker_
/** API index for the worker. Needed for multi-process apps */
u32 api_client_index;
+ /** Set if mq is congested */
+ u8 mq_congested;
+
u8 app_is_builtin;
/** Pool of half-open session handles. Tracked in case worker detaches */
session_handle_t *half_open_table;
+ /* Per vpp worker fifos of events for app worker */
+ session_event_t **wrk_evts;
+
+ /* Vector of vpp workers mq congestion flags */
+ u8 *wrk_mq_congested;
+
/** Protects detached seg managers */
clib_spinlock_t detached_seg_managers_lock;
@@ -87,6 +106,8 @@ typedef struct app_listener_
session_handle_t ls_handle; /**< session handle of the local or global
listening session that also identifies
the app listener */
+ u32 *cl_listeners; /**< vector that maps app workers to their
+ cl sessions with fifos */
} app_listener_t;
typedef enum app_rx_mq_flags_
@@ -130,9 +151,6 @@ typedef struct application_
u16 proxied_transports;
- /** Pool of listeners for the app */
- app_listener_t *listeners;
-
/** Preferred tls engine */
u8 tls_engine;
@@ -179,6 +197,9 @@ typedef struct app_main_
*/
application_t *app_pool;
+ /** Pool of app listeners */
+ app_listener_t *listeners;
+
/**
* Hash table of apps by api client index
*/
@@ -227,7 +248,7 @@ typedef struct _vnet_app_worker_add_del_args
#define APP_NS_INVALID_INDEX ((u32)~0)
#define APP_INVALID_SEGMENT_MANAGER_INDEX ((u32) ~0)
-app_listener_t *app_listener_get (application_t * app, u32 al_index);
+app_listener_t *app_listener_get (u32 al_index);
int app_listener_alloc_and_init (application_t * app,
session_endpoint_cfg_t * sep,
app_listener_t ** listener);
@@ -235,6 +256,8 @@ void app_listener_cleanup (app_listener_t * app_listener);
session_handle_t app_listener_handle (app_listener_t * app_listener);
app_listener_t *app_listener_lookup (application_t * app,
session_endpoint_cfg_t * sep);
+session_t *app_listener_select_wrk_cl_session (session_t *ls,
+ session_dgram_hdr_t *hdr);
/**
* Get app listener handle for listening session
@@ -258,9 +281,9 @@ session_handle_t app_listen_session_handle (session_t * ls);
* @return pointer to app listener or 0
*/
app_listener_t *app_listener_get_w_handle (session_handle_t handle);
-app_listener_t *app_listener_get_w_session (session_t * ls);
session_t *app_listener_get_session (app_listener_t * al);
session_t *app_listener_get_local_session (app_listener_t * al);
+session_t *app_listener_get_wrk_cl_session (app_listener_t *al, u32 wrk_index);
application_t *application_get (u32 index);
application_t *application_get_if_valid (u32 index);
@@ -280,6 +303,8 @@ u8 application_has_local_scope (application_t * app);
u8 application_has_global_scope (application_t * app);
void application_setup_proxy (application_t * app);
void application_remove_proxy (application_t * app);
+void application_namespace_cleanup (app_namespace_t *app_ns);
+int application_original_dst_is_enabled (application_t *app);
segment_manager_props_t *application_get_segment_manager_properties (u32
app_index);
@@ -296,6 +321,12 @@ void application_enable_rx_mqs_nodes (u8 is_en);
* App worker
*/
+always_inline u8
+app_worker_mq_is_congested (app_worker_t *app_wrk)
+{
+ return app_wrk->mq_congested > 0;
+}
+
app_worker_t *app_worker_alloc (application_t * app);
int application_alloc_worker_and_init (application_t * app,
app_worker_t ** wrk);
@@ -306,9 +337,14 @@ int app_worker_own_session (app_worker_t * app_wrk, session_t * s);
void app_worker_free (app_worker_t * app_wrk);
int app_worker_connect_session (app_worker_t *app, session_endpoint_cfg_t *sep,
session_handle_t *rsh);
-int app_worker_start_listen (app_worker_t * app_wrk, app_listener_t * lstnr);
+session_error_t app_worker_start_listen (app_worker_t *app_wrk,
+ app_listener_t *lstnr);
int app_worker_stop_listen (app_worker_t * app_wrk, app_listener_t * al);
int app_worker_init_accepted (session_t * s);
+int app_worker_listened_notify (app_worker_t *app_wrk, session_handle_t alsh,
+ u32 opaque, session_error_t err);
+int app_worker_unlisten_reply (app_worker_t *app_wrk, session_handle_t sh,
+ u32 opaque, session_error_t err);
int app_worker_accept_notify (app_worker_t * app_wrk, session_t * s);
int app_worker_init_connected (app_worker_t * app_wrk, session_t * s);
int app_worker_connect_notify (app_worker_t * app_wrk, session_t * s,
@@ -321,13 +357,21 @@ int app_worker_transport_closed_notify (app_worker_t * app_wrk,
int app_worker_reset_notify (app_worker_t * app_wrk, session_t * s);
int app_worker_cleanup_notify (app_worker_t * app_wrk, session_t * s,
session_cleanup_ntf_t ntf);
+int app_worker_cleanup_notify_custom (app_worker_t *app_wrk, session_t *s,
+ session_cleanup_ntf_t ntf,
+ void (*cleanup_cb) (session_t *s));
int app_worker_migrate_notify (app_worker_t * app_wrk, session_t * s,
session_handle_t new_sh);
-int app_worker_builtin_rx (app_worker_t * app_wrk, session_t * s);
-int app_worker_builtin_tx (app_worker_t * app_wrk, session_t * s);
+int app_worker_rx_notify (app_worker_t *app_wrk, session_t *s);
int app_worker_session_fifo_tuning (app_worker_t * app_wrk, session_t * s,
svm_fifo_t * f,
session_ft_action_t act, u32 len);
+void app_worker_add_event (app_worker_t *app_wrk, session_t *s,
+ session_evt_type_t evt_type);
+void app_worker_add_event_custom (app_worker_t *app_wrk, u32 thread_index,
+ session_event_t *evt);
+int app_wrk_flush_wrk_events (app_worker_t *app_wrk, u32 thread_index);
+void app_worker_del_all_events (app_worker_t *app_wrk);
segment_manager_t *app_worker_get_listen_segment_manager (app_worker_t *,
session_t *);
segment_manager_t *app_worker_get_connect_segment_manager (app_worker_t *);
@@ -338,9 +382,14 @@ int app_worker_del_segment_notify (app_worker_t * app_wrk,
u32 app_worker_n_listeners (app_worker_t * app);
session_t *app_worker_first_listener (app_worker_t * app,
u8 fib_proto, u8 transport_proto);
-int app_worker_send_event (app_worker_t * app, session_t * s, u8 evt);
-int app_worker_lock_and_send_event (app_worker_t * app, session_t * s,
- u8 evt_type);
+void app_wrk_send_ctrl_evt_fd (app_worker_t *app_wrk, u8 evt_type, void *msg,
+ u32 msg_len, int fd);
+void app_wrk_send_ctrl_evt (app_worker_t *app_wrk, u8 evt_type, void *msg,
+ u32 msg_len);
+u8 app_worker_mq_wrk_is_congested (app_worker_t *app_wrk, u32 thread_index);
+void app_worker_set_mq_wrk_congested (app_worker_t *app_wrk, u32 thread_index);
+void app_worker_unset_wrk_mq_congested (app_worker_t *app_wrk,
+ u32 thread_index);
session_t *app_worker_proxy_listener (app_worker_t * app, u8 fib_proto,
u8 transport_proto);
void app_worker_del_detached_sm (app_worker_t * app_wrk, u32 sm_index);
@@ -349,7 +398,7 @@ u8 *format_app_worker_listener (u8 * s, va_list * args);
u8 *format_crypto_engine (u8 * s, va_list * args);
u8 *format_crypto_context (u8 * s, va_list * args);
void app_worker_format_connects (app_worker_t * app_wrk, int verbose);
-int vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a);
+session_error_t vnet_app_worker_add_del (vnet_app_worker_add_del_args_t *a);
uword unformat_application_proto (unformat_input_t * input, va_list * args);
@@ -357,17 +406,17 @@ app_cert_key_pair_t *app_cert_key_pair_get (u32 index);
app_cert_key_pair_t *app_cert_key_pair_get_if_valid (u32 index);
app_cert_key_pair_t *app_cert_key_pair_get_default ();
-/* Needed while we support both bapi and mq ctrl messages */
-int mq_send_session_bound_cb (u32 app_wrk_index, u32 api_context,
- session_handle_t handle, int rv);
-int mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context,
- session_t * s, session_error_t err);
-void mq_send_unlisten_reply (app_worker_t * app_wrk, session_handle_t sh,
- u32 context, int rv);
+void sapi_socket_close_w_handle (u32 api_handle);
crypto_engine_type_t app_crypto_engine_type_add (void);
u8 app_crypto_engine_n_types (void);
+static inline u8
+app_worker_application_is_builtin (app_worker_t *app_wrk)
+{
+ return app_wrk->app_is_builtin;
+}
+
#endif /* SRC_VNET_SESSION_APPLICATION_H_ */
/*
diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c
index 74f456a1eab..a62f914d43a 100644
--- a/src/vnet/session/application_interface.c
+++ b/src/vnet/session/application_interface.c
@@ -73,8 +73,8 @@ unformat_vnet_uri (unformat_input_t * input, va_list * args)
static u8 *cache_uri;
static session_endpoint_cfg_t *cache_sep;
-int
-parse_uri (char *uri, session_endpoint_cfg_t * sep)
+session_error_t
+parse_uri (char *uri, session_endpoint_cfg_t *sep)
{
unformat_input_t _input, *input = &_input;
@@ -92,7 +92,7 @@ parse_uri (char *uri, session_endpoint_cfg_t * sep)
if (!unformat (input, "%U", unformat_vnet_uri, sep))
{
unformat_free (input);
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
}
unformat_free (input);
@@ -106,8 +106,8 @@ parse_uri (char *uri, session_endpoint_cfg_t * sep)
return 0;
}
-int
-vnet_bind_uri (vnet_listen_args_t * a)
+session_error_t
+vnet_bind_uri (vnet_listen_args_t *a)
{
session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
int rv;
@@ -120,36 +120,36 @@ vnet_bind_uri (vnet_listen_args_t * a)
return vnet_listen (a);
}
-int
-vnet_unbind_uri (vnet_unlisten_args_t * a)
+session_error_t
+vnet_unbind_uri (vnet_unlisten_args_t *a)
{
session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
application_t *app;
session_t *listener;
u32 table_index;
- int rv;
+ session_error_t rv;
if ((rv = parse_uri (a->uri, &sep)))
return rv;
app = application_get (a->app_index);
if (!app)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
table_index = application_session_table (app, fib_ip_proto (!sep.is_ip4));
listener = session_lookup_listener (table_index,
(session_endpoint_t *) & sep);
if (!listener)
- return VNET_API_ERROR_ADDRESS_NOT_IN_USE;
+ return SESSION_E_ADDR_NOT_IN_USE;
a->handle = listen_session_get_handle (listener);
return vnet_unlisten (a);
}
-int
-vnet_connect_uri (vnet_connect_args_t * a)
+session_error_t
+vnet_connect_uri (vnet_connect_args_t *a)
{
session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
- int rv;
+ session_error_t rv;
if ((rv = parse_uri (a->uri, &sep)))
return rv;
diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h
index b10dd6c150d..f175e4a58c6 100644
--- a/src/vnet/session/application_interface.h
+++ b/src/vnet/session/application_interface.h
@@ -62,6 +62,13 @@ typedef struct session_cb_vft_
/** Notify app that session pool migration happened */
void (*session_migrate_callback) (session_t * s, session_handle_t new_sh);
+ /** Notify app (external only) that listen was processed */
+ int (*session_listened_callback) (u32 app_wrk_index, u32 api_context,
+ session_handle_t handle, int rv);
+ /** Notify app (external only) that unlisten was processed */
+ void (*session_unlistened_callback) (u32 app_wrk_index, session_handle_t sh,
+ u32 context, int rv);
+
/** Direct RX callback for built-in application */
int (*builtin_app_rx_callback) (session_t * session);
@@ -74,6 +81,8 @@ typedef struct session_cb_vft_
/** Delegate fifo-tuning-logic to application */
int (*fifo_tuning_callback) (session_t * s, svm_fifo_t * f,
session_ft_action_t act, u32 bytes);
+ /** Custom fifo allocation for proxy */
+ int (*proxy_alloc_session_fifos) (session_t *s);
} session_cb_vft_t;
@@ -117,7 +126,7 @@ typedef struct _vnet_bind_args_t
/*
* Results
*/
- u64 handle;
+ session_handle_t handle;
} vnet_listen_args_t;
typedef struct _vnet_unlisten_args_t
@@ -125,7 +134,7 @@ typedef struct _vnet_unlisten_args_t
union
{
char *uri;
- u64 handle; /**< Session handle */
+ session_handle_t handle; /**< Session handle */
};
u32 app_index; /**< Owning application index */
u32 wrk_map_index; /**< App's local pool worker index */
@@ -232,7 +241,9 @@ typedef enum
_ (USE_GLOBAL_SCOPE, "App can use global session scope") \
_ (USE_LOCAL_SCOPE, "App can use local session scope") \
_ (EVT_MQ_USE_EVENTFD, "Use eventfds for signaling") \
- _ (MEMFD_FOR_BUILTIN, "Use memfd for builtin app segs")
+ _ (MEMFD_FOR_BUILTIN, "Use memfd for builtin app segs") \
+ _ (USE_HUGE_PAGE, "Use huge page for FIFO") \
+ _ (GET_ORIGINAL_DST, "Get original dst enabled")
typedef enum _app_options
{
@@ -269,24 +280,26 @@ typedef enum session_fd_flag_
#undef _
} session_fd_flag_t;
-int parse_uri (char *uri, session_endpoint_cfg_t * sep);
-int vnet_bind_uri (vnet_listen_args_t *);
-int vnet_unbind_uri (vnet_unlisten_args_t * a);
-int vnet_connect_uri (vnet_connect_args_t * a);
+session_error_t parse_uri (char *uri, session_endpoint_cfg_t *sep);
+session_error_t vnet_bind_uri (vnet_listen_args_t *);
+session_error_t vnet_unbind_uri (vnet_unlisten_args_t *a);
+session_error_t vnet_connect_uri (vnet_connect_args_t *a);
-int vnet_application_attach (vnet_app_attach_args_t * a);
-int vnet_application_detach (vnet_app_detach_args_t * a);
-int vnet_listen (vnet_listen_args_t * a);
-int vnet_connect (vnet_connect_args_t * a);
-int vnet_unlisten (vnet_unlisten_args_t * a);
-int vnet_shutdown_session (vnet_shutdown_args_t *a);
-int vnet_disconnect_session (vnet_disconnect_args_t * a);
+session_error_t vnet_application_attach (vnet_app_attach_args_t *a);
+session_error_t vnet_application_detach (vnet_app_detach_args_t *a);
+session_error_t vnet_listen (vnet_listen_args_t *a);
+session_error_t vnet_connect (vnet_connect_args_t *a);
+session_error_t vnet_unlisten (vnet_unlisten_args_t *a);
+session_error_t vnet_shutdown_session (vnet_shutdown_args_t *a);
+session_error_t vnet_disconnect_session (vnet_disconnect_args_t *a);
int vnet_app_add_cert_key_pair (vnet_app_add_cert_key_pair_args_t * a);
int vnet_app_del_cert_key_pair (u32 index);
/** Ask for app cb on pair deletion */
int vnet_app_add_cert_key_interest (u32 index, u32 app_index);
+uword unformat_vnet_uri (unformat_input_t *input, va_list *args);
+
typedef struct app_session_transport_
{
ip46_address_t rmt_ip; /**< remote ip */
@@ -296,15 +309,15 @@ typedef struct app_session_transport_
u8 is_ip4; /**< set if uses ip4 networking */
} app_session_transport_t;
-#define foreach_app_session_field \
- _(svm_fifo_t, *rx_fifo) /**< rx fifo */ \
- _(svm_fifo_t, *tx_fifo) /**< tx fifo */ \
- _(session_type_t, session_type) /**< session type */ \
- _(volatile u8, session_state) /**< session state */ \
- _(u32, session_index) /**< index in owning pool */ \
- _(app_session_transport_t, transport) /**< transport info */ \
- _(svm_msg_q_t, *vpp_evt_q) /**< vpp event queue */ \
- _(u8, is_dgram) /**< flag for dgram mode */ \
+#define foreach_app_session_field \
+ _ (svm_fifo_t, *rx_fifo) /**< rx fifo */ \
+ _ (svm_fifo_t, *tx_fifo) /**< tx fifo */ \
+ _ (session_type_t, session_type) /**< session type */ \
+ _ (volatile u8, session_state) /**< session state */ \
+ _ (u32, session_index) /**< index in owning pool */ \
+ _ (app_session_transport_t, transport) /**< transport info */ \
+ _ (svm_msg_q_t, *vpp_evt_q) /**< vpp event queue */ \
+ _ (u8, is_dgram) /**< flag for dgram mode */
typedef struct
{
@@ -343,7 +356,7 @@ STATIC_ASSERT (sizeof (session_listen_uri_msg_t) <= SESSION_CTRL_MSG_MAX_SIZE,
typedef struct session_bound_msg_
{
u32 context;
- u64 handle;
+ session_handle_t handle;
i32 retval;
u8 lcl_is_ip4;
u8 lcl_ip[16];
@@ -366,15 +379,15 @@ typedef struct session_unlisten_msg_
typedef struct session_unlisten_reply_msg_
{
u32 context;
- u64 handle;
+ session_handle_t handle;
i32 retval;
} __clib_packed session_unlisten_reply_msg_t;
typedef struct session_accepted_msg_
{
u32 context;
- u64 listener_handle;
- u64 handle;
+ session_handle_t listener_handle;
+ session_handle_t handle;
uword server_rx_fifo;
uword server_tx_fifo;
u64 segment_handle;
@@ -383,13 +396,15 @@ typedef struct session_accepted_msg_
transport_endpoint_t lcl;
transport_endpoint_t rmt;
u8 flags;
+ u32 original_dst_ip4;
+ u16 original_dst_port;
} __clib_packed session_accepted_msg_t;
typedef struct session_accepted_reply_msg_
{
u32 context;
i32 retval;
- u64 handle;
+ session_handle_t handle;
} __clib_packed session_accepted_reply_msg_t;
typedef struct session_connect_msg_
@@ -408,6 +423,7 @@ typedef struct session_connect_msg_
u32 ckpair_index;
u8 crypto_engine;
u8 flags;
+ u8 dscp;
uword ext_config;
} __clib_packed session_connect_msg_t;
@@ -428,7 +444,7 @@ typedef struct session_connected_msg_
{
u32 context;
i32 retval;
- u64 handle;
+ session_handle_t handle;
uword server_rx_fifo;
uword server_tx_fifo;
u64 segment_handle;
@@ -458,33 +474,33 @@ typedef struct session_disconnected_msg_
{
u32 client_index;
u32 context;
- u64 handle;
+ session_handle_t handle;
} __clib_packed session_disconnected_msg_t;
typedef struct session_disconnected_reply_msg_
{
u32 context;
i32 retval;
- u64 handle;
+ session_handle_t handle;
} __clib_packed session_disconnected_reply_msg_t;
typedef struct session_reset_msg_
{
u32 client_index;
u32 context;
- u64 handle;
+ session_handle_t handle;
} __clib_packed session_reset_msg_t;
typedef struct session_reset_reply_msg_
{
u32 context;
i32 retval;
- u64 handle;
+ session_handle_t handle;
} __clib_packed session_reset_reply_msg_t;
typedef struct session_req_worker_update_msg_
{
- u64 session_handle;
+ session_handle_t session_handle;
} __clib_packed session_req_worker_update_msg_t;
/* NOTE: using u16 for wrk indices because message needs to fit in 18B */
@@ -493,12 +509,12 @@ typedef struct session_worker_update_msg_
u32 client_index;
u16 wrk_index;
u16 req_wrk_index;
- u64 handle;
+ session_handle_t handle;
} __clib_packed session_worker_update_msg_t;
typedef struct session_worker_update_reply_msg_
{
- u64 handle;
+ session_handle_t handle;
uword rx_fifo;
uword tx_fifo;
u64 segment_handle;
@@ -612,8 +628,8 @@ app_send_io_evt_to_vpp (svm_msg_q_t * mq, u32 session_index, u8 evt_type,
{
if (svm_msg_q_try_lock (mq))
return -1;
- if (PREDICT_FALSE (svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING)
- || svm_msg_q_is_full (mq)))
+ if (PREDICT_FALSE (
+ svm_msg_q_or_ring_is_full (mq, SESSION_MQ_IO_EVT_RING)))
{
svm_msg_q_unlock (mq);
return -2;
@@ -628,9 +644,8 @@ app_send_io_evt_to_vpp (svm_msg_q_t * mq, u32 session_index, u8 evt_type,
else
{
svm_msg_q_lock (mq);
- while (svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING)
- || svm_msg_q_is_full (mq))
- svm_msg_q_wait_prod (mq);
+ while (svm_msg_q_or_ring_is_full (mq, SESSION_MQ_IO_EVT_RING))
+ svm_msg_q_or_ring_wait_prod (mq, SESSION_MQ_IO_EVT_RING);
msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg);
evt->session_index = session_index;
@@ -640,14 +655,18 @@ app_send_io_evt_to_vpp (svm_msg_q_t * mq, u32 session_index, u8 evt_type,
}
}
+#define app_send_dgram_raw(f, at, vpp_evt_q, data, len, evt_type, do_evt, \
+ noblock) \
+ app_send_dgram_raw_gso (f, at, vpp_evt_q, data, len, 0, evt_type, do_evt, \
+ noblock)
+
always_inline int
-app_send_dgram_raw (svm_fifo_t * f, app_session_transport_t * at,
- svm_msg_q_t * vpp_evt_q, u8 * data, u32 len, u8 evt_type,
- u8 do_evt, u8 noblock)
+app_send_dgram_raw_gso (svm_fifo_t *f, app_session_transport_t *at,
+ svm_msg_q_t *vpp_evt_q, u8 *data, u32 len,
+ u16 gso_size, u8 evt_type, u8 do_evt, u8 noblock)
{
session_dgram_hdr_t hdr;
int rv;
-
if (svm_fifo_max_enqueue_prod (f) < (sizeof (session_dgram_hdr_t) + len))
return 0;
@@ -658,10 +677,8 @@ app_send_dgram_raw (svm_fifo_t * f, app_session_transport_t * at,
hdr.rmt_port = at->rmt_port;
clib_memcpy_fast (&hdr.lcl_ip, &at->lcl_ip, sizeof (ip46_address_t));
hdr.lcl_port = at->lcl_port;
-
- /* *INDENT-OFF* */
+ hdr.gso_size = gso_size;
svm_fifo_seg_t segs[2] = {{ (u8 *) &hdr, sizeof (hdr) }, { data, len }};
- /* *INDENT-ON* */
rv = svm_fifo_enqueue_segments (f, segs, 2, 0 /* allow partial */ );
if (PREDICT_FALSE (rv < 0))
@@ -786,13 +803,11 @@ app_recv (app_session_t * s, u8 * data, u32 len)
return app_recv_stream (s, data, len);
}
-/* *INDENT-OFF* */
static char *session_error_str[] = {
#define _(sym, str) str,
foreach_session_error
#undef _
};
-/* *INDENT-ON* */
static inline u8 *
format_session_error (u8 * s, va_list * args)
@@ -817,6 +832,8 @@ typedef enum app_sapi_msg_type
APP_SAPI_MSG_TYPE_ADD_DEL_WORKER,
APP_SAPI_MSG_TYPE_ADD_DEL_WORKER_REPLY,
APP_SAPI_MSG_TYPE_SEND_FDS,
+ APP_SAPI_MSG_TYPE_ADD_DEL_CERT_KEY,
+ APP_SAPI_MSG_TYPE_ADD_DEL_CERT_KEY_REPLY,
} __clib_packed app_sapi_msg_type_e;
typedef struct app_sapi_attach_msg_
@@ -861,6 +878,22 @@ typedef struct app_sapi_worker_add_del_reply_msg_
u8 is_add;
} __clib_packed app_sapi_worker_add_del_reply_msg_t;
+typedef struct app_sapi_cert_key_add_del_msg_
+{
+ u32 context;
+ u32 index;
+ u16 cert_len;
+ u16 certkey_len;
+ u8 is_add;
+} __clib_packed app_sapi_cert_key_add_del_msg_t;
+
+typedef struct app_sapi_cert_key_add_del_reply_msg_
+{
+ u32 context;
+ i32 retval;
+ u32 index;
+} __clib_packed app_sapi_cert_key_add_del_reply_msg_t;
+
typedef struct app_sapi_msg_
{
app_sapi_msg_type_e type;
@@ -870,6 +903,8 @@ typedef struct app_sapi_msg_
app_sapi_attach_reply_msg_t attach_reply;
app_sapi_worker_add_del_msg_t worker_add_del;
app_sapi_worker_add_del_reply_msg_t worker_add_del_reply;
+ app_sapi_cert_key_add_del_msg_t cert_key_add_del;
+ app_sapi_cert_key_add_del_reply_msg_t cert_key_add_del_reply;
};
} __clib_packed app_sapi_msg_t;
diff --git a/src/vnet/session/application_local.c b/src/vnet/session/application_local.c
index 3c62dade0f5..3cb743d10e0 100644
--- a/src/vnet/session/application_local.c
+++ b/src/vnet/session/application_local.c
@@ -41,9 +41,25 @@ typedef struct ct_segments_
ct_segment_t *segments;
} ct_segments_ctx_t;
+typedef struct ct_cleanup_req_
+{
+ u32 ct_index;
+} ct_cleanup_req_t;
+
+typedef struct ct_worker_
+{
+ ct_connection_t *connections; /**< Per-worker connection pools */
+ u32 *pending_connects; /**< Fifo of pending ho indices */
+ ct_cleanup_req_t *pending_cleanups; /**< Fifo of pending indices */
+ u8 have_connects; /**< Set if connect rpc pending */
+ u8 have_cleanups; /**< Set if cleanup rpc pending */
+ clib_spinlock_t pending_connects_lock; /**< Lock for pending connects */
+ u32 *new_connects; /**< Burst of connects to be done */
+} ct_worker_t;
+
typedef struct ct_main_
{
- ct_connection_t **connections; /**< Per-worker connection pools */
+ ct_worker_t *wrk; /**< Per-worker state */
u32 n_workers; /**< Number of vpp workers */
u32 n_sessions; /**< Cumulative sessions counter */
u32 *ho_reusable; /**< Vector of reusable ho indices */
@@ -51,17 +67,28 @@ typedef struct ct_main_
clib_rwlock_t app_segs_lock; /**< RW lock for seg contexts */
uword *app_segs_ctxs_table; /**< App handle to segment pool map */
ct_segments_ctx_t *app_seg_ctxs; /**< Pool of ct segment contexts */
+ u32 **fwrk_pending_connects; /**< First wrk pending half-opens */
+ u32 fwrk_thread; /**< First worker thread */
+ u8 fwrk_have_flush; /**< Flag for connect flush rpc */
} ct_main_t;
static ct_main_t ct_main;
+static inline ct_worker_t *
+ct_worker_get (u32 thread_index)
+{
+ return &ct_main.wrk[thread_index];
+}
+
static ct_connection_t *
ct_connection_alloc (u32 thread_index)
{
+ ct_worker_t *wrk = ct_worker_get (thread_index);
ct_connection_t *ct;
- pool_get_zero (ct_main.connections[thread_index], ct);
- ct->c_c_index = ct - ct_main.connections[thread_index];
+ pool_get_aligned_safe (wrk->connections, ct, CLIB_CACHE_LINE_BYTES);
+ clib_memset (ct, 0, sizeof (*ct));
+ ct->c_c_index = ct - wrk->connections;
ct->c_thread_index = thread_index;
ct->client_wrk = ~0;
ct->server_wrk = ~0;
@@ -73,22 +100,25 @@ ct_connection_alloc (u32 thread_index)
static ct_connection_t *
ct_connection_get (u32 ct_index, u32 thread_index)
{
- if (pool_is_free_index (ct_main.connections[thread_index], ct_index))
+ ct_worker_t *wrk = ct_worker_get (thread_index);
+
+ if (pool_is_free_index (wrk->connections, ct_index))
return 0;
- return pool_elt_at_index (ct_main.connections[thread_index], ct_index);
+ return pool_elt_at_index (wrk->connections, ct_index);
}
static void
ct_connection_free (ct_connection_t * ct)
{
+ ct_worker_t *wrk = ct_worker_get (ct->c_thread_index);
+
if (CLIB_DEBUG)
{
- u32 thread_index = ct->c_thread_index;
- memset (ct, 0xfc, sizeof (*ct));
- pool_put (ct_main.connections[thread_index], ct);
+ clib_memset (ct, 0xfc, sizeof (*ct));
+ pool_put (wrk->connections, ct);
return;
}
- pool_put (ct_main.connections[ct->c_thread_index], ct);
+ pool_put (wrk->connections, ct);
}
static ct_connection_t *
@@ -99,11 +129,18 @@ ct_half_open_alloc (void)
clib_spinlock_lock (&cm->ho_reuseable_lock);
vec_foreach (hip, cm->ho_reusable)
- pool_put_index (cm->connections[0], *hip);
+ pool_put_index (cm->wrk[cm->fwrk_thread].connections, *hip);
vec_reset_length (cm->ho_reusable);
clib_spinlock_unlock (&cm->ho_reuseable_lock);
- return ct_connection_alloc (0);
+ return ct_connection_alloc (cm->fwrk_thread);
+}
+
+static ct_connection_t *
+ct_half_open_get (u32 ho_index)
+{
+ ct_main_t *cm = &ct_main;
+ return ct_connection_get (ho_index, cm->fwrk_thread);
}
void
@@ -137,6 +174,33 @@ ct_session_endpoint (session_t * ll, session_endpoint_t * sep)
}
static void
+ct_set_invalid_app_wrk (ct_connection_t *ct, u8 is_client)
+{
+ ct_connection_t *peer_ct;
+
+ peer_ct = ct_connection_get (ct->peer_index, ct->c_thread_index);
+
+ if (is_client)
+ {
+ ct->client_wrk = APP_INVALID_INDEX;
+ if (peer_ct)
+ ct->client_wrk = APP_INVALID_INDEX;
+ }
+ else
+ {
+ ct->server_wrk = APP_INVALID_INDEX;
+ if (peer_ct)
+ ct->server_wrk = APP_INVALID_INDEX;
+ }
+}
+
+static inline u64
+ct_client_seg_handle (u64 server_sh, u32 client_wrk_index)
+{
+ return (((u64) client_wrk_index << 56) | server_sh);
+}
+
+static void
ct_session_dealloc_fifos (ct_connection_t *ct, svm_fifo_t *rx_fifo,
svm_fifo_t *tx_fifo)
{
@@ -146,8 +210,8 @@ ct_session_dealloc_fifos (ct_connection_t *ct, svm_fifo_t *rx_fifo,
app_worker_t *app_wrk;
ct_segment_t *ct_seg;
fifo_segment_t *fs;
- u8 del_segment = 0;
u32 seg_index;
+ session_t *s;
int cnt;
/*
@@ -202,77 +266,82 @@ ct_session_dealloc_fifos (ct_connection_t *ct, svm_fifo_t *rx_fifo,
if (ct->flags & CT_CONN_F_CLIENT)
{
cnt = ct_seg->client_n_sessions;
- if (!cnt)
- ct_seg->flags |= CT_SEGMENT_F_CLIENT_DETACHED;
+ if (cnt)
+ goto done;
+ ct_seg->flags |= CT_SEGMENT_F_CLIENT_DETACHED;
+ s = session_get (ct->c_s_index, ct->c_thread_index);
+ if (s->app_wrk_index == APP_INVALID_INDEX)
+ ct_set_invalid_app_wrk (ct, 1 /* is_client */);
}
else
{
cnt = ct_seg->server_n_sessions;
- if (!cnt)
- ct_seg->flags |= CT_SEGMENT_F_SERVER_DETACHED;
+ if (cnt)
+ goto done;
+ ct_seg->flags |= CT_SEGMENT_F_SERVER_DETACHED;
+ s = session_get (ct->c_s_index, ct->c_thread_index);
+ if (s->app_wrk_index == APP_INVALID_INDEX)
+ ct_set_invalid_app_wrk (ct, 0 /* is_client */);
}
+ if (!(ct_seg->flags & CT_SEGMENT_F_CLIENT_DETACHED) ||
+ !(ct_seg->flags & CT_SEGMENT_F_SERVER_DETACHED))
+ goto done;
+
/*
* Remove segment context because both client and server detached
*/
- if (!cnt && (ct_seg->flags & CT_SEGMENT_F_CLIENT_DETACHED) &&
- (ct_seg->flags & CT_SEGMENT_F_SERVER_DETACHED))
- {
- pool_put_index (seg_ctx->segments, ct->ct_seg_index);
+ pool_put_index (seg_ctx->segments, ct->ct_seg_index);
- /*
- * No more segment indices left, remove the segments context
- */
- if (!pool_elts (seg_ctx->segments))
- {
- u64 table_handle = seg_ctx->client_wrk << 16 | seg_ctx->server_wrk;
- table_handle = (u64) seg_ctx->sm_index << 32 | table_handle;
- hash_unset (cm->app_segs_ctxs_table, table_handle);
- pool_free (seg_ctx->segments);
- pool_put_index (cm->app_seg_ctxs, ct->seg_ctx_index);
- }
- del_segment = 1;
+ /*
+ * No more segment indices left, remove the segments context
+ */
+ if (!pool_elts (seg_ctx->segments))
+ {
+ u64 table_handle = seg_ctx->client_wrk << 16 | seg_ctx->server_wrk;
+ table_handle = (u64) seg_ctx->sm_index << 32 | table_handle;
+ hash_unset (cm->app_segs_ctxs_table, table_handle);
+ pool_free (seg_ctx->segments);
+ pool_put_index (cm->app_seg_ctxs, ct->seg_ctx_index);
}
- clib_rwlock_writer_unlock (&cm->app_segs_lock);
-
/*
- * Session counter went to zero, notify the app that detached
+ * Segment to be removed so notify both apps
*/
- if (cnt)
- return;
- if (ct->flags & CT_CONN_F_CLIENT)
- {
- app_wrk = app_worker_get_if_valid (ct->client_wrk);
- /* Determine if client app still needs notification, i.e., if it is
- * still attached. If client detached and this is the last ct session
- * on this segment, then its connects segment manager should also be
- * detached, so do not send notification */
- if (app_wrk)
- {
- segment_manager_t *csm;
- csm = app_worker_get_connect_segment_manager (app_wrk);
- if (!segment_manager_app_detached (csm))
- app_worker_del_segment_notify (app_wrk, ct->segment_handle);
- }
- }
- else if (!segment_manager_app_detached (sm))
+ app_wrk = app_worker_get_if_valid (ct->client_wrk);
+ /* Determine if client app still needs notification, i.e., if it is
+ * still attached. If client detached and this is the last ct session
+ * on this segment, then its connects segment manager should also be
+ * detached, so do not send notification */
+ if (app_wrk)
{
- app_wrk = app_worker_get (ct->server_wrk);
- app_worker_del_segment_notify (app_wrk, ct->segment_handle);
+ segment_manager_t *csm;
+ csm = app_worker_get_connect_segment_manager (app_wrk);
+ if (!segment_manager_app_detached (csm))
+ app_worker_del_segment_notify (
+ app_wrk, ct_client_seg_handle (ct->segment_handle, ct->client_wrk));
}
- if (!del_segment)
- return;
-
+ /* Notify server app and free segment */
segment_manager_lock_and_del_segment (sm, seg_index);
/* Cleanup segment manager if needed. If server detaches there's a chance
* the client's sessions will hold up segment removal */
if (segment_manager_app_detached (sm) && !segment_manager_has_fifos (sm))
segment_manager_free_safe (sm);
+
+done:
+
+ clib_rwlock_writer_unlock (&cm->app_segs_lock);
+}
+
+static void
+ct_session_force_disconnect_server (ct_connection_t *sct)
+{
+ sct->peer_index = ~0;
+ session_transport_closing_notify (&sct->connection);
}
int
@@ -294,9 +363,7 @@ ct_session_connect_notify (session_t *ss, session_error_t err)
/* Client closed while waiting for reply from server */
if (PREDICT_FALSE (!cct))
{
- session_transport_closing_notify (&sct->connection);
- session_transport_delete_notify (&sct->connection);
- ct_connection_free (sct);
+ ct_session_force_disconnect_server (sct);
return 0;
}
@@ -307,16 +374,19 @@ ct_session_connect_notify (session_t *ss, session_error_t err)
goto connect_error;
/*
- * Alloc client session
+ * Alloc client session, server session assumed to be established
*/
+ ASSERT (ss->session_state >= SESSION_STATE_READY);
+
cs = session_alloc (thread_index);
ss = session_get (ss_index, thread_index);
cs->session_type = ss->session_type;
cs->listener_handle = SESSION_INVALID_HANDLE;
- cs->session_state = SESSION_STATE_CONNECTING;
+ session_set_state (cs, SESSION_STATE_CONNECTING);
cs->app_wrk_index = client_wrk->wrk_index;
cs->connection_index = cct->c_c_index;
+ cs->opaque = opaque;
cct->c_s_index = cs->session_index;
/* This will allocate fifos for the session. They won't be used for
@@ -325,23 +395,23 @@ ct_session_connect_notify (session_t *ss, session_error_t err)
if ((err = app_worker_init_connected (client_wrk, cs)))
{
session_free (cs);
- session_close (ss);
+ ct_session_force_disconnect_server (sct);
err = SESSION_E_ALLOC;
goto connect_error;
}
- cs->session_state = SESSION_STATE_CONNECTING;
+ session_set_state (cs, SESSION_STATE_CONNECTING);
if (app_worker_connect_notify (client_wrk, cs, 0, opaque))
{
segment_manager_dealloc_fifos (cs->rx_fifo, cs->tx_fifo);
session_free (cs);
- session_close (ss);
+ ct_session_force_disconnect_server (sct);
goto cleanup_client;
}
cs = session_get (cct->c_s_index, cct->c_thread_index);
- cs->session_state = SESSION_STATE_READY;
+ session_set_state (cs, SESSION_STATE_READY);
return 0;
@@ -373,9 +443,6 @@ ct_lookup_free_segment (ct_main_t *cm, segment_manager_t *sm,
pool_foreach (ct_seg, seg_ctx->segments)
{
/* Client or server has detached so segment cannot be used */
- if ((ct_seg->flags & CT_SEGMENT_F_SERVER_DETACHED) ||
- (ct_seg->flags & CT_SEGMENT_F_CLIENT_DETACHED))
- continue;
fs = segment_manager_get_segment (sm, ct_seg->segment_index);
free_bytes = fifo_segment_available_bytes (fs);
max_fifos = fifo_segment_size (fs) / seg_ctx->fifo_pair_bytes;
@@ -395,11 +462,11 @@ ct_alloc_segment (ct_main_t *cm, app_worker_t *server_wrk, u64 table_handle,
segment_manager_t *sm, u32 client_wrk_index)
{
u32 seg_ctx_index = ~0, sm_index, pair_bytes;
+ u64 seg_size, seg_handle, client_seg_handle;
segment_manager_props_t *props;
const u32 margin = 16 << 10;
ct_segments_ctx_t *seg_ctx;
app_worker_t *client_wrk;
- u64 seg_size, seg_handle;
application_t *server;
ct_segment_t *ct_seg;
uword *spp;
@@ -461,7 +528,11 @@ ct_alloc_segment (ct_main_t *cm, app_worker_t *server_wrk, u64 table_handle,
goto error;
client_wrk = app_worker_get (client_wrk_index);
- if (app_worker_add_segment_notify (client_wrk, seg_handle))
+ /* Make sure client workers do not have overlapping segment handles.
+ * Ideally, we should attach fs to client worker segment manager and
+ * create a new handle but that's not currently possible. */
+ client_seg_handle = ct_client_seg_handle (seg_handle, client_wrk_index);
+ if (app_worker_add_segment_notify (client_wrk, client_seg_handle))
{
app_worker_del_segment_notify (server_wrk, seg_handle);
goto error;
@@ -515,6 +586,8 @@ ct_init_accepted_session (app_worker_t *server_wrk, ct_connection_t *ct,
ct->seg_ctx_index = ct_seg->seg_ctx_index;
ct->ct_seg_index = ct_seg->ct_seg_index;
fs_index = ct_seg->segment_index;
+ ct_seg->flags &=
+ ~(CT_SEGMENT_F_SERVER_DETACHED | CT_SEGMENT_F_CLIENT_DETACHED);
__atomic_add_fetch (&ct_seg->server_n_sessions, 1, __ATOMIC_RELAXED);
__atomic_add_fetch (&ct_seg->client_n_sessions, 1, __ATOMIC_RELAXED);
}
@@ -573,10 +646,6 @@ ct_init_accepted_session (app_worker_t *server_wrk, ct_connection_t *ct,
ls->tx_fifo->shr->master_session_index = ls->session_index;
ls->rx_fifo->master_thread_index = ls->thread_index;
ls->tx_fifo->master_thread_index = ls->thread_index;
- ls->rx_fifo->segment_manager = sm_index;
- ls->tx_fifo->segment_manager = sm_index;
- ls->rx_fifo->segment_index = fs_index;
- ls->tx_fifo->segment_index = fs_index;
seg_handle = segment_manager_segment_handle (sm, fs);
segment_manager_segment_reader_unlock (sm);
@@ -587,23 +656,21 @@ ct_init_accepted_session (app_worker_t *server_wrk, ct_connection_t *ct,
}
static void
-ct_accept_rpc_wrk_handler (void *accept_args)
+ct_accept_one (u32 thread_index, u32 ho_index)
{
- u32 cct_index, ho_index, thread_index, ll_index;
ct_connection_t *sct, *cct, *ho;
transport_connection_t *ll_ct;
app_worker_t *server_wrk;
+ u32 cct_index, ll_index;
session_t *ss, *ll;
/*
* Alloc client ct and initialize from ho
*/
- thread_index = vlib_get_thread_index ();
cct = ct_connection_alloc (thread_index);
cct_index = cct->c_c_index;
- ho_index = pointer_to_uword (accept_args);
- ho = ct_connection_get (ho_index, 0);
+ ho = ct_half_open_get (ho_index);
/* Unlikely but half-open session and transport could have been freed */
if (PREDICT_FALSE (!ho))
@@ -640,7 +707,7 @@ ct_accept_rpc_wrk_handler (void *accept_args)
sct->c_rmt_port = 0;
sct->c_lcl_port = ll_ct->lcl_port;
sct->c_is_ip4 = cct->c_is_ip4;
- clib_memcpy (&sct->c_lcl_ip, &ll_ct->lcl_ip, sizeof (ll_ct->lcl_ip));
+ clib_memcpy (&sct->c_lcl_ip, &cct->c_rmt_ip, sizeof (cct->c_rmt_ip));
sct->client_wrk = cct->client_wrk;
sct->c_proto = TRANSPORT_PROTO_NONE;
sct->client_opaque = cct->client_opaque;
@@ -659,7 +726,7 @@ ct_accept_rpc_wrk_handler (void *accept_args)
sct->c_is_ip4);
ss->connection_index = sct->c_c_index;
ss->listener_handle = listen_session_get_handle (ll);
- ss->session_state = SESSION_STATE_CREATED;
+ session_set_state (ss, SESSION_STATE_CREATED);
server_wrk = application_listener_select_worker (ll);
ss->app_wrk_index = server_wrk->wrk_index;
@@ -675,15 +742,17 @@ ct_accept_rpc_wrk_handler (void *accept_args)
return;
}
+ cct->server_wrk = sct->server_wrk;
cct->seg_ctx_index = sct->seg_ctx_index;
cct->ct_seg_index = sct->ct_seg_index;
cct->client_rx_fifo = ss->tx_fifo;
cct->client_tx_fifo = ss->rx_fifo;
cct->client_rx_fifo->refcnt++;
cct->client_tx_fifo->refcnt++;
- cct->segment_handle = sct->segment_handle;
+ cct->segment_handle =
+ ct_client_seg_handle (sct->segment_handle, cct->client_wrk);
- ss->session_state = SESSION_STATE_ACCEPTING;
+ session_set_state (ss, SESSION_STATE_ACCEPTING);
if (app_worker_accept_notify (server_wrk, ss))
{
ct_session_connect_notify (ss, SESSION_E_REFUSED);
@@ -693,13 +762,93 @@ ct_accept_rpc_wrk_handler (void *accept_args)
}
}
-static int
-ct_connect (app_worker_t * client_wrk, session_t * ll,
- session_endpoint_cfg_t * sep)
+static void
+ct_accept_rpc_wrk_handler (void *rpc_args)
{
- u32 thread_index, ho_index;
+ u32 thread_index, n_connects, i, n_pending;
+ const u32 max_connects = 32;
+ ct_worker_t *wrk;
+ u8 need_rpc = 0;
+
+ thread_index = pointer_to_uword (rpc_args);
+ wrk = ct_worker_get (thread_index);
+
+ /* Connects could be handled without worker barrier so grab lock */
+ clib_spinlock_lock (&wrk->pending_connects_lock);
+
+ n_pending = clib_fifo_elts (wrk->pending_connects);
+ n_connects = clib_min (n_pending, max_connects);
+ vec_validate (wrk->new_connects, n_connects);
+
+ for (i = 0; i < n_connects; i++)
+ clib_fifo_sub1 (wrk->pending_connects, wrk->new_connects[i]);
+
+ if (n_pending == n_connects)
+ wrk->have_connects = 0;
+ else
+ need_rpc = 1;
+
+ clib_spinlock_unlock (&wrk->pending_connects_lock);
+
+ for (i = 0; i < n_connects; i++)
+ ct_accept_one (thread_index, wrk->new_connects[i]);
+
+ if (need_rpc)
+ session_send_rpc_evt_to_thread_force (
+ thread_index, ct_accept_rpc_wrk_handler,
+ uword_to_pointer (thread_index, void *));
+}
+
+static void
+ct_fwrk_flush_connects (void *rpc_args)
+{
+ u32 thread_index, fwrk_index, n_workers;
ct_main_t *cm = &ct_main;
- ct_connection_t *ho;
+ ct_worker_t *wrk;
+ u8 need_rpc;
+
+ fwrk_index = cm->fwrk_thread;
+ n_workers = vec_len (cm->fwrk_pending_connects);
+
+ for (thread_index = fwrk_index; thread_index < n_workers; thread_index++)
+ {
+ if (!vec_len (cm->fwrk_pending_connects[thread_index]))
+ continue;
+
+ wrk = ct_worker_get (thread_index);
+
+ /* Connects can be done without worker barrier, grab dst worker lock */
+ if (thread_index != fwrk_index)
+ clib_spinlock_lock (&wrk->pending_connects_lock);
+
+ clib_fifo_add (wrk->pending_connects,
+ cm->fwrk_pending_connects[thread_index],
+ vec_len (cm->fwrk_pending_connects[thread_index]));
+ if (!wrk->have_connects)
+ {
+ wrk->have_connects = 1;
+ need_rpc = 1;
+ }
+
+ if (thread_index != fwrk_index)
+ clib_spinlock_unlock (&wrk->pending_connects_lock);
+
+ vec_reset_length (cm->fwrk_pending_connects[thread_index]);
+
+ if (need_rpc)
+ session_send_rpc_evt_to_thread_force (
+ thread_index, ct_accept_rpc_wrk_handler,
+ uword_to_pointer (thread_index, void *));
+ }
+
+ cm->fwrk_have_flush = 0;
+}
+
+static void
+ct_program_connect_to_wrk (u32 ho_index)
+{
+ ct_main_t *cm = &ct_main;
+ u32 thread_index;
/* Simple round-robin policy for spreading sessions over workers. We skip
* thread index 0, i.e., offset the index by 1, when we have workers as it
@@ -708,6 +857,25 @@ ct_connect (app_worker_t * client_wrk, session_t * ll,
cm->n_sessions += 1;
thread_index = cm->n_workers ? (cm->n_sessions % cm->n_workers) + 1 : 0;
+ /* Pospone flushing of connect request to dst worker until after session
+ * layer fully initializes the half-open session. */
+ vec_add1 (cm->fwrk_pending_connects[thread_index], ho_index);
+ if (!cm->fwrk_have_flush)
+ {
+ session_send_rpc_evt_to_thread_force (
+ cm->fwrk_thread, ct_fwrk_flush_connects,
+ uword_to_pointer (thread_index, void *));
+ cm->fwrk_have_flush = 1;
+ }
+}
+
+static int
+ct_connect (app_worker_t *client_wrk, session_t *ll,
+ session_endpoint_cfg_t *sep)
+{
+ ct_connection_t *ho;
+ u32 ho_index;
+
/*
* Alloc and init client half-open transport
*/
@@ -725,22 +893,19 @@ ct_connect (app_worker_t * client_wrk, session_t * ll,
clib_memcpy (&ho->c_rmt_ip, &sep->ip, sizeof (sep->ip));
ho->flags |= CT_CONN_F_CLIENT;
ho->c_s_index = ~0;
- ho->actual_tp = sep->transport_proto;
+ ho->actual_tp = sep->original_tp;
/*
- * Accept connection on thread selected above. Connected reply comes
+ * Program connect on a worker, connected reply comes
* after server accepts the connection.
*/
-
- session_send_rpc_evt_to_thread_force (thread_index,
- ct_accept_rpc_wrk_handler,
- uword_to_pointer (ho_index, void *));
+ ct_program_connect_to_wrk (ho_index);
return ho_index;
}
static u32
-ct_start_listen (u32 app_listener_index, transport_endpoint_t * tep)
+ct_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep)
{
session_endpoint_cfg_t *sep;
ct_connection_t *ct;
@@ -772,9 +937,9 @@ ct_listener_get (u32 ct_index)
}
static transport_connection_t *
-ct_half_open_get (u32 ct_index)
+ct_session_half_open_get (u32 ct_index)
{
- return (transport_connection_t *) ct_connection_get (ct_index, 0);
+ return (transport_connection_t *) ct_half_open_get (ct_index);
}
static void
@@ -796,7 +961,10 @@ ct_session_cleanup (u32 conn_index, u32 thread_index)
static void
ct_cleanup_ho (u32 ho_index)
{
- ct_connection_free (ct_connection_get (ho_index, 0));
+ ct_connection_t *ho;
+
+ ho = ct_half_open_get (ho_index);
+ ct_connection_free (ho);
}
static int
@@ -827,7 +995,7 @@ ct_session_connect (transport_endpoint_cfg_t * tep)
goto global_scope;
ll = listen_session_get_from_handle (lh);
- al = app_listener_get_w_session (ll);
+ al = app_listener_get (ll->al_index);
/*
* Break loop if rule in local table points to connecting app. This
@@ -856,58 +1024,189 @@ global_scope:
ll = session_lookup_listener_wildcard (table_index, sep);
/* Avoid connecting app to own listener */
- if (ll && ll->app_index != app->app_index)
- return ct_connect (app_wrk, ll, sep_ext);
+ if (ll)
+ {
+ al = app_listener_get (ll->al_index);
+ if (al->app_index != app->app_index)
+ return ct_connect (app_wrk, ll, sep_ext);
+ }
/* Failed to connect but no error */
return SESSION_E_LOCAL_CONNECT;
}
+static inline int
+ct_close_is_reset (ct_connection_t *ct, session_t *s)
+{
+ if (ct->flags & CT_CONN_F_RESET)
+ return 1;
+ if (ct->flags & CT_CONN_F_CLIENT)
+ return (svm_fifo_max_dequeue (ct->client_rx_fifo) > 0);
+ else
+ return (svm_fifo_max_dequeue (s->rx_fifo) > 0);
+}
+
static void
-ct_session_close (u32 ct_index, u32 thread_index)
+ct_session_cleanup_server_session (session_t *s)
{
- ct_connection_t *ct, *peer_ct;
+ ct_connection_t *ct;
+
+ ct = (ct_connection_t *) session_get_transport (s);
+ ct_session_dealloc_fifos (ct, s->rx_fifo, s->tx_fifo);
+ session_free (s);
+ ct_connection_free (ct);
+}
+
+static void
+ct_session_postponed_cleanup (ct_connection_t *ct)
+{
+ ct_connection_t *peer_ct;
app_worker_t *app_wrk;
session_t *s;
- ct = ct_connection_get (ct_index, thread_index);
s = session_get (ct->c_s_index, ct->c_thread_index);
- peer_ct = ct_connection_get (ct->peer_index, thread_index);
+ app_wrk = app_worker_get_if_valid (s->app_wrk_index);
+
+ peer_ct = ct_connection_get (ct->peer_index, ct->c_thread_index);
if (peer_ct)
{
- peer_ct->peer_index = ~0;
- /* Make sure session was allocated */
- if (peer_ct->flags & CT_CONN_F_HALF_OPEN)
- {
- ct_session_connect_notify (s, SESSION_E_REFUSED);
- }
- else if (peer_ct->c_s_index != ~0)
- session_transport_closing_notify (&peer_ct->connection);
+ if (ct_close_is_reset (ct, s))
+ session_transport_reset_notify (&peer_ct->connection);
else
- ct_connection_free (peer_ct);
+ session_transport_closing_notify (&peer_ct->connection);
}
+ session_transport_closed_notify (&ct->connection);
+
+ /* It would be cleaner to call session_transport_delete_notify
+ * but then we can't control session cleanup lower */
+ session_set_state (s, SESSION_STATE_TRANSPORT_DELETED);
+ if (app_wrk)
+ app_worker_cleanup_notify (app_wrk, s, SESSION_CLEANUP_TRANSPORT);
if (ct->flags & CT_CONN_F_CLIENT)
{
/* Normal free for client session as the fifos are allocated through
* the connects segment manager in a segment that's not shared with
* the server */
- session_free_w_fifos (s);
ct_session_dealloc_fifos (ct, ct->client_rx_fifo, ct->client_tx_fifo);
+ session_program_cleanup (s);
+ ct_connection_free (ct);
}
else
{
/* Manual session and fifo segment cleanup to avoid implicit
* segment manager cleanups and notifications */
- app_wrk = app_worker_get_if_valid (s->app_wrk_index);
if (app_wrk)
- app_worker_cleanup_notify (app_wrk, s, SESSION_CLEANUP_SESSION);
+ {
+ /* Remove custom cleanup notify infra when/if switching to normal
+ * session cleanup. Note that ct is freed in the cb function */
+ app_worker_cleanup_notify_custom (app_wrk, s,
+ SESSION_CLEANUP_SESSION,
+ ct_session_cleanup_server_session);
+ }
+ else
+ {
+ ct_connection_free (ct);
+ }
+ }
+}
+
+static void
+ct_handle_cleanups (void *args)
+{
+ uword thread_index = pointer_to_uword (args);
+ const u32 max_cleanups = 100;
+ ct_cleanup_req_t *req;
+ ct_connection_t *ct;
+ u32 n_to_handle = 0;
+ ct_worker_t *wrk;
+ session_t *s;
+
+ wrk = ct_worker_get (thread_index);
+ wrk->have_cleanups = 0;
+ n_to_handle = clib_fifo_elts (wrk->pending_cleanups);
+ n_to_handle = clib_min (n_to_handle, max_cleanups);
+
+ while (n_to_handle)
+ {
+ clib_fifo_sub2 (wrk->pending_cleanups, req);
+ ct = ct_connection_get (req->ct_index, thread_index);
+ s = session_get (ct->c_s_index, ct->c_thread_index);
+ if (svm_fifo_has_event (s->tx_fifo) || (s->flags & SESSION_F_RX_EVT))
+ clib_fifo_add1 (wrk->pending_cleanups, *req);
+ else
+ ct_session_postponed_cleanup (ct);
+ n_to_handle -= 1;
+ }
- ct_session_dealloc_fifos (ct, s->rx_fifo, s->tx_fifo);
- session_free (s);
+ if (clib_fifo_elts (wrk->pending_cleanups))
+ {
+ wrk->have_cleanups = 1;
+ session_send_rpc_evt_to_thread_force (
+ thread_index, ct_handle_cleanups,
+ uword_to_pointer (thread_index, void *));
}
+}
- ct_connection_free (ct);
+static void
+ct_program_cleanup (ct_connection_t *ct)
+{
+ ct_cleanup_req_t *req;
+ uword thread_index;
+ ct_worker_t *wrk;
+
+ thread_index = ct->c_thread_index;
+ wrk = ct_worker_get (ct->c_thread_index);
+
+ clib_fifo_add2 (wrk->pending_cleanups, req);
+ req->ct_index = ct->c_c_index;
+
+ if (wrk->have_cleanups)
+ return;
+
+ wrk->have_cleanups = 1;
+ session_send_rpc_evt_to_thread_force (
+ thread_index, ct_handle_cleanups, uword_to_pointer (thread_index, void *));
+}
+
+static void
+ct_session_close (u32 ct_index, u32 thread_index)
+{
+ ct_connection_t *ct, *peer_ct;
+ session_t *s;
+
+ ct = ct_connection_get (ct_index, thread_index);
+ s = session_get (ct->c_s_index, ct->c_thread_index);
+ peer_ct = ct_connection_get (ct->peer_index, thread_index);
+ if (peer_ct)
+ {
+ peer_ct->peer_index = ~0;
+ /* Make sure session was allocated */
+ if (peer_ct->flags & CT_CONN_F_HALF_OPEN)
+ {
+ ct_session_connect_notify (s, SESSION_E_REFUSED);
+ ct->peer_index = ~0;
+ }
+ else if (peer_ct->c_s_index == ~0)
+ {
+ /* should not happen */
+ clib_warning ("ct peer without session");
+ ct_connection_free (peer_ct);
+ }
+ }
+
+ /* Do not send closed notify to make sure pending tx events are
+ * still delivered and program cleanup */
+ ct_program_cleanup (ct);
+}
+
+static void
+ct_session_reset (u32 ct_index, u32 thread_index)
+{
+ ct_connection_t *ct;
+ ct = ct_connection_get (ct_index, thread_index);
+ ct->flags |= CT_CONN_F_RESET;
+ ct_session_close (ct_index, thread_index);
}
static transport_connection_t *
@@ -966,12 +1265,17 @@ static int
ct_app_rx_evt (transport_connection_t * tc)
{
ct_connection_t *ct = (ct_connection_t *) tc, *peer_ct;
- session_t *ps;
+ session_t *ps, *s;
+ s = session_get (ct->c_s_index, ct->c_thread_index);
+ if (session_has_transport (s) || s->session_state < SESSION_STATE_READY)
+ return -1;
peer_ct = ct_connection_get (ct->peer_index, tc->thread_index);
- if (!peer_ct)
+ if (!peer_ct || (peer_ct->flags & CT_CONN_F_HALF_OPEN))
return -1;
ps = session_get (peer_ct->c_s_index, peer_ct->c_thread_index);
+ if (ps->session_state >= SESSION_STATE_TRANSPORT_CLOSING)
+ return -1;
return session_dequeue_notify (ps);
}
@@ -993,7 +1297,7 @@ format_ct_half_open (u8 *s, va_list *args)
{
u32 ho_index = va_arg (*args, u32);
u32 verbose = va_arg (*args, u32);
- ct_connection_t *ct = ct_connection_get (ho_index, 0);
+ ct_connection_t *ct = ct_half_open_get (ho_index);
s = format (s, "%-" SESSION_CLI_ID_LEN "U", format_ct_connection_id, ct);
if (verbose)
s = format (s, "%-" SESSION_CLI_STATE_LEN "s", "HALF-OPEN");
@@ -1042,27 +1346,33 @@ format_ct_session (u8 * s, va_list * args)
clib_error_t *
ct_enable_disable (vlib_main_t * vm, u8 is_en)
{
+ vlib_thread_main_t *vtm = &vlib_thread_main;
ct_main_t *cm = &ct_main;
+ ct_worker_t *wrk;
cm->n_workers = vlib_num_workers ();
- vec_validate (cm->connections, cm->n_workers);
+ cm->fwrk_thread = transport_cl_thread ();
+ vec_validate (cm->wrk, vtm->n_vlib_mains);
+ vec_foreach (wrk, cm->wrk)
+ clib_spinlock_init (&wrk->pending_connects_lock);
clib_spinlock_init (&cm->ho_reuseable_lock);
clib_rwlock_init (&cm->app_segs_lock);
+ vec_validate (cm->fwrk_pending_connects, cm->n_workers);
return 0;
}
-/* *INDENT-OFF* */
static const transport_proto_vft_t cut_thru_proto = {
.enable = ct_enable_disable,
.start_listen = ct_start_listen,
.stop_listen = ct_stop_listen,
.get_connection = ct_session_get,
.get_listener = ct_listener_get,
- .get_half_open = ct_half_open_get,
+ .get_half_open = ct_session_half_open_get,
.cleanup = ct_session_cleanup,
.cleanup_ho = ct_cleanup_ho,
.connect = ct_session_connect,
.close = ct_session_close,
+ .reset = ct_session_reset,
.custom_tx = ct_custom_tx,
.app_rx_evt = ct_app_rx_evt,
.format_listener = format_ct_listener,
@@ -1075,7 +1385,14 @@ static const transport_proto_vft_t cut_thru_proto = {
.service_type = TRANSPORT_SERVICE_VC,
},
};
-/* *INDENT-ON* */
+
+static inline int
+ct_session_can_tx (session_t *s)
+{
+ return (s->session_state == SESSION_STATE_READY ||
+ s->session_state == SESSION_STATE_CLOSING ||
+ s->session_state == SESSION_STATE_APP_CLOSED);
+}
int
ct_session_tx (session_t * s)
@@ -1083,6 +1400,8 @@ ct_session_tx (session_t * s)
ct_connection_t *ct, *peer_ct;
session_t *peer_s;
+ if (!ct_session_can_tx (s))
+ return 0;
ct = (ct_connection_t *) session_get_transport (s);
peer_ct = ct_connection_get (ct->peer_index, ct->c_thread_index);
if (!peer_ct)
@@ -1090,6 +1409,7 @@ ct_session_tx (session_t * s)
peer_s = session_get (peer_ct->c_s_index, peer_ct->c_thread_index);
if (peer_s->session_state >= SESSION_STATE_TRANSPORT_CLOSING)
return 0;
+ peer_s->flags |= SESSION_F_RX_EVT;
return session_enqueue_notify (peer_s);
}
diff --git a/src/vnet/session/application_local.h b/src/vnet/session/application_local.h
index 86edf243b22..fd2804c7baf 100644
--- a/src/vnet/session/application_local.h
+++ b/src/vnet/session/application_local.h
@@ -22,7 +22,8 @@
#define foreach_ct_flags \
_ (CLIENT, "client") \
- _ (HALF_OPEN, "half-open")
+ _ (HALF_OPEN, "half-open") \
+ _ (RESET, "reset")
enum
{
diff --git a/src/vnet/session/application_namespace.c b/src/vnet/session/application_namespace.c
index 6d91fc362b2..f547dcfc031 100644
--- a/src/vnet/session/application_namespace.c
+++ b/src/vnet/session/application_namespace.c
@@ -19,6 +19,7 @@
#include <vnet/session/session.h>
#include <vnet/fib/fib_table.h>
#include <vppinfra/file.h>
+#include <vppinfra/format_table.h>
#include <vlib/unix/unix.h>
/**
@@ -40,7 +41,7 @@ app_namespace_get (u32 index)
}
app_namespace_t *
-app_namespace_get_from_id (const u8 * ns_id)
+app_namespace_get_from_id (const u8 *ns_id)
{
u32 index = app_namespace_index_from_id (ns_id);
if (index == APP_NAMESPACE_INVALID_INDEX)
@@ -54,31 +55,46 @@ app_namespace_index (app_namespace_t * app_ns)
return (app_ns - app_namespace_pool);
}
+void
+app_namespace_free (app_namespace_t *app_ns)
+{
+ hash_unset_mem (app_namespace_lookup_table, app_ns->ns_id);
+ vec_free (app_ns->ns_id);
+
+ pool_put (app_namespace_pool, app_ns);
+}
+
app_namespace_t *
-app_namespace_alloc (u8 * ns_id)
+app_namespace_alloc (const u8 *ns_id)
{
app_namespace_t *app_ns;
+
pool_get (app_namespace_pool, app_ns);
clib_memset (app_ns, 0, sizeof (*app_ns));
- app_ns->ns_id = vec_dup (ns_id);
+
+ app_ns->ns_id = vec_dup ((u8 *) ns_id);
+ vec_terminate_c_string (app_ns->ns_id);
+
hash_set_mem (app_namespace_lookup_table, app_ns->ns_id,
app_ns - app_namespace_pool);
+
return app_ns;
}
-int
-vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a)
+session_error_t
+vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t *a)
{
app_namespace_t *app_ns;
session_table_t *st;
+ u32 ns_index;
+ session_error_t rv;
if (a->is_add)
{
if (a->sw_if_index != APP_NAMESPACE_INVALID_INDEX
&& !vnet_get_sw_interface_or_null (vnet_get_main (),
a->sw_if_index))
- return VNET_API_ERROR_INVALID_SW_IF_INDEX;
-
+ return SESSION_E_INVALID;
if (a->sw_if_index != APP_NAMESPACE_INVALID_INDEX)
{
@@ -91,7 +107,7 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a)
}
if (a->sw_if_index == APP_NAMESPACE_INVALID_INDEX
&& a->ip4_fib_id == APP_NAMESPACE_INVALID_INDEX)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
app_ns = app_namespace_get_from_id (a->ns_id);
if (!app_ns)
@@ -102,9 +118,23 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a)
st->is_local = 1;
st->appns_index = app_namespace_index (app_ns);
app_ns->local_table_index = session_table_index (st);
+ if (a->sock_name)
+ {
+ app_ns->sock_name = vec_dup (a->sock_name);
+ vec_terminate_c_string (app_ns->sock_name);
+ }
+
+ /* Add socket for namespace,
+ * only at creation time */
+ if (app_sapi_enabled)
+ {
+ rv = appns_sapi_add_ns_socket (app_ns);
+ if (rv)
+ return rv;
+ }
}
+
app_ns->ns_secret = a->secret;
- app_ns->netns = a->netns ? vec_dup (a->netns) : 0;
app_ns->sw_if_index = a->sw_if_index;
app_ns->ip4_fib_index =
fib_table_find (FIB_PROTOCOL_IP4, a->ip4_fib_id);
@@ -112,14 +142,31 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a)
fib_table_find (FIB_PROTOCOL_IP6, a->ip6_fib_id);
session_lookup_set_tables_appns (app_ns);
- /* Add socket for namespace */
- if (app_sapi_enabled)
- appns_sapi_add_ns_socket (app_ns);
}
else
{
- return VNET_API_ERROR_UNIMPLEMENTED;
+ ns_index = app_namespace_index_from_id (a->ns_id);
+ if (ns_index == APP_NAMESPACE_INVALID_INDEX)
+ return SESSION_E_INVALID;
+
+ app_ns = app_namespace_get (ns_index);
+ if (!app_ns)
+ return SESSION_E_INVALID;
+
+ application_namespace_cleanup (app_ns);
+
+ if (app_sapi_enabled)
+ appns_sapi_del_ns_socket (app_ns);
+
+ st = session_table_get (app_ns->local_table_index);
+
+ session_table_free (st, FIB_PROTOCOL_MAX);
+ if (app_ns->sock_name)
+ vec_free (app_ns->sock_name);
+
+ app_namespace_free (app_ns);
}
+
return 0;
}
@@ -133,7 +180,13 @@ u32
app_namespace_index_from_id (const u8 * ns_id)
{
uword *indexp;
- indexp = hash_get_mem (app_namespace_lookup_table, ns_id);
+ u8 *key;
+
+ key = vec_dup ((u8 *) ns_id);
+ vec_terminate_c_string (key);
+
+ indexp = hash_get_mem (app_namespace_lookup_table, key);
+ vec_free (key);
if (!indexp)
return APP_NAMESPACE_INVALID_INDEX;
return *indexp;
@@ -161,10 +214,15 @@ app_namespace_get_local_table (app_namespace_t * app_ns)
return session_table_get (app_ns->local_table_index);
}
-void
-appns_sapi_enable (void)
+int
+appns_sapi_enable_disable (int is_enable)
{
- app_sapi_enabled = 1;
+ /* This cannot be called with active sockets */
+ if (pool_elts (app_namespace_pool))
+ return -1;
+
+ app_sapi_enabled = is_enable;
+ return 0;
}
u8
@@ -189,7 +247,7 @@ app_namespaces_init (void)
/* clang-format off */
vnet_app_namespace_add_del_args_t a = {
.ns_id = ns_id,
- .netns = 0,
+ .sock_name = 0,
.secret = 0,
.sw_if_index = APP_NAMESPACE_INVALID_INDEX,
.is_add = 1
@@ -204,9 +262,11 @@ static clib_error_t *
app_ns_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
- u8 is_add = 0, *ns_id = 0, secret_set = 0, sw_if_index_set = 0, *netns = 0;
+ u8 is_add = 0, *ns_id = 0, secret_set = 0, sw_if_index_set = 0;
+ u8 *sock_name = 0;
unformat_input_t _line_input, *line_input = &_line_input;
u32 sw_if_index, fib_id = APP_NAMESPACE_INVALID_INDEX;
+ vnet_main_t *vnm = vnet_get_main ();
u64 secret;
clib_error_t *error = 0;
int rv;
@@ -220,15 +280,20 @@ app_ns_fn (vlib_main_t * vm, unformat_input_t * input,
{
if (unformat (line_input, "add"))
is_add = 1;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
else if (unformat (line_input, "id %_%v%_", &ns_id))
;
else if (unformat (line_input, "secret %lu", &secret))
secret_set = 1;
else if (unformat (line_input, "sw_if_index %u", &sw_if_index))
sw_if_index_set = 1;
+ else if (unformat (line_input, "if %U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ sw_if_index_set = 1;
else if (unformat (line_input, "fib_id", &fib_id))
;
- else if (unformat (line_input, "netns %_%v%_", &netns))
+ else if (unformat (line_input, "sock-name %_%v%_", &sock_name))
;
else
{
@@ -238,57 +303,62 @@ app_ns_fn (vlib_main_t * vm, unformat_input_t * input,
}
}
- if (!ns_id || !secret_set || !sw_if_index_set)
+ if (!ns_id)
{
- vlib_cli_output (vm, "namespace-id, secret and sw_if_index must be "
- "provided");
+ vlib_cli_output (vm, "namespace-id must be provided");
goto done;
}
- if (is_add)
+ if (is_add && (!secret_set || !sw_if_index_set))
{
- /* clang-format off */
- vnet_app_namespace_add_del_args_t args = {
- .ns_id = ns_id,
- .netns = netns,
- .secret = secret,
- .sw_if_index = sw_if_index,
- .ip4_fib_id = fib_id,
- .is_add = 1
- };
- /* clang-format on */
-
- if ((rv = vnet_app_namespace_add_del (&args)))
- error = clib_error_return (0, "app namespace add del returned %d", rv);
+ vlib_cli_output (vm, "secret and interface must be provided");
+ goto done;
}
+ /* clang-format off */
+ vnet_app_namespace_add_del_args_t args = {
+ .ns_id = ns_id,
+ .secret = secret,
+ .sw_if_index = sw_if_index,
+ .sock_name = sock_name,
+ .ip4_fib_id = fib_id,
+ .is_add = is_add,
+ };
+ /* clang-format on */
+
+ if ((rv = vnet_app_namespace_add_del (&args)))
+ error = clib_error_return (0, "app namespace add del returned %d", rv);
+
done:
vec_free (ns_id);
- vec_free (netns);
+ vec_free (sock_name);
unformat_free (line_input);
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (app_ns_command, static) = {
.path = "app ns",
- .short_help = "app ns [add] id <namespace-id> secret <secret> "
- "sw_if_index <sw_if_index> [netns <ns>]",
+ .short_help = "app ns [add|del] id <namespace-id> secret <secret> "
+ "sw_if_index <sw_if_index> if <interface>",
.function = app_ns_fn,
};
-/* *INDENT-ON* */
u8 *
format_app_namespace (u8 * s, va_list * args)
{
app_namespace_t *app_ns = va_arg (*args, app_namespace_t *);
+ vnet_main_t *vnm = vnet_get_main ();
+
+ s = format (s, "Application namespace [%u]\nid: %s\nsecret: %lu",
+ app_namespace_index (app_ns), app_ns->ns_id, app_ns->ns_secret);
+ if (app_ns->sw_if_index != (u32) ~0)
+ s = format (s, "\nInterface: %U", format_vnet_sw_if_index_name, vnm,
+ app_ns->sw_if_index);
+ if (app_ns->sock_name)
+ s = format (s, "\nSocket: %s", app_ns->sock_name);
- s =
- format (s, "%-10u%-10lu%-15d%-15v%-15v%-40v", app_namespace_index (app_ns),
- app_ns->ns_secret, app_ns->sw_if_index, app_ns->ns_id,
- app_ns->netns, app_ns->sock_name);
return s;
}
@@ -314,7 +384,6 @@ app_namespace_show_api (vlib_main_t * vm, app_namespace_t * app_ns)
vlib_cli_output (vm, "%12s%12s%5s", "app index", "wrk index", "fd");
- /* *INDENT-OFF* */
pool_foreach (cs, app_ns->app_sockets) {
handle = (app_ns_api_handle_t *) &cs->private_data;
cf = clib_file_get (&file_main, handle->aah_file_index);
@@ -327,7 +396,6 @@ app_namespace_show_api (vlib_main_t * vm, app_namespace_t * app_ns)
vlib_cli_output (vm, "%12d%12d%5u", app_wrk->app_index,
app_wrk->wrk_map_index, cf->file_descriptor);
}
- /* *INDENT-ON* */
}
static clib_error_t *
@@ -335,9 +403,11 @@ show_app_ns_fn (vlib_main_t * vm, unformat_input_t * main_input,
vlib_cli_command_t * cmd)
{
unformat_input_t _line_input, *line_input = &_line_input;
- u8 *ns_id, do_table = 0, had_input = 1, do_api = 0;
+ u8 *ns_id = 0, do_table = 0, had_input = 1, do_api = 0;
app_namespace_t *app_ns;
+ vnet_main_t *vnm = vnet_get_main ();
session_table_t *st;
+ table_t table = {}, *t = &table;
session_cli_return_if_not_enabled ();
@@ -349,7 +419,7 @@ show_app_ns_fn (vlib_main_t * vm, unformat_input_t * main_input,
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "table %_%v%_", &ns_id))
+ if (unformat (line_input, "id %_%v%_", &ns_id))
do_table = 1;
else if (unformat (line_input, "api-clients"))
do_api = 1;
@@ -386,20 +456,32 @@ show_app_ns_fn (vlib_main_t * vm, unformat_input_t * main_input,
vlib_cli_output (vm, "table for ns %v could not be found", ns_id);
goto done;
}
+ vlib_cli_output (vm, "%U", format_app_namespace, app_ns);
session_lookup_show_table_entries (vm, st, 0, 1);
vec_free (ns_id);
goto done;
}
do_ns_list:
- vlib_cli_output (vm, "%-10s%-10s%-15s%-15s%-15s%-40s", "Index", "Secret",
- "sw_if_index", "Id", "netns", "Socket");
+ table_add_header_col (t, 5, "Index", "Secret", "Interface", "Id", "Socket");
+ int i = 0;
+ pool_foreach (app_ns, app_namespace_pool)
+ {
+ int j = 0;
+ table_format_cell (t, i, j++, "%u", app_namespace_index (app_ns));
+ table_format_cell (t, i, j++, "%lu", app_ns->ns_secret);
+ table_format_cell (t, i, j++, "%U", format_vnet_sw_if_index_name, vnm,
+ app_ns->sw_if_index);
+ table_format_cell (t, i, j++, "%s", app_ns->ns_id);
+ table_format_cell (t, i++, j++, "%s", app_ns->sock_name);
+ }
- /* *INDENT-OFF* */
- pool_foreach (app_ns, app_namespace_pool) {
- vlib_cli_output (vm, "%U", format_app_namespace, app_ns);
- }
- /* *INDENT-ON* */
+ t->default_body.align = TTAA_LEFT;
+ t->default_header_col.align = TTAA_LEFT;
+ t->default_header_col.fg_color = TTAC_YELLOW;
+ t->default_header_col.flags = TTAF_FG_COLOR_SET;
+ vlib_cli_output (vm, "%U", format_table, t);
+ table_free (t);
done:
if (had_input)
@@ -407,14 +489,11 @@ done:
return 0;
}
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (show_app_ns_command, static) =
-{
+VLIB_CLI_COMMAND (show_app_ns_command, static) = {
.path = "show app ns",
- .short_help = "show app ns [table <id> [api-clients]]",
+ .short_help = "show app ns [id <id> [api-clients]]",
.function = show_app_ns_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/session/application_namespace.h b/src/vnet/session/application_namespace.h
index 313b2d0e63d..261325cbe0e 100644
--- a/src/vnet/session/application_namespace.h
+++ b/src/vnet/session/application_namespace.h
@@ -51,11 +51,6 @@ typedef struct _app_namespace
u8 *ns_id;
/**
- * Linux netns if one was provided
- */
- u8 *netns;
-
- /**
* Name of socket applications can use to attach to session layer
*/
u8 *sock_name;
@@ -69,7 +64,7 @@ typedef struct _app_namespace
typedef struct _vnet_app_namespace_add_del_args
{
u8 *ns_id;
- u8 *netns;
+ u8 *sock_name;
u64 secret;
u32 sw_if_index;
u32 ip4_fib_id;
@@ -79,15 +74,16 @@ typedef struct _vnet_app_namespace_add_del_args
#define APP_NAMESPACE_INVALID_INDEX ((u32)~0)
-app_namespace_t *app_namespace_alloc (u8 * ns_id);
+app_namespace_t *app_namespace_alloc (const u8 *ns_id);
app_namespace_t *app_namespace_get (u32 index);
-app_namespace_t *app_namespace_get_from_id (const u8 * ns_id);
+app_namespace_t *app_namespace_get_from_id (const u8 *ns_id);
u32 app_namespace_index (app_namespace_t * app_ns);
const u8 *app_namespace_id (app_namespace_t * app_ns);
const u8 *app_namespace_id_from_index (u32 index);
-u32 app_namespace_index_from_id (const u8 * ns_id);
+u32 app_namespace_index_from_id (const u8 *ns_id);
void app_namespaces_init (void);
-int vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a);
+session_error_t
+vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t *a);
u32 app_namespace_get_fib_index (app_namespace_t * app_ns, u8 fib_proto);
session_table_t *app_namespace_get_local_table (app_namespace_t * app_ns);
@@ -159,8 +155,9 @@ appns_sapi_handle_sock_index (u32 sapi_sock_handle)
}
int appns_sapi_add_ns_socket (app_namespace_t * app_ns);
+void appns_sapi_del_ns_socket (app_namespace_t *app_ns);
u8 appns_sapi_enabled (void);
-void appns_sapi_enable (void);
+int appns_sapi_enable_disable (int is_enable);
#endif /* SRC_VNET_SESSION_APPLICATION_NAMESPACE_H_ */
diff --git a/src/vnet/session/application_worker.c b/src/vnet/session/application_worker.c
index be8a9e86bd5..befdb7c7002 100644
--- a/src/vnet/session/application_worker.c
+++ b/src/vnet/session/application_worker.c
@@ -26,6 +26,7 @@ app_worker_t *
app_worker_alloc (application_t * app)
{
app_worker_t *app_wrk;
+
pool_get (app_workers, app_wrk);
clib_memset (app_wrk, 0, sizeof (*app_wrk));
app_wrk->wrk_index = app_wrk - app_workers;
@@ -33,6 +34,8 @@ app_worker_alloc (application_t * app)
app_wrk->wrk_map_index = ~0;
app_wrk->connects_seg_manager = APP_INVALID_SEGMENT_MANAGER_INDEX;
clib_spinlock_init (&app_wrk->detached_seg_managers_lock);
+ vec_validate (app_wrk->wrk_evts, vlib_num_workers ());
+ vec_validate (app_wrk->wrk_mq_congested, vlib_num_workers ());
APP_DBG ("New app %v worker %u", app->name, app_wrk->wrk_index);
return app_wrk;
}
@@ -55,26 +58,34 @@ void
app_worker_free (app_worker_t * app_wrk)
{
application_t *app = application_get (app_wrk->app_index);
+ session_handle_t handle, *handles = 0, *sh;
vnet_unlisten_args_t _a, *a = &_a;
- u64 handle, *handles = 0, *sm_indices = 0;
segment_manager_t *sm;
- session_handle_t *sh;
+ u64 *sm_indices = 0;
session_t *ls;
u32 sm_index;
int i;
/*
+ * Cleanup vpp wrk events
+ */
+ app_worker_del_all_events (app_wrk);
+ for (i = 0; i < vec_len (app_wrk->wrk_evts); i++)
+ clib_fifo_free (app_wrk->wrk_evts[i]);
+
+ vec_free (app_wrk->wrk_evts);
+ vec_free (app_wrk->wrk_mq_congested);
+
+ /*
* Listener cleanup
*/
- /* *INDENT-OFF* */
hash_foreach (handle, sm_index, app_wrk->listeners_table, ({
ls = listen_session_get_from_handle (handle);
vec_add1 (handles, app_listen_session_handle (ls));
vec_add1 (sm_indices, sm_index);
sm = segment_manager_get (sm_index);
}));
- /* *INDENT-ON* */
for (i = 0; i < vec_len (handles); i++)
{
@@ -91,7 +102,7 @@ app_worker_free (app_worker_t * app_wrk)
segment_manager_init_free (sm);
}
}
- vec_reset_length (handles);
+ vec_free (handles);
vec_free (sm_indices);
hash_free (app_wrk->listeners_table);
@@ -175,31 +186,85 @@ app_worker_alloc_session_fifos (segment_manager_t * sm, session_t * s)
}
int
+app_worker_alloc_wrk_cl_session (app_worker_t *app_wrk, session_t *ls)
+{
+ svm_fifo_t *rx_fifo = 0, *tx_fifo = 0;
+ segment_manager_t *sm;
+ session_handle_t lsh;
+ app_listener_t *al;
+ session_t *s;
+
+ al = app_listener_get (ls->al_index);
+ sm = app_worker_get_listen_segment_manager (app_wrk, ls);
+ lsh = session_handle (ls);
+
+ s = session_alloc (0 /* listener on main worker */);
+ session_set_state (s, SESSION_STATE_LISTENING);
+ s->flags |= SESSION_F_IS_CLESS;
+ s->app_wrk_index = app_wrk->wrk_index;
+ ls = session_get_from_handle (lsh);
+ s->session_type = ls->session_type;
+ s->connection_index = ls->connection_index;
+
+ segment_manager_alloc_session_fifos (sm, s->thread_index, &rx_fifo,
+ &tx_fifo);
+
+ rx_fifo->shr->master_session_index = s->session_index;
+ rx_fifo->master_thread_index = s->thread_index;
+
+ tx_fifo->shr->master_session_index = s->session_index;
+ tx_fifo->master_thread_index = s->thread_index;
+
+ s->rx_fifo = rx_fifo;
+ s->tx_fifo = tx_fifo;
+
+ vec_validate (al->cl_listeners, app_wrk->wrk_map_index);
+ al->cl_listeners[app_wrk->wrk_map_index] = s->session_index;
+
+ return 0;
+}
+
+void
+app_worker_free_wrk_cl_session (app_worker_t *app_wrk, session_t *ls)
+{
+ app_listener_t *al;
+ session_t *s;
+
+ al = app_listener_get (ls->al_index);
+
+ s = app_listener_get_wrk_cl_session (al, app_wrk->wrk_map_index);
+ segment_manager_dealloc_fifos (s->rx_fifo, s->tx_fifo);
+ session_free (s);
+
+ al->cl_listeners[app_wrk->wrk_map_index] = SESSION_INVALID_INDEX;
+}
+
+int
app_worker_init_listener (app_worker_t * app_wrk, session_t * ls)
{
segment_manager_t *sm;
/* Allocate segment manager. All sessions derived out of a listen session
- * have fifos allocated by the same segment manager. */
+ * have fifos allocated by the same segment manager.
+ * TODO(fcoras): limit memory consumption by cless listeners */
if (!(sm = app_worker_alloc_segment_manager (app_wrk)))
return SESSION_E_ALLOC;
+ /* Once the first segment is mapped, don't remove it until unlisten */
+ sm->first_is_protected = 1;
+
/* Keep track of the segment manager for the listener or this worker */
hash_set (app_wrk->listeners_table, listen_session_get_handle (ls),
segment_manager_index (sm));
- if (transport_connection_is_cless (session_get_transport (ls)))
- {
- if (ls->rx_fifo)
- return SESSION_E_NOSUPPORT;
- return app_worker_alloc_session_fifos (sm, ls);
- }
+ if (ls->flags & SESSION_F_IS_CLESS)
+ return app_worker_alloc_wrk_cl_session (app_wrk, ls);
+
return 0;
}
-int
-app_worker_start_listen (app_worker_t * app_wrk,
- app_listener_t * app_listener)
+session_error_t
+app_worker_start_listen (app_worker_t *app_wrk, app_listener_t *app_listener)
{
session_t *ls;
int rv;
@@ -263,17 +328,14 @@ app_worker_stop_listen_session (app_worker_t * app_wrk, session_t * ls)
if (PREDICT_FALSE (!sm_indexp))
return;
- /* Dealloc fifos, if any (dgram listeners) */
- if (ls->rx_fifo)
- {
- segment_manager_dealloc_fifos (ls->rx_fifo, ls->tx_fifo);
- ls->tx_fifo = ls->rx_fifo = 0;
- }
+ if (ls->flags & SESSION_F_IS_CLESS)
+ app_worker_free_wrk_cl_session (app_wrk, ls);
/* Try to cleanup segment manager */
sm = segment_manager_get (*sm_indexp);
if (sm)
{
+ sm->first_is_protected = 0;
segment_manager_app_detach (sm);
if (!segment_manager_has_fifos (sm))
{
@@ -334,8 +396,10 @@ app_worker_init_accepted (session_t * s)
listener = listen_session_get_from_handle (s->listener_handle);
app_wrk = application_listener_select_worker (listener);
- s->app_wrk_index = app_wrk->wrk_index;
+ if (PREDICT_FALSE (app_worker_mq_is_congested (app_wrk)))
+ return -1;
+ s->app_wrk_index = app_wrk->wrk_index;
app = application_get (app_wrk->app_index);
if (app->cb_fns.fifo_tuning_callback)
s->flags |= SESSION_F_CUSTOM_FIFO_TUNING;
@@ -348,10 +412,35 @@ app_worker_init_accepted (session_t * s)
}
int
+app_worker_listened_notify (app_worker_t *app_wrk, session_handle_t alsh,
+ u32 opaque, session_error_t err)
+{
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_BOUND,
+ .as_u64[0] = alsh,
+ .as_u64[1] = (u64) opaque << 32 | (u32) err };
+
+ app_worker_add_event_custom (app_wrk, 0 /* thread index */, &evt);
+
+ return 0;
+}
+
+int
+app_worker_unlisten_reply (app_worker_t *app_wrk, session_handle_t sh,
+ u32 opaque, session_error_t err)
+{
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_UNLISTEN_REPLY,
+ .as_u64[0] = sh,
+ .as_u64[1] = (u64) opaque << 32 | (u32) err };
+
+ app_worker_add_event_custom (app_wrk, 0 /* thread index */, &evt);
+ return 0;
+}
+
+int
app_worker_accept_notify (app_worker_t * app_wrk, session_t * s)
{
- application_t *app = application_get (app_wrk->app_index);
- return app->cb_fns.session_accept_callback (s);
+ app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_ACCEPTED);
+ return 0;
}
int
@@ -365,7 +454,7 @@ app_worker_init_connected (app_worker_t * app_wrk, session_t * s)
/* Allocate fifos for session, unless the app is a builtin proxy */
if (application_is_builtin_proxy (app))
- return 0;
+ return app->cb_fns.proxy_alloc_session_fifos (s);
sm = app_worker_get_connect_segment_manager (app_wrk);
return app_worker_alloc_session_fifos (sm, s);
@@ -375,9 +464,13 @@ int
app_worker_connect_notify (app_worker_t * app_wrk, session_t * s,
session_error_t err, u32 opaque)
{
- application_t *app = application_get (app_wrk->app_index);
- return app->cb_fns.session_connected_callback (app_wrk->wrk_index, opaque,
- s, err);
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_CONNECTED,
+ .as_u64[0] = s ? s->session_index : ~0,
+ .as_u64[1] = (u64) opaque << 32 | (u32) err };
+ u32 thread_index = s ? s->thread_index : vlib_get_thread_index ();
+
+ app_worker_add_event_custom (app_wrk, thread_index, &evt);
+ return 0;
}
int
@@ -385,7 +478,7 @@ app_worker_add_half_open (app_worker_t *app_wrk, session_handle_t sh)
{
session_handle_t *shp;
- ASSERT (vlib_get_thread_index () == 0);
+ ASSERT (session_vlib_thread_is_cl_thread ());
pool_get (app_wrk->half_open_table, shp);
*shp = sh;
@@ -395,36 +488,28 @@ app_worker_add_half_open (app_worker_t *app_wrk, session_handle_t sh)
int
app_worker_del_half_open (app_worker_t *app_wrk, session_t *s)
{
- application_t *app = application_get (app_wrk->app_index);
- ASSERT (vlib_get_thread_index () <= 1);
- pool_put_index (app_wrk->half_open_table, s->ho_index);
- if (app->cb_fns.half_open_cleanup_callback)
- app->cb_fns.half_open_cleanup_callback (s);
+ app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_HALF_CLEANUP);
return 0;
}
int
app_worker_close_notify (app_worker_t * app_wrk, session_t * s)
{
- application_t *app = application_get (app_wrk->app_index);
- app->cb_fns.session_disconnect_callback (s);
+ app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_DISCONNECTED);
return 0;
}
int
app_worker_transport_closed_notify (app_worker_t * app_wrk, session_t * s)
{
- application_t *app = application_get (app_wrk->app_index);
- if (app->cb_fns.session_transport_closed_callback)
- app->cb_fns.session_transport_closed_callback (s);
+ app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_TRANSPORT_CLOSED);
return 0;
}
int
app_worker_reset_notify (app_worker_t * app_wrk, session_t * s)
{
- application_t *app = application_get (app_wrk->app_index);
- app->cb_fns.session_reset_callback (s);
+ app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_RESET);
return 0;
}
@@ -432,29 +517,33 @@ int
app_worker_cleanup_notify (app_worker_t * app_wrk, session_t * s,
session_cleanup_ntf_t ntf)
{
- application_t *app = application_get (app_wrk->app_index);
- if (app->cb_fns.session_cleanup_callback)
- app->cb_fns.session_cleanup_callback (s, ntf);
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_CLEANUP,
+ .as_u64[0] = (u64) ntf << 32 | s->session_index,
+ .as_u64[1] = pointer_to_uword (session_cleanup) };
+
+ app_worker_add_event_custom (app_wrk, s->thread_index, &evt);
+
return 0;
}
int
-app_worker_builtin_rx (app_worker_t * app_wrk, session_t * s)
+app_worker_cleanup_notify_custom (app_worker_t *app_wrk, session_t *s,
+ session_cleanup_ntf_t ntf,
+ void (*cleanup_cb) (session_t *s))
{
- application_t *app = application_get (app_wrk->app_index);
- app->cb_fns.builtin_app_rx_callback (s);
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_CLEANUP,
+ .as_u64[0] = (u64) ntf << 32 | s->session_index,
+ .as_u64[1] = pointer_to_uword (cleanup_cb) };
+
+ app_worker_add_event_custom (app_wrk, s->thread_index, &evt);
+
return 0;
}
int
-app_worker_builtin_tx (app_worker_t * app_wrk, session_t * s)
+app_worker_rx_notify (app_worker_t *app_wrk, session_t *s)
{
- application_t *app = application_get (app_wrk->app_index);
-
- if (!app->cb_fns.builtin_app_tx_callback)
- return 0;
-
- app->cb_fns.builtin_app_tx_callback (s);
+ app_worker_add_event (app_wrk, s, SESSION_IO_EVT_RX);
return 0;
}
@@ -462,8 +551,11 @@ int
app_worker_migrate_notify (app_worker_t * app_wrk, session_t * s,
session_handle_t new_sh)
{
- application_t *app = application_get (app_wrk->app_index);
- app->cb_fns.session_migrate_callback (s, new_sh);
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_MIGRATED,
+ .as_u64[0] = s->session_index,
+ .as_u64[1] = new_sh };
+
+ app_worker_add_event_custom (app_wrk, s->thread_index, &evt);
return 0;
}
@@ -472,6 +564,7 @@ app_worker_own_session (app_worker_t * app_wrk, session_t * s)
{
segment_manager_t *sm;
svm_fifo_t *rxf, *txf;
+ int rv;
if (s->session_state == SESSION_STATE_LISTENING)
return application_change_listener_owner (s, app_wrk);
@@ -488,8 +581,8 @@ app_worker_own_session (app_worker_t * app_wrk, session_t * s)
s->tx_fifo = 0;
sm = app_worker_get_connect_segment_manager (app_wrk);
- if (app_worker_alloc_session_fifos (sm, s))
- return -1;
+ if ((rv = app_worker_alloc_session_fifos (sm, s)))
+ return rv;
if (!svm_fifo_is_empty_cons (rxf))
svm_fifo_clone (s->rx_fifo, rxf);
@@ -506,6 +599,9 @@ int
app_worker_connect_session (app_worker_t *app_wrk, session_endpoint_cfg_t *sep,
session_handle_t *rsh)
{
+ if (PREDICT_FALSE (app_worker_mq_is_congested (app_wrk)))
+ return SESSION_E_REFUSED;
+
sep->app_wrk_index = app_wrk->wrk_index;
return session_open (sep, rsh);
@@ -549,14 +645,12 @@ app_worker_first_listener (app_worker_t * app_wrk, u8 fib_proto,
sst = session_type_from_proto_and_ip (transport_proto,
fib_proto == FIB_PROTOCOL_IP4);
- /* *INDENT-OFF* */
hash_foreach (handle, sm_index, app_wrk->listeners_table, ({
listener = listen_session_get_from_handle (handle);
if (listener->session_type == sst
&& !(listener->flags & SESSION_F_PROXY))
return listener;
}));
- /* *INDENT-ON* */
return 0;
}
@@ -573,13 +667,11 @@ app_worker_proxy_listener (app_worker_t * app_wrk, u8 fib_proto,
sst = session_type_from_proto_and_ip (transport_proto,
fib_proto == FIB_PROTOCOL_IP4);
- /* *INDENT-OFF* */
hash_foreach (handle, sm_index, app_wrk->listeners_table, ({
listener = listen_session_get_from_handle (handle);
if (listener->session_type == sst && (listener->flags & SESSION_F_PROXY))
return listener;
}));
- /* *INDENT-ON* */
return 0;
}
@@ -590,130 +682,178 @@ app_worker_proxy_listener (app_worker_t * app_wrk, u8 fib_proto,
int
app_worker_add_segment_notify (app_worker_t * app_wrk, u64 segment_handle)
{
- application_t *app = application_get (app_wrk->app_index);
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_APP_ADD_SEGMENT,
+ .as_u64[1] = segment_handle };
- return app->cb_fns.add_segment_callback (app_wrk->wrk_index,
- segment_handle);
+ app_worker_add_event_custom (app_wrk, vlib_get_thread_index (), &evt);
+
+ return 0;
}
int
app_worker_del_segment_notify (app_worker_t * app_wrk, u64 segment_handle)
{
- application_t *app = application_get (app_wrk->app_index);
- return app->cb_fns.del_segment_callback (app_wrk->wrk_index,
- segment_handle);
-}
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_APP_DEL_SEGMENT,
+ .as_u64[1] = segment_handle };
-static inline u8
-app_worker_application_is_builtin (app_worker_t * app_wrk)
-{
- return app_wrk->app_is_builtin;
+ app_worker_add_event_custom (app_wrk, vlib_get_thread_index (), &evt);
+
+ return 0;
}
-static inline int
-app_send_io_evt_rx (app_worker_t * app_wrk, session_t * s)
+static int
+app_wrk_send_fd (app_worker_t *app_wrk, int fd)
{
- session_event_t *evt;
- svm_msg_q_msg_t msg;
- svm_msg_q_t *mq;
+ if (!appns_sapi_enabled ())
+ {
+ vl_api_registration_t *reg;
+ clib_error_t *error;
- if (app_worker_application_is_builtin (app_wrk))
- return app_worker_builtin_rx (app_wrk, s);
+ reg =
+ vl_mem_api_client_index_to_registration (app_wrk->api_client_index);
+ if (!reg)
+ {
+ clib_warning ("no api registration for client: %u",
+ app_wrk->api_client_index);
+ return -1;
+ }
- if (svm_fifo_has_event (s->rx_fifo))
- return 0;
+ if (vl_api_registration_file_index (reg) == VL_API_INVALID_FI)
+ return -1;
- mq = app_wrk->event_queue;
- svm_msg_q_lock (mq);
+ error = vl_api_send_fd_msg (reg, &fd, 1);
+ if (error)
+ {
+ clib_error_report (error);
+ return -1;
+ }
- if (PREDICT_FALSE (svm_msg_q_is_full (mq)))
- {
- clib_warning ("evt q full");
- svm_msg_q_unlock (mq);
- return -1;
+ return 0;
}
- if (PREDICT_FALSE (svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING)))
+ app_sapi_msg_t smsg = { 0 };
+ app_namespace_t *app_ns;
+ clib_error_t *error;
+ application_t *app;
+ clib_socket_t *cs;
+ u32 cs_index;
+
+ app = application_get (app_wrk->app_index);
+ app_ns = app_namespace_get (app->ns_index);
+ cs_index = appns_sapi_handle_sock_index (app_wrk->api_client_index);
+ cs = appns_sapi_get_socket (app_ns, cs_index);
+ if (PREDICT_FALSE (!cs))
+ return -1;
+
+ /* There's no payload for the message only the type */
+ smsg.type = APP_SAPI_MSG_TYPE_SEND_FDS;
+ error = clib_socket_sendmsg (cs, &smsg, sizeof (smsg), &fd, 1);
+ if (error)
{
- clib_warning ("evt q rings full");
- svm_msg_q_unlock (mq);
+ clib_error_report (error);
return -1;
}
- msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
- evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg);
- evt->session_index = s->rx_fifo->shr->client_session_index;
- evt->event_type = SESSION_IO_EVT_RX;
-
- (void) svm_fifo_set_event (s->rx_fifo);
- svm_msg_q_add_and_unlock (mq, &msg);
-
return 0;
}
-static inline int
-app_send_io_evt_tx (app_worker_t * app_wrk, session_t * s)
+void
+app_worker_add_event (app_worker_t *app_wrk, session_t *s,
+ session_evt_type_t evt_type)
{
- svm_msg_q_t *mq;
session_event_t *evt;
- svm_msg_q_msg_t msg;
- if (app_worker_application_is_builtin (app_wrk))
- return app_worker_builtin_tx (app_wrk, s);
+ ASSERT (s->thread_index == vlib_get_thread_index ());
+ clib_fifo_add2 (app_wrk->wrk_evts[s->thread_index], evt);
+ evt->session_index = s->session_index;
+ evt->event_type = evt_type;
+ evt->postponed = 0;
- mq = app_wrk->event_queue;
- svm_msg_q_lock (mq);
-
- if (PREDICT_FALSE (svm_msg_q_is_full (mq)))
+ /* First event for this app_wrk. Schedule it for handling in session input */
+ if (clib_fifo_elts (app_wrk->wrk_evts[s->thread_index]) == 1)
{
- clib_warning ("evt q full");
- svm_msg_q_unlock (mq);
- return -1;
+ session_worker_t *wrk = session_main_get_worker (s->thread_index);
+ session_wrk_program_app_wrk_evts (wrk, app_wrk->wrk_index);
}
+}
+
+void
+app_worker_add_event_custom (app_worker_t *app_wrk, u32 thread_index,
+ session_event_t *evt)
+{
+ clib_fifo_add1 (app_wrk->wrk_evts[thread_index], *evt);
- if (PREDICT_FALSE (svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING)))
+ /* First event for this app_wrk. Schedule it for handling in session input */
+ if (clib_fifo_elts (app_wrk->wrk_evts[thread_index]) == 1)
{
- clib_warning ("evt q rings full");
- svm_msg_q_unlock (mq);
- return -1;
+ session_worker_t *wrk = session_main_get_worker (thread_index);
+ session_wrk_program_app_wrk_evts (wrk, app_wrk->wrk_index);
}
+}
- msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
- evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg);
- evt->event_type = SESSION_IO_EVT_TX;
- evt->session_index = s->tx_fifo->shr->client_session_index;
+always_inline void
+app_wrk_send_ctrl_evt_inline (app_worker_t *app_wrk, u8 evt_type, void *msg,
+ u32 msg_len, int fd)
+{
+ svm_msg_q_msg_t _mq_msg, *mq_msg = &_mq_msg;
+ svm_msg_q_t *mq = app_wrk->event_queue;
+ session_event_t *evt;
- svm_msg_q_add_and_unlock (mq, &msg);
- return 0;
+ ASSERT (!svm_msg_q_or_ring_is_full (mq, SESSION_MQ_CTRL_EVT_RING));
+ *mq_msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_CTRL_EVT_RING);
+
+ evt = svm_msg_q_msg_data (mq, mq_msg);
+ clib_memset (evt, 0, sizeof (*evt));
+ evt->event_type = evt_type;
+ clib_memcpy_fast (evt->data, msg, msg_len);
+
+ if (fd != -1)
+ app_wrk_send_fd (app_wrk, fd);
+
+ svm_msg_q_add_raw (mq, mq_msg);
+}
+
+void
+app_wrk_send_ctrl_evt_fd (app_worker_t *app_wrk, u8 evt_type, void *msg,
+ u32 msg_len, int fd)
+{
+ app_wrk_send_ctrl_evt_inline (app_wrk, evt_type, msg, msg_len, fd);
}
-/* *INDENT-OFF* */
-typedef int (app_send_evt_handler_fn) (app_worker_t *app,
- session_t *s);
-static app_send_evt_handler_fn * const app_send_evt_handler_fns[2] = {
- app_send_io_evt_rx,
- app_send_io_evt_tx,
-};
-/* *INDENT-ON* */
+void
+app_wrk_send_ctrl_evt (app_worker_t *app_wrk, u8 evt_type, void *msg,
+ u32 msg_len)
+{
+ app_wrk_send_ctrl_evt_inline (app_wrk, evt_type, msg, msg_len, -1);
+}
-/**
- * Send event to application
- *
- * Logic from queue perspective is blocking. However, if queue is full,
- * we return.
- */
-int
-app_worker_lock_and_send_event (app_worker_t * app, session_t * s,
- u8 evt_type)
+u8
+app_worker_mq_wrk_is_congested (app_worker_t *app_wrk, u32 thread_index)
+{
+ return app_wrk->wrk_mq_congested[thread_index] > 0;
+}
+
+void
+app_worker_set_mq_wrk_congested (app_worker_t *app_wrk, u32 thread_index)
+{
+ clib_atomic_fetch_add_relax (&app_wrk->mq_congested, 1);
+ ASSERT (thread_index == vlib_get_thread_index ());
+ app_wrk->wrk_mq_congested[thread_index] = 1;
+}
+
+void
+app_worker_unset_wrk_mq_congested (app_worker_t *app_wrk, u32 thread_index)
{
- return app_send_evt_handler_fns[evt_type] (app, s);
+ clib_atomic_fetch_sub_relax (&app_wrk->mq_congested, 1);
+ ASSERT (thread_index == vlib_get_thread_index ());
+ app_wrk->wrk_mq_congested[thread_index] = 0;
}
u8 *
format_app_worker_listener (u8 * s, va_list * args)
{
app_worker_t *app_wrk = va_arg (*args, app_worker_t *);
- u64 handle = va_arg (*args, u64);
+ session_handle_t handle = va_arg (*args, u64);
u32 sm_index = va_arg (*args, u32);
int verbose = va_arg (*args, int);
session_t *listener;
@@ -760,10 +900,12 @@ format_app_worker (u8 * s, va_list * args)
app_worker_t *app_wrk = va_arg (*args, app_worker_t *);
u32 indent = 1;
- s = format (s, "%U wrk-index %u app-index %u map-index %u "
- "api-client-index %d\n", format_white_space, indent,
- app_wrk->wrk_index, app_wrk->app_index, app_wrk->wrk_map_index,
- app_wrk->api_client_index);
+ s = format (s,
+ "%U wrk-index %u app-index %u map-index %u "
+ "api-client-index %d mq-cong %u\n",
+ format_white_space, indent, app_wrk->wrk_index,
+ app_wrk->app_index, app_wrk->wrk_map_index,
+ app_wrk->api_client_index, app_wrk->mq_congested);
return s;
}
diff --git a/src/vnet/session/mma_template.c b/src/vnet/session/mma_template.c
index ae730e5dbea..4b2770bb756 100644
--- a/src/vnet/session/mma_template.c
+++ b/src/vnet/session/mma_template.c
@@ -65,6 +65,11 @@ RT (mma_rule_free) (RTT (mma_rules_table) * srt, RTT (mma_rule) * rule)
return rule;
}
+void RT (mma_rules_table_free) (RTT (mma_rules_table) * srt)
+{
+ pool_free (srt->rules);
+}
+
RTT (mma_rule) *
RT (mma_rules_table_get_rule) (RTT (mma_rules_table) * srt, u32 srt_index)
{
diff --git a/src/vnet/session/mma_template.h b/src/vnet/session/mma_template.h
index dc3545a4ffe..2c0230c2869 100644
--- a/src/vnet/session/mma_template.h
+++ b/src/vnet/session/mma_template.h
@@ -41,11 +41,9 @@ typedef struct
{
u32 action_index;
u32 *next_indices;
- /* *INDENT-OFF* */
RTT (mma_mask_or_match) mask;
RTT (mma_mask_or_match) match;
RTT (mma_mask_or_match) max_match;
- /* *INDENT-ON* */
} RTT (mma_rule);
typedef int (*RTT (rule_cmp_fn)) (RTT (mma_rule) * rule1,
diff --git a/src/vnet/session/segment_manager.c b/src/vnet/session/segment_manager.c
index c7a06d8b636..80bebdca9b5 100644
--- a/src/vnet/session/segment_manager.c
+++ b/src/vnet/session/segment_manager.c
@@ -89,28 +89,30 @@ segment_manager_segment_index (segment_manager_t * sm, fifo_segment_t * seg)
*/
static inline int
segment_manager_add_segment_inline (segment_manager_t *sm, uword segment_size,
- u8 notify_app, u8 flags)
+ u8 notify_app, u8 flags, u8 need_lock)
{
segment_manager_main_t *smm = &sm_main;
segment_manager_props_t *props;
+ app_worker_t *app_wrk;
fifo_segment_t *fs;
u32 fs_index = ~0;
u8 *seg_name;
int rv;
props = segment_manager_properties_get (sm);
+ app_wrk = app_worker_get (sm->app_wrk_index);
/* Not configured for addition of new segments and not first */
if (!props->add_segment && !segment_size)
{
- clib_warning ("cannot allocate new segment");
- return VNET_API_ERROR_INVALID_VALUE;
+ SESSION_DBG ("cannot allocate new segment");
+ return SESSION_E_INVALID;
}
/*
* Allocate fifo segment and grab lock if needed
*/
- if (vlib_num_workers ())
+ if (need_lock)
clib_rwlock_writer_lock (&sm->segments_rwlock);
pool_get_zero (sm->segments, fs);
@@ -119,18 +121,24 @@ segment_manager_add_segment_inline (segment_manager_t *sm, uword segment_size,
* Allocate ssvm segment
*/
segment_size = segment_size ? segment_size : props->add_segment_size;
- segment_size = round_pow2 (segment_size, clib_mem_get_page_size ());
-
- if (props->segment_type != SSVM_SEGMENT_PRIVATE)
+ /* add overhead to ensure the result segment size is at least
+ * of that requested */
+ segment_size +=
+ sizeof (fifo_segment_header_t) +
+ vlib_thread_main.n_vlib_mains * sizeof (fifo_segment_slice_t) +
+ FIFO_SEGMENT_ALLOC_OVERHEAD;
+
+ if (props->huge_page)
{
- seg_name = format (0, "%d-%d%c", getpid (), smm->seg_name_counter++, 0);
+ uword hugepage_size = clib_mem_get_default_hugepage_size ();
+ segment_size = round_pow2 (segment_size, hugepage_size);
+ fs->ssvm.huge_page = 1;
}
else
- {
- app_worker_t *app_wrk = app_worker_get (sm->app_wrk_index);
- application_t *app = application_get (app_wrk->app_index);
- seg_name = format (0, "%v segment%c", app->name, 0);
- }
+ segment_size = round_pow2 (segment_size, clib_mem_get_page_size ());
+
+ seg_name = format (0, "seg-%u-%u-%u%c", app_wrk->app_index,
+ app_wrk->wrk_index, smm->seg_name_counter++, 0);
fs->ssvm.ssvm_size = segment_size;
fs->ssvm.name = seg_name;
@@ -154,15 +162,17 @@ segment_manager_add_segment_inline (segment_manager_t *sm, uword segment_size,
* Save segment index before dropping lock, if any held
*/
fs_index = fs - sm->segments;
+ fs->fs_index = fs_index;
+ fs->sm_index = segment_manager_index (sm);
/*
* Set watermarks in segment
*/
- fs->h->high_watermark = sm->high_watermark;
- fs->h->low_watermark = sm->low_watermark;
+ fs->high_watermark = sm->high_watermark;
+ fs->low_watermark = sm->low_watermark;
+ fs->flags = flags;
+ fs->flags &= ~FIFO_SEGMENT_F_MEM_LIMIT;
fs->h->pct_first_alloc = props->pct_first_alloc;
- fs->h->flags = flags;
- fs->h->flags &= ~FIFO_SEGMENT_F_MEM_LIMIT;
if (notify_app)
{
@@ -172,11 +182,14 @@ segment_manager_add_segment_inline (segment_manager_t *sm, uword segment_size,
app_wrk = app_worker_get (sm->app_wrk_index);
rv = app_worker_add_segment_notify (app_wrk, fs_handle);
if (rv)
- return rv;
+ {
+ fs_index = rv;
+ goto done;
+ }
}
done:
- if (vlib_num_workers ())
+ if (need_lock)
clib_rwlock_writer_unlock (&sm->segments_rwlock);
return fs_index;
@@ -186,14 +199,16 @@ int
segment_manager_add_segment (segment_manager_t *sm, uword segment_size,
u8 notify_app)
{
- return segment_manager_add_segment_inline (sm, segment_size, notify_app, 0);
+ return segment_manager_add_segment_inline (sm, segment_size, notify_app,
+ 0 /* flags */, 0 /* need_lock */);
}
int
segment_manager_add_segment2 (segment_manager_t *sm, uword segment_size,
u8 flags)
{
- return segment_manager_add_segment_inline (sm, segment_size, 0, flags);
+ return segment_manager_add_segment_inline (sm, segment_size, 0, flags,
+ vlib_num_workers ());
}
/**
@@ -235,7 +250,8 @@ segment_manager_get_segment_if_valid (segment_manager_t * sm,
* Removes segment after acquiring writer lock
*/
static inline void
-sm_lock_and_del_segment_inline (segment_manager_t * sm, u32 fs_index)
+sm_lock_and_del_segment_inline (segment_manager_t *sm, u32 fs_index,
+ u8 check_if_empty)
{
fifo_segment_t *fs;
u8 is_prealloc;
@@ -246,6 +262,9 @@ sm_lock_and_del_segment_inline (segment_manager_t * sm, u32 fs_index)
if (!fs)
goto done;
+ if (check_if_empty && fifo_segment_has_fifos (fs))
+ goto done;
+
is_prealloc = fifo_segment_flags (fs) & FIFO_SEGMENT_F_IS_PREALLOCATED;
if (is_prealloc && !segment_manager_app_detached (sm))
goto done;
@@ -259,7 +278,7 @@ done:
void
segment_manager_lock_and_del_segment (segment_manager_t * sm, u32 fs_index)
{
- sm_lock_and_del_segment_inline (sm, fs_index);
+ sm_lock_and_del_segment_inline (sm, fs_index, 0 /* check_if_empty */);
}
/**
@@ -326,12 +345,6 @@ segment_manager_segment_reader_unlock (segment_manager_t * sm)
clib_rwlock_reader_unlock (&sm->segments_rwlock);
}
-void
-segment_manager_segment_writer_unlock (segment_manager_t * sm)
-{
- clib_rwlock_writer_unlock (&sm->segments_rwlock);
-}
-
segment_manager_t *
segment_manager_alloc (void)
{
@@ -405,7 +418,7 @@ segment_manager_init_first (segment_manager_t * sm)
fs_index = segment_manager_add_segment (sm, max_seg_size, 0);
if (fs_index < 0)
{
- clib_warning ("Failed to preallocate segment %d", i);
+ SESSION_DBG ("Failed to preallocate segment %d", i);
return fs_index;
}
@@ -427,7 +440,7 @@ segment_manager_init_first (segment_manager_t * sm)
fs_index = segment_manager_add_segment (sm, first_seg_size, 0);
if (fs_index < 0)
{
- clib_warning ("Failed to allocate segment");
+ SESSION_DBG ("Failed to allocate segment");
return fs_index;
}
@@ -445,7 +458,7 @@ segment_manager_init_first (segment_manager_t * sm)
for (; i < fs->n_slices; i++)
{
if (fifo_segment_prealloc_fifo_hdrs (fs, i, hdrs_per_slice))
- return VNET_API_ERROR_SVM_SEGMENT_CREATE_FAIL;
+ return SESSION_E_SEG_CREATE;
}
}
@@ -486,11 +499,9 @@ segment_manager_free (segment_manager_t * sm)
* the manager is explicitly deleted/detached by the app. */
clib_rwlock_writer_lock (&sm->segments_rwlock);
- /* *INDENT-OFF* */
pool_foreach (fifo_segment, sm->segments) {
segment_manager_del_segment (sm, fifo_segment);
}
- /* *INDENT-ON* */
pool_free (sm->segments);
clib_rwlock_writer_unlock (&sm->segments_rwlock);
@@ -569,7 +580,6 @@ segment_manager_has_fifos (segment_manager_t * sm)
fifo_segment_t *seg;
u8 first = 1;
- /* *INDENT-OFF* */
segment_manager_foreach_segment_w_lock (seg, sm, ({
if (CLIB_DEBUG && !first && !fifo_segment_has_fifos (seg)
&& !(fifo_segment_flags (seg) & FIFO_SEGMENT_F_IS_PREALLOCATED))
@@ -584,7 +594,6 @@ segment_manager_has_fifos (segment_manager_t * sm)
return 1;
}
}));
- /* *INDENT-ON* */
return 0;
}
@@ -604,7 +613,6 @@ segment_manager_del_sessions (segment_manager_t * sm)
ASSERT (pool_elts (sm->segments) != 0);
/* Across all fifo segments used by the server */
- /* *INDENT-OFF* */
segment_manager_foreach_segment_w_lock (fs, sm, ({
for (slice_index = 0; slice_index < fs->n_slices; slice_index++)
{
@@ -629,7 +637,6 @@ segment_manager_del_sessions (segment_manager_t * sm)
* sessions if the segment can be removed.
*/
}));
- /* *INDENT-ON* */
vec_foreach (handle, handles)
{
@@ -695,19 +702,16 @@ segment_manager_del_sessions_filter (segment_manager_t *sm,
}
int
-segment_manager_try_alloc_fifos (fifo_segment_t * fifo_segment,
- u32 thread_index,
+segment_manager_try_alloc_fifos (fifo_segment_t *fs, u32 thread_index,
u32 rx_fifo_size, u32 tx_fifo_size,
- svm_fifo_t ** rx_fifo, svm_fifo_t ** tx_fifo)
+ svm_fifo_t **rx_fifo, svm_fifo_t **tx_fifo)
{
rx_fifo_size = clib_max (rx_fifo_size, sm_main.default_fifo_size);
- *rx_fifo = fifo_segment_alloc_fifo_w_slice (fifo_segment, thread_index,
- rx_fifo_size,
+ *rx_fifo = fifo_segment_alloc_fifo_w_slice (fs, thread_index, rx_fifo_size,
FIFO_SEGMENT_RX_FIFO);
tx_fifo_size = clib_max (tx_fifo_size, sm_main.default_fifo_size);
- *tx_fifo = fifo_segment_alloc_fifo_w_slice (fifo_segment, thread_index,
- tx_fifo_size,
+ *tx_fifo = fifo_segment_alloc_fifo_w_slice (fs, thread_index, tx_fifo_size,
FIFO_SEGMENT_TX_FIFO);
if (*rx_fifo == 0)
@@ -715,45 +719,37 @@ segment_manager_try_alloc_fifos (fifo_segment_t * fifo_segment,
/* This would be very odd, but handle it... */
if (*tx_fifo != 0)
{
- fifo_segment_free_fifo (fifo_segment, *tx_fifo);
+ fifo_segment_free_fifo (fs, *tx_fifo);
*tx_fifo = 0;
}
- return -1;
+ return SESSION_E_SEG_NO_SPACE;
}
if (*tx_fifo == 0)
{
if (*rx_fifo != 0)
{
- fifo_segment_free_fifo (fifo_segment, *rx_fifo);
+ fifo_segment_free_fifo (fs, *rx_fifo);
*rx_fifo = 0;
}
- return -1;
+ return SESSION_E_SEG_NO_SPACE;
}
return 0;
}
-int
-segment_manager_alloc_session_fifos (segment_manager_t * sm,
- u32 thread_index,
- svm_fifo_t ** rx_fifo,
- svm_fifo_t ** tx_fifo)
+static inline int
+sm_lookup_segment_and_alloc_fifos (segment_manager_t *sm,
+ segment_manager_props_t *props,
+ u32 thread_index, svm_fifo_t **rx_fifo,
+ svm_fifo_t **tx_fifo)
{
- int alloc_fail = 1, rv = 0, new_fs_index;
- uword free_bytes, max_free_bytes = 0;
- segment_manager_props_t *props;
- fifo_segment_t *fs = 0, *cur;
- u32 sm_index, fs_index;
-
- props = segment_manager_properties_get (sm);
-
- /*
- * Find the first free segment to allocate the fifos in
- */
+ uword free_bytes, max_free_bytes;
+ fifo_segment_t *cur, *fs = 0;
- segment_manager_segment_reader_lock (sm);
+ max_free_bytes = props->rx_fifo_size + props->tx_fifo_size - 1;
- pool_foreach (cur, sm->segments) {
+ pool_foreach (cur, sm->segments)
+ {
if (fifo_segment_flags (cur) & FIFO_SEGMENT_F_CUSTOM_USE)
continue;
free_bytes = fifo_segment_available_bytes (cur);
@@ -762,63 +758,93 @@ segment_manager_alloc_session_fifos (segment_manager_t * sm,
max_free_bytes = free_bytes;
fs = cur;
}
- }
-
- if (fs)
- {
- alloc_fail = segment_manager_try_alloc_fifos (fs, thread_index,
- props->rx_fifo_size,
- props->tx_fifo_size,
- rx_fifo, tx_fifo);
- /* On success, keep lock until fifos are initialized */
- if (!alloc_fail)
- goto alloc_success;
}
- segment_manager_segment_reader_unlock (sm);
+ if (PREDICT_FALSE (!fs))
+ return SESSION_E_SEG_NO_SPACE;
- /*
- * Allocation failed, see if we can add a new segment
- */
- if (props->add_segment)
+ return segment_manager_try_alloc_fifos (
+ fs, thread_index, props->rx_fifo_size, props->tx_fifo_size, rx_fifo,
+ tx_fifo);
+}
+
+static int
+sm_lock_and_alloc_segment_and_fifos (segment_manager_t *sm,
+ segment_manager_props_t *props,
+ u32 thread_index, svm_fifo_t **rx_fifo,
+ svm_fifo_t **tx_fifo)
+{
+ int new_fs_index, rv;
+ fifo_segment_t *fs;
+
+ if (!props->add_segment)
+ return SESSION_E_SEG_NO_SPACE;
+
+ clib_rwlock_writer_lock (&sm->segments_rwlock);
+
+ /* Make sure there really is no free space. Another worker might've freed
+ * some fifos or allocated a segment */
+ rv = sm_lookup_segment_and_alloc_fifos (sm, props, thread_index, rx_fifo,
+ tx_fifo);
+ if (!rv)
+ goto done;
+
+ new_fs_index =
+ segment_manager_add_segment (sm, 0 /* segment_size*/, 1 /* notify_app */);
+ if (new_fs_index < 0)
{
- if ((new_fs_index = segment_manager_add_segment (sm, 0, 1)) < 0)
- {
- clib_warning ("Failed to add new segment");
- return SESSION_E_SEG_CREATE;
- }
- fs = segment_manager_get_segment_w_lock (sm, new_fs_index);
- alloc_fail = segment_manager_try_alloc_fifos (fs, thread_index,
- props->rx_fifo_size,
- props->tx_fifo_size,
- rx_fifo, tx_fifo);
- if (alloc_fail)
- {
- clib_warning ("Added a segment, still can't allocate a fifo");
- segment_manager_segment_reader_unlock (sm);
- return SESSION_E_SEG_NO_SPACE2;
- }
+ rv = SESSION_E_SEG_CREATE;
+ goto done;
}
- else
+ fs = segment_manager_get_segment (sm, new_fs_index);
+ rv = segment_manager_try_alloc_fifos (fs, thread_index, props->rx_fifo_size,
+ props->tx_fifo_size, rx_fifo, tx_fifo);
+ if (rv)
{
- SESSION_DBG ("Can't add new seg and no space to allocate fifos!");
- return SESSION_E_SEG_NO_SPACE;
+ SESSION_DBG ("Added a segment, still can't allocate a fifo");
+ rv = SESSION_E_SEG_NO_SPACE2;
+ goto done;
}
-alloc_success:
- ASSERT (rx_fifo && tx_fifo);
+done:
+
+ clib_rwlock_writer_unlock (&sm->segments_rwlock);
+
+ return rv;
+}
+
+int
+segment_manager_alloc_session_fifos (segment_manager_t * sm,
+ u32 thread_index,
+ svm_fifo_t ** rx_fifo,
+ svm_fifo_t ** tx_fifo)
+{
+ segment_manager_props_t *props;
+ int rv;
+
+ props = segment_manager_properties_get (sm);
- sm_index = segment_manager_index (sm);
- fs_index = segment_manager_segment_index (sm, fs);
- (*tx_fifo)->segment_manager = sm_index;
- (*rx_fifo)->segment_manager = sm_index;
- (*tx_fifo)->segment_index = fs_index;
- (*rx_fifo)->segment_index = fs_index;
+ /*
+ * Fast path: find the first segment with enough free space and
+ * try to allocate the fifos. Done with reader lock
+ */
+
+ segment_manager_segment_reader_lock (sm);
+
+ rv = sm_lookup_segment_and_alloc_fifos (sm, props, thread_index, rx_fifo,
+ tx_fifo);
- /* Drop the lock after app is notified */
segment_manager_segment_reader_unlock (sm);
- return rv;
+ /*
+ * Slow path: if no fifo segment or alloc fail grab writer lock and try
+ * to allocate new segment
+ */
+ if (PREDICT_FALSE (rv < 0))
+ return sm_lock_and_alloc_segment_and_fifos (sm, props, thread_index,
+ rx_fifo, tx_fifo);
+
+ return 0;
}
void
@@ -827,10 +853,15 @@ segment_manager_dealloc_fifos (svm_fifo_t * rx_fifo, svm_fifo_t * tx_fifo)
segment_manager_t *sm;
fifo_segment_t *fs;
u32 segment_index;
+ u8 try_delete = 0;
if (!rx_fifo || !tx_fifo)
return;
+ /* Thread that allocated the fifos must be the one to clean them up */
+ ASSERT (rx_fifo->master_thread_index == vlib_get_thread_index () ||
+ rx_fifo->refcnt > 1 || vlib_thread_is_main_w_barrier ());
+
/* It's possible to have no segment manager if the session was removed
* as result of a detach. */
if (!(sm = segment_manager_get_if_valid (rx_fifo->segment_manager)))
@@ -842,26 +873,30 @@ segment_manager_dealloc_fifos (svm_fifo_t * rx_fifo, svm_fifo_t * tx_fifo)
fifo_segment_free_fifo (fs, tx_fifo);
/*
- * Try to remove svm segment if it has no fifos. This can be done only if
+ * Try to remove fifo segment if it has no fifos. This can be done only if
* the segment is not the first in the segment manager or if it is first
* and it is not protected. Moreover, if the segment is first and the app
* has detached from the segment manager, remove the segment manager.
*/
if (!fifo_segment_has_fifos (fs))
{
- segment_manager_segment_reader_unlock (sm);
+ /* If first, remove only if not protected */
+ try_delete = segment_index != 0 || !sm->first_is_protected;
+ }
+
+ segment_manager_segment_reader_unlock (sm);
- /* Remove segment if it holds no fifos or first but not protected */
- if (segment_index != 0 || !sm->first_is_protected)
- sm_lock_and_del_segment_inline (sm, segment_index);
+ if (PREDICT_FALSE (try_delete))
+ {
+ /* Only remove if empty after writer lock acquired */
+ sm_lock_and_del_segment_inline (sm, segment_index,
+ 1 /* check_if_empty */);
/* Remove segment manager if no sessions and detached from app */
if (segment_manager_app_detached (sm)
&& !segment_manager_has_fifos (sm))
segment_manager_free_safe (sm);
}
- else
- segment_manager_segment_reader_unlock (sm);
}
void
@@ -920,12 +955,10 @@ segment_manager_alloc_queue (fifo_segment_t * segment,
fifo_evt_size = sizeof (session_event_t);
notif_q_size = clib_max (16, props->evt_q_size >> 4);
- /* *INDENT-OFF* */
svm_msg_q_ring_cfg_t rc[SESSION_MQ_N_RINGS] = {
{props->evt_q_size, fifo_evt_size, 0},
{notif_q_size, session_evt_size, 0}
};
- /* *INDENT-ON* */
cfg->consumer_pid = 0;
cfg->n_rings = 2;
cfg->q_nitems = props->evt_q_size;
@@ -984,79 +1017,111 @@ segment_manager_main_init (void)
sm->default_low_watermark = 50;
}
+static u8 *
+format_segment_manager (u8 *s, va_list *args)
+{
+ segment_manager_t *sm = va_arg (*args, segment_manager_t *);
+ int verbose = va_arg (*args, int);
+ app_worker_t *app_wrk;
+ uword max_fifo_size;
+ fifo_segment_t *seg;
+ application_t *app;
+ u8 custom_logic;
+
+ app_wrk = app_worker_get_if_valid (sm->app_wrk_index);
+ app = app_wrk ? application_get (app_wrk->app_index) : 0;
+ custom_logic = (app && (app->cb_fns.fifo_tuning_callback)) ? 1 : 0;
+ max_fifo_size = sm->max_fifo_size;
+
+ s = format (s,
+ "[%u] %v app-wrk: %u segs: %u max-fifo-sz: %U "
+ "wmarks: %u %u %s flags: 0x%x",
+ segment_manager_index (sm), app ? app->name : 0,
+ sm->app_wrk_index, pool_elts (sm->segments), format_memory_size,
+ max_fifo_size, sm->high_watermark, sm->low_watermark,
+ custom_logic ? "custom-tuning" : "no-tuning", sm->flags);
+
+ if (!verbose || !pool_elts (sm->segments))
+ return s;
+
+ s = format (s, "\n\n");
+
+ segment_manager_foreach_segment_w_lock (
+ seg, sm, ({ s = format (s, " *%U", format_fifo_segment, seg, verbose); }));
+
+ return s;
+}
+
static clib_error_t *
segment_manager_show_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
+ unformat_input_t _line_input, *line_input = &_line_input;
segment_manager_main_t *smm = &sm_main;
u8 show_segments = 0, verbose = 0;
- uword max_fifo_size;
segment_manager_t *sm;
- fifo_segment_t *seg;
- app_worker_t *app_wrk;
- application_t *app;
- u8 custom_logic;
+ u32 sm_index = ~0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ {
+ vlib_cli_output (vm, "%d segment managers allocated",
+ pool_elts (smm->segment_managers));
+ return 0;
+ }
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (input, "segments"))
+ if (unformat (line_input, "segments"))
show_segments = 1;
- else if (unformat (input, "verbose"))
+ else if (unformat (line_input, "verbose"))
verbose = 1;
+ else if (unformat (line_input, "index %u", &sm_index))
+ ;
else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
+ {
+ vlib_cli_output (vm, "unknown input [%U]", format_unformat_error,
+ line_input);
+ goto done;
+ }
}
- vlib_cli_output (vm, "%d segment managers allocated",
- pool_elts (smm->segment_managers));
- if (verbose && pool_elts (smm->segment_managers))
+
+ if (!pool_elts (smm->segment_managers))
+ goto done;
+
+ if (sm_index != ~0)
{
- vlib_cli_output (vm, "%-6s%=10s%=10s%=13s%=11s%=11s%=12s",
- "Index", "AppIndex", "Segments", "MaxFifoSize",
- "HighWater", "LowWater", "FifoTuning");
+ sm = segment_manager_get_if_valid (sm_index);
+ if (!sm)
+ {
+ vlib_cli_output (vm, "segment manager %u not allocated", sm_index);
+ goto done;
+ }
+ vlib_cli_output (vm, "%U", format_segment_manager, sm, 1 /* verbose */);
+ goto done;
+ }
- /* *INDENT-OFF* */
+ if (verbose || show_segments)
+ {
pool_foreach (sm, smm->segment_managers) {
- app_wrk = app_worker_get_if_valid (sm->app_wrk_index);
- app = app_wrk ? application_get (app_wrk->app_index) : 0;
- custom_logic = (app && (app->cb_fns.fifo_tuning_callback)) ? 1 : 0;
- max_fifo_size = sm->max_fifo_size;
-
- vlib_cli_output (vm, "%-6d%=10d%=10d%=13U%=11d%=11d%=12s",
- segment_manager_index (sm),
- sm->app_wrk_index, pool_elts (sm->segments),
- format_memory_size, max_fifo_size,
- sm->high_watermark, sm->low_watermark,
- custom_logic ? "custom" : "none");
+ vlib_cli_output (vm, "%U", format_segment_manager, sm,
+ show_segments);
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "\n");
}
- if (show_segments)
- {
- vlib_cli_output (vm, "%U", format_fifo_segment, 0, verbose);
- /* *INDENT-OFF* */
- pool_foreach (sm, smm->segment_managers) {
- segment_manager_foreach_segment_w_lock (seg, sm, ({
- vlib_cli_output (vm, "%U", format_fifo_segment, seg, verbose);
- }));
- }
- /* *INDENT-ON* */
+done:
+
+ unformat_free (line_input);
- }
return 0;
}
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (segment_manager_show_command, static) =
-{
+VLIB_CLI_COMMAND (segment_manager_show_command, static) = {
.path = "show segment-manager",
- .short_help = "show segment-manager [segments][verbose]",
+ .short_help = "show segment-manager [segments][verbose][index <nn>]",
.function = segment_manager_show_fn,
};
-/* *INDENT-ON* */
void
segment_manager_format_sessions (segment_manager_t * sm, int verbose)
@@ -1085,7 +1150,6 @@ segment_manager_format_sessions (segment_manager_t * sm, int verbose)
clib_rwlock_reader_lock (&sm->segments_rwlock);
- /* *INDENT-OFF* */
pool_foreach (fs, sm->segments) {
for (slice_index = 0; slice_index < fs->n_slices; slice_index++)
{
@@ -1117,7 +1181,6 @@ segment_manager_format_sessions (segment_manager_t * sm, int verbose)
vec_free (s);
}
}
- /* *INDENT-ON* */
clib_rwlock_reader_unlock (&sm->segments_rwlock);
}
diff --git a/src/vnet/session/segment_manager.h b/src/vnet/session/segment_manager.h
index 5a3d772ff02..1e99c4605a6 100644
--- a/src/vnet/session/segment_manager.h
+++ b/src/vnet/session/segment_manager.h
@@ -40,6 +40,7 @@ typedef struct _segment_manager_props
u8 high_watermark; /**< memory usage high watermark % */
u8 low_watermark; /**< memory usage low watermark % */
u8 pct_first_alloc; /**< pct of fifo size to alloc */
+ u8 huge_page; /**< use hugepage */
} segment_manager_props_t;
typedef enum seg_manager_flag_
@@ -102,8 +103,23 @@ segment_manager_t *segment_manager_get (u32 index);
segment_manager_t *segment_manager_get_if_valid (u32 index);
u32 segment_manager_index (segment_manager_t * sm);
+/**
+ * Add segment without lock
+ *
+ * @param sm Segment manager
+ * @param segment_size Size of segment to be added
+ * @param notify_app Flag set if app notification requested
+ */
int segment_manager_add_segment (segment_manager_t *sm, uword segment_size,
u8 notify_app);
+
+/**
+ * Add segment with lock
+ *
+ * @param sm Segment manager
+ * @param segment_size Size of segment to be added
+ * @param flags Flags to be set on segment
+ */
int segment_manager_add_segment2 (segment_manager_t *sm, uword segment_size,
u8 flags);
void segment_manager_del_segment (segment_manager_t * sm,
@@ -122,7 +138,6 @@ u64 segment_manager_make_segment_handle (u32 segment_manager_index,
u64 segment_manager_segment_handle (segment_manager_t * sm,
fifo_segment_t * segment);
void segment_manager_segment_reader_unlock (segment_manager_t * sm);
-void segment_manager_segment_writer_unlock (segment_manager_t * sm);
int segment_manager_alloc_session_fifos (segment_manager_t * sm,
u32 thread_index,
@@ -175,7 +190,9 @@ static inline void
segment_manager_parse_segment_handle (u64 segment_handle, u32 * sm_index,
u32 * segment_index)
{
- *sm_index = segment_handle >> 32;
+ /* Upper 8 bits zeroed out as they may be used for cut-through segments.
+ * See @ref ct_alloc_segment */
+ *sm_index = (segment_handle >> 32) & 0xFFFFFF;
*segment_index = segment_handle & 0xFFFFFFFF;
}
diff --git a/src/vnet/session/session.api b/src/vnet/session/session.api
index 43bde1afbbd..6affae4112d 100644
--- a/src/vnet/session/session.api
+++ b/src/vnet/session/session.api
@@ -117,38 +117,6 @@ autoreply define app_del_cert_key_pair {
u32 index;
};
-/** \brief Application add TLS certificate
- ### WILL BE DEPRECATED POST 20.01 ###
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param cert_len - certificate length
- @param cert - certificate as a string
-*/
-autoreply define application_tls_cert_add {
- option deprecated="to be removed post 21.06";
- u32 client_index;
- u32 context;
- u32 app_index;
- u16 cert_len;
- u8 cert[cert_len];
-};
-
-/** \brief Application add TLS key
- ### WILL BE DEPRECATED POST 20.01 ###
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param key_len - certificate length
- @param key - PEM encoded key as a string
-*/
-autoreply define application_tls_key_add {
- option deprecated="to be removed post 21.06";
- u32 client_index;
- u32 context;
- u32 app_index;
- u16 key_len;
- u8 key[key_len];
-};
-
/** \brief add/del application worker
@param client_index - opaque cookie to identify the sender
client to vpp direction only
@@ -203,6 +171,18 @@ autoreply define session_enable_disable {
bool is_enable [default=true];
};
+/** \brief enable/disable session layer socket api
+ @param client_index - opaque cookie to identify the sender
+ client to vpp direction only
+ @param context - sender context, to match reply w/ request
+ @param is_enable - disable session layer if 0, enable otherwise
+*/
+autoreply define session_sapi_enable_disable {
+ u32 client_index;
+ u32 context;
+ bool is_enable [default=true];
+};
+
/** \brief add/del application namespace
@param client_index - opaque cookie to identify the sender
client to vpp direction only
@@ -239,17 +219,86 @@ define app_namespace_add_del {
@param ip6_fib_id - id of ip6 fib that "supports" the namespace. Ignored
if sw_if_index set.
@param namespace_id - namespace id
+ @param sock_name - socket name (path, abstract socket name)
+*/
+define app_namespace_add_del_v4 {
+ option deprecated;
+ u32 client_index;
+ u32 context;
+ u64 secret;
+ bool is_add [default=true];
+ vl_api_interface_index_t sw_if_index [default=0xffffffff];
+ u32 ip4_fib_id;
+ u32 ip6_fib_id;
+ string namespace_id[64];
+ string sock_name[];
+};
+
+/** \brief Reply for app namespace add/del
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param appns_index - app namespace index
+*/
+define app_namespace_add_del_v4_reply
+{
+ u32 context;
+ i32 retval;
+ u32 appns_index;
+};
+
+/** \brief add/del application namespace
+ @param client_index - opaque cookie to identify the sender
+ client to vpp direction only
+ @param context - sender context, to match reply w/ request
+ @param secret - secret shared between app and vpp
+ @param sw_if_index - local interface that "supports" namespace. Set to
+ ~0 if no preference
+ @param ip4_fib_id - id of ip4 fib that "supports" the namespace. Ignored
+ if sw_if_index set.
+ @param ip6_fib_id - id of ip6 fib that "supports" the namespace. Ignored
+ if sw_if_index set.
+ @param namespace_id - namespace id
@param netns - linux net namespace
*/
define app_namespace_add_del_v2 {
+ option deprecated;
+ u32 client_index;
+ u32 context;
+ u64 secret;
+ vl_api_interface_index_t sw_if_index [default=0xffffffff];
+ u32 ip4_fib_id;
+ u32 ip6_fib_id;
+ string namespace_id[64];
+ string netns[64];
+};
+
+/** \brief add/del application namespace
+ @param client_index - opaque cookie to identify the sender
+ client to vpp direction only
+ @param context - sender context, to match reply w/ request
+ @param secret - secret shared between app and vpp
+ @param sw_if_index - local interface that "supports" namespace. Set to
+ ~0 if no preference
+ @param ip4_fib_id - id of ip4 fib that "supports" the namespace. Ignored
+ if sw_if_index set.
+ @param ip6_fib_id - id of ip6 fib that "supports" the namespace. Ignored
+ if sw_if_index set.
+ @param namespace_id - namespace id
+ @param netns - linux net namespace
+ @param sock_name - socket name (path, abstract socket name)
+*/
+define app_namespace_add_del_v3 {
+ option deprecated;
u32 client_index;
u32 context;
u64 secret;
+ bool is_add [default=true];
vl_api_interface_index_t sw_if_index [default=0xffffffff];
u32 ip4_fib_id;
u32 ip6_fib_id;
string namespace_id[64];
string netns[64];
+ string sock_name[];
};
/** \brief Reply for app namespace add/del
@@ -272,6 +321,15 @@ define app_namespace_add_del_reply
*/
define app_namespace_add_del_v2_reply
{
+ option deprecated;
+ u32 context;
+ i32 retval;
+ u32 appns_index;
+};
+
+define app_namespace_add_del_v3_reply
+{
+ option deprecated;
u32 context;
i32 retval;
u32 appns_index;
diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c
index 05712181ab0..67e7ee39001 100644
--- a/src/vnet/session/session.c
+++ b/src/vnet/session/session.c
@@ -17,10 +17,13 @@
* @brief Session and session manager
*/
+#include <vnet/plugin/plugin.h>
#include <vnet/session/session.h>
#include <vnet/session/application.h>
#include <vnet/dpo/load_balance.h>
#include <vnet/fib/ip4_fib.h>
+#include <vlib/stats/stats.h>
+#include <vlib/dma/dma.h>
session_main_t session_main;
@@ -36,8 +39,7 @@ session_send_evt_to_thread (void *data, void *args, u32 thread_index,
mq = wrk->vpp_event_queue;
if (PREDICT_FALSE (svm_msg_q_lock (mq)))
return -1;
- if (PREDICT_FALSE (svm_msg_q_is_full (mq)
- || svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING)))
+ if (PREDICT_FALSE (svm_msg_q_or_ring_is_full (mq, SESSION_MQ_IO_EVT_RING)))
{
svm_msg_q_unlock (mq);
return -2;
@@ -58,7 +60,7 @@ session_send_evt_to_thread (void *data, void *args, u32 thread_index,
evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg);
evt->session_index = *(u32 *) data;
break;
- case SESSION_IO_EVT_BUILTIN_TX:
+ case SESSION_IO_EVT_TX_MAIN:
case SESSION_CTRL_EVT_CLOSE:
case SESSION_CTRL_EVT_RESET:
msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
@@ -95,6 +97,13 @@ session_send_io_evt_to_thread_custom (void *data, u32 thread_index,
}
int
+session_program_tx_io_evt (session_handle_tu_t sh, session_evt_type_t evt_type)
+{
+ return session_send_evt_to_thread ((void *) &sh.session_index, 0,
+ (u32) sh.thread_index, evt_type);
+}
+
+int
session_send_ctrl_evt_to_thread (session_t * s, session_evt_type_t evt_type)
{
/* only events supported are disconnect, shutdown and reset */
@@ -202,39 +211,25 @@ session_alloc (u32 thread_index)
{
session_worker_t *wrk = &session_main.wrk[thread_index];
session_t *s;
- u8 will_expand = 0;
- pool_get_aligned_will_expand (wrk->sessions, will_expand,
- CLIB_CACHE_LINE_BYTES);
- /* If we have peekers, let them finish */
- if (PREDICT_FALSE (will_expand && vlib_num_workers ()))
- {
- clib_rwlock_writer_lock (&wrk->peekers_rw_locks);
- pool_get_aligned (wrk->sessions, s, CLIB_CACHE_LINE_BYTES);
- clib_rwlock_writer_unlock (&wrk->peekers_rw_locks);
- }
- else
- {
- pool_get_aligned (wrk->sessions, s, CLIB_CACHE_LINE_BYTES);
- }
+
+ pool_get_aligned_safe (wrk->sessions, s, CLIB_CACHE_LINE_BYTES);
clib_memset (s, 0, sizeof (*s));
s->session_index = s - wrk->sessions;
s->thread_index = thread_index;
- s->app_index = APP_INVALID_INDEX;
+ s->al_index = APP_INVALID_INDEX;
+
return s;
}
void
session_free (session_t * s)
{
- if (CLIB_DEBUG)
- {
- u8 thread_index = s->thread_index;
- clib_memset (s, 0xFA, sizeof (*s));
- pool_put (session_main.wrk[thread_index].sessions, s);
- return;
- }
+ session_worker_t *wrk = &session_main.wrk[s->thread_index];
+
SESSION_EVT (SESSION_EVT_FREE, s);
- pool_put (session_main.wrk[s->thread_index].sessions, s);
+ if (CLIB_DEBUG)
+ clib_memset (s, 0xFA, sizeof (*s));
+ pool_put (wrk->sessions, s);
}
u8
@@ -252,35 +247,48 @@ session_is_valid (u32 si, u8 thread_index)
|| s->session_state <= SESSION_STATE_LISTENING)
return 1;
- if (s->session_state == SESSION_STATE_CONNECTING &&
+ if ((s->session_state == SESSION_STATE_CONNECTING ||
+ s->session_state == SESSION_STATE_TRANSPORT_CLOSED) &&
(s->flags & SESSION_F_HALF_OPEN))
return 1;
tc = session_get_transport (s);
- if (s->connection_index != tc->c_index
- || s->thread_index != tc->thread_index || tc->s_index != si)
+ if (s->connection_index != tc->c_index ||
+ s->thread_index != tc->thread_index || tc->s_index != si)
return 0;
return 1;
}
+void
+session_cleanup (session_t *s)
+{
+ segment_manager_dealloc_fifos (s->rx_fifo, s->tx_fifo);
+ session_free (s);
+}
+
static void
session_cleanup_notify (session_t * s, session_cleanup_ntf_t ntf)
{
app_worker_t *app_wrk;
app_wrk = app_worker_get_if_valid (s->app_wrk_index);
- if (!app_wrk)
- return;
+ if (PREDICT_FALSE (!app_wrk))
+ {
+ if (ntf == SESSION_CLEANUP_TRANSPORT)
+ return;
+
+ session_cleanup (s);
+ return;
+ }
app_worker_cleanup_notify (app_wrk, s, ntf);
}
void
-session_free_w_fifos (session_t * s)
+session_program_cleanup (session_t *s)
{
+ ASSERT (s->session_state == SESSION_STATE_TRANSPORT_DELETED);
session_cleanup_notify (s, SESSION_CLEANUP_SESSION);
- segment_manager_dealloc_fifos (s->rx_fifo, s->tx_fifo);
- session_free (s);
}
/**
@@ -297,7 +305,7 @@ session_delete (session_t * s)
if ((rv = session_lookup_del_session (s)))
clib_warning ("session %u hash delete rv %d", s->session_index, rv);
- session_free_w_fifos (s);
+ session_program_cleanup (s);
}
void
@@ -312,16 +320,27 @@ session_cleanup_half_open (session_handle_t ho_handle)
* session should be removed. */
if (ho->connection_index == ~0)
{
- ho->session_state = SESSION_STATE_CLOSED;
+ session_set_state (ho, SESSION_STATE_CLOSED);
return;
}
/* Migrated transports are no longer half-opens */
transport_cleanup (session_get_transport_proto (ho),
- ho->connection_index, ho->app_index /* overloaded */);
+ ho->connection_index, ho->al_index /* overloaded */);
+ }
+ else if (ho->session_state != SESSION_STATE_TRANSPORT_DELETED)
+ {
+ /* Cleanup half-open session lookup table if need be */
+ if (ho->session_state != SESSION_STATE_TRANSPORT_CLOSED)
+ {
+ transport_connection_t *tc;
+ tc = transport_get_half_open (session_get_transport_proto (ho),
+ ho->connection_index);
+ if (tc && !(tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP))
+ session_lookup_del_half_open (tc);
+ }
+ transport_cleanup_half_open (session_get_transport_proto (ho),
+ ho->connection_index);
}
- else
- transport_cleanup_half_open (session_get_transport_proto (ho),
- ho->connection_index);
session_free (ho);
}
@@ -330,10 +349,12 @@ session_half_open_free (session_t *ho)
{
app_worker_t *app_wrk;
- ASSERT (vlib_get_thread_index () <= 1);
- app_wrk = app_worker_get (ho->app_wrk_index);
- app_worker_del_half_open (app_wrk, ho);
- session_free (ho);
+ ASSERT (vlib_get_thread_index () <= transport_cl_thread ());
+ app_wrk = app_worker_get_if_valid (ho->app_wrk_index);
+ if (app_wrk)
+ app_worker_del_half_open (app_wrk, ho);
+ else
+ session_free (ho);
}
static void
@@ -346,16 +367,26 @@ session_half_open_free_rpc (void *args)
void
session_half_open_delete_notify (transport_connection_t *tc)
{
+ session_t *ho = ho_session_get (tc->s_index);
+
+ /* Cleanup half-open lookup table if need be */
+ if (ho->session_state != SESSION_STATE_TRANSPORT_CLOSED)
+ {
+ if (!(tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP))
+ session_lookup_del_half_open (tc);
+ }
+ session_set_state (ho, SESSION_STATE_TRANSPORT_DELETED);
+
/* Notification from ctrl thread accepted without rpc */
- if (!tc->thread_index)
+ if (tc->thread_index == transport_cl_thread ())
{
- session_half_open_free (ho_session_get (tc->s_index));
+ session_half_open_free (ho);
}
else
{
void *args = uword_to_pointer ((uword) tc->s_index, void *);
- session_send_rpc_evt_to_thread_force (0, session_half_open_free_rpc,
- args);
+ session_send_rpc_evt_to_thread_force (transport_cl_thread (),
+ session_half_open_free_rpc, args);
}
}
@@ -364,6 +395,9 @@ session_half_open_migrate_notify (transport_connection_t *tc)
{
session_t *ho;
+ /* Support half-open migrations only for transports with no lookup */
+ ASSERT (tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP);
+
ho = ho_session_get (tc->s_index);
ho->flags |= SESSION_F_IS_MIGRATING;
ho->connection_index = ~0;
@@ -383,8 +417,8 @@ session_half_open_migrated_notify (transport_connection_t *tc)
return -1;
}
ho->connection_index = tc->c_index;
- /* Overload app index for half-open with new thread */
- ho->app_index = tc->thread_index;
+ /* Overload al_index for half-open with new thread */
+ ho->al_index = tc->thread_index;
return 0;
}
@@ -399,7 +433,7 @@ session_alloc_for_connection (transport_connection_t * tc)
s = session_alloc (thread_index);
s->session_type = session_type_from_proto_and_ip (tc->proto, tc->is_ip4);
- s->session_state = SESSION_STATE_CLOSED;
+ session_set_state (s, SESSION_STATE_CLOSED);
/* Attach transport to session and vice versa */
s->connection_index = tc->c_index;
@@ -546,10 +580,162 @@ session_fifo_tuning (session_t * s, svm_fifo_t * f,
}
}
+void
+session_wrk_program_app_wrk_evts (session_worker_t *wrk, u32 app_wrk_index)
+{
+ u8 need_interrupt;
+
+ ASSERT ((wrk - session_main.wrk) == vlib_get_thread_index ());
+ need_interrupt = clib_bitmap_is_zero (wrk->app_wrks_pending_ntf);
+ wrk->app_wrks_pending_ntf =
+ clib_bitmap_set (wrk->app_wrks_pending_ntf, app_wrk_index, 1);
+
+ if (need_interrupt)
+ vlib_node_set_interrupt_pending (wrk->vm, session_input_node.index);
+}
+
+always_inline void
+session_program_io_event (app_worker_t *app_wrk, session_t *s,
+ session_evt_type_t et, u8 is_cl)
+{
+ if (is_cl)
+ {
+ /* Special events for connectionless sessions */
+ et += SESSION_IO_EVT_BUILTIN_RX - SESSION_IO_EVT_RX;
+
+ ASSERT (s->thread_index == 0 || et == SESSION_IO_EVT_TX_MAIN);
+ session_event_t evt = {
+ .event_type = et,
+ .session_handle = session_handle (s),
+ };
+
+ app_worker_add_event_custom (app_wrk, vlib_get_thread_index (), &evt);
+ }
+ else
+ {
+ app_worker_add_event (app_wrk, s, et);
+ }
+}
+
+static inline int
+session_notify_subscribers (u32 app_index, session_t *s, svm_fifo_t *f,
+ session_evt_type_t evt_type)
+{
+ app_worker_t *app_wrk;
+ application_t *app;
+ u8 is_cl;
+ int i;
+
+ app = application_get (app_index);
+ if (!app)
+ return -1;
+
+ is_cl = s->thread_index != vlib_get_thread_index ();
+ for (i = 0; i < f->shr->n_subscribers; i++)
+ {
+ app_wrk = application_get_worker (app, f->shr->subscribers[i]);
+ if (!app_wrk)
+ continue;
+ session_program_io_event (app_wrk, s, evt_type, is_cl ? 1 : 0);
+ }
+
+ return 0;
+}
+
+always_inline int
+session_enqueue_notify_inline (session_t *s, u8 is_cl)
+{
+ app_worker_t *app_wrk;
+
+ app_wrk = app_worker_get_if_valid (s->app_wrk_index);
+ if (PREDICT_FALSE (!app_wrk))
+ return -1;
+
+ session_program_io_event (app_wrk, s, SESSION_IO_EVT_RX, is_cl);
+
+ if (PREDICT_FALSE (svm_fifo_n_subscribers (s->rx_fifo)))
+ return session_notify_subscribers (app_wrk->app_index, s, s->rx_fifo,
+ SESSION_IO_EVT_RX);
+
+ return 0;
+}
+
+int
+session_enqueue_notify (session_t *s)
+{
+ return session_enqueue_notify_inline (s, 0 /* is_cl */);
+}
+
+int
+session_enqueue_notify_cl (session_t *s)
+{
+ return session_enqueue_notify_inline (s, 1 /* is_cl */);
+}
+
+int
+session_dequeue_notify (session_t *s)
+{
+ app_worker_t *app_wrk;
+ u8 is_cl;
+
+ /* Unset as soon as event is requested */
+ svm_fifo_clear_deq_ntf (s->tx_fifo);
+
+ app_wrk = app_worker_get_if_valid (s->app_wrk_index);
+ if (PREDICT_FALSE (!app_wrk))
+ return -1;
+
+ is_cl = s->session_state == SESSION_STATE_LISTENING ||
+ s->session_state == SESSION_STATE_OPENED;
+ session_program_io_event (app_wrk, s, SESSION_IO_EVT_TX, is_cl ? 1 : 0);
+
+ if (PREDICT_FALSE (svm_fifo_n_subscribers (s->tx_fifo)))
+ return session_notify_subscribers (app_wrk->app_index, s, s->tx_fifo,
+ SESSION_IO_EVT_TX);
+
+ return 0;
+}
+
+/**
+ * Flushes queue of sessions that are to be notified of new data
+ * enqueued events.
+ *
+ * @param transport_proto transport protocol for which queue to be flushed
+ * @param thread_index Thread index for which the flush is to be performed.
+ * @return 0 on success or a positive number indicating the number of
+ * failures due to API queue being full.
+ */
+void
+session_main_flush_enqueue_events (transport_proto_t transport_proto,
+ u32 thread_index)
+{
+ session_worker_t *wrk = session_main_get_worker (thread_index);
+ session_handle_t *handles;
+ session_t *s;
+ u32 i, is_cl;
+
+ handles = wrk->session_to_enqueue[transport_proto];
+
+ for (i = 0; i < vec_len (handles); i++)
+ {
+ s = session_get_from_handle (handles[i]);
+ session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED,
+ 0 /* TODO/not needed */);
+ is_cl =
+ s->thread_index != thread_index || (s->flags & SESSION_F_IS_CLESS);
+ if (!is_cl)
+ session_enqueue_notify_inline (s, 0);
+ else
+ session_enqueue_notify_inline (s, 1);
+ }
+
+ vec_reset_length (handles);
+ wrk->session_to_enqueue[transport_proto] = handles;
+}
+
/*
- * Enqueue data for delivery to session peer. Does not notify peer of enqueue
- * event but on request can queue notification events for later delivery by
- * calling stream_server_flush_enqueue_events().
+ * Enqueue data for delivery to app. If requested, it queues app notification
+ * event for later delivery.
*
* @param tc Transport connection which is to be enqueued data
* @param b Buffer to be enqueued
@@ -598,15 +784,14 @@ session_enqueue_stream_connection (transport_connection_t * tc,
if (queue_event)
{
- /* Queue RX event on this fifo. Eventually these will need to be flushed
- * by calling stream_server_flush_enqueue_events () */
- session_worker_t *wrk;
-
- wrk = session_main_get_worker (s->thread_index);
+ /* Queue RX event on this fifo. Eventually these will need to be
+ * flushed by calling @ref session_main_flush_enqueue_events () */
if (!(s->flags & SESSION_F_RX_EVT))
{
+ session_worker_t *wrk = session_main_get_worker (s->thread_index);
+ ASSERT (s->thread_index == vlib_get_thread_index ());
s->flags |= SESSION_F_RX_EVT;
- vec_add1 (wrk->session_to_enqueue[tc->proto], s->session_index);
+ vec_add1 (wrk->session_to_enqueue[tc->proto], session_handle (s));
}
session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0);
@@ -615,10 +800,11 @@ session_enqueue_stream_connection (transport_connection_t * tc,
return enqueued;
}
-int
-session_enqueue_dgram_connection (session_t * s,
- session_dgram_hdr_t * hdr,
- vlib_buffer_t * b, u8 proto, u8 queue_event)
+always_inline int
+session_enqueue_dgram_connection_inline (session_t *s,
+ session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto,
+ u8 queue_event, u32 is_cl)
{
int rv;
@@ -627,12 +813,10 @@ session_enqueue_dgram_connection (session_t * s,
if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT)))
{
- /* *INDENT-OFF* */
svm_fifo_seg_t segs[2] = {
{ (u8 *) hdr, sizeof (*hdr) },
{ vlib_buffer_get_current (b), b->current_length }
};
- /* *INDENT-ON* */
rv = svm_fifo_enqueue_segments (s->rx_fifo, segs, 2,
0 /* allow_partial */ );
@@ -664,15 +848,16 @@ session_enqueue_dgram_connection (session_t * s,
if (queue_event && rv > 0)
{
- /* Queue RX event on this fifo. Eventually these will need to be flushed
- * by calling stream_server_flush_enqueue_events () */
- session_worker_t *wrk;
-
- wrk = session_main_get_worker (s->thread_index);
+ /* Queue RX event on this fifo. Eventually these will need to be
+ * flushed by calling @ref session_main_flush_enqueue_events () */
if (!(s->flags & SESSION_F_RX_EVT))
{
+ u32 thread_index =
+ is_cl ? vlib_get_thread_index () : s->thread_index;
+ session_worker_t *wrk = session_main_get_worker (thread_index);
+ ASSERT (s->thread_index == vlib_get_thread_index () || is_cl);
s->flags |= SESSION_F_RX_EVT;
- vec_add1 (wrk->session_to_enqueue[proto], s->session_index);
+ vec_add1 (wrk->session_to_enqueue[proto], session_handle (s));
}
session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0);
@@ -681,6 +866,34 @@ session_enqueue_dgram_connection (session_t * s,
}
int
+session_enqueue_dgram_connection (session_t *s, session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto, u8 queue_event)
+{
+ return session_enqueue_dgram_connection_inline (s, hdr, b, proto,
+ queue_event, 0 /* is_cl */);
+}
+
+int
+session_enqueue_dgram_connection2 (session_t *s, session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto, u8 queue_event)
+{
+ return session_enqueue_dgram_connection_inline (s, hdr, b, proto,
+ queue_event, 1 /* is_cl */);
+}
+
+int
+session_enqueue_dgram_connection_cl (session_t *s, session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto,
+ u8 queue_event)
+{
+ session_t *awls;
+
+ awls = app_listener_select_wrk_cl_session (s, hdr);
+ return session_enqueue_dgram_connection_inline (awls, hdr, b, proto,
+ queue_event, 1 /* is_cl */);
+}
+
+int
session_tx_fifo_peek_bytes (transport_connection_t * tc, u8 * buffer,
u32 offset, u32 max_bytes)
{
@@ -703,187 +916,6 @@ session_tx_fifo_dequeue_drop (transport_connection_t * tc, u32 max_bytes)
return rv;
}
-static inline int
-session_notify_subscribers (u32 app_index, session_t * s,
- svm_fifo_t * f, session_evt_type_t evt_type)
-{
- app_worker_t *app_wrk;
- application_t *app;
- int i;
-
- app = application_get (app_index);
- if (!app)
- return -1;
-
- for (i = 0; i < f->shr->n_subscribers; i++)
- {
- app_wrk = application_get_worker (app, f->shr->subscribers[i]);
- if (!app_wrk)
- continue;
- if (app_worker_lock_and_send_event (app_wrk, s, evt_type))
- return -1;
- }
-
- return 0;
-}
-
-/**
- * Notify session peer that new data has been enqueued.
- *
- * @param s Stream session for which the event is to be generated.
- * @param lock Flag to indicate if call should lock message queue.
- *
- * @return 0 on success or negative number if failed to send notification.
- */
-static inline int
-session_enqueue_notify_inline (session_t * s)
-{
- app_worker_t *app_wrk;
- u32 session_index;
- u8 n_subscribers;
-
- session_index = s->session_index;
- n_subscribers = svm_fifo_n_subscribers (s->rx_fifo);
-
- app_wrk = app_worker_get_if_valid (s->app_wrk_index);
- if (PREDICT_FALSE (!app_wrk))
- {
- SESSION_DBG ("invalid s->app_index = %d", s->app_wrk_index);
- return 0;
- }
-
- SESSION_EVT (SESSION_EVT_ENQ, s, svm_fifo_max_dequeue_prod (s->rx_fifo));
-
- s->flags &= ~SESSION_F_RX_EVT;
-
- /* Application didn't confirm accept yet */
- if (PREDICT_FALSE (s->session_state == SESSION_STATE_ACCEPTING))
- return 0;
-
- if (PREDICT_FALSE (app_worker_lock_and_send_event (app_wrk, s,
- SESSION_IO_EVT_RX)))
- return -1;
-
- if (PREDICT_FALSE (n_subscribers))
- {
- s = session_get (session_index, vlib_get_thread_index ());
- return session_notify_subscribers (app_wrk->app_index, s,
- s->rx_fifo, SESSION_IO_EVT_RX);
- }
-
- return 0;
-}
-
-int
-session_enqueue_notify (session_t * s)
-{
- return session_enqueue_notify_inline (s);
-}
-
-static void
-session_enqueue_notify_rpc (void *arg)
-{
- u32 session_index = pointer_to_uword (arg);
- session_t *s;
-
- s = session_get_if_valid (session_index, vlib_get_thread_index ());
- if (!s)
- return;
-
- session_enqueue_notify (s);
-}
-
-/**
- * Like session_enqueue_notify, but can be called from a thread that does not
- * own the session.
- */
-void
-session_enqueue_notify_thread (session_handle_t sh)
-{
- u32 thread_index = session_thread_from_handle (sh);
- u32 session_index = session_index_from_handle (sh);
-
- /*
- * Pass session index (u32) as opposed to handle (u64) in case pointers
- * are not 64-bit.
- */
- session_send_rpc_evt_to_thread (thread_index,
- session_enqueue_notify_rpc,
- uword_to_pointer (session_index, void *));
-}
-
-int
-session_dequeue_notify (session_t * s)
-{
- app_worker_t *app_wrk;
-
- svm_fifo_clear_deq_ntf (s->tx_fifo);
-
- app_wrk = app_worker_get_if_valid (s->app_wrk_index);
- if (PREDICT_FALSE (!app_wrk))
- return -1;
-
- if (PREDICT_FALSE (app_worker_lock_and_send_event (app_wrk, s,
- SESSION_IO_EVT_TX)))
- return -1;
-
- if (PREDICT_FALSE (s->tx_fifo->shr->n_subscribers))
- return session_notify_subscribers (app_wrk->app_index, s,
- s->tx_fifo, SESSION_IO_EVT_TX);
-
- return 0;
-}
-
-/**
- * Flushes queue of sessions that are to be notified of new data
- * enqueued events.
- *
- * @param thread_index Thread index for which the flush is to be performed.
- * @return 0 on success or a positive number indicating the number of
- * failures due to API queue being full.
- */
-int
-session_main_flush_enqueue_events (u8 transport_proto, u32 thread_index)
-{
- session_worker_t *wrk = session_main_get_worker (thread_index);
- session_t *s;
- int i, errors = 0;
- u32 *indices;
-
- indices = wrk->session_to_enqueue[transport_proto];
-
- for (i = 0; i < vec_len (indices); i++)
- {
- s = session_get_if_valid (indices[i], thread_index);
- if (PREDICT_FALSE (!s))
- {
- errors++;
- continue;
- }
-
- session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED,
- 0 /* TODO/not needed */ );
-
- if (PREDICT_FALSE (session_enqueue_notify_inline (s)))
- errors++;
- }
-
- vec_reset_length (indices);
- wrk->session_to_enqueue[transport_proto] = indices;
-
- return errors;
-}
-
-int
-session_main_flush_all_enqueue_events (u8 transport_proto)
-{
- vlib_thread_main_t *vtm = vlib_get_thread_main ();
- int i, errors = 0;
- for (i = 0; i < 1 + vtm->n_threads; i++)
- errors += session_main_flush_enqueue_events (transport_proto, i);
- return errors;
-}
-
int
session_stream_connect_notify (transport_connection_t * tc,
session_error_t err)
@@ -898,6 +930,7 @@ session_stream_connect_notify (transport_connection_t * tc,
session_lookup_del_half_open (tc);
ho = ho_session_get (tc->s_index);
+ session_set_state (ho, SESSION_STATE_TRANSPORT_CLOSED);
opaque = ho->opaque;
app_wrk = app_worker_get_if_valid (ho->app_wrk_index);
if (!app_wrk)
@@ -907,8 +940,9 @@ session_stream_connect_notify (transport_connection_t * tc,
return app_worker_connect_notify (app_wrk, s, err, opaque);
s = session_alloc_for_connection (tc);
- s->session_state = SESSION_STATE_CONNECTING;
+ session_set_state (s, SESSION_STATE_CONNECTING);
s->app_wrk_index = app_wrk->wrk_index;
+ s->opaque = opaque;
new_si = s->session_index;
new_ti = s->thread_index;
@@ -920,7 +954,7 @@ session_stream_connect_notify (transport_connection_t * tc,
}
s = session_get (new_si, new_ti);
- s->session_state = SESSION_STATE_READY;
+ session_set_state (s, SESSION_STATE_READY);
session_lookup_add_connection (tc, session_handle (s));
if (app_worker_connect_notify (app_wrk, s, SESSION_E_NONE, opaque))
@@ -937,17 +971,19 @@ session_stream_connect_notify (transport_connection_t * tc,
}
static void
-session_switch_pool_reply (void *arg)
+session_switch_pool_closed_rpc (void *arg)
{
- u32 session_index = pointer_to_uword (arg);
+ session_handle_t sh;
session_t *s;
- s = session_get_if_valid (session_index, vlib_get_thread_index ());
+ sh = pointer_to_uword (arg);
+ s = session_get_from_handle_if_valid (sh);
if (!s)
return;
- /* Notify app that it has data on the new session */
- session_enqueue_notify (s);
+ transport_cleanup (session_get_transport_proto (s), s->connection_index,
+ s->thread_index);
+ session_cleanup (s);
}
typedef struct _session_switch_pool_args
@@ -965,39 +1001,40 @@ static void
session_switch_pool (void *cb_args)
{
session_switch_pool_args_t *args = (session_switch_pool_args_t *) cb_args;
- session_handle_t new_sh;
+ session_handle_t sh, new_sh;
segment_manager_t *sm;
app_worker_t *app_wrk;
session_t *s;
- void *rargs;
ASSERT (args->thread_index == vlib_get_thread_index ());
s = session_get (args->session_index, args->thread_index);
- transport_cleanup (session_get_transport_proto (s), s->connection_index,
- s->thread_index);
+ app_wrk = app_worker_get_if_valid (s->app_wrk_index);
+ if (!app_wrk)
+ goto app_closed;
- new_sh = session_make_handle (args->new_session_index,
- args->new_thread_index);
+ /* Cleanup fifo segment slice state for fifos */
+ sm = app_worker_get_connect_segment_manager (app_wrk);
+ segment_manager_detach_fifo (sm, &s->rx_fifo);
+ segment_manager_detach_fifo (sm, &s->tx_fifo);
- app_wrk = app_worker_get_if_valid (s->app_wrk_index);
- if (app_wrk)
- {
- /* Cleanup fifo segment slice state for fifos */
- sm = app_worker_get_connect_segment_manager (app_wrk);
- segment_manager_detach_fifo (sm, &s->rx_fifo);
- segment_manager_detach_fifo (sm, &s->tx_fifo);
+ /* Check if session closed during migration */
+ if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING)
+ goto app_closed;
- /* Notify app, using old session, about the migration event */
- app_worker_migrate_notify (app_wrk, s, new_sh);
- }
+ new_sh =
+ session_make_handle (args->new_session_index, args->new_thread_index);
+ app_worker_migrate_notify (app_wrk, s, new_sh);
- /* Trigger app read and fifo updates on the new thread */
- rargs = uword_to_pointer (args->new_session_index, void *);
- session_send_rpc_evt_to_thread (args->new_thread_index,
- session_switch_pool_reply, rargs);
+ clib_mem_free (cb_args);
+ return;
- session_free (s);
+app_closed:
+ /* Session closed during migration. Clean everything up */
+ sh = session_handle (s);
+ session_send_rpc_evt_to_thread (args->new_thread_index,
+ session_switch_pool_closed_rpc,
+ uword_to_pointer (sh, void *));
clib_mem_free (cb_args);
}
@@ -1018,7 +1055,7 @@ session_dgram_connect_notify (transport_connection_t * tc,
*/
new_s = session_clone_safe (tc->s_index, old_thread_index);
new_s->connection_index = tc->c_index;
- new_s->session_state = SESSION_STATE_READY;
+ session_set_state (new_s, SESSION_STATE_READY);
new_s->flags |= SESSION_F_IS_MIGRATING;
if (!(tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP))
@@ -1067,7 +1104,16 @@ session_transport_closing_notify (transport_connection_t * tc)
s = session_get (tc->s_index, tc->thread_index);
if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING)
return;
- s->session_state = SESSION_STATE_TRANSPORT_CLOSING;
+
+ /* Wait for reply from app before sending notification as the
+ * accept might be rejected */
+ if (s->session_state == SESSION_STATE_ACCEPTING)
+ {
+ session_set_state (s, SESSION_STATE_TRANSPORT_CLOSING);
+ return;
+ }
+
+ session_set_state (s, SESSION_STATE_TRANSPORT_CLOSING);
app_wrk = app_worker_get (s->app_wrk_index);
app_worker_close_notify (app_wrk, s);
}
@@ -1108,7 +1154,7 @@ session_transport_delete_notify (transport_connection_t * tc)
* because transport will soon be closed and closed sessions
* are assumed to have been removed from the lookup table */
session_lookup_del_session (s);
- s->session_state = SESSION_STATE_TRANSPORT_DELETED;
+ session_set_state (s, SESSION_STATE_TRANSPORT_DELETED);
session_cleanup_notify (s, SESSION_CLEANUP_TRANSPORT);
svm_fifo_dequeue_drop_all (s->tx_fifo);
break;
@@ -1119,7 +1165,7 @@ session_transport_delete_notify (transport_connection_t * tc)
* session is just removed because both transport and app have
* confirmed the close*/
session_lookup_del_session (s);
- s->session_state = SESSION_STATE_TRANSPORT_DELETED;
+ session_set_state (s, SESSION_STATE_TRANSPORT_DELETED);
session_cleanup_notify (s, SESSION_CLEANUP_TRANSPORT);
svm_fifo_dequeue_drop_all (s->tx_fifo);
session_program_transport_ctrl_evt (s, SESSION_CTRL_EVT_CLOSE);
@@ -1128,6 +1174,7 @@ session_transport_delete_notify (transport_connection_t * tc)
break;
case SESSION_STATE_CLOSED:
session_cleanup_notify (s, SESSION_CLEANUP_TRANSPORT);
+ session_set_state (s, SESSION_STATE_TRANSPORT_DELETED);
session_delete (s);
break;
default:
@@ -1155,6 +1202,9 @@ session_transport_closed_notify (transport_connection_t * tc)
if (!(s = session_get_if_valid (tc->s_index, tc->thread_index)))
return;
+ if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSED)
+ return;
+
/* Transport thinks that app requested close but it actually didn't.
* Can happen for tcp:
* 1)if fin and rst are received in close succession.
@@ -1163,17 +1213,15 @@ session_transport_closed_notify (transport_connection_t * tc)
{
session_transport_closing_notify (tc);
svm_fifo_dequeue_drop_all (s->tx_fifo);
- s->session_state = SESSION_STATE_TRANSPORT_CLOSED;
+ session_set_state (s, SESSION_STATE_TRANSPORT_CLOSED);
}
/* If app close has not been received or has not yet resulted in
* a transport close, only mark the session transport as closed */
else if (s->session_state <= SESSION_STATE_CLOSING)
- {
- s->session_state = SESSION_STATE_TRANSPORT_CLOSED;
- }
+ session_set_state (s, SESSION_STATE_TRANSPORT_CLOSED);
/* If app also closed, switch to closed */
else if (s->session_state == SESSION_STATE_APP_CLOSED)
- s->session_state = SESSION_STATE_CLOSED;
+ session_set_state (s, SESSION_STATE_CLOSED);
app_wrk = app_worker_get_if_valid (s->app_wrk_index);
if (app_wrk)
@@ -1193,7 +1241,12 @@ session_transport_reset_notify (transport_connection_t * tc)
svm_fifo_dequeue_drop_all (s->tx_fifo);
if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING)
return;
- s->session_state = SESSION_STATE_TRANSPORT_CLOSING;
+ if (s->session_state == SESSION_STATE_ACCEPTING)
+ {
+ session_set_state (s, SESSION_STATE_TRANSPORT_CLOSING);
+ return;
+ }
+ session_set_state (s, SESSION_STATE_TRANSPORT_CLOSING);
app_wrk = app_worker_get (s->app_wrk_index);
app_worker_reset_notify (app_wrk, s);
}
@@ -1210,12 +1263,12 @@ session_stream_accept_notify (transport_connection_t * tc)
return -1;
if (s->session_state != SESSION_STATE_CREATED)
return 0;
- s->session_state = SESSION_STATE_ACCEPTING;
+ session_set_state (s, SESSION_STATE_ACCEPTING);
if (app_worker_accept_notify (app_wrk, s))
{
/* On transport delete, no notifications should be sent. Unless, the
* accept is retried and successful. */
- s->session_state = SESSION_STATE_CREATED;
+ session_set_state (s, SESSION_STATE_CREATED);
return -1;
}
return 0;
@@ -1233,7 +1286,7 @@ session_stream_accept (transport_connection_t * tc, u32 listener_index,
s = session_alloc_for_connection (tc);
s->listener_handle = ((u64) thread_index << 32) | (u64) listener_index;
- s->session_state = SESSION_STATE_CREATED;
+ session_set_state (s, SESSION_STATE_CREATED);
if ((rv = app_worker_init_accepted (s)))
{
@@ -1277,6 +1330,7 @@ session_dgram_accept (transport_connection_t * tc, u32 listener_index,
}
session_lookup_add_connection (tc, session_handle (s));
+ session_set_state (s, SESSION_STATE_ACCEPTING);
app_wrk = app_worker_get (s->app_wrk_index);
if ((rv = app_worker_accept_notify (app_wrk, s)))
@@ -1314,7 +1368,10 @@ session_open_cl (session_endpoint_cfg_t *rmt, session_handle_t *rsh)
app_wrk = app_worker_get (rmt->app_wrk_index);
s = session_alloc_for_connection (tc);
s->app_wrk_index = app_wrk->wrk_index;
- s->session_state = SESSION_STATE_OPENED;
+ s->opaque = rmt->opaque;
+ session_set_state (s, SESSION_STATE_OPENED);
+ if (transport_connection_is_cless (tc))
+ s->flags |= SESSION_F_IS_CLESS;
if (app_worker_init_connected (app_wrk, s))
{
session_free (s);
@@ -1382,13 +1439,11 @@ session_open_app (session_endpoint_cfg_t *rmt, session_handle_t *rsh)
typedef int (*session_open_service_fn) (session_endpoint_cfg_t *,
session_handle_t *);
-/* *INDENT-OFF* */
static session_open_service_fn session_open_srv_fns[TRANSPORT_N_SERVICES] = {
session_open_vc,
session_open_cl,
session_open_app,
};
-/* *INDENT-ON* */
/**
* Ask transport to open connection to remote transport endpoint.
@@ -1422,12 +1477,12 @@ session_open (session_endpoint_cfg_t *rmt, session_handle_t *rsh)
int
session_listen (session_t * ls, session_endpoint_cfg_t * sep)
{
- transport_endpoint_t *tep;
+ transport_endpoint_cfg_t *tep;
int tc_index;
u32 s_index;
/* Transport bind/listen */
- tep = session_endpoint_to_transport (sep);
+ tep = session_endpoint_to_transport_cfg (sep);
s_index = ls->session_index;
tc_index = transport_start_listen (session_get_transport_proto (ls),
s_index, tep);
@@ -1439,6 +1494,9 @@ session_listen (session_t * ls, session_endpoint_cfg_t * sep)
* worker because local tables (for ct sessions) are not backed by a fib */
ls = listen_session_get (s_index);
ls->connection_index = tc_index;
+ ls->opaque = sep->opaque;
+ if (transport_connection_is_cless (session_get_transport (ls)))
+ ls->flags |= SESSION_F_IS_CLESS;
return 0;
}
@@ -1493,9 +1551,15 @@ session_half_close (session_t *s)
void
session_close (session_t * s)
{
- if (!s)
+ if (!s || (s->flags & SESSION_F_APP_CLOSED))
return;
+ /* Transports can close and delete their state independent of app closes
+ * and transport initiated state transitions can hide app closes. Instead
+ * of extending the state machine to support separate tracking of app and
+ * transport initiated closes, use a flag. */
+ s->flags |= SESSION_F_APP_CLOSED;
+
if (s->session_state >= SESSION_STATE_CLOSING)
{
/* Session will only be removed once both app and transport
@@ -1506,7 +1570,12 @@ session_close (session_t * s)
return;
}
- s->session_state = SESSION_STATE_CLOSING;
+ /* App closed so stop propagating dequeue notifications.
+ * App might disconnect session before connected, in this case,
+ * tx_fifo may not be setup yet, so clear only it's inited. */
+ if (s->tx_fifo)
+ svm_fifo_clear_deq_ntf (s->tx_fifo);
+ session_set_state (s, SESSION_STATE_CLOSING);
session_program_transport_ctrl_evt (s, SESSION_CTRL_EVT_CLOSE);
}
@@ -1518,12 +1587,46 @@ session_reset (session_t * s)
{
if (s->session_state >= SESSION_STATE_CLOSING)
return;
- /* Drop all outstanding tx data */
- svm_fifo_dequeue_drop_all (s->tx_fifo);
- s->session_state = SESSION_STATE_CLOSING;
+ /* Drop all outstanding tx data
+ * App might disconnect session before connected, in this case,
+ * tx_fifo may not be setup yet, so clear only it's inited. */
+ if (s->tx_fifo)
+ svm_fifo_dequeue_drop_all (s->tx_fifo);
+ session_set_state (s, SESSION_STATE_CLOSING);
session_program_transport_ctrl_evt (s, SESSION_CTRL_EVT_RESET);
}
+void
+session_detach_app (session_t *s)
+{
+ if (s->session_state < SESSION_STATE_TRANSPORT_CLOSING)
+ {
+ session_close (s);
+ }
+ else if (s->session_state < SESSION_STATE_TRANSPORT_DELETED)
+ {
+ transport_connection_t *tc;
+
+ /* Transport is closing but it's not yet deleted. Confirm close and
+ * subsequently detach transport from session and enqueue a session
+ * cleanup notification. Transport closed and cleanup notifications are
+ * going to be dropped by session layer apis */
+ transport_close (session_get_transport_proto (s), s->connection_index,
+ s->thread_index);
+ tc = session_get_transport (s);
+ tc->s_index = SESSION_INVALID_INDEX;
+ session_set_state (s, SESSION_STATE_TRANSPORT_DELETED);
+ session_cleanup_notify (s, SESSION_CLEANUP_SESSION);
+ }
+ else
+ {
+ session_cleanup_notify (s, SESSION_CLEANUP_SESSION);
+ }
+
+ s->flags |= SESSION_F_APP_CLOSED;
+ s->app_wrk_index = APP_INVALID_INDEX;
+}
+
/**
* Notify transport the session can be half-disconnected.
*
@@ -1555,10 +1658,10 @@ session_transport_close (session_t * s)
if (s->session_state >= SESSION_STATE_APP_CLOSED)
{
if (s->session_state == SESSION_STATE_TRANSPORT_CLOSED)
- s->session_state = SESSION_STATE_CLOSED;
+ session_set_state (s, SESSION_STATE_CLOSED);
/* If transport is already deleted, just free the session */
else if (s->session_state >= SESSION_STATE_TRANSPORT_DELETED)
- session_free_w_fifos (s);
+ session_program_cleanup (s);
return;
}
@@ -1568,7 +1671,7 @@ session_transport_close (session_t * s)
* delete notify. This will finally lead to the complete cleanup of the
* session.
*/
- s->session_state = SESSION_STATE_APP_CLOSED;
+ session_set_state (s, SESSION_STATE_APP_CLOSED);
transport_close (session_get_transport_proto (s), s->connection_index,
s->thread_index);
@@ -1583,13 +1686,13 @@ session_transport_reset (session_t * s)
if (s->session_state >= SESSION_STATE_APP_CLOSED)
{
if (s->session_state == SESSION_STATE_TRANSPORT_CLOSED)
- s->session_state = SESSION_STATE_CLOSED;
+ session_set_state (s, SESSION_STATE_CLOSED);
else if (s->session_state >= SESSION_STATE_TRANSPORT_DELETED)
- session_free_w_fifos (s);
+ session_program_cleanup (s);
return;
}
- s->session_state = SESSION_STATE_APP_CLOSED;
+ session_set_state (s, SESSION_STATE_APP_CLOSED);
transport_reset (session_get_transport_proto (s), s->connection_index,
s->thread_index);
}
@@ -1616,64 +1719,63 @@ session_transport_cleanup (session_t * s)
}
/**
- * Allocate event queues in the shared-memory segment
+ * Allocate worker mqs in share-able segment
*
- * That can only be a newly created memfd segment, that must be
- * mapped by all apps/stack users.
+ * That can only be a newly created memfd segment, that must be mapped
+ * by all apps/stack users unless private rx mqs are enabled.
*/
void
-session_vpp_event_queues_allocate (session_main_t * smm)
+session_vpp_wrk_mqs_alloc (session_main_t *smm)
{
- u32 evt_q_length = 2048, evt_size = sizeof (session_event_t);
- fifo_segment_t *eqs = &smm->evt_qs_segment;
- uword eqs_size = 64 << 20;
- pid_t vpp_pid = getpid ();
+ u32 mq_q_length = 2048, evt_size = sizeof (session_event_t);
+ fifo_segment_t *mqs_seg = &smm->wrk_mqs_segment;
+ svm_msg_q_cfg_t _cfg, *cfg = &_cfg;
+ uword mqs_seg_size;
int i;
- if (smm->configured_event_queue_length)
- evt_q_length = smm->configured_event_queue_length;
+ mq_q_length = clib_max (mq_q_length, smm->configured_wrk_mq_length);
- if (smm->evt_qs_segment_size)
- eqs_size = smm->evt_qs_segment_size;
+ svm_msg_q_ring_cfg_t rc[SESSION_MQ_N_RINGS] = {
+ { mq_q_length, evt_size, 0 }, { mq_q_length >> 1, 256, 0 }
+ };
+ cfg->consumer_pid = 0;
+ cfg->n_rings = 2;
+ cfg->q_nitems = mq_q_length;
+ cfg->ring_cfgs = rc;
+
+ /*
+ * Compute mqs segment size based on rings config and leave space
+ * for passing extended configuration messages, i.e., data allocated
+ * outside of the rings. If provided with a config value, accept it
+ * if larger than minimum size.
+ */
+ mqs_seg_size = svm_msg_q_size_to_alloc (cfg) * vec_len (smm->wrk);
+ mqs_seg_size = mqs_seg_size + (1 << 20);
+ mqs_seg_size = clib_max (mqs_seg_size, smm->wrk_mqs_segment_size);
- eqs->ssvm.ssvm_size = eqs_size;
- eqs->ssvm.my_pid = vpp_pid;
- eqs->ssvm.name = format (0, "%s%c", "session: evt-qs-segment", 0);
- /* clib_mem_vm_map_shared consumes first page before requested_va */
- eqs->ssvm.requested_va = smm->session_baseva + clib_mem_get_page_size ();
+ mqs_seg->ssvm.ssvm_size = mqs_seg_size;
+ mqs_seg->ssvm.my_pid = getpid ();
+ mqs_seg->ssvm.name = format (0, "%s%c", "session: wrk-mqs-segment", 0);
- if (ssvm_server_init (&eqs->ssvm, SSVM_SEGMENT_MEMFD))
+ if (ssvm_server_init (&mqs_seg->ssvm, SSVM_SEGMENT_MEMFD))
{
clib_warning ("failed to initialize queue segment");
return;
}
- fifo_segment_init (eqs);
+ fifo_segment_init (mqs_seg);
/* Special fifo segment that's filled only with mqs */
- eqs->h->n_mqs = vec_len (smm->wrk);
+ mqs_seg->h->n_mqs = vec_len (smm->wrk);
for (i = 0; i < vec_len (smm->wrk); i++)
- {
- svm_msg_q_cfg_t _cfg, *cfg = &_cfg;
- svm_msg_q_ring_cfg_t rc[SESSION_MQ_N_RINGS] = {
- {evt_q_length, evt_size, 0}
- ,
- {evt_q_length >> 1, 256, 0}
- };
- cfg->consumer_pid = 0;
- cfg->n_rings = 2;
- cfg->q_nitems = evt_q_length;
- cfg->ring_cfgs = rc;
-
- smm->wrk[i].vpp_event_queue = fifo_segment_msg_q_alloc (eqs, i, cfg);
- }
+ smm->wrk[i].vpp_event_queue = fifo_segment_msg_q_alloc (mqs_seg, i, cfg);
}
fifo_segment_t *
-session_main_get_evt_q_segment (void)
+session_main_get_wrk_mqs_segment (void)
{
- return &session_main.evt_qs_segment;
+ return &session_main.wrk_mqs_segment;
}
u64
@@ -1689,14 +1791,28 @@ session_segment_handle (session_t * s)
f->segment_index);
}
-/* *INDENT-OFF* */
+void
+session_get_original_dst (transport_endpoint_t *i2o_src,
+ transport_endpoint_t *i2o_dst,
+ transport_proto_t transport_proto, u32 *original_dst,
+ u16 *original_dst_port)
+{
+ session_main_t *smm = vnet_get_session_main ();
+ ip_protocol_t proto =
+ (transport_proto == TRANSPORT_PROTO_TCP ? IPPROTO_TCP : IPPROTO_UDP);
+ if (!smm->original_dst_lookup || !i2o_dst->is_ip4)
+ return;
+ smm->original_dst_lookup (&i2o_src->ip.ip4, i2o_src->port, &i2o_dst->ip.ip4,
+ i2o_dst->port, proto, original_dst,
+ original_dst_port);
+}
+
static session_fifo_rx_fn *session_tx_fns[TRANSPORT_TX_N_FNS] = {
session_tx_fifo_peek_and_snd,
session_tx_fifo_dequeue_and_snd,
session_tx_fifo_dequeue_internal,
session_tx_fifo_dequeue_and_snd
};
-/* *INDENT-ON* */
void
session_register_transport (transport_proto_t transport_proto,
@@ -1721,6 +1837,39 @@ session_register_transport (transport_proto_t transport_proto,
session_tx_fns[vft->transport_options.tx_type];
}
+void
+session_register_update_time_fn (session_update_time_fn fn, u8 is_add)
+{
+ session_main_t *smm = &session_main;
+ session_update_time_fn *fi;
+ u32 fi_pos = ~0;
+ u8 found = 0;
+
+ vec_foreach (fi, smm->update_time_fns)
+ {
+ if (*fi == fn)
+ {
+ fi_pos = fi - smm->update_time_fns;
+ found = 1;
+ break;
+ }
+ }
+
+ if (is_add)
+ {
+ if (found)
+ {
+ clib_warning ("update time fn %p already registered", fn);
+ return;
+ }
+ vec_add1 (smm->update_time_fns, fn);
+ }
+ else
+ {
+ vec_del1 (smm->update_time_fns, fi_pos);
+ }
+}
+
transport_proto_t
session_add_transport_proto (void)
{
@@ -1788,6 +1937,44 @@ session_queue_run_on_main_thread (vlib_main_t * vm)
vlib_node_set_interrupt_pending (vm, session_queue_node.index);
}
+static void
+session_stats_collector_fn (vlib_stats_collector_data_t *d)
+{
+ u32 i, n_workers, n_wrk_sessions, n_sessions = 0;
+ session_main_t *smm = &session_main;
+ session_worker_t *wrk;
+ counter_t **counters;
+ counter_t *cb;
+
+ n_workers = vec_len (smm->wrk);
+ vlib_stats_validate (d->entry_index, 0, n_workers - 1);
+ counters = d->entry->data;
+ cb = counters[0];
+
+ for (i = 0; i < vec_len (smm->wrk); i++)
+ {
+ wrk = session_main_get_worker (i);
+ n_wrk_sessions = pool_elts (wrk->sessions);
+ cb[i] = n_wrk_sessions;
+ n_sessions += n_wrk_sessions;
+ }
+
+ vlib_stats_set_gauge (d->private_data, n_sessions);
+}
+
+static void
+session_stats_collector_init (void)
+{
+ vlib_stats_collector_reg_t reg = {};
+
+ reg.entry_index =
+ vlib_stats_add_counter_vector ("/sys/session/sessions_per_worker");
+ reg.private_data = vlib_stats_add_gauge ("/sys/session/sessions_total");
+ reg.collect_fn = session_stats_collector_fn;
+ vlib_stats_register_collector_fn (&reg);
+ vlib_stats_validate (reg.entry_index, 0, vlib_get_n_threads ());
+}
+
static clib_error_t *
session_manager_main_enable (vlib_main_t * vm)
{
@@ -1808,6 +1995,7 @@ session_manager_main_enable (vlib_main_t * vm)
/* Allocate cache line aligned worker contexts */
vec_validate_aligned (smm->wrk, num_threads - 1, CLIB_CACHE_LINE_BYTES);
+ clib_spinlock_init (&session_main.pool_realloc_lock);
for (i = 0; i < num_threads; i++)
{
@@ -1816,21 +2004,20 @@ session_manager_main_enable (vlib_main_t * vm)
wrk->new_head = clib_llist_make_head (wrk->event_elts, evt_list);
wrk->old_head = clib_llist_make_head (wrk->event_elts, evt_list);
wrk->pending_connects = clib_llist_make_head (wrk->event_elts, evt_list);
+ wrk->evts_pending_main =
+ clib_llist_make_head (wrk->event_elts, evt_list);
wrk->vm = vlib_get_main_by_index (i);
wrk->last_vlib_time = vlib_time_now (vm);
wrk->last_vlib_us_time = wrk->last_vlib_time * CLIB_US_TIME_FREQ;
wrk->timerfd = -1;
vec_validate (wrk->session_to_enqueue, smm->last_transport_proto_type);
- if (num_threads > 1)
- clib_rwlock_init (&smm->wrk[i].peekers_rw_locks);
-
if (!smm->no_adaptive && smm->use_private_rx_mqs)
session_wrk_enable_adaptive_mode (wrk);
}
/* Allocate vpp event queues segment and queue */
- session_vpp_event_queues_allocate (smm);
+ session_vpp_wrk_mqs_alloc (smm);
/* Initialize segment manager properties */
segment_manager_main_init ();
@@ -1860,6 +2047,7 @@ session_manager_main_enable (vlib_main_t * vm)
session_lookup_init ();
app_namespaces_init ();
transport_init ();
+ session_stats_collector_init ();
smm->is_initialized = 1;
done:
@@ -1879,6 +2067,87 @@ session_manager_main_disable (vlib_main_t * vm)
transport_enable_disable (vm, 0 /* is_en */ );
}
+/* in this new callback, cookie hint the index */
+void
+session_dma_completion_cb (vlib_main_t *vm, struct vlib_dma_batch *batch)
+{
+ session_worker_t *wrk;
+ wrk = session_main_get_worker (vm->thread_index);
+ session_dma_transfer *dma_transfer;
+
+ dma_transfer = &wrk->dma_trans[wrk->trans_head];
+ vec_add (wrk->pending_tx_buffers, dma_transfer->pending_tx_buffers,
+ vec_len (dma_transfer->pending_tx_buffers));
+ vec_add (wrk->pending_tx_nexts, dma_transfer->pending_tx_nexts,
+ vec_len (dma_transfer->pending_tx_nexts));
+ vec_reset_length (dma_transfer->pending_tx_buffers);
+ vec_reset_length (dma_transfer->pending_tx_nexts);
+ wrk->trans_head++;
+ if (wrk->trans_head == wrk->trans_size)
+ wrk->trans_head = 0;
+ return;
+}
+
+static void
+session_prepare_dma_args (vlib_dma_config_t *args)
+{
+ args->max_batches = 16;
+ args->max_transfers = DMA_TRANS_SIZE;
+ args->max_transfer_size = 65536;
+ args->features = 0;
+ args->sw_fallback = 1;
+ args->barrier_before_last = 1;
+ args->callback_fn = session_dma_completion_cb;
+}
+
+static void
+session_node_enable_dma (u8 is_en, int n_vlibs)
+{
+ vlib_dma_config_t args;
+ session_prepare_dma_args (&args);
+ session_worker_t *wrk;
+ vlib_main_t *vm;
+
+ int config_index = -1;
+
+ if (is_en)
+ {
+ vm = vlib_get_main_by_index (0);
+ config_index = vlib_dma_config_add (vm, &args);
+ }
+ else
+ {
+ vm = vlib_get_main_by_index (0);
+ wrk = session_main_get_worker (0);
+ if (wrk->config_index >= 0)
+ vlib_dma_config_del (vm, wrk->config_index);
+ }
+ int i;
+ for (i = 0; i < n_vlibs; i++)
+ {
+ vm = vlib_get_main_by_index (i);
+ wrk = session_main_get_worker (vm->thread_index);
+ wrk->config_index = config_index;
+ if (is_en)
+ {
+ if (config_index >= 0)
+ wrk->dma_enabled = true;
+ wrk->dma_trans = (session_dma_transfer *) clib_mem_alloc (
+ sizeof (session_dma_transfer) * DMA_TRANS_SIZE);
+ bzero (wrk->dma_trans,
+ sizeof (session_dma_transfer) * DMA_TRANS_SIZE);
+ }
+ else
+ {
+ if (wrk->dma_trans)
+ clib_mem_free (wrk->dma_trans);
+ }
+ wrk->trans_head = 0;
+ wrk->trans_tail = 0;
+ wrk->trans_size = DMA_TRANS_SIZE;
+ }
+}
+
void
session_node_enable_disable (u8 is_en)
{
@@ -1914,11 +2183,15 @@ session_node_enable_disable (u8 is_en)
if (!sm->poll_main)
continue;
}
+ vlib_node_set_state (vm, session_input_node.index, mstate);
vlib_node_set_state (vm, session_queue_node.index, state);
}
if (sm->use_private_rx_mqs)
application_enable_rx_mqs_nodes (is_en);
+
+ if (sm->dma_enabled)
+ session_node_enable_dma (is_en, n_vlibs);
}
clib_error_t *
@@ -1953,17 +2226,9 @@ session_main_init (vlib_main_t * vm)
smm->poll_main = 0;
smm->use_private_rx_mqs = 0;
smm->no_adaptive = 0;
- smm->session_baseva = HIGH_SEGMENT_BASEVA;
-
-#if (HIGH_SEGMENT_BASEVA > (4ULL << 30))
- smm->session_va_space_size = 128ULL << 30;
- smm->evt_qs_segment_size = 64 << 20;
-#else
- smm->session_va_space_size = 128 << 20;
- smm->evt_qs_segment_size = 1 << 20;
-#endif
-
- smm->last_transport_proto_type = TRANSPORT_PROTO_SRTP;
+ smm->last_transport_proto_type = TRANSPORT_PROTO_HTTP;
+ smm->port_allocator_min_src_port = 1024;
+ smm->port_allocator_max_src_port = 65535;
return 0;
}
@@ -1993,13 +2258,16 @@ session_config_fn (vlib_main_t * vm, unformat_input_t * input)
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (input, "event-queue-length %d", &nitems))
+ if (unformat (input, "wrk-mq-length %d", &nitems))
{
if (nitems >= 2048)
- smm->configured_event_queue_length = nitems;
+ smm->configured_wrk_mq_length = nitems;
else
clib_warning ("event queue length %d too small, ignored", nitems);
}
+ else if (unformat (input, "wrk-mqs-segment-size %U",
+ unformat_memory_size, &smm->wrk_mqs_segment_size))
+ ;
else if (unformat (input, "preallocated-sessions %d",
&smm->preallocated_sessions))
;
@@ -2058,24 +2326,44 @@ session_config_fn (vlib_main_t * vm, unformat_input_t * input)
else if (unformat (input, "local-endpoints-table-buckets %d",
&smm->local_endpoints_table_buckets))
;
- /* Deprecated but maintained for compatibility */
- else if (unformat (input, "evt_qs_memfd_seg"))
- ;
- else if (unformat (input, "evt_qs_seg_size %U", unformat_memory_size,
- &smm->evt_qs_segment_size))
- ;
+ else if (unformat (input, "min-src-port %d", &tmp))
+ smm->port_allocator_min_src_port = tmp;
+ else if (unformat (input, "max-src-port %d", &tmp))
+ smm->port_allocator_max_src_port = tmp;
else if (unformat (input, "enable"))
smm->session_enable_asap = 1;
- else if (unformat (input, "segment-baseva 0x%lx", &smm->session_baseva))
- ;
else if (unformat (input, "use-app-socket-api"))
- appns_sapi_enable ();
+ (void) appns_sapi_enable_disable (1 /* is_enable */);
else if (unformat (input, "poll-main"))
smm->poll_main = 1;
else if (unformat (input, "use-private-rx-mqs"))
smm->use_private_rx_mqs = 1;
else if (unformat (input, "no-adaptive"))
smm->no_adaptive = 1;
+ else if (unformat (input, "use-dma"))
+ smm->dma_enabled = 1;
+ else if (unformat (input, "nat44-original-dst-enable"))
+ {
+ smm->original_dst_lookup = vlib_get_plugin_symbol (
+ "nat_plugin.so", "nat44_original_dst_lookup");
+ }
+ /*
+ * Deprecated but maintained for compatibility
+ */
+ else if (unformat (input, "evt_qs_memfd_seg"))
+ ;
+ else if (unformat (input, "segment-baseva 0x%lx", &tmp))
+ ;
+ else if (unformat (input, "evt_qs_seg_size %U", unformat_memory_size,
+ &smm->wrk_mqs_segment_size))
+ ;
+ else if (unformat (input, "event-queue-length %d", &nitems))
+ {
+ if (nitems >= 2048)
+ smm->configured_wrk_mq_length = nitems;
+ else
+ clib_warning ("event queue length %d too small, ignored", nitems);
+ }
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h
index 2d01eb6a67a..a5604bf8725 100644
--- a/src/vnet/session/session.h
+++ b/src/vnet/session/session.h
@@ -21,23 +21,12 @@
#include <vnet/session/session_debug.h>
#include <svm/message_queue.h>
#include <svm/fifo_segment.h>
+#include <vlib/dma/dma.h>
-#define foreach_session_input_error \
-_(NO_SESSION, "No session drops") \
-_(NO_LISTENER, "No listener for dst port drops") \
-_(ENQUEUED, "Packets pushed into rx fifo") \
-_(NOT_READY, "Session not ready packets") \
-_(FIFO_FULL, "Packets dropped for lack of rx fifo space") \
-_(EVENT_FIFO_FULL, "Events not sent for lack of event fifo space") \
-_(API_QUEUE_FULL, "Sessions not created for lack of API queue space") \
-
-typedef enum
+typedef struct session_wrk_stats_
{
-#define _(sym,str) SESSION_ERROR_##sym,
- foreach_session_input_error
-#undef _
- SESSION_N_ERROR,
-} session_input_error_t;
+ u32 errors[SESSION_N_ERRORS];
+} session_wrk_stats_t;
typedef struct session_tx_context_
{
@@ -59,6 +48,7 @@ typedef struct session_tx_context_
/** Vector of tx buffer free lists */
u32 *tx_buffers;
+ vlib_buffer_t **transport_pending_bufs;
} session_tx_context_t;
typedef struct session_evt_elt
@@ -84,6 +74,13 @@ typedef enum session_wrk_flags_
SESSION_WRK_F_ADAPTIVE = 1 << 0,
} __clib_packed session_wrk_flag_t;
+#define DMA_TRANS_SIZE 1024
+typedef struct
+{
+ u32 *pending_tx_buffers;
+ u16 *pending_tx_nexts;
+} session_dma_transfer;
+
typedef struct session_worker_
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
@@ -103,8 +100,8 @@ typedef struct session_worker_
/** Convenience pointer to this worker's vlib_main */
vlib_main_t *vm;
- /** Per-proto vector of sessions to enqueue */
- u32 **session_to_enqueue;
+ /** Per-proto vector of session handles to enqueue */
+ session_handle_t **session_to_enqueue;
/** Timerfd used to periodically signal wrk session queue node */
int timerfd;
@@ -133,9 +130,6 @@ typedef struct session_worker_
/** Head of list of pending events */
clib_llist_index_t old_head;
- /** Peekers rw lock */
- clib_rwlock_t peekers_rw_locks;
-
/** Vector of buffers to be sent */
u32 *pending_tx_buffers;
@@ -151,8 +145,22 @@ typedef struct session_worker_
/** Flag that is set if main thread signaled to handle connects */
u32 n_pending_connects;
- /** Main thread loops in poll mode without a connect */
- u32 no_connect_loops;
+ /** List head for first worker evts pending handling on main */
+ clib_llist_index_t evts_pending_main;
+
+ /** Per-app-worker bitmap of pending notifications */
+ uword *app_wrks_pending_ntf;
+
+ int config_index;
+ u8 dma_enabled;
+ session_dma_transfer *dma_trans;
+ u16 trans_head;
+ u16 trans_tail;
+ u16 trans_size;
+ u16 batch_num;
+ vlib_dma_batch_t *batch;
+
+ session_wrk_stats_t stats;
#if SESSION_DEBUG
/** last event poll time by thread */
@@ -170,13 +178,22 @@ extern session_fifo_rx_fn session_tx_fifo_dequeue_internal;
u8 session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e);
+typedef void (*session_update_time_fn) (f64 time_now, u8 thread_index);
+typedef void (*nat44_original_dst_lookup_fn) (
+ ip4_address_t *i2o_src, u16 i2o_src_port, ip4_address_t *i2o_dst,
+ u16 i2o_dst_port, ip_protocol_t proto, u32 *original_dst,
+ u16 *original_dst_port);
+
typedef struct session_main_
{
/** Worker contexts */
session_worker_t *wrk;
+ /** Vector of transport update time functions */
+ session_update_time_fn *update_time_fns;
+
/** Event queues memfd segment */
- fifo_segment_t evt_qs_segment;
+ fifo_segment_t wrk_mqs_segment;
/** Unique segment name counter */
u32 unique_segment_name_counter;
@@ -189,11 +206,22 @@ typedef struct session_main_
* Trade memory for speed, for now */
u32 *session_type_to_next;
- /** Thread for cl and ho that rely on cl allocs */
+ /** Thread used for allocating active open connections, i.e., half-opens
+ * for transports like tcp, and sessions that will be migrated for cl
+ * transports like udp. If vpp has workers, this will be first worker. */
u32 transport_cl_thread;
transport_proto_t last_transport_proto_type;
+ /** Number of workers at pool realloc barrier */
+ volatile u32 pool_realloc_at_barrier;
+
+ /** Number of workers doing reallocs */
+ volatile u32 pool_realloc_doing_work;
+
+ /** Lock to synchronize parallel forced reallocs */
+ clib_spinlock_t pool_realloc_lock;
+
/*
* Config parameters
*/
@@ -217,12 +245,13 @@ typedef struct session_main_
u8 no_adaptive;
/** vpp fifo event queue configured length */
- u32 configured_event_queue_length;
+ u32 configured_wrk_mq_length;
/** Session ssvm segment configs*/
- uword session_baseva;
- uword session_va_space_size;
- uword evt_qs_segment_size;
+ uword wrk_mqs_segment_size;
+
+ /** Session enable dma*/
+ u8 dma_enabled;
/** Session table size parameters */
u32 configured_v4_session_table_buckets;
@@ -238,14 +267,22 @@ typedef struct session_main_
u32 local_endpoints_table_memory;
u32 local_endpoints_table_buckets;
+ /** Transport source port allocation range */
+ u16 port_allocator_min_src_port;
+ u16 port_allocator_max_src_port;
+
/** Preallocate session config parameter */
u32 preallocated_sessions;
u16 msg_id_base;
+
+ /** Query nat44-ed session to get original dst ip4 & dst port. */
+ nat44_original_dst_lookup_fn original_dst_lookup;
} session_main_t;
extern session_main_t session_main;
extern vlib_node_registration_t session_queue_node;
+extern vlib_node_registration_t session_input_node;
extern vlib_node_registration_t session_queue_process_node;
extern vlib_node_registration_t session_queue_pre_input_node;
@@ -301,7 +338,7 @@ session_evt_ctrl_data (session_worker_t * wrk, session_evt_elt_t * elt)
static inline void
session_evt_ctrl_data_free (session_worker_t * wrk, session_evt_elt_t * elt)
{
- ASSERT (elt->evt.event_type > SESSION_IO_EVT_BUILTIN_TX);
+ ASSERT (elt->evt.event_type >= SESSION_CTRL_EVT_RPC);
pool_put_index (wrk->ctrl_evts_data, elt->evt.ctrl_data_index);
}
@@ -329,7 +366,8 @@ int session_wrk_handle_mq (session_worker_t *wrk, svm_msg_q_t *mq);
session_t *session_alloc (u32 thread_index);
void session_free (session_t * s);
-void session_free_w_fifos (session_t * s);
+void session_cleanup (session_t *s);
+void session_program_cleanup (session_t *s);
void session_cleanup_half_open (session_handle_t ho_handle);
u8 session_is_valid (u32 si, u8 thread_index);
@@ -354,100 +392,53 @@ session_get_if_valid (u64 si, u32 thread_index)
}
always_inline session_t *
-session_get_from_handle (session_handle_t handle)
+session_get_from_handle (session_handle_tu_t handle)
{
session_main_t *smm = &session_main;
- u32 session_index, thread_index;
- session_parse_handle (handle, &session_index, &thread_index);
- return pool_elt_at_index (smm->wrk[thread_index].sessions, session_index);
+ return pool_elt_at_index (smm->wrk[handle.thread_index].sessions,
+ handle.session_index);
}
always_inline session_t *
-session_get_from_handle_if_valid (session_handle_t handle)
+session_get_from_handle_if_valid (session_handle_tu_t handle)
{
- u32 session_index, thread_index;
- session_parse_handle (handle, &session_index, &thread_index);
- return session_get_if_valid (session_index, thread_index);
+ return session_get_if_valid (handle.session_index, handle.thread_index);
}
-u64 session_segment_handle (session_t * s);
-
/**
- * Acquires a lock that blocks a session pool from expanding.
+ * Get session from handle and avoid pool validation if no same thread
*
- * This is typically used for safely peeking into other threads'
- * pools in order to clone elements. Lock should be dropped as soon
- * as possible by calling @ref session_pool_remove_peeker.
- *
- * NOTE: Avoid using pool_elt_at_index while the lock is held because
- * it may lead to free elt bitmap expansion/contraction!
- */
-always_inline void
-session_pool_add_peeker (u32 thread_index)
-{
- session_worker_t *wrk = &session_main.wrk[thread_index];
- if (thread_index == vlib_get_thread_index ())
- return;
- clib_rwlock_reader_lock (&wrk->peekers_rw_locks);
-}
-
-always_inline void
-session_pool_remove_peeker (u32 thread_index)
-{
- session_worker_t *wrk = &session_main.wrk[thread_index];
- if (thread_index == vlib_get_thread_index ())
- return;
- clib_rwlock_reader_unlock (&wrk->peekers_rw_locks);
-}
-
-/**
- * Get session from handle and 'lock' pool resize if not in same thread
- *
- * Caller should drop the peek 'lock' as soon as possible.
+ * Peekers are fine because pool grows with barrier (see @ref session_alloc)
*/
always_inline session_t *
-session_get_from_handle_safe (u64 handle)
+session_get_from_handle_safe (session_handle_tu_t handle)
{
- u32 thread_index = session_thread_from_handle (handle);
- session_worker_t *wrk = &session_main.wrk[thread_index];
+ session_worker_t *wrk = &session_main.wrk[handle.thread_index];
- if (thread_index == vlib_get_thread_index ())
+ if (handle.thread_index == vlib_get_thread_index ())
{
- return pool_elt_at_index (wrk->sessions,
- session_index_from_handle (handle));
+ return pool_elt_at_index (wrk->sessions, handle.session_index);
}
else
{
- session_pool_add_peeker (thread_index);
- /* Don't use pool_elt_at index. See @ref session_pool_add_peeker */
- return wrk->sessions + session_index_from_handle (handle);
+ /* Don't use pool_elt_at index to avoid pool bitmap reallocs */
+ return wrk->sessions + handle.session_index;
}
}
-always_inline u32
-session_get_index (session_t * s)
-{
- return (s - session_main.wrk[s->thread_index].sessions);
-}
-
always_inline session_t *
session_clone_safe (u32 session_index, u32 thread_index)
{
+ u32 current_thread_index = vlib_get_thread_index (), new_index;
session_t *old_s, *new_s;
- u32 current_thread_index = vlib_get_thread_index ();
- /* If during the memcpy pool is reallocated AND the memory allocator
- * decides to give the old chunk of memory to somebody in a hurry to
- * scribble something on it, we have a problem. So add this thread as
- * a session pool peeker.
- */
- session_pool_add_peeker (thread_index);
new_s = session_alloc (current_thread_index);
+ new_index = new_s->session_index;
+ /* Session pools are reallocated with barrier (see @ref session_alloc) */
old_s = session_main.wrk[thread_index].sessions + session_index;
clib_memcpy_fast (new_s, old_s, sizeof (*new_s));
- session_pool_remove_peeker (thread_index);
new_s->thread_index = current_thread_index;
- new_s->session_index = session_get_index (new_s);
+ new_s->session_index = new_index;
return new_s;
}
@@ -457,16 +448,19 @@ int session_stop_listen (session_t * s);
void session_half_close (session_t *s);
void session_close (session_t * s);
void session_reset (session_t * s);
+void session_detach_app (session_t *s);
void session_transport_half_close (session_t *s);
void session_transport_close (session_t * s);
void session_transport_reset (session_t * s);
void session_transport_cleanup (session_t * s);
-int session_send_io_evt_to_thread (svm_fifo_t * f,
- session_evt_type_t evt_type);
-int session_enqueue_notify (session_t * s);
+int session_enqueue_notify (session_t *s);
int session_dequeue_notify (session_t * s);
+int session_enqueue_notify_cl (session_t *s);
+int session_send_io_evt_to_thread (svm_fifo_t *f, session_evt_type_t evt_type);
int session_send_io_evt_to_thread_custom (void *data, u32 thread_index,
session_evt_type_t evt_type);
+int session_program_tx_io_evt (session_handle_tu_t sh,
+ session_evt_type_t evt_type);
void session_send_rpc_evt_to_thread (u32 thread_index, void *fp,
void *rpc_args);
void session_send_rpc_evt_to_thread_force (u32 thread_index, void *fp,
@@ -479,6 +473,7 @@ void session_get_endpoint (session_t * s, transport_endpoint_t * tep,
u8 is_lcl);
int session_transport_attribute (session_t *s, u8 is_get,
transport_endpt_attr_t *attr);
+u64 session_segment_handle (session_t *s);
u8 *format_session (u8 * s, va_list * args);
uword unformat_session (unformat_input_t * input, va_list * args);
@@ -496,6 +491,13 @@ int session_enqueue_dgram_connection (session_t * s,
session_dgram_hdr_t * hdr,
vlib_buffer_t * b, u8 proto,
u8 queue_event);
+int session_enqueue_dgram_connection2 (session_t *s, session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto,
+ u8 queue_event);
+int session_enqueue_dgram_connection_cl (session_t *s,
+ session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto,
+ u8 queue_event);
int session_stream_connect_notify (transport_connection_t * tc,
session_error_t err);
int session_dgram_connect_notify (transport_connection_t * tc,
@@ -513,6 +515,7 @@ int session_stream_accept (transport_connection_t * tc, u32 listener_index,
u32 thread_index, u8 notify);
int session_dgram_accept (transport_connection_t * tc, u32 listener_index,
u32 thread_index);
+
/**
* Initialize session layer for given transport proto and ip version
*
@@ -529,10 +532,18 @@ void session_register_transport (transport_proto_t transport_proto,
const transport_proto_vft_t * vft, u8 is_ip4,
u32 output_node);
transport_proto_t session_add_transport_proto (void);
+void session_register_update_time_fn (session_update_time_fn fn, u8 is_add);
int session_tx_fifo_peek_bytes (transport_connection_t * tc, u8 * buffer,
u32 offset, u32 max_bytes);
u32 session_tx_fifo_dequeue_drop (transport_connection_t * tc, u32 max_bytes);
+always_inline void
+session_set_state (session_t *s, session_state_t session_state)
+{
+ s->session_state = session_state;
+ SESSION_EVT (SESSION_EVT_STATE_CHANGE, s);
+}
+
always_inline u32
transport_max_rx_enqueue (transport_connection_t * tc)
{
@@ -575,6 +586,19 @@ transport_rx_fifo_has_ooo_data (transport_connection_t * tc)
return svm_fifo_has_ooo_data (s->rx_fifo);
}
+always_inline u32
+transport_tx_fifo_has_dgram (transport_connection_t *tc)
+{
+ session_t *s = session_get (tc->s_index, tc->thread_index);
+ u32 max_deq = svm_fifo_max_dequeue_cons (s->tx_fifo);
+ session_dgram_pre_hdr_t phdr;
+
+ if (max_deq <= sizeof (session_dgram_hdr_t))
+ return 0;
+ svm_fifo_peek (s->tx_fifo, 0, sizeof (phdr), (u8 *) &phdr);
+ return max_deq >= phdr.data_length + sizeof (session_dgram_hdr_t);
+}
+
always_inline void
transport_rx_fifo_req_deq_ntf (transport_connection_t *tc)
{
@@ -615,12 +639,19 @@ transport_cl_thread (void)
return session_main.transport_cl_thread;
}
+always_inline u32
+session_vlib_thread_is_cl_thread (void)
+{
+ return (vlib_get_thread_index () == transport_cl_thread () ||
+ vlib_thread_is_main_w_barrier ());
+}
+
/*
* Listen sessions
*/
-always_inline u64
-listen_session_get_handle (session_t * s)
+always_inline session_handle_t
+listen_session_get_handle (session_t *s)
{
ASSERT (s->session_state == SESSION_STATE_LISTENING ||
session_get_transport_proto (s) == TRANSPORT_PROTO_QUIC);
@@ -667,8 +698,8 @@ always_inline session_t *
ho_session_alloc (void)
{
session_t *s;
- ASSERT (vlib_get_thread_index () == 0);
- s = session_alloc (0);
+ ASSERT (session_vlib_thread_is_cl_thread ());
+ s = session_alloc (transport_cl_thread ());
s->session_state = SESSION_STATE_CONNECTING;
s->flags |= SESSION_F_HALF_OPEN;
return s;
@@ -677,7 +708,7 @@ ho_session_alloc (void)
always_inline session_t *
ho_session_get (u32 ho_index)
{
- return session_get (ho_index, 0 /* half-open thread */);
+ return session_get (ho_index, transport_cl_thread ());
}
always_inline void
@@ -702,7 +733,7 @@ vnet_get_session_main ()
always_inline session_worker_t *
session_main_get_worker (u32 thread_index)
{
- return &session_main.wrk[thread_index];
+ return vec_elt_at_index (session_main.wrk, thread_index);
}
static inline session_worker_t *
@@ -710,13 +741,13 @@ session_main_get_worker_if_valid (u32 thread_index)
{
if (thread_index > vec_len (session_main.wrk))
return 0;
- return &session_main.wrk[thread_index];
+ return session_main_get_worker (thread_index);
}
always_inline svm_msg_q_t *
session_main_get_vpp_event_queue (u32 thread_index)
{
- return session_main.wrk[thread_index].vpp_event_queue;
+ return session_main_get_worker (thread_index)->vpp_event_queue;
}
always_inline u8
@@ -725,14 +756,31 @@ session_main_is_enabled ()
return session_main.is_enabled == 1;
}
+always_inline void
+session_worker_stat_error_inc (session_worker_t *wrk, int error, int value)
+{
+ if ((-(error) >= 0 && -(error) < SESSION_N_ERRORS))
+ wrk->stats.errors[-error] += value;
+ else
+ SESSION_DBG ("unknown session counter");
+}
+
+always_inline void
+session_stat_error_inc (int error, int value)
+{
+ session_worker_t *wrk;
+ wrk = session_main_get_worker (vlib_get_thread_index ());
+ session_worker_stat_error_inc (wrk, error, value);
+}
+
#define session_cli_return_if_not_enabled() \
do { \
if (!session_main.is_enabled) \
return clib_error_return (0, "session layer is not enabled"); \
} while (0)
-int session_main_flush_enqueue_events (u8 proto, u32 thread_index);
-int session_main_flush_all_enqueue_events (u8 transport_proto);
+void session_main_flush_enqueue_events (transport_proto_t transport_proto,
+ u32 thread_index);
void session_queue_run_on_main_thread (vlib_main_t * vm);
/**
@@ -761,12 +809,116 @@ session_wrk_update_time (session_worker_t *wrk, f64 now)
}
void session_wrk_enable_adaptive_mode (session_worker_t *wrk);
-fifo_segment_t *session_main_get_evt_q_segment (void);
+fifo_segment_t *session_main_get_wrk_mqs_segment (void);
void session_node_enable_disable (u8 is_en);
clib_error_t *vnet_session_enable_disable (vlib_main_t * vm, u8 is_en);
+void session_wrk_handle_evts_main_rpc (void *);
+void session_wrk_program_app_wrk_evts (session_worker_t *wrk,
+ u32 app_wrk_index);
session_t *session_alloc_for_connection (transport_connection_t * tc);
session_t *session_alloc_for_half_open (transport_connection_t *tc);
+void session_get_original_dst (transport_endpoint_t *i2o_src,
+ transport_endpoint_t *i2o_dst,
+ transport_proto_t transport_proto,
+ u32 *original_dst, u16 *original_dst_port);
+
+typedef void (pool_safe_realloc_rpc_fn) (void *rpc_args);
+
+typedef struct
+{
+ u8 ph[STRUCT_OFFSET_OF (pool_header_t, max_elts) + 4];
+ u32 flag;
+} pool_safe_realloc_header_t;
+
+STATIC_ASSERT_SIZEOF (pool_safe_realloc_header_t, sizeof (pool_header_t));
+
+#define POOL_REALLOC_SAFE_ELT_THRESH 32
+
+#define pool_realloc_flag(PH) \
+ ((pool_safe_realloc_header_t *) pool_header (PH))->flag
+
+typedef struct pool_realloc_rpc_args_
+{
+ void **pool;
+ uword elt_size;
+ uword align;
+} pool_realloc_rpc_args_t;
+
+always_inline void
+pool_program_safe_realloc_rpc (void *args)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 free_elts, max_elts, n_alloc;
+ pool_realloc_rpc_args_t *pra;
+
+ ASSERT (vlib_get_thread_index () == 0);
+ pra = (pool_realloc_rpc_args_t *) args;
+
+ vlib_worker_thread_barrier_sync (vm);
+
+ free_elts = _pool_free_elts (*pra->pool, pra->elt_size);
+ if (free_elts < POOL_REALLOC_SAFE_ELT_THRESH)
+ {
+ max_elts = _vec_max_len (*pra->pool, pra->elt_size);
+ n_alloc = clib_max (2 * max_elts, POOL_REALLOC_SAFE_ELT_THRESH);
+ _pool_alloc (pra->pool, n_alloc, pra->align, 0, pra->elt_size);
+ }
+ pool_realloc_flag (*pra->pool) = 0;
+ clib_mem_free (args);
+
+ vlib_worker_thread_barrier_release (vm);
+}
+
+always_inline void
+pool_program_safe_realloc (void **p, u32 elt_size, u32 align)
+{
+ pool_realloc_rpc_args_t *pra;
+
+ /* Reuse pad as a realloc flag */
+ if (pool_realloc_flag (*p))
+ return;
+
+ pra = clib_mem_alloc (sizeof (*pra));
+ pra->pool = p;
+ pra->elt_size = elt_size;
+ pra->align = align;
+ pool_realloc_flag (*p) = 1;
+
+ session_send_rpc_evt_to_thread (0 /* thread index */,
+ pool_program_safe_realloc_rpc, pra);
+}
+
+#define pool_needs_realloc(P) \
+ ((!P) || \
+ (vec_len (pool_header (P)->free_indices) < POOL_REALLOC_SAFE_ELT_THRESH && \
+ pool_free_elts (P) < POOL_REALLOC_SAFE_ELT_THRESH))
+
+#define pool_get_aligned_safe(P, E, align) \
+ do \
+ { \
+ if (PREDICT_FALSE (pool_needs_realloc (P))) \
+ { \
+ if (PREDICT_FALSE (!(P))) \
+ { \
+ pool_alloc_aligned (P, POOL_REALLOC_SAFE_ELT_THRESH, align); \
+ } \
+ else if (PREDICT_FALSE (!pool_free_elts (P))) \
+ { \
+ vlib_workers_sync (); \
+ pool_alloc_aligned (P, pool_max_len (P), align); \
+ vlib_workers_continue (); \
+ ALWAYS_ASSERT (pool_free_elts (P) > 0); \
+ } \
+ else \
+ { \
+ pool_program_safe_realloc ((void **) &(P), sizeof ((P)[0]), \
+ _vec_align (P, align)); \
+ } \
+ } \
+ pool_get_aligned (P, E, align); \
+ } \
+ while (0)
#endif /* __included_session_h__ */
diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c
index 00e67dcd2d0..48eb932a2c9 100644
--- a/src/vnet/session/session_api.c
+++ b/src/vnet/session/session_api.c
@@ -82,40 +82,12 @@ session_send_fds (vl_api_registration_t * reg, int fds[], int n_fds)
}
static int
-mq_try_lock_and_alloc_msg (svm_msg_q_t * app_mq, svm_msg_q_msg_t * msg)
-{
- int rv;
- u8 try = 0;
- while (try < 100)
- {
- rv = svm_msg_q_lock_and_alloc_msg_w_ring (app_mq,
- SESSION_MQ_CTRL_EVT_RING,
- SVM_Q_NOWAIT, msg);
- if (!rv)
- return 0;
- /*
- * Break the loop if mq is full, usually this is because the
- * app has crashed or is hanging on somewhere.
- */
- if (rv != -1)
- break;
- try++;
- usleep (1);
- }
- clib_warning ("failed to alloc msg");
- return -1;
-}
-
-static int
mq_send_session_accepted_cb (session_t * s)
{
app_worker_t *app_wrk = app_worker_get (s->app_wrk_index);
- svm_msg_q_msg_t _msg, *msg = &_msg;
session_accepted_msg_t m = { 0 };
- svm_msg_q_t *app_mq;
fifo_segment_t *eq_seg;
session_t *listener;
- session_event_t *evt;
application_t *app;
app = application_get (app_wrk->app_index);
@@ -164,15 +136,14 @@ mq_send_session_accepted_cb (session_t * s)
m.mq_index = s->thread_index;
}
- app_mq = app_wrk->event_queue;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return SESSION_E_MQ_MSG_ALLOC;
+ if (application_original_dst_is_enabled (app))
+ {
+ session_get_original_dst (&m.lcl, &m.rmt,
+ session_get_transport_proto (s),
+ &m.original_dst_ip4, &m.original_dst_port);
+ }
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_ACCEPTED;
- clib_memcpy_fast (evt->data, &m, sizeof (m));
- svm_msg_q_add_and_unlock (app_mq, msg);
+ app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_ACCEPTED, &m, sizeof (m));
return 0;
}
@@ -181,21 +152,12 @@ static inline void
mq_send_session_close_evt (app_worker_t * app_wrk, session_handle_t sh,
session_evt_type_t evt_type)
{
- svm_msg_q_msg_t _msg, *msg = &_msg;
- session_disconnected_msg_t *mp;
- svm_msg_q_t *app_mq;
- session_event_t *evt;
+ session_disconnected_msg_t m = { 0 };
- app_mq = app_wrk->event_queue;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return;
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = evt_type;
- mp = (session_disconnected_msg_t *) evt->data;
- mp->handle = sh;
- mp->context = app_wrk->api_client_index;
- svm_msg_q_add_and_unlock (app_mq, msg);
+ m.handle = sh;
+ m.context = app_wrk->api_client_index;
+
+ app_wrk_send_ctrl_evt (app_wrk, evt_type, &m, sizeof (m));
}
static inline void
@@ -249,13 +211,9 @@ int
mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context,
session_t * s, session_error_t err)
{
- svm_msg_q_msg_t _msg, *msg = &_msg;
session_connected_msg_t m = { 0 };
- svm_msg_q_t *app_mq;
- transport_connection_t *tc;
fifo_segment_t *eq_seg;
app_worker_t *app_wrk;
- session_event_t *evt;
application_t *app;
app_wrk = app_worker_get (app_wrk_index);
@@ -271,14 +229,6 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context,
if (session_has_transport (s))
{
- tc = session_get_transport (s);
- if (!tc)
- {
- clib_warning ("failed to retrieve transport!");
- m.retval = SESSION_E_REFUSED;
- goto snd_msg;
- }
-
m.handle = session_handle (s);
m.vpp_event_queue_address =
fifo_segment_msg_q_offset (eq_seg, s->thread_index);
@@ -293,7 +243,6 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context,
else
{
ct_connection_t *cct;
- session_t *ss;
cct = (ct_connection_t *) session_get_transport (s);
m.handle = session_handle (s);
@@ -304,11 +253,10 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context,
m.server_rx_fifo = fifo_segment_fifo_offset (s->rx_fifo);
m.server_tx_fifo = fifo_segment_fifo_offset (s->tx_fifo);
m.segment_handle = session_segment_handle (s);
- ss = ct_session_get_peer (s);
- m.ct_rx_fifo = fifo_segment_fifo_offset (ss->tx_fifo);
- m.ct_tx_fifo = fifo_segment_fifo_offset (ss->rx_fifo);
- m.ct_segment_handle = session_segment_handle (ss);
m.mq_index = s->thread_index;
+ m.ct_rx_fifo = fifo_segment_fifo_offset (cct->client_rx_fifo);
+ m.ct_tx_fifo = fifo_segment_fifo_offset (cct->client_tx_fifo);
+ m.ct_segment_handle = cct->segment_handle;
}
/* Setup client session index in advance, in case data arrives
@@ -318,31 +266,19 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context,
snd_msg:
- app_mq = app_wrk->event_queue;
-
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return SESSION_E_MQ_MSG_ALLOC;
+ app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_CONNECTED, &m, sizeof (m));
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_CONNECTED;
- clib_memcpy_fast (evt->data, &m, sizeof (m));
-
- svm_msg_q_add_and_unlock (app_mq, msg);
return 0;
}
-int
+static int
mq_send_session_bound_cb (u32 app_wrk_index, u32 api_context,
session_handle_t handle, int rv)
{
- svm_msg_q_msg_t _msg, *msg = &_msg;
session_bound_msg_t m = { 0 };
- svm_msg_q_t *app_mq;
- transport_endpoint_t tep;
+ transport_connection_t *ltc;
fifo_segment_t *eq_seg;
app_worker_t *app_wrk;
- session_event_t *evt;
application_t *app;
app_listener_t *al;
session_t *ls = 0;
@@ -362,77 +298,60 @@ mq_send_session_bound_cb (u32 app_wrk_index, u32 api_context,
else
ls = app_listener_get_local_session (al);
- session_get_endpoint (ls, &tep, 1 /* is_lcl */);
- m.lcl_port = tep.port;
- m.lcl_is_ip4 = tep.is_ip4;
- clib_memcpy_fast (m.lcl_ip, &tep.ip, sizeof (tep.ip));
+ ltc = session_get_transport (ls);
+ m.lcl_port = ltc->lcl_port;
+ m.lcl_is_ip4 = ltc->is_ip4;
+ clib_memcpy_fast (m.lcl_ip, &ltc->lcl_ip, sizeof (m.lcl_ip));
app = application_get (app_wrk->app_index);
eq_seg = application_get_rx_mqs_segment (app);
m.vpp_evt_q = fifo_segment_msg_q_offset (eq_seg, ls->thread_index);
m.mq_index = ls->thread_index;
- if (session_transport_service_type (ls) == TRANSPORT_SERVICE_CL &&
- ls->rx_fifo)
+ if (transport_connection_is_cless (ltc))
{
- m.rx_fifo = fifo_segment_fifo_offset (ls->rx_fifo);
- m.tx_fifo = fifo_segment_fifo_offset (ls->tx_fifo);
- m.segment_handle = session_segment_handle (ls);
+ session_t *wrk_ls;
+ m.mq_index = transport_cl_thread ();
+ m.vpp_evt_q = fifo_segment_msg_q_offset (eq_seg, m.mq_index);
+ wrk_ls = app_listener_get_wrk_cl_session (al, app_wrk->wrk_map_index);
+ m.rx_fifo = fifo_segment_fifo_offset (wrk_ls->rx_fifo);
+ m.tx_fifo = fifo_segment_fifo_offset (wrk_ls->tx_fifo);
+ m.segment_handle = session_segment_handle (wrk_ls);
}
snd_msg:
- app_mq = app_wrk->event_queue;
-
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return SESSION_E_MQ_MSG_ALLOC;
-
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_BOUND;
- clib_memcpy_fast (evt->data, &m, sizeof (m));
+ app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_BOUND, &m, sizeof (m));
- svm_msg_q_add_and_unlock (app_mq, msg);
return 0;
}
-void
-mq_send_unlisten_reply (app_worker_t * app_wrk, session_handle_t sh,
- u32 context, int rv)
+static void
+mq_send_unlisten_cb (u32 app_wrk_index, session_handle_t sh, u32 context,
+ int rv)
{
- svm_msg_q_msg_t _msg, *msg = &_msg;
- session_unlisten_reply_msg_t *ump;
- svm_msg_q_t *app_mq;
- session_event_t *evt;
+ session_unlisten_reply_msg_t m = { 0 };
+ app_worker_t *app_wrk;
- app_mq = app_wrk->event_queue;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return;
+ app_wrk = app_worker_get (app_wrk_index);
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_UNLISTEN_REPLY;
- ump = (session_unlisten_reply_msg_t *) evt->data;
- ump->context = context;
- ump->handle = sh;
- ump->retval = rv;
- svm_msg_q_add_and_unlock (app_mq, msg);
+ m.context = context;
+ m.handle = sh;
+ m.retval = rv;
+ app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_UNLISTEN_REPLY, &m,
+ sizeof (m));
}
static void
mq_send_session_migrate_cb (session_t * s, session_handle_t new_sh)
{
- svm_msg_q_msg_t _msg, *msg = &_msg;
session_migrated_msg_t m = { 0 };
fifo_segment_t *eq_seg;
app_worker_t *app_wrk;
- session_event_t *evt;
- svm_msg_q_t *app_mq;
application_t *app;
u32 thread_index;
thread_index = session_thread_from_handle (new_sh);
app_wrk = app_worker_get (s->app_wrk_index);
- app_mq = app_wrk->event_queue;
app = application_get (app_wrk->app_index);
eq_seg = application_get_rx_mqs_segment (app);
@@ -442,27 +361,15 @@ mq_send_session_migrate_cb (session_t * s, session_handle_t new_sh)
m.vpp_evt_q = fifo_segment_msg_q_offset (eq_seg, thread_index);
m.segment_handle = SESSION_INVALID_HANDLE;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return;
-
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_MIGRATED;
- clib_memcpy_fast (evt->data, &m, sizeof (m));
-
- svm_msg_q_add_and_unlock (app_mq, msg);
+ app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_MIGRATED, &m, sizeof (m));
}
static int
mq_send_add_segment_cb (u32 app_wrk_index, u64 segment_handle)
{
- int fds[SESSION_N_FD_TYPE], n_fds = 0;
- svm_msg_q_msg_t _msg, *msg = &_msg;
- session_app_add_segment_msg_t *mp;
+ session_app_add_segment_msg_t m = { 0 };
vl_api_registration_t *reg;
app_worker_t *app_wrk;
- session_event_t *evt;
- svm_msg_q_t *app_mq;
fifo_segment_t *fs;
ssvm_private_t *sp;
u8 fd_flags = 0;
@@ -488,29 +395,16 @@ mq_send_add_segment_cb (u32 app_wrk_index, u64 segment_handle)
}
fd_flags |= SESSION_FD_F_MEMFD_SEGMENT;
- fds[n_fds] = sp->fd;
- n_fds += 1;
}
- app_mq = app_wrk->event_queue;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return -1;
-
- if (n_fds)
- session_send_fds (reg, fds, n_fds);
-
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_APP_ADD_SEGMENT;
- mp = (session_app_add_segment_msg_t *) evt->data;
- clib_memset (mp, 0, sizeof (*mp));
- mp->segment_size = sp->ssvm_size;
- mp->fd_flags = fd_flags;
- mp->segment_handle = segment_handle;
- strncpy ((char *) mp->segment_name, (char *) sp->name,
- sizeof (mp->segment_name) - 1);
+ m.segment_size = sp->ssvm_size;
+ m.fd_flags = fd_flags;
+ m.segment_handle = segment_handle;
+ strncpy ((char *) m.segment_name, (char *) sp->name,
+ sizeof (m.segment_name) - 1);
- svm_msg_q_add_and_unlock (app_mq, msg);
+ app_wrk_send_ctrl_evt_fd (app_wrk, SESSION_CTRL_EVT_APP_ADD_SEGMENT, &m,
+ sizeof (m), sp->fd);
return 0;
}
@@ -518,12 +412,9 @@ mq_send_add_segment_cb (u32 app_wrk_index, u64 segment_handle)
static int
mq_send_del_segment_cb (u32 app_wrk_index, u64 segment_handle)
{
- svm_msg_q_msg_t _msg, *msg = &_msg;
- session_app_del_segment_msg_t *mp;
+ session_app_del_segment_msg_t m = { 0 };
vl_api_registration_t *reg;
app_worker_t *app_wrk;
- session_event_t *evt;
- svm_msg_q_t *app_mq;
app_wrk = app_worker_get (app_wrk_index);
reg = vl_mem_api_client_index_to_registration (app_wrk->api_client_index);
@@ -533,17 +424,10 @@ mq_send_del_segment_cb (u32 app_wrk_index, u64 segment_handle)
return -1;
}
- app_mq = app_wrk->event_queue;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return -1;
+ m.segment_handle = segment_handle;
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_APP_DEL_SEGMENT;
- mp = (session_app_del_segment_msg_t *) evt->data;
- clib_memset (mp, 0, sizeof (*mp));
- mp->segment_handle = segment_handle;
- svm_msg_q_add_and_unlock (app_mq, msg);
+ app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_APP_DEL_SEGMENT, &m,
+ sizeof (m));
return 0;
}
@@ -551,10 +435,7 @@ mq_send_del_segment_cb (u32 app_wrk_index, u64 segment_handle)
static void
mq_send_session_cleanup_cb (session_t * s, session_cleanup_ntf_t ntf)
{
- svm_msg_q_msg_t _msg, *msg = &_msg;
- session_cleanup_msg_t *mp;
- svm_msg_q_t *app_mq;
- session_event_t *evt;
+ session_cleanup_msg_t m = { 0 };
app_worker_t *app_wrk;
/* Propagate transport cleanup notifications only if app didn't close */
@@ -566,17 +447,56 @@ mq_send_session_cleanup_cb (session_t * s, session_cleanup_ntf_t ntf)
if (!app_wrk)
return;
- app_mq = app_wrk->event_queue;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return;
+ m.handle = session_handle (s);
+ m.type = ntf;
+
+ app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_CLEANUP, &m, sizeof (m));
+}
+
+static int
+mq_send_io_rx_event (session_t *s)
+{
+ session_event_t *mq_evt;
+ svm_msg_q_msg_t mq_msg;
+ app_worker_t *app_wrk;
+ svm_msg_q_t *mq;
+
+ if (svm_fifo_has_event (s->rx_fifo))
+ return 0;
+
+ app_wrk = app_worker_get (s->app_wrk_index);
+ mq = app_wrk->event_queue;
+
+ mq_msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
+ mq_evt = svm_msg_q_msg_data (mq, &mq_msg);
+
+ mq_evt->event_type = SESSION_IO_EVT_RX;
+ mq_evt->session_index = s->rx_fifo->shr->client_session_index;
+
+ (void) svm_fifo_set_event (s->rx_fifo);
+
+ svm_msg_q_add_raw (mq, &mq_msg);
+
+ return 0;
+}
+
+static int
+mq_send_io_tx_event (session_t *s)
+{
+ app_worker_t *app_wrk = app_worker_get (s->app_wrk_index);
+ svm_msg_q_t *mq = app_wrk->event_queue;
+ session_event_t *mq_evt;
+ svm_msg_q_msg_t mq_msg;
+
+ mq_msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
+ mq_evt = svm_msg_q_msg_data (mq, &mq_msg);
+
+ mq_evt->event_type = SESSION_IO_EVT_TX;
+ mq_evt->session_index = s->tx_fifo->shr->client_session_index;
+
+ svm_msg_q_add_raw (mq, &mq_msg);
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_CLEANUP;
- mp = (session_cleanup_msg_t *) evt->data;
- mp->handle = session_handle (s);
- mp->type = ntf;
- svm_msg_q_add_and_unlock (app_mq, msg);
+ return 0;
}
static session_cb_vft_t session_mq_cb_vft = {
@@ -586,8 +506,12 @@ static session_cb_vft_t session_mq_cb_vft = {
.session_reset_callback = mq_send_session_reset_cb,
.session_migrate_callback = mq_send_session_migrate_cb,
.session_cleanup_callback = mq_send_session_cleanup_cb,
+ .session_listened_callback = mq_send_session_bound_cb,
+ .session_unlistened_callback = mq_send_unlisten_cb,
.add_segment_callback = mq_send_add_segment_cb,
.del_segment_callback = mq_send_del_segment_cb,
+ .builtin_app_rx_callback = mq_send_io_rx_event,
+ .builtin_app_tx_callback = mq_send_io_tx_event,
};
static void
@@ -602,6 +526,17 @@ vl_api_session_enable_disable_t_handler (vl_api_session_enable_disable_t * mp)
}
static void
+vl_api_session_sapi_enable_disable_t_handler (
+ vl_api_session_sapi_enable_disable_t *mp)
+{
+ vl_api_session_sapi_enable_disable_reply_t *rmp;
+ int rv = 0;
+
+ rv = appns_sapi_enable_disable (mp->is_enable);
+ REPLY_MACRO (VL_API_SESSION_SAPI_ENABLE_DISABLE_REPLY);
+}
+
+static void
vl_api_app_attach_t_handler (vl_api_app_attach_t * mp)
{
int rv = 0, *fds = 0, n_fds = 0, n_workers, i;
@@ -642,7 +577,8 @@ vl_api_app_attach_t_handler (vl_api_app_attach_t * mp)
if ((rv = vnet_application_attach (a)))
{
- clib_warning ("attach returned: %d", rv);
+ clib_warning ("attach returned: %U", format_session_error, rv);
+ rv = VNET_API_ERROR_UNSPECIFIED;
vec_free (a->namespace_id);
goto done;
}
@@ -684,27 +620,28 @@ vl_api_app_attach_t_handler (vl_api_app_attach_t * mp)
}
done:
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_APP_ATTACH_REPLY, ({
- if (!rv)
- {
- ctrl_thread = n_workers ? 1 : 0;
- segp = (fifo_segment_t *) a->segment;
- rmp->app_index = clib_host_to_net_u32 (a->app_index);
- rmp->app_mq = fifo_segment_msg_q_offset (segp, 0);
- rmp->vpp_ctrl_mq = fifo_segment_msg_q_offset (rx_mqs_seg, ctrl_thread);
- rmp->vpp_ctrl_mq_thread = ctrl_thread;
- rmp->n_fds = n_fds;
- rmp->fd_flags = fd_flags;
- if (vec_len (segp->ssvm.name))
- {
- vl_api_vec_to_api_string (segp->ssvm.name, &rmp->segment_name);
- }
- rmp->segment_size = segp->ssvm.ssvm_size;
- rmp->segment_handle = clib_host_to_net_u64 (a->segment_handle);
- }
- }));
- /* *INDENT-ON* */
+ REPLY_MACRO3 (
+ VL_API_APP_ATTACH_REPLY,
+ ((!rv) ? vec_len (((fifo_segment_t *) a->segment)->ssvm.name) : 0), ({
+ if (!rv)
+ {
+ ctrl_thread = n_workers ? 1 : 0;
+ segp = (fifo_segment_t *) a->segment;
+ rmp->app_index = clib_host_to_net_u32 (a->app_index);
+ rmp->app_mq = fifo_segment_msg_q_offset (segp, 0);
+ rmp->vpp_ctrl_mq =
+ fifo_segment_msg_q_offset (rx_mqs_seg, ctrl_thread);
+ rmp->vpp_ctrl_mq_thread = ctrl_thread;
+ rmp->n_fds = n_fds;
+ rmp->fd_flags = fd_flags;
+ if (vec_len (segp->ssvm.name))
+ {
+ vl_api_vec_to_api_string (segp->ssvm.name, &rmp->segment_name);
+ }
+ rmp->segment_size = segp->ssvm.ssvm_size;
+ rmp->segment_handle = clib_host_to_net_u64 (a->segment_handle);
+ }
+ }));
if (n_fds)
session_send_fds (reg, fds, n_fds);
@@ -746,7 +683,9 @@ vl_api_app_worker_add_del_t_handler (vl_api_app_worker_add_del_t * mp)
rv = vnet_app_worker_add_del (&args);
if (rv)
{
- clib_warning ("app worker add/del returned: %d", rv);
+ clib_warning ("app worker add/del returned: %U", format_session_error,
+ rv);
+ rv = VNET_API_ERROR_UNSPECIFIED;
goto done;
}
@@ -767,25 +706,27 @@ vl_api_app_worker_add_del_t_handler (vl_api_app_worker_add_del_t * mp)
n_fds += 1;
}
- /* *INDENT-OFF* */
done:
- REPLY_MACRO2 (VL_API_APP_WORKER_ADD_DEL_REPLY, ({
- rmp->is_add = mp->is_add;
- rmp->wrk_index = clib_host_to_net_u32 (args.wrk_map_index);
- rmp->segment_handle = clib_host_to_net_u64 (args.segment_handle);
- if (!rv && mp->is_add)
- {
- rmp->app_event_queue_address =
- fifo_segment_msg_q_offset ((fifo_segment_t *) args.segment, 0);
- rmp->n_fds = n_fds;
- rmp->fd_flags = fd_flags;
- if (vec_len (args.segment->name))
- {
- vl_api_vec_to_api_string (args.segment->name, &rmp->segment_name);
- }
- }
- }));
- /* *INDENT-ON* */
+ REPLY_MACRO3 (
+ VL_API_APP_WORKER_ADD_DEL_REPLY,
+ ((!rv && mp->is_add) ? vec_len (args.segment->name) : 0), ({
+ rmp->is_add = mp->is_add;
+ rmp->wrk_index = mp->wrk_index;
+ if (!rv && mp->is_add)
+ {
+ rmp->wrk_index = clib_host_to_net_u32 (args.wrk_map_index);
+ rmp->segment_handle = clib_host_to_net_u64 (args.segment_handle);
+ rmp->app_event_queue_address =
+ fifo_segment_msg_q_offset ((fifo_segment_t *) args.segment, 0);
+ rmp->n_fds = n_fds;
+ rmp->fd_flags = fd_flags;
+ if (vec_len (args.segment->name))
+ {
+ vl_api_vec_to_api_string (args.segment->name,
+ &rmp->segment_name);
+ }
+ }
+ }));
if (n_fds)
session_send_fds (reg, fds, n_fds);
@@ -811,6 +752,12 @@ vl_api_application_detach_t_handler (vl_api_application_detach_t * mp)
a->app_index = app->app_index;
a->api_client_index = mp->client_index;
rv = vnet_application_detach (a);
+ if (rv)
+ {
+ clib_warning ("vnet_application_detach: %U", format_session_error,
+ rv);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
}
done:
@@ -834,6 +781,7 @@ vl_api_app_namespace_add_del_t_handler (vl_api_app_namespace_add_del_t * mp)
vnet_app_namespace_add_del_args_t args = {
.ns_id = ns_id,
+ .sock_name = 0,
.secret = clib_net_to_host_u64 (mp->secret),
.sw_if_index = clib_net_to_host_u32 (mp->sw_if_index),
.ip4_fib_id = clib_net_to_host_u32 (mp->ip4_fib_id),
@@ -852,13 +800,11 @@ vl_api_app_namespace_add_del_t_handler (vl_api_app_namespace_add_del_t * mp)
}
vec_free (ns_id);
- /* *INDENT-OFF* */
done:
REPLY_MACRO2 (VL_API_APP_NAMESPACE_ADD_DEL_REPLY, ({
if (!rv)
rmp->appns_index = clib_host_to_net_u32 (appns_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -866,7 +812,7 @@ vl_api_app_namespace_add_del_v2_t_handler (
vl_api_app_namespace_add_del_v2_t *mp)
{
vl_api_app_namespace_add_del_v2_reply_t *rmp;
- u8 *ns_id = 0, *netns = 0;
+ u8 *ns_id = 0;
u32 appns_index = 0;
int rv = 0;
@@ -877,13 +823,11 @@ vl_api_app_namespace_add_del_v2_t_handler (
}
mp->namespace_id[sizeof (mp->namespace_id) - 1] = 0;
- mp->netns[sizeof (mp->netns) - 1] = 0;
ns_id = format (0, "%s", &mp->namespace_id);
- netns = format (0, "%s", &mp->netns);
vnet_app_namespace_add_del_args_t args = {
.ns_id = ns_id,
- .netns = netns,
+ .sock_name = 0,
.secret = clib_net_to_host_u64 (mp->secret),
.sw_if_index = clib_net_to_host_u32 (mp->sw_if_index),
.ip4_fib_id = clib_net_to_host_u32 (mp->ip4_fib_id),
@@ -896,12 +840,11 @@ vl_api_app_namespace_add_del_v2_t_handler (
appns_index = app_namespace_index_from_id (ns_id);
if (appns_index == APP_NAMESPACE_INVALID_INDEX)
{
- clib_warning ("app ns lookup failed");
+ clib_warning ("app ns lookup failed id:%s", ns_id);
rv = VNET_API_ERROR_UNSPECIFIED;
}
}
vec_free (ns_id);
- vec_free (netns);
done:
REPLY_MACRO2 (VL_API_APP_NAMESPACE_ADD_DEL_V2_REPLY, ({
@@ -911,6 +854,107 @@ done:
}
static void
+vl_api_app_namespace_add_del_v4_t_handler (
+ vl_api_app_namespace_add_del_v4_t *mp)
+{
+ vl_api_app_namespace_add_del_v4_reply_t *rmp;
+ u8 *ns_id = 0, *sock_name = 0;
+ u32 appns_index = 0;
+ int rv = 0;
+ if (session_main_is_enabled () == 0)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto done;
+ }
+ mp->namespace_id[sizeof (mp->namespace_id) - 1] = 0;
+ ns_id = format (0, "%s", &mp->namespace_id);
+ sock_name = vl_api_from_api_to_new_vec (mp, &mp->sock_name);
+ vnet_app_namespace_add_del_args_t args = {
+ .ns_id = ns_id,
+ .sock_name = sock_name,
+ .secret = clib_net_to_host_u64 (mp->secret),
+ .sw_if_index = clib_net_to_host_u32 (mp->sw_if_index),
+ .ip4_fib_id = clib_net_to_host_u32 (mp->ip4_fib_id),
+ .ip6_fib_id = clib_net_to_host_u32 (mp->ip6_fib_id),
+ .is_add = mp->is_add,
+ };
+ rv = vnet_app_namespace_add_del (&args);
+ if (!rv && mp->is_add)
+ {
+ appns_index = app_namespace_index_from_id (ns_id);
+ if (appns_index == APP_NAMESPACE_INVALID_INDEX)
+ {
+ clib_warning ("app ns lookup failed id:%s", ns_id);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
+ }
+ vec_free (ns_id);
+ vec_free (sock_name);
+done:
+ REPLY_MACRO2 (VL_API_APP_NAMESPACE_ADD_DEL_V4_REPLY, ({
+ if (!rv)
+ rmp->appns_index = clib_host_to_net_u32 (appns_index);
+ }));
+}
+
+static void
+vl_api_app_namespace_add_del_v3_t_handler (
+ vl_api_app_namespace_add_del_v3_t *mp)
+{
+ vl_api_app_namespace_add_del_v3_reply_t *rmp;
+ u8 *ns_id = 0, *sock_name = 0, *api_sock_name = 0;
+ u32 appns_index = 0;
+ int rv = 0;
+ if (session_main_is_enabled () == 0)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto done;
+ }
+ mp->namespace_id[sizeof (mp->namespace_id) - 1] = 0;
+ ns_id = format (0, "%s", &mp->namespace_id);
+ api_sock_name = vl_api_from_api_to_new_vec (mp, &mp->sock_name);
+ mp->netns[sizeof (mp->netns) - 1] = 0;
+ if (strlen ((char *) mp->netns) != 0)
+ {
+ sock_name =
+ format (0, "abstract:%v,netns_name=%s", api_sock_name, &mp->netns);
+ }
+ else
+ {
+ sock_name = api_sock_name;
+ api_sock_name = 0; // for vec_free
+ }
+
+ vnet_app_namespace_add_del_args_t args = {
+ .ns_id = ns_id,
+ .sock_name = sock_name,
+ .secret = clib_net_to_host_u64 (mp->secret),
+ .sw_if_index = clib_net_to_host_u32 (mp->sw_if_index),
+ .ip4_fib_id = clib_net_to_host_u32 (mp->ip4_fib_id),
+ .ip6_fib_id = clib_net_to_host_u32 (mp->ip6_fib_id),
+ .is_add = mp->is_add,
+ };
+ rv = vnet_app_namespace_add_del (&args);
+ if (!rv && mp->is_add)
+ {
+ appns_index = app_namespace_index_from_id (ns_id);
+ if (appns_index == APP_NAMESPACE_INVALID_INDEX)
+ {
+ clib_warning ("app ns lookup failed id:%s", ns_id);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
+ }
+ vec_free (ns_id);
+ vec_free (sock_name);
+ vec_free (api_sock_name);
+done:
+ REPLY_MACRO2 (VL_API_APP_NAMESPACE_ADD_DEL_V3_REPLY, ({
+ if (!rv)
+ rmp->appns_index = clib_host_to_net_u32 (appns_index);
+ }));
+}
+
+static void
vl_api_session_rule_add_del_t_handler (vl_api_session_rule_add_del_t * mp)
{
vl_api_session_rule_add_del_reply_t *rmp;
@@ -937,7 +981,10 @@ vl_api_session_rule_add_del_t_handler (vl_api_session_rule_add_del_t * mp)
rv = vnet_session_rule_add_del (&args);
if (rv)
- clib_warning ("rule add del returned: %d", rv);
+ {
+ clib_warning ("rule add del returned: %U", format_session_error, rv);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
vec_free (table_args->tag);
REPLY_MACRO (VL_API_SESSION_RULE_ADD_DEL_REPLY);
}
@@ -1040,7 +1087,6 @@ send_session_rules_table_details (session_rules_table_t * srt, u8 fib_proto,
if (is_local || fib_proto == FIB_PROTOCOL_IP4)
{
u8 *tag = 0;
- /* *INDENT-OFF* */
srt16 = &srt->session_rules_tables_16;
pool_foreach (rule16, srt16->rules) {
ri = mma_rules_table_rule_index_16 (srt16, rule16);
@@ -1048,12 +1094,10 @@ send_session_rules_table_details (session_rules_table_t * srt, u8 fib_proto,
send_session_rule_details4 (rule16, is_local, tp, appns_index, tag,
reg, context);
}
- /* *INDENT-ON* */
}
if (is_local || fib_proto == FIB_PROTOCOL_IP6)
{
u8 *tag = 0;
- /* *INDENT-OFF* */
srt40 = &srt->session_rules_tables_40;
pool_foreach (rule40, srt40->rules) {
ri = mma_rules_table_rule_index_40 (srt40, rule40);
@@ -1061,7 +1105,6 @@ send_session_rules_table_details (session_rules_table_t * srt, u8 fib_proto,
send_session_rule_details6 (rule40, is_local, tp, appns_index, tag,
reg, context);
}
- /* *INDENT-ON* */
}
}
@@ -1076,7 +1119,6 @@ vl_api_session_rules_dump_t_handler (vl_api_session_rules_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
session_table_foreach (st, ({
for (tp = 0; tp < TRANSPORT_N_PROTOS; tp++)
{
@@ -1086,7 +1128,6 @@ vl_api_session_rules_dump_t_handler (vl_api_session_rules_dump_t * mp)
mp->context);
}
}));
- /* *INDENT-ON* */
}
static void
@@ -1131,12 +1172,10 @@ vl_api_app_add_cert_key_pair_t_handler (vl_api_app_add_cert_key_pair_t * mp)
rv = vnet_app_add_cert_key_pair (a);
done:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_APP_ADD_CERT_KEY_PAIR_REPLY, ({
if (!rv)
rmp->index = clib_host_to_net_u32 (a->index);
}));
- /* *INDENT-ON* */
}
static void
@@ -1152,6 +1191,12 @@ vl_api_app_del_cert_key_pair_t_handler (vl_api_app_del_cert_key_pair_t * mp)
}
ckpair_index = clib_net_to_host_u32 (mp->index);
rv = vnet_app_del_cert_key_pair (ckpair_index);
+ if (rv)
+ {
+ clib_warning ("vnet_app_del_cert_key_pair: %U", format_session_error,
+ rv);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
done:
REPLY_MACRO (VL_API_APP_DEL_CERT_KEY_PAIR_REPLY);
@@ -1177,36 +1222,11 @@ VL_MSG_API_REAPER_FUNCTION (application_reaper_cb);
* Socket api functions
*/
-static void
-sapi_send_fds (app_worker_t * app_wrk, int *fds, int n_fds)
-{
- app_sapi_msg_t smsg = { 0 };
- app_namespace_t *app_ns;
- application_t *app;
- clib_socket_t *cs;
- u32 cs_index;
-
- app = application_get (app_wrk->app_index);
- app_ns = app_namespace_get (app->ns_index);
- cs_index = appns_sapi_handle_sock_index (app_wrk->api_client_index);
- cs = appns_sapi_get_socket (app_ns, cs_index);
- if (PREDICT_FALSE (!cs))
- return;
-
- /* There's no payload for the message only the type */
- smsg.type = APP_SAPI_MSG_TYPE_SEND_FDS;
- clib_socket_sendmsg (cs, &smsg, sizeof (smsg), fds, n_fds);
-}
-
static int
mq_send_add_segment_sapi_cb (u32 app_wrk_index, u64 segment_handle)
{
- int fds[SESSION_N_FD_TYPE], n_fds = 0;
- svm_msg_q_msg_t _msg, *msg = &_msg;
- session_app_add_segment_msg_t *mp;
+ session_app_add_segment_msg_t m = { 0 };
app_worker_t *app_wrk;
- session_event_t *evt;
- svm_msg_q_t *app_mq;
fifo_segment_t *fs;
ssvm_private_t *sp;
u8 fd_flags = 0;
@@ -1218,33 +1238,15 @@ mq_send_add_segment_sapi_cb (u32 app_wrk_index, u64 segment_handle)
ASSERT (ssvm_type (sp) == SSVM_SEGMENT_MEMFD);
fd_flags |= SESSION_FD_F_MEMFD_SEGMENT;
- fds[n_fds] = sp->fd;
- n_fds += 1;
- app_mq = app_wrk->event_queue;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return -1;
+ m.segment_size = sp->ssvm_size;
+ m.fd_flags = fd_flags;
+ m.segment_handle = segment_handle;
+ strncpy ((char *) m.segment_name, (char *) sp->name,
+ sizeof (m.segment_name) - 1);
- /*
- * Send the fd over api socket
- */
- sapi_send_fds (app_wrk, fds, n_fds);
-
- /*
- * Send the actual message over mq
- */
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_APP_ADD_SEGMENT;
- mp = (session_app_add_segment_msg_t *) evt->data;
- clib_memset (mp, 0, sizeof (*mp));
- mp->segment_size = sp->ssvm_size;
- mp->fd_flags = fd_flags;
- mp->segment_handle = segment_handle;
- strncpy ((char *) mp->segment_name, (char *) sp->name,
- sizeof (mp->segment_name) - 1);
-
- svm_msg_q_add_and_unlock (app_mq, msg);
+ app_wrk_send_ctrl_evt_fd (app_wrk, SESSION_CTRL_EVT_APP_ADD_SEGMENT, &m,
+ sizeof (m), sp->fd);
return 0;
}
@@ -1252,25 +1254,15 @@ mq_send_add_segment_sapi_cb (u32 app_wrk_index, u64 segment_handle)
static int
mq_send_del_segment_sapi_cb (u32 app_wrk_index, u64 segment_handle)
{
- svm_msg_q_msg_t _msg, *msg = &_msg;
- session_app_del_segment_msg_t *mp;
+ session_app_del_segment_msg_t m = { 0 };
app_worker_t *app_wrk;
- session_event_t *evt;
- svm_msg_q_t *app_mq;
app_wrk = app_worker_get (app_wrk_index);
- app_mq = app_wrk->event_queue;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return -1;
+ m.segment_handle = segment_handle;
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_APP_DEL_SEGMENT;
- mp = (session_app_del_segment_msg_t *) evt->data;
- clib_memset (mp, 0, sizeof (*mp));
- mp->segment_handle = segment_handle;
- svm_msg_q_add_and_unlock (app_mq, msg);
+ app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_APP_DEL_SEGMENT, &m,
+ sizeof (m));
return 0;
}
@@ -1282,8 +1274,12 @@ static session_cb_vft_t session_mq_sapi_cb_vft = {
.session_reset_callback = mq_send_session_reset_cb,
.session_migrate_callback = mq_send_session_migrate_cb,
.session_cleanup_callback = mq_send_session_cleanup_cb,
+ .session_listened_callback = mq_send_session_bound_cb,
+ .session_unlistened_callback = mq_send_unlisten_cb,
.add_segment_callback = mq_send_add_segment_sapi_cb,
.del_segment_callback = mq_send_del_segment_sapi_cb,
+ .builtin_app_rx_callback = mq_send_io_rx_event,
+ .builtin_app_tx_callback = mq_send_io_tx_event,
};
static void
@@ -1385,7 +1381,7 @@ done:
vec_free (fds);
}
-static void
+void
sapi_socket_close_w_handle (u32 api_handle)
{
app_namespace_t *app_ns = app_namespace_get (api_handle >> 16);
@@ -1423,7 +1419,7 @@ sapi_add_del_worker_handler (app_namespace_t * app_ns,
app = application_get_if_valid (mp->app_index);
if (!app)
{
- rv = VNET_API_ERROR_INVALID_VALUE;
+ rv = SESSION_E_INVALID;
goto done;
}
@@ -1438,15 +1434,13 @@ sapi_add_del_worker_handler (app_namespace_t * app_ns,
rv = vnet_app_worker_add_del (&args);
if (rv)
{
- clib_warning ("app worker add/del returned: %d", rv);
+ clib_warning ("app worker add/del returned: %U", format_session_error,
+ rv);
goto done;
}
if (!mp->is_add)
- {
- sapi_socket_close_w_handle (sapi_handle);
- goto done;
- }
+ goto done;
/* Send fifo segment fd if needed */
if (ssvm_type (args.segment) == SSVM_SEGMENT_MEMFD)
@@ -1464,15 +1458,20 @@ sapi_add_del_worker_handler (app_namespace_t * app_ns,
done:
+ /* With app sock api socket expected to be closed, no reply */
+ if (!mp->is_add && appns_sapi_enabled ())
+ return;
+
msg.type = APP_SAPI_MSG_TYPE_ADD_DEL_WORKER_REPLY;
rmp = &msg.worker_add_del_reply;
rmp->retval = rv;
rmp->is_add = mp->is_add;
+ rmp->wrk_index = mp->wrk_index;
rmp->api_client_handle = sapi_handle;
- rmp->wrk_index = args.wrk_map_index;
- rmp->segment_handle = args.segment_handle;
if (!rv && mp->is_add)
{
+ rmp->wrk_index = args.wrk_map_index;
+ rmp->segment_handle = args.segment_handle;
/* No segment name and size. This supports only memfds */
rmp->app_event_queue_address =
fifo_segment_msg_q_offset ((fifo_segment_t *) args.segment, 0);
@@ -1488,6 +1487,108 @@ done:
clib_socket_sendmsg (cs, &msg, sizeof (msg), fds, n_fds);
}
+/* This is a workaround for the case when session layer starts reading
+ * the socket before the client actualy sends the data
+ */
+static clib_error_t *
+sapi_socket_receive_wait (clib_socket_t *cs, u8 *msg, u32 msg_len)
+{
+ clib_error_t *err;
+ int n_tries = 5;
+
+ while (1)
+ {
+ err = clib_socket_recvmsg (cs, msg, msg_len, 0, 0);
+ if (!err)
+ break;
+
+ if (!n_tries)
+ return err;
+
+ n_tries--;
+ usleep (1);
+ }
+
+ return err;
+}
+
+static void
+sapi_add_del_cert_key_handler (app_namespace_t *app_ns, clib_socket_t *cs,
+ app_sapi_cert_key_add_del_msg_t *mp)
+{
+ vnet_app_add_cert_key_pair_args_t _a, *a = &_a;
+ app_sapi_cert_key_add_del_reply_msg_t *rmp;
+ app_sapi_msg_t msg = { 0 };
+ int rv = 0;
+
+ if (mp->is_add)
+ {
+ const u32 max_certkey_len = 2e4, max_cert_len = 1e4, max_key_len = 1e4;
+ clib_error_t *err;
+ u8 *certkey = 0;
+ u32 key_len;
+
+ if (mp->certkey_len > max_certkey_len)
+ {
+ rv = SESSION_E_INVALID;
+ goto send_reply;
+ }
+
+ vec_validate (certkey, mp->certkey_len - 1);
+
+ err = sapi_socket_receive_wait (cs, certkey, mp->certkey_len);
+ if (err)
+ {
+ clib_error_report (err);
+ rv = SESSION_E_INVALID;
+ goto send_reply;
+ }
+
+ if (mp->cert_len > max_cert_len)
+ {
+ rv = SESSION_E_INVALID;
+ goto send_reply;
+ }
+
+ if (mp->certkey_len < mp->cert_len)
+ {
+ rv = SESSION_E_INVALID;
+ goto send_reply;
+ }
+
+ key_len = mp->certkey_len - mp->cert_len;
+ if (key_len > max_key_len)
+ {
+ rv = SESSION_E_INVALID;
+ goto send_reply;
+ }
+
+ clib_memset (a, 0, sizeof (*a));
+ a->cert = certkey;
+ a->key = certkey + mp->cert_len;
+ a->cert_len = mp->cert_len;
+ a->key_len = key_len;
+ rv = vnet_app_add_cert_key_pair (a);
+
+ vec_free (certkey);
+ }
+ else
+ {
+ rv = vnet_app_del_cert_key_pair (mp->index);
+ }
+
+send_reply:
+
+ msg.type = APP_SAPI_MSG_TYPE_ADD_DEL_CERT_KEY_REPLY;
+ rmp = &msg.cert_key_add_del_reply;
+ rmp->retval = rv;
+ rmp->context = mp->context;
+ if (!rv && mp->is_add)
+ rmp->index = a->index;
+
+ clib_socket_sendmsg (cs, &msg, sizeof (msg), 0, 0);
+}
+
static void
sapi_socket_detach (app_namespace_t * app_ns, clib_socket_t * cs)
{
@@ -1496,11 +1597,12 @@ sapi_socket_detach (app_namespace_t * app_ns, clib_socket_t * cs)
u32 api_client_handle;
api_client_handle = appns_sapi_socket_handle (app_ns, cs);
- sapi_socket_close_w_handle (api_client_handle);
/* Cleanup everything because app worker closed socket or crashed */
handle = (app_ns_api_handle_t *) & cs->private_data;
- app_wrk = app_worker_get (handle->aah_app_wrk_index);
+ app_wrk = app_worker_get_if_valid (handle->aah_app_wrk_index);
+ if (!app_wrk)
+ return;
vnet_app_worker_add_del_args_t args = {
.app_index = app_wrk->app_index,
@@ -1548,6 +1650,9 @@ sapi_sock_read_ready (clib_file_t * cf)
case APP_SAPI_MSG_TYPE_ADD_DEL_WORKER:
sapi_add_del_worker_handler (app_ns, cs, &msg.worker_add_del);
break;
+ case APP_SAPI_MSG_TYPE_ADD_DEL_CERT_KEY:
+ sapi_add_del_cert_key_handler (app_ns, cs, &msg.cert_key_add_del);
+ break;
default:
clib_warning ("app wrk %u unknown message type: %u",
handle->aah_app_wrk_index, msg.type);
@@ -1635,6 +1740,23 @@ error:
return err;
}
+void
+appns_sapi_del_ns_socket (app_namespace_t *app_ns)
+{
+ app_ns_api_handle_t *handle;
+ clib_socket_t *cs;
+
+ pool_foreach (cs, app_ns->app_sockets)
+ {
+ handle = (app_ns_api_handle_t *) &cs->private_data;
+ clib_file_del_by_index (&file_main, handle->aah_file_index);
+
+ clib_socket_close (cs);
+ clib_socket_free (cs);
+ }
+ pool_free (app_ns->app_sockets);
+}
+
int
appns_sapi_add_ns_socket (app_namespace_t * app_ns)
{
@@ -1644,49 +1766,42 @@ appns_sapi_add_ns_socket (app_namespace_t * app_ns)
struct stat file_stat;
clib_error_t *err;
clib_socket_t *cs;
- u8 *dir = 0;
- int rv = 0;
+ char dir[4096];
- vec_add (dir, vlib_unix_get_runtime_dir (),
- strlen (vlib_unix_get_runtime_dir ()));
- vec_add (dir, (u8 *) subdir, strlen (subdir));
+ snprintf (dir, sizeof (dir), "%s%s", vlib_unix_get_runtime_dir (), subdir);
- err = vlib_unix_recursive_mkdir ((char *) dir);
- if (err)
- {
- clib_error_report (err);
- rv = -1;
- goto error;
- }
-
- /* Use abstract sockets if a netns was provided */
- if (app_ns->netns)
- app_ns->sock_name = format (0, "@vpp/session/%v%c", app_ns->ns_id, 0);
- else
- app_ns->sock_name = format (0, "%v%v%c", dir, app_ns->ns_id, 0);
+ if (!app_ns->sock_name)
+ app_ns->sock_name = format (0, "%s%v%c", dir, app_ns->ns_id, 0);
/*
* Create and initialize socket to listen on
*/
cs = appns_sapi_alloc_socket (app_ns);
- cs->config = (char *) app_ns->sock_name;
+ cs->config = (char *) vec_dup (app_ns->sock_name);
cs->flags = CLIB_SOCKET_F_IS_SERVER |
CLIB_SOCKET_F_ALLOW_GROUP_WRITE |
CLIB_SOCKET_F_SEQPACKET | CLIB_SOCKET_F_PASSCRED;
- if ((err = clib_socket_init_netns (cs, app_ns->netns)))
+ if (clib_socket_prefix_get_type (cs->config) == CLIB_SOCKET_TYPE_UNIX)
{
- clib_error_report (err);
- rv = -1;
- goto error;
+ err = vlib_unix_recursive_mkdir ((char *) dir);
+ if (err)
+ {
+ clib_error_report (err);
+ return SESSION_E_SYSCALL;
+ }
}
- if (!app_ns->netns && stat ((char *) app_ns->sock_name, &file_stat) == -1)
+ if ((err = clib_socket_init (cs)))
{
- rv = -1;
- goto error;
+ clib_error_report (err);
+ return -1;
}
+ if (clib_socket_prefix_get_type (cs->config) == CLIB_SOCKET_TYPE_UNIX &&
+ stat ((char *) app_ns->sock_name, &file_stat) == -1)
+ return -1;
+
/*
* Start polling it
*/
@@ -1703,22 +1818,7 @@ appns_sapi_add_ns_socket (app_namespace_t * app_ns)
handle->aah_file_index = clib_file_add (&file_main, &cf);
handle->aah_app_wrk_index = APP_INVALID_INDEX;
-error:
- vec_free (dir);
- return rv;
-}
-
-static void
-vl_api_application_tls_cert_add_t_handler (
- vl_api_application_tls_cert_add_t *mp)
-{
- /* deprecated */
-}
-
-static void
-vl_api_application_tls_key_add_t_handler (vl_api_application_tls_key_add_t *mp)
-{
- /* deprecated */
+ return 0;
}
#include <vnet/session/session.api.c>
diff --git a/src/vnet/session/session_cli.c b/src/vnet/session/session_cli.c
index 24d8cfb1e24..569a77bccc1 100644
--- a/src/vnet/session/session_cli.c
+++ b/src/vnet/session/session_cli.c
@@ -145,8 +145,11 @@ format_session (u8 * s, va_list * args)
else if (ss->session_state == SESSION_STATE_CONNECTING)
{
if (ss->flags & SESSION_F_HALF_OPEN)
- s = format (s, "%U%v", format_transport_half_open_connection, tp,
- ss->connection_index, ss->thread_index, verbose, str);
+ {
+ s = format (s, "%U", format_transport_half_open_connection, tp,
+ ss->connection_index, ss->thread_index, verbose);
+ s = format (s, "%v", str);
+ }
else
s = format (s, "%U", format_transport_connection, tp,
ss->connection_index, ss->thread_index, verbose);
@@ -259,7 +262,6 @@ unformat_session (unformat_input_t * input, va_list * args)
if (s)
{
*result = s;
- session_pool_remove_peeker (s->thread_index);
return 1;
}
return 0;
@@ -340,7 +342,6 @@ session_cli_show_all_sessions (vlib_main_t * vm, int verbose)
n_closed = 0;
- /* *INDENT-OFF* */
pool_foreach (s, pool) {
if (s->session_state >= SESSION_STATE_TRANSPORT_DELETED)
{
@@ -349,7 +350,6 @@ session_cli_show_all_sessions (vlib_main_t * vm, int verbose)
}
vlib_cli_output (vm, "%U", format_session, s, verbose);
}
- /* *INDENT-ON* */
if (!n_closed)
vlib_cli_output (vm, "Thread %d: active sessions %u", thread_index,
@@ -488,7 +488,6 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
{
u8 one_session = 0, do_listeners = 0, sst, do_elog = 0, do_filter = 0;
u32 track_index, thread_index = 0, start = 0, end = ~0, session_index;
- unformat_input_t _line_input, *line_input = &_line_input;
transport_proto_t transport_proto = TRANSPORT_PROTO_INVALID;
session_state_t state = SESSION_N_STATES, *states = 0;
session_main_t *smm = &session_main;
@@ -502,26 +501,20 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
session_cli_return_if_not_enabled ();
- if (!unformat_user (input, unformat_line_input, line_input))
- {
- session_cli_show_all_sessions (vm, 0);
- return 0;
- }
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "verbose %d", &verbose))
+ if (unformat (input, "verbose %d", &verbose))
;
- else if (unformat (line_input, "verbose"))
+ else if (unformat (input, "verbose"))
verbose = 1;
- else if (unformat (line_input, "listeners %U", unformat_transport_proto,
+ else if (unformat (input, "listeners %U", unformat_transport_proto,
&transport_proto))
do_listeners = 1;
- else if (unformat (line_input, "%U", unformat_session, &s))
+ else if (unformat (input, "%U", unformat_session, &s))
{
one_session = 1;
}
- else if (unformat (line_input, "thread %u index %u", &thread_index,
+ else if (unformat (input, "thread %u index %u", &thread_index,
&session_index))
{
s = session_get_if_valid (session_index, thread_index);
@@ -532,19 +525,17 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
}
one_session = 1;
}
- else if (unformat (line_input, "thread %u", &thread_index))
+ else if (unformat (input, "thread %u", &thread_index))
{
do_filter = 1;
}
- else
- if (unformat (line_input, "state %U", unformat_session_state, &state))
+ else if (unformat (input, "state %U", unformat_session_state, &state))
{
vec_add1 (states, state);
do_filter = 1;
}
- else if (unformat (line_input, "proto %U index %u",
- unformat_transport_proto, &transport_proto,
- &transport_index))
+ else if (unformat (input, "proto %U index %u", unformat_transport_proto,
+ &transport_proto, &transport_index))
{
transport_connection_t *tc;
tc = transport_get_connection (transport_proto, transport_index,
@@ -565,34 +556,34 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
}
one_session = 1;
}
- else if (unformat (line_input, "proto %U", unformat_transport_proto,
+ else if (unformat (input, "proto %U", unformat_transport_proto,
&transport_proto))
do_filter = 1;
- else if (unformat (line_input, "range %u %u", &start, &end))
+ else if (unformat (input, "range %u %u", &start, &end))
do_filter = 1;
- else if (unformat (line_input, "range %u", &start))
+ else if (unformat (input, "range %u", &start))
{
end = start + 50;
do_filter = 1;
}
- else if (unformat (line_input, "elog"))
+ else if (unformat (input, "elog"))
do_elog = 1;
- else if (unformat (line_input, "protos"))
+ else if (unformat (input, "protos"))
{
vlib_cli_output (vm, "%U", format_transport_protos);
goto done;
}
- else if (unformat (line_input, "states"))
+ else if (unformat (input, "states"))
{
session_cli_print_session_states (vm);
goto done;
}
- else if (unformat (line_input, "events"))
+ else if (unformat (input, "events"))
do_events = 1;
else
{
error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
+ format_unformat_error, input);
goto done;
}
}
@@ -625,7 +616,6 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "%-" SESSION_CLI_ID_LEN "s%-24s", "Listener",
"App");
- /* *INDENT-OFF* */
pool_foreach (s, smm->wrk[0].sessions) {
if (s->session_state != SESSION_STATE_LISTENING
|| s->session_type != sst)
@@ -635,7 +625,6 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "%U%-25v%", format_session, s, 0,
app_name);
}
- /* *INDENT-ON* */
goto done;
}
@@ -661,12 +650,10 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
session_cli_show_all_sessions (vm, verbose);
done:
- unformat_free (line_input);
vec_free (states);
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_show_session_command) =
{
.path = "show session",
@@ -676,7 +663,6 @@ VLIB_CLI_COMMAND (vlib_cli_show_session_command) =
"[protos] [states] ",
.function = show_session_command_fn,
};
-/* *INDENT-ON* */
static int
clear_session (session_t * s)
@@ -728,27 +714,23 @@ clear_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (clear_all)
{
- /* *INDENT-OFF* */
vec_foreach (wrk, smm->wrk)
{
pool_foreach (session, wrk->sessions) {
clear_session (session);
}
};
- /* *INDENT-ON* */
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_session_command, static) =
{
.path = "clear session",
.short_help = "clear session thread <thread> session <index>",
.function = clear_session_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_session_fifo_trace_command_fn (vlib_main_t * vm,
@@ -791,14 +773,12 @@ show_session_fifo_trace_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_session_fifo_trace_command, static) =
{
.path = "show session fifo trace",
.short_help = "show session fifo trace <session>",
.function = show_session_fifo_trace_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
session_replay_fifo_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -838,53 +818,98 @@ session_replay_fifo_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (session_replay_fifo_trace_command, static) =
{
.path = "session replay fifo",
.short_help = "session replay fifo <session>",
.function = session_replay_fifo_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
session_enable_disable_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
- unformat_input_t _line_input, *line_input = &_line_input;
- u8 is_en = 1;
- clib_error_t *error;
-
- if (!unformat_user (input, unformat_line_input, line_input))
- return clib_error_return (0, "expected enable | disable");
+ u8 is_en = 2;
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "enable"))
+ if (unformat (input, "enable"))
is_en = 1;
- else if (unformat (line_input, "disable"))
+ else if (unformat (input, "disable"))
is_en = 0;
else
- {
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
- unformat_free (line_input);
- return error;
- }
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
}
- unformat_free (line_input);
+ if (is_en > 1)
+ return clib_error_return (0, "expected enable | disable");
+
return vnet_session_enable_disable (vm, is_en);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (session_enable_disable_command, static) =
{
.path = "session",
.short_help = "session [enable|disable]",
.function = session_enable_disable_fn,
};
-/* *INDENT-ON* */
+
+static clib_error_t *
+show_session_stats_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ session_main_t *smm = &session_main;
+ session_worker_t *wrk;
+ unsigned int *e;
+
+ if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ return clib_error_return (0, "unknown input `%U'", format_unformat_error,
+ input);
+
+ vec_foreach (wrk, smm->wrk)
+ {
+ vlib_cli_output (vm, "Thread %u:\n", wrk - smm->wrk);
+ e = wrk->stats.errors;
+#define _(name, str) \
+ if (e[SESSION_EP_##name]) \
+ vlib_cli_output (vm, " %lu %s", e[SESSION_EP_##name], str);
+ foreach_session_error
+#undef _
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_session_stats_command, static) = {
+ .path = "show session stats",
+ .short_help = "show session stats",
+ .function = show_session_stats_fn,
+};
+
+static clib_error_t *
+clear_session_stats_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ session_main_t *smm = &session_main;
+ session_worker_t *wrk;
+
+ if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ return clib_error_return (0, "unknown input `%U'", format_unformat_error,
+ input);
+
+ vec_foreach (wrk, smm->wrk)
+ {
+ clib_memset (&wrk->stats, 0, sizeof (wrk->stats));
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (clear_session_stats_command, static) = {
+ .path = "clear session stats",
+ .short_help = "clear session stats",
+ .function = clear_session_stats_fn,
+};
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/session/session_debug.c b/src/vnet/session/session_debug.c
index 349d1ec9b46..2a50adac5dd 100644
--- a/src/vnet/session/session_debug.c
+++ b/src/vnet/session/session_debug.c
@@ -52,15 +52,20 @@ show_session_dbg_clock_cycles_fn (vlib_main_t * vm, unformat_input_t * input,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_session_dbg_clock_cycles_command, static) =
{
.path = "show session dbg clock_cycles",
.short_help = "show session dbg clock_cycles",
.function = show_session_dbg_clock_cycles_fn,
};
-/* *INDENT-ON* */
+static_always_inline f64
+session_dbg_time_now (u32 thread)
+{
+ vlib_main_t *vm = vlib_get_main_by_index (thread);
+
+ return clib_time_now (&vm->clib_time) + vm->time_offset;
+}
static clib_error_t *
clear_session_dbg_clock_cycles_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -77,7 +82,7 @@ clear_session_dbg_clock_cycles_fn (vlib_main_t * vm, unformat_input_t * input,
{
sde = &session_dbg_main.wrk[thread];
clib_memset (sde, 0, sizeof (session_dbg_evts_t));
- sde->last_time = vlib_time_now (vlib_mains[thread]);
+ sde->last_time = session_dbg_time_now (thread);
sde->start_time = sde->last_time;
}
@@ -85,14 +90,12 @@ clear_session_dbg_clock_cycles_fn (vlib_main_t * vm, unformat_input_t * input,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_session_clock_cycles_command, static) =
{
.path = "clear session dbg clock_cycles",
.short_help = "clear session dbg clock_cycles",
.function = clear_session_dbg_clock_cycles_fn,
};
-/* *INDENT-ON* */
void
session_debug_init (void)
@@ -107,15 +110,99 @@ session_debug_init (void)
for (thread = 0; thread < num_threads; thread++)
{
clib_memset (&sdm->wrk[thread], 0, sizeof (session_dbg_evts_t));
- sdm->wrk[thread].start_time = vlib_time_now (vlib_mains[thread]);
+ sdm->wrk[thread].start_time = session_dbg_time_now (thread);
+ }
+}
+
+static const char *session_evt_grp_str[] = {
+#define _(sym, str) str,
+ foreach_session_evt_grp
+#undef _
+};
+
+static void
+session_debug_show_groups (vlib_main_t *vm)
+{
+ session_dbg_main_t *sdm = &session_dbg_main;
+ int i = 0;
+
+ vlib_cli_output (vm, "%-10s%-30s%-10s", "Index", "Group", "Level");
+
+ for (i = 0; i < SESSION_EVT_N_GRP; i++)
+ vlib_cli_output (vm, "%-10d%-30s%-10d", i, session_evt_grp_str[i],
+ sdm->grp_dbg_lvl[i]);
+}
+
+static clib_error_t *
+session_debug_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ session_dbg_main_t *sdm = &session_dbg_main;
+ u32 group, level = ~0;
+ clib_error_t *error = 0;
+ u8 is_show = 0;
+ uword *bitmap = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "show"))
+ is_show = 1;
+ else if (unformat (input, "group %U", unformat_bitmap_list, &bitmap))
+ ;
+ else if (unformat (input, "level %d", &level))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ if (is_show)
+ {
+ session_debug_show_groups (vm);
+ goto done;
+ }
+ if (level == ~0)
+ {
+ vlib_cli_output (vm, "level must be entered");
+ goto done;
+ }
+
+ group = clib_bitmap_last_set (bitmap);
+ if (group == ~0)
+ {
+ vlib_cli_output (vm, "group must be entered");
+ goto done;
+ }
+ if (group >= SESSION_EVT_N_GRP)
+ {
+ vlib_cli_output (vm, "group out of bounds");
+ goto done;
}
+ clib_bitmap_foreach (group, bitmap)
+ sdm->grp_dbg_lvl[group] = level;
+
+done:
+
+ clib_bitmap_free (bitmap);
+ return error;
}
+
+VLIB_CLI_COMMAND (session_debug_command, static) = {
+ .path = "session debug",
+ .short_help = "session debug {show | debug group <list> level <n>}",
+ .function = session_debug_fn,
+ .is_mp_safe = 1,
+};
+
#else
void
session_debug_init (void)
{
}
-#endif
+#endif /* SESSION_DEBUG */
void
dump_thread_0_event_queue (void)
@@ -144,6 +231,8 @@ dump_thread_0_event_queue (void)
{
case SESSION_IO_EVT_TX:
s0 = session_get_if_valid (e->session_index, my_thread_index);
+ if (!s0)
+ break;
fformat (stdout, "[%04d] TX session %d\n", i, s0->session_index);
break;
@@ -155,6 +244,8 @@ dump_thread_0_event_queue (void)
case SESSION_IO_EVT_BUILTIN_RX:
s0 = session_get_if_valid (e->session_index, my_thread_index);
+ if (!s0)
+ break;
fformat (stdout, "[%04d] builtin_rx %d\n", i, s0->session_index);
break;
@@ -180,28 +271,18 @@ dump_thread_0_event_queue (void)
static u8
session_node_cmp_event (session_event_t * e, svm_fifo_t * f)
{
- session_t *s;
switch (e->event_type)
{
case SESSION_IO_EVT_RX:
case SESSION_IO_EVT_TX:
case SESSION_IO_EVT_BUILTIN_RX:
- case SESSION_IO_EVT_BUILTIN_TX:
+ case SESSION_IO_EVT_TX_MAIN:
case SESSION_IO_EVT_TX_FLUSH:
if (e->session_index == f->shr->master_session_index)
return 1;
break;
case SESSION_CTRL_EVT_CLOSE:
- break;
case SESSION_CTRL_EVT_RPC:
- s = session_get_from_handle (e->session_handle);
- if (!s)
- {
- clib_warning ("session has event but doesn't exist!");
- break;
- }
- if (s->rx_fifo == f || s->tx_fifo == f)
- return 1;
break;
default:
break;
@@ -217,7 +298,6 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e)
session_worker_t *wrk;
int i, index, found = 0;
svm_msg_q_msg_t *msg;
- svm_msg_q_ring_t *ring;
svm_msg_q_t *mq;
u8 thread_index;
@@ -234,8 +314,7 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e)
for (i = 0; i < sq->cursize; i++)
{
msg = (svm_msg_q_msg_t *) (&sq->data[0] + sq->elsize * index);
- ring = svm_msg_q_ring (mq, msg->ring_index);
- clib_memcpy_fast (e, svm_msg_q_msg_data (mq, msg), ring->elsize);
+ clib_memcpy_fast (e, svm_msg_q_msg_data (mq, msg), sizeof (*e));
found = session_node_cmp_event (e, f);
if (found)
return 1;
@@ -245,7 +324,6 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e)
* Search pending events vector
*/
- /* *INDENT-OFF* */
clib_llist_foreach (wrk->event_elts, evt_list,
pool_elt_at_index (wrk->event_elts, wrk->new_head),
elt, ({
@@ -256,9 +334,7 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e)
goto done;
}
}));
- /* *INDENT-ON* */
- /* *INDENT-OFF* */
clib_llist_foreach (wrk->event_elts, evt_list,
pool_elt_at_index (wrk->event_elts, wrk->old_head),
elt, ({
@@ -269,7 +345,6 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e)
goto done;
}
}));
- /* *INDENT-ON* */
done:
return found;
diff --git a/src/vnet/session/session_debug.h b/src/vnet/session/session_debug.h
index 9e49a35dbe6..d433ef47fb1 100644
--- a/src/vnet/session/session_debug.h
+++ b/src/vnet/session/session_debug.h
@@ -17,49 +17,81 @@
#include <vnet/session/transport.h>
#include <vlib/vlib.h>
-
-#define foreach_session_dbg_evt \
- _(ENQ, "enqueue") \
- _(DEQ, "dequeue") \
- _(DEQ_NODE, "dequeue") \
- _(POLL_GAP_TRACK, "poll gap track") \
- _(POLL_DISPATCH_TIME, "dispatch time") \
- _(DISPATCH_START, "dispatch start") \
- _(DISPATCH_END, "dispatch end") \
- _(FREE, "session free") \
- _(DSP_CNTRS, "dispatch counters") \
- _(IO_EVT_COUNTS, "io evt counts") \
- _(EVT_COUNTS, "ctrl evt counts") \
+#include <vpp/vnet/config.h>
+
+#define foreach_session_dbg_evt \
+ _ (ENQ, DEQ_EVTS, 1, "enqueue") \
+ _ (DEQ, DEQ_EVTS, 1, "dequeue") \
+ _ (DEQ_NODE, DISPATCH_DBG, 1, "dequeue") \
+ _ (POLL_GAP_TRACK, EVT_POLL_DBG, 1, "poll gap track") \
+ _ (POLL_DISPATCH_TIME, EVT_POLL_DBG, 1, "dispatch time") \
+ _ (DISPATCH_START, CLOCKS_EVT_DBG, 1, "dispatch start") \
+ _ (DISPATCH_END, CLOCKS_EVT_DBG, 1, "dispatch end") \
+ _ (DSP_CNTRS, CLOCKS_EVT_DBG, 1, "dispatch counters") \
+ _ (STATE_CHANGE, SM, 1, "session state change") \
+ _ (FREE, SM, 1, "session free") \
+ _ (IO_EVT_COUNTS, COUNTS_EVT_DBG, 1, "io evt counts") \
+ _ (COUNTS, COUNTS_EVT_DBG, 1, "ctrl evt counts")
typedef enum _session_evt_dbg
{
-#define _(sym, str) SESSION_EVT_##sym,
+#define _(sym, grp, lvl, str) SESSION_EVT_##sym,
foreach_session_dbg_evt
#undef _
} session_evt_dbg_e;
-#define foreach_session_events \
-_(CLK_UPDATE_TIME, 1, 1, "Time Update Time") \
-_(CLK_MQ_DEQ, 1, 1, "Time MQ Dequeue") \
-_(CLK_CTRL_EVTS, 1, 1, "Time Ctrl Events") \
-_(CLK_NEW_IO_EVTS, 1, 1, "Time New IO Events") \
-_(CLK_OLD_IO_EVTS, 1, 1, "Time Old IO Events") \
-_(CLK_TOTAL, 1, 1, "Time Total in Node") \
-_(CLK_START, 1, 1, "Time Since Last Reset") \
- \
-_(CNT_MQ_EVTS, 1, 0, "# of MQ Events Processed" ) \
-_(CNT_CTRL_EVTS, 1, 0, "# of Ctrl Events Processed" ) \
-_(CNT_NEW_EVTS, 1, 0, "# of New Events Processed" ) \
-_(CNT_OLD_EVTS, 1, 0, "# of Old Events Processed" ) \
-_(CNT_IO_EVTS, 1, 0, "# of Events Processed" ) \
-_(CNT_NODE_CALL, 1, 0, "# of Node Calls") \
- \
-_(BASE_OFFSET_IO_EVTS, 0, 0, "NULL") \
-_(SESSION_IO_EVT_RX, 1, 0, "# of IO Event RX") \
-_(SESSION_IO_EVT_TX, 1, 0, "# of IO Event TX") \
-_(SESSION_IO_EVT_TX_FLUSH, 1, 0, "# of IO Event TX Flush") \
-_(SESSION_IO_EVT_BUILTIN_RX, 1, 0, "# of IO Event BuiltIn RX") \
-_(SESSION_IO_EVT_BUILTIN_TX, 1, 0, "# of IO Event BuiltIn TX") \
+typedef enum session_evt_lvl_
+{
+#define _(sym, grp, lvl, str) SESSION_EVT_##sym##_LVL = lvl,
+ foreach_session_dbg_evt
+#undef _
+} session_evt_lvl_e;
+
+#define foreach_session_evt_grp \
+ _ (DEQ_EVTS, "dequeue/enqueue events") \
+ _ (DISPATCH_DBG, "dispatch") \
+ _ (EVT_POLL_DBG, "event poll") \
+ _ (SM, "state machine") \
+ _ (CLOCKS_EVT_DBG, "clocks events") \
+ _ (COUNTS_EVT_DBG, "counts events")
+
+typedef enum session_evt_grp_
+{
+#define _(sym, str) SESSION_EVT_GRP_##sym,
+ foreach_session_evt_grp
+#undef _
+ SESSION_EVT_N_GRP
+} session_evt_grp_e;
+
+typedef enum session_evt_to_grp_
+{
+#define _(sym, grp, lvl, str) SESSION_EVT_##sym##_GRP = SESSION_EVT_GRP_##grp,
+ foreach_session_dbg_evt
+#undef _
+} session_evt_to_grp_e;
+
+#define foreach_session_events \
+ _ (CLK_UPDATE_TIME, 1, 1, "Time Update Time") \
+ _ (CLK_MQ_DEQ, 1, 1, "Time MQ Dequeue") \
+ _ (CLK_CTRL_EVTS, 1, 1, "Time Ctrl Events") \
+ _ (CLK_NEW_IO_EVTS, 1, 1, "Time New IO Events") \
+ _ (CLK_OLD_IO_EVTS, 1, 1, "Time Old IO Events") \
+ _ (CLK_TOTAL, 1, 1, "Time Total in Node") \
+ _ (CLK_START, 1, 1, "Time Since Last Reset") \
+ \
+ _ (CNT_MQ_EVTS, 1, 0, "# of MQ Events Processed") \
+ _ (CNT_CTRL_EVTS, 1, 0, "# of Ctrl Events Processed") \
+ _ (CNT_NEW_EVTS, 1, 0, "# of New Events Processed") \
+ _ (CNT_OLD_EVTS, 1, 0, "# of Old Events Processed") \
+ _ (CNT_IO_EVTS, 1, 0, "# of Events Processed") \
+ _ (CNT_NODE_CALL, 1, 0, "# of Node Calls") \
+ \
+ _ (BASE_OFFSET_IO_EVTS, 0, 0, "NULL") \
+ _ (SESSION_IO_EVT_RX, 1, 0, "# of IO Event RX") \
+ _ (SESSION_IO_EVT_TX, 1, 0, "# of IO Event TX") \
+ _ (SESSION_IO_EVT_TX_FLUSH, 1, 0, "# of IO Event TX Flush") \
+ _ (SESSION_IO_EVT_BUILTIN_RX, 1, 0, "# of IO Event BuiltIn RX") \
+ _ (SESSION_IO_EVT_TX_MAIN, 1, 0, "# of IO Event TX Main")
typedef enum
{
@@ -90,17 +122,28 @@ typedef struct session_dbg_evts_t
typedef struct session_dbg_main_
{
session_dbg_evts_t *wrk;
+ u8 grp_dbg_lvl[SESSION_EVT_N_GRP];
} session_dbg_main_t;
extern session_dbg_main_t session_dbg_main;
-#define SESSION_DEBUG 0 * (TRANSPORT_DEBUG > 0)
-#define SESSION_DEQ_EVTS (0)
-#define SESSION_DISPATCH_DBG (0)
-#define SESSION_EVT_POLL_DBG (0)
-#define SESSION_SM (0)
+#if defined VPP_SESSION_DEBUG && (TRANSPORT_DEBUG > 0)
+#define SESSION_DEBUG (1)
+#define SESSION_DEQ_EVTS (1)
+#define SESSION_DISPATCH_DBG (1)
+#define SESSION_EVT_POLL_DBG (1)
+#define SESSION_SM (1)
+#define SESSION_CLOCKS_EVT_DBG (1)
+#define SESSION_COUNTS_EVT_DBG (1)
+#else
+#define SESSION_DEBUG (0)
+#define SESSION_DEQ_EVTS (0)
+#define SESSION_DISPATCH_DBG (0)
+#define SESSION_EVT_POLL_DBG (0)
+#define SESSION_SM (0)
#define SESSION_CLOCKS_EVT_DBG (0)
#define SESSION_COUNTS_EVT_DBG (0)
+#endif
#if SESSION_DEBUG
@@ -123,17 +166,43 @@ extern session_dbg_main_t session_dbg_main;
ed = ELOG_DATA (&vlib_global_main.elog_main, _e)
#if SESSION_SM
-#define SESSION_EVT_FREE_HANDLER(_s) \
-{ \
- ELOG_TYPE_DECLARE (_e) = \
- { \
- .format = "free: idx %u", \
- .format_args = "i4", \
- }; \
- DEC_SESSION_ETD(_s, _e, 1); \
- ed->data[0] = _s->session_index; \
-}
+#define SESSION_EVT_STATE_CHANGE_HANDLER(_s) \
+ { \
+ ELOG_TYPE_DECLARE (_e) = { \
+ .format = "%s: idx %u", \
+ .format_args = "t4i4", \
+ .n_enum_strings = 12, \
+ .enum_strings = { \
+ "created", \
+ "listening", \
+ "connecting", \
+ "accepting", \
+ "ready", \
+ "opened", \
+ "transport closing", \
+ "closing", \
+ "app closed", \
+ "transport closed", \
+ "closed", \
+ "transport deleted", \
+ }, \
+ }; \
+ DEC_SESSION_ETD (_s, _e, 2); \
+ ed->data[0] = _s->session_state; \
+ ed->data[1] = _s->session_index; \
+ }
+
+#define SESSION_EVT_FREE_HANDLER(_s) \
+ { \
+ ELOG_TYPE_DECLARE (_e) = { \
+ .format = "free: idx %u", \
+ .format_args = "i4", \
+ }; \
+ DEC_SESSION_ED (_e, 1); \
+ ed->data[0] = _s->session_index; \
+ }
#else
+#define SESSION_EVT_STATE_CHANGE_HANDLER(_s)
#define SESSION_EVT_FREE_HANDLER(_s)
#endif
@@ -282,17 +351,17 @@ extern session_dbg_main_t session_dbg_main;
counters[SESS_Q_##_node_evt].u64 += _cnt; \
}
-#define SESSION_IO_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk) \
-{ \
- u8 type = SESS_Q_BASE_OFFSET_IO_EVTS + _node_evt + 1; \
- session_dbg_evts_t *sde; \
- sde = &session_dbg_main.wrk[_wrk->vm->thread_index]; \
- sde->counters[type].u64 += _cnt; \
- sde->counters[SESS_Q_CNT_IO_EVTS].u64 += _cnt ; \
-}
+#define SESSION_EVT_IO_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk) \
+ { \
+ u8 type = SESS_Q_BASE_OFFSET_IO_EVTS + _node_evt + 1; \
+ session_dbg_evts_t *sde; \
+ sde = &session_dbg_main.wrk[_wrk->vm->thread_index]; \
+ sde->counters[type].u64 += _cnt; \
+ sde->counters[SESS_Q_CNT_IO_EVTS].u64 += _cnt; \
+ }
#else
#define SESSION_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk)
-#define SESSION_IO_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk)
+#define SESSION_EVT_IO_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk)
#endif /*SESSION_COUNTS_EVT_DBG */
@@ -322,8 +391,18 @@ extern session_dbg_main_t session_dbg_main;
#define CONCAT_HELPER(_a, _b) _a##_b
#define CC(_a, _b) CONCAT_HELPER(_a, _b)
-#define SESSION_EVT(_evt, _args...) CC(_evt, _HANDLER)(_args)
-
+#define session_evt_lvl(_evt) CC (_evt, _LVL)
+#define session_evt_grp(_evt) CC (_evt, _GRP)
+#define session_evt_grp_dbg_lvl(_evt) \
+ session_dbg_main.grp_dbg_lvl[session_evt_grp (_evt)]
+#define SESSION_EVT(_evt, _args...) \
+ do \
+ { \
+ if (PREDICT_FALSE (session_evt_grp_dbg_lvl (_evt) >= \
+ session_evt_lvl (_evt))) \
+ CC (_evt, _HANDLER) (_args); \
+ } \
+ while (0)
#else
#define SESSION_EVT(_evt, _args...)
#define SESSION_DBG(_fmt, _args...)
diff --git a/src/vnet/session/session_input.c b/src/vnet/session/session_input.c
new file mode 100644
index 00000000000..73b777127fd
--- /dev/null
+++ b/src/vnet/session/session_input.c
@@ -0,0 +1,343 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/session/session.h>
+#include <vnet/session/application.h>
+
+static inline int
+mq_try_lock (svm_msg_q_t *mq)
+{
+ int rv, n_try = 0;
+
+ while (n_try < 100)
+ {
+ rv = svm_msg_q_try_lock (mq);
+ if (!rv)
+ return 0;
+ n_try += 1;
+ usleep (1);
+ }
+
+ return -1;
+}
+
+always_inline u8
+mq_event_ring_index (session_evt_type_t et)
+{
+ return (et >= SESSION_CTRL_EVT_RPC ? SESSION_MQ_CTRL_EVT_RING :
+ SESSION_MQ_IO_EVT_RING);
+}
+
+void
+app_worker_del_all_events (app_worker_t *app_wrk)
+{
+ session_worker_t *wrk;
+ session_event_t *evt;
+ u32 thread_index;
+ session_t *s;
+
+ for (thread_index = 0; thread_index < vec_len (app_wrk->wrk_evts);
+ thread_index++)
+ {
+ while (clib_fifo_elts (app_wrk->wrk_evts[thread_index]))
+ {
+ clib_fifo_sub2 (app_wrk->wrk_evts[thread_index], evt);
+ switch (evt->event_type)
+ {
+ case SESSION_CTRL_EVT_MIGRATED:
+ s = session_get (evt->session_index, thread_index);
+ transport_cleanup (session_get_transport_proto (s),
+ s->connection_index, s->thread_index);
+ session_free (s);
+ break;
+ case SESSION_CTRL_EVT_CLEANUP:
+ s = session_get (evt->as_u64[0] & 0xffffffff, thread_index);
+ if (evt->as_u64[0] >> 32 != SESSION_CLEANUP_SESSION)
+ break;
+ uword_to_pointer (evt->as_u64[1], void (*) (session_t * s)) (s);
+ break;
+ case SESSION_CTRL_EVT_HALF_CLEANUP:
+ s = ho_session_get (evt->session_index);
+ pool_put_index (app_wrk->half_open_table, s->ho_index);
+ session_free (s);
+ break;
+ default:
+ break;
+ }
+ }
+ wrk = session_main_get_worker (thread_index);
+ clib_bitmap_set (wrk->app_wrks_pending_ntf, app_wrk->wrk_index, 0);
+ }
+}
+
+always_inline int
+app_worker_flush_events_inline (app_worker_t *app_wrk, u32 thread_index,
+ u8 is_builtin)
+{
+ application_t *app = application_get (app_wrk->app_index);
+ svm_msg_q_t *mq = app_wrk->event_queue;
+ u8 ring_index, mq_is_cong;
+ session_state_t old_state;
+ session_event_t *evt;
+ u32 n_evts = 128, i;
+ session_t *s;
+ int rv;
+
+ n_evts = clib_min (n_evts, clib_fifo_elts (app_wrk->wrk_evts[thread_index]));
+
+ if (!is_builtin)
+ {
+ mq_is_cong = app_worker_mq_is_congested (app_wrk);
+ if (mq_try_lock (mq))
+ {
+ app_worker_set_mq_wrk_congested (app_wrk, thread_index);
+ return 0;
+ }
+ }
+
+ for (i = 0; i < n_evts; i++)
+ {
+ evt = clib_fifo_head (app_wrk->wrk_evts[thread_index]);
+ if (!is_builtin)
+ {
+ ring_index = mq_event_ring_index (evt->event_type);
+ if (svm_msg_q_or_ring_is_full (mq, ring_index))
+ {
+ app_worker_set_mq_wrk_congested (app_wrk, thread_index);
+ break;
+ }
+ }
+
+ switch (evt->event_type)
+ {
+ case SESSION_IO_EVT_RX:
+ s = session_get (evt->session_index, thread_index);
+ s->flags &= ~SESSION_F_RX_EVT;
+ /* Application didn't confirm accept yet */
+ if (PREDICT_FALSE (s->session_state == SESSION_STATE_ACCEPTING ||
+ s->session_state == SESSION_STATE_CONNECTING))
+ break;
+ app->cb_fns.builtin_app_rx_callback (s);
+ break;
+ /* Handle sessions that might not be on current thread */
+ case SESSION_IO_EVT_BUILTIN_RX:
+ s = session_get_from_handle_if_valid (evt->session_handle);
+ if (!s)
+ break;
+ s->flags &= ~SESSION_F_RX_EVT;
+ if (PREDICT_FALSE (s->session_state == SESSION_STATE_ACCEPTING ||
+ s->session_state == SESSION_STATE_CONNECTING))
+ break;
+ app->cb_fns.builtin_app_rx_callback (s);
+ break;
+ case SESSION_IO_EVT_TX:
+ s = session_get (evt->session_index, thread_index);
+ app->cb_fns.builtin_app_tx_callback (s);
+ break;
+ case SESSION_IO_EVT_TX_MAIN:
+ s = session_get_from_handle_if_valid (evt->session_handle);
+ if (!s)
+ break;
+ app->cb_fns.builtin_app_tx_callback (s);
+ break;
+ case SESSION_CTRL_EVT_BOUND:
+ /* No app cb function currently */
+ if (is_builtin)
+ break;
+ app->cb_fns.session_listened_callback (
+ app_wrk->wrk_index, evt->as_u64[1] >> 32, evt->session_handle,
+ evt->as_u64[1] & 0xffffffff);
+ break;
+ case SESSION_CTRL_EVT_ACCEPTED:
+ s = session_get (evt->session_index, thread_index);
+ old_state = s->session_state;
+ if (app->cb_fns.session_accept_callback (s))
+ {
+ session_detach_app (s);
+ break;
+ }
+ if (is_builtin)
+ {
+ if (old_state >= SESSION_STATE_TRANSPORT_CLOSING)
+ {
+ session_set_state (s,
+ clib_max (old_state, s->session_state));
+ if (!(s->flags & SESSION_F_APP_CLOSED))
+ app->cb_fns.session_disconnect_callback (s);
+ }
+ }
+ break;
+ case SESSION_CTRL_EVT_CONNECTED:
+ if (!(evt->as_u64[1] & 0xffffffff))
+ {
+ s = session_get (evt->session_index, thread_index);
+ old_state = s->session_state;
+ }
+ else
+ s = 0;
+ rv = app->cb_fns.session_connected_callback (
+ app_wrk->wrk_index, evt->as_u64[1] >> 32, s,
+ evt->as_u64[1] & 0xffffffff);
+ if (!s)
+ break;
+ if (rv)
+ {
+ session_detach_app (s);
+ break;
+ }
+ if (old_state >= SESSION_STATE_TRANSPORT_CLOSING)
+ {
+ session_set_state (s, clib_max (old_state, s->session_state));
+ if (!(s->flags & SESSION_F_APP_CLOSED))
+ app->cb_fns.session_disconnect_callback (s);
+ }
+ break;
+ case SESSION_CTRL_EVT_DISCONNECTED:
+ s = session_get (evt->session_index, thread_index);
+ if (!(s->flags & SESSION_F_APP_CLOSED))
+ app->cb_fns.session_disconnect_callback (s);
+ break;
+ case SESSION_CTRL_EVT_RESET:
+ s = session_get (evt->session_index, thread_index);
+ if (!(s->flags & SESSION_F_APP_CLOSED))
+ app->cb_fns.session_reset_callback (s);
+ break;
+ case SESSION_CTRL_EVT_UNLISTEN_REPLY:
+ if (is_builtin)
+ break;
+ app->cb_fns.session_unlistened_callback (
+ app_wrk->wrk_index, evt->session_handle, evt->as_u64[1] >> 32,
+ evt->as_u64[1] & 0xffffffff);
+ break;
+ case SESSION_CTRL_EVT_MIGRATED:
+ s = session_get (evt->session_index, thread_index);
+ app->cb_fns.session_migrate_callback (s, evt->as_u64[1]);
+ transport_cleanup (session_get_transport_proto (s),
+ s->connection_index, s->thread_index);
+ session_free (s);
+ /* Notify app that it has data on the new session */
+ s = session_get_from_handle (evt->as_u64[1]);
+ session_send_io_evt_to_thread (s->rx_fifo,
+ SESSION_IO_EVT_BUILTIN_RX);
+ break;
+ case SESSION_CTRL_EVT_TRANSPORT_CLOSED:
+ s = session_get (evt->session_index, thread_index);
+ /* Notification enqueued before session was refused by app */
+ if (PREDICT_FALSE (s->app_wrk_index == APP_INVALID_INDEX))
+ break;
+ if (app->cb_fns.session_transport_closed_callback)
+ app->cb_fns.session_transport_closed_callback (s);
+ break;
+ case SESSION_CTRL_EVT_CLEANUP:
+ s = session_get (evt->as_u64[0] & 0xffffffff, thread_index);
+ /* Notification enqueued before session was refused by app */
+ if (PREDICT_TRUE (s->app_wrk_index != APP_INVALID_INDEX))
+ {
+ if (app->cb_fns.session_cleanup_callback)
+ app->cb_fns.session_cleanup_callback (s, evt->as_u64[0] >> 32);
+ }
+ if (evt->as_u64[0] >> 32 != SESSION_CLEANUP_SESSION)
+ break;
+ uword_to_pointer (evt->as_u64[1], void (*) (session_t * s)) (s);
+ break;
+ case SESSION_CTRL_EVT_HALF_CLEANUP:
+ s = ho_session_get (evt->session_index);
+ ASSERT (session_vlib_thread_is_cl_thread ());
+ if (app->cb_fns.half_open_cleanup_callback)
+ app->cb_fns.half_open_cleanup_callback (s);
+ pool_put_index (app_wrk->half_open_table, s->ho_index);
+ session_free (s);
+ break;
+ case SESSION_CTRL_EVT_APP_ADD_SEGMENT:
+ app->cb_fns.add_segment_callback (app_wrk->wrk_index,
+ evt->as_u64[1]);
+ break;
+ case SESSION_CTRL_EVT_APP_DEL_SEGMENT:
+ app->cb_fns.del_segment_callback (app_wrk->wrk_index,
+ evt->as_u64[1]);
+ break;
+ default:
+ clib_warning ("unexpected event: %u", evt->event_type);
+ ASSERT (0);
+ break;
+ }
+ clib_fifo_advance_head (app_wrk->wrk_evts[thread_index], 1);
+ }
+
+ if (!is_builtin)
+ {
+ svm_msg_q_unlock (mq);
+ if (mq_is_cong && i == n_evts)
+ app_worker_unset_wrk_mq_congested (app_wrk, thread_index);
+ }
+
+ return 0;
+}
+
+int
+app_wrk_flush_wrk_events (app_worker_t *app_wrk, u32 thread_index)
+{
+ if (app_worker_application_is_builtin (app_wrk))
+ return app_worker_flush_events_inline (app_wrk, thread_index,
+ 1 /* is_builtin */);
+ else
+ return app_worker_flush_events_inline (app_wrk, thread_index,
+ 0 /* is_builtin */);
+}
+
+static inline int
+session_wrk_flush_events (session_worker_t *wrk)
+{
+ app_worker_t *app_wrk;
+ uword app_wrk_index;
+ u32 thread_index;
+
+ thread_index = wrk->vm->thread_index;
+ app_wrk_index = clib_bitmap_first_set (wrk->app_wrks_pending_ntf);
+
+ while (app_wrk_index != ~0)
+ {
+ app_wrk = app_worker_get_if_valid (app_wrk_index);
+ /* app_wrk events are flushed on free, so should be valid here */
+ ASSERT (app_wrk != 0);
+ app_wrk_flush_wrk_events (app_wrk, thread_index);
+
+ if (!clib_fifo_elts (app_wrk->wrk_evts[thread_index]))
+ clib_bitmap_set (wrk->app_wrks_pending_ntf, app_wrk->wrk_index, 0);
+
+ app_wrk_index =
+ clib_bitmap_next_set (wrk->app_wrks_pending_ntf, app_wrk_index + 1);
+ }
+
+ if (!clib_bitmap_is_zero (wrk->app_wrks_pending_ntf))
+ vlib_node_set_interrupt_pending (wrk->vm, session_input_node.index);
+
+ return 0;
+}
+
+VLIB_NODE_FN (session_input_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ u32 thread_index = vm->thread_index;
+ session_worker_t *wrk;
+
+ wrk = session_main_get_worker (thread_index);
+ session_wrk_flush_events (wrk);
+
+ return 0;
+}
+
+VLIB_REGISTER_NODE (session_input_node) = {
+ .name = "session-input",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_DISABLED,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */ \ No newline at end of file
diff --git a/src/vnet/session/session_lookup.c b/src/vnet/session/session_lookup.c
index 6e060cb119d..ff20bc2d835 100644
--- a/src/vnet/session/session_lookup.c
+++ b/src/vnet/session/session_lookup.c
@@ -29,13 +29,14 @@
#include <vnet/session/session.h>
#include <vnet/session/application.h>
+static session_lookup_main_t sl_main;
+
/**
* Network namespace index (i.e., fib index) to session lookup table. We
* should have one per network protocol type but for now we only support IP4/6
*/
static u32 *fib_index_to_table_index[2];
-/* *INDENT-OFF* */
/* 16 octets */
typedef CLIB_PACKED (struct {
union
@@ -72,7 +73,6 @@ typedef CLIB_PACKED (struct {
u64 as_u64[6];
};
}) v6_connection_key_t;
-/* *INDENT-ON* */
typedef clib_bihash_kv_16_8_t session_kv4_t;
typedef clib_bihash_kv_48_8_t session_kv6_t;
@@ -155,29 +155,70 @@ make_v6_ss_kv_from_tc (session_kv6_t * kv, transport_connection_t * tc)
tc->rmt_port, tc->proto);
}
+static inline u8
+session_table_alloc_needs_sync (void)
+{
+ return !vlib_thread_is_main_w_barrier () && (vlib_num_workers () > 1);
+}
+
+static_always_inline u8
+session_table_is_alloced (u8 fib_proto, u32 fib_index)
+{
+ return (vec_len (fib_index_to_table_index[fib_proto]) > fib_index &&
+ fib_index_to_table_index[fib_proto][fib_index] != ~0);
+}
+
static session_table_t *
session_table_get_or_alloc (u8 fib_proto, u32 fib_index)
{
session_table_t *st;
u32 table_index;
+
ASSERT (fib_index != ~0);
- if (vec_len (fib_index_to_table_index[fib_proto]) > fib_index &&
- fib_index_to_table_index[fib_proto][fib_index] != ~0)
+
+ if (session_table_is_alloced (fib_proto, fib_index))
{
table_index = fib_index_to_table_index[fib_proto][fib_index];
return session_table_get (table_index);
}
+
+ u8 needs_sync = session_table_alloc_needs_sync ();
+ session_lookup_main_t *slm = &sl_main;
+
+ /* Stop workers, otherwise consumers might be affected. This is
+ * acceptable because new tables should seldom be allocated */
+ if (needs_sync)
+ {
+ vlib_workers_sync ();
+
+ /* We might have a race, only one worker allowed at once */
+ clib_spinlock_lock (&slm->st_alloc_lock);
+ }
+
+ /* Another worker just allocated this table */
+ if (session_table_is_alloced (fib_proto, fib_index))
+ {
+ table_index = fib_index_to_table_index[fib_proto][fib_index];
+ st = session_table_get (table_index);
+ }
else
{
st = session_table_alloc ();
- table_index = session_table_index (st);
+ st->active_fib_proto = fib_proto;
+ session_table_init (st, fib_proto);
vec_validate_init_empty (fib_index_to_table_index[fib_proto], fib_index,
~0);
+ table_index = session_table_index (st);
fib_index_to_table_index[fib_proto][fib_index] = table_index;
- st->active_fib_proto = fib_proto;
- session_table_init (st, fib_proto);
- return st;
}
+
+ if (needs_sync)
+ {
+ clib_spinlock_unlock (&slm->st_alloc_lock);
+ vlib_workers_continue ();
+ }
+
+ return st;
}
static session_table_t *
@@ -1046,9 +1087,7 @@ session_lookup_connection4 (u32 fib_index, ip4_address_t * lcl,
/**
* Lookup session with ip4 and transport layer information
*
- * Important note: this may look into another thread's pool table and
- * register as 'peeker'. Caller should call @ref session_pool_remove_peeker as
- * if needed as soon as possible.
+ * Important note: this may look into another thread's pool table
*
* Lookup logic is similar to that of @ref session_lookup_connection_wt4 but
* this returns a session as opposed to a transport connection and it does not
@@ -1145,7 +1184,6 @@ session_lookup_connection_wt6 (u32 fib_index, ip6_address_t * lcl,
rv = clib_bihash_search_inline_48_8 (&st->v6_session_hash, &kv6);
if (rv == 0)
{
- ASSERT ((u32) (kv6.value >> 32) == thread_index);
if (PREDICT_FALSE ((u32) (kv6.value >> 32) != thread_index))
{
*result = SESSION_LOOKUP_RESULT_WRONG_THREAD;
@@ -1313,8 +1351,8 @@ session_lookup_connection (u32 fib_index, ip46_address_t * lcl,
lcl_port, rmt_port, proto);
}
-int
-vnet_session_rule_add_del (session_rule_add_del_args_t * args)
+session_error_t
+vnet_session_rule_add_del (session_rule_add_del_args_t *args)
{
app_namespace_t *app_ns = app_namespace_get (args->appns_index);
session_rules_table_t *srt;
@@ -1324,14 +1362,14 @@ vnet_session_rule_add_del (session_rule_add_del_args_t * args)
int rv = 0;
if (!app_ns)
- return VNET_API_ERROR_APP_INVALID_NS;
+ return SESSION_E_INVALID_NS;
if (args->scope > 3)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
if (args->transport_proto != TRANSPORT_PROTO_TCP
&& args->transport_proto != TRANSPORT_PROTO_UDP)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
if ((args->scope & SESSION_RULE_SCOPE_GLOBAL) || args->scope == 0)
{
@@ -1452,6 +1490,7 @@ session_rule_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
u32 proto = ~0, lcl_port, rmt_port, action = 0, lcl_plen = 0, rmt_plen = 0;
+ clib_error_t *error = 0;
u32 appns_index, scope = 0;
ip46_address_t lcl_ip, rmt_ip;
u8 is_ip4 = 1, conn_set = 0;
@@ -1501,29 +1540,32 @@ session_rule_command_fn (vlib_main_t * vm, unformat_input_t * input,
else if (unformat (input, "tag %_%v%_", &tag))
;
else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
}
if (proto == ~0)
{
vlib_cli_output (vm, "proto must be set");
- return 0;
+ goto done;
}
if (is_add && !conn_set && action == ~0)
{
vlib_cli_output (vm, "connection and action must be set for add");
- return 0;
+ goto done;
}
if (!is_add && !tag && !conn_set)
{
vlib_cli_output (vm, "connection or tag must be set for delete");
- return 0;
+ goto done;
}
if (vec_len (tag) > SESSION_RULE_TAG_MAX_LEN)
{
vlib_cli_output (vm, "tag too long (max u64)");
- return 0;
+ goto done;
}
if (ns_id)
@@ -1532,7 +1574,7 @@ session_rule_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (!app_ns)
{
vlib_cli_output (vm, "namespace %v does not exist", ns_id);
- return 0;
+ goto done;
}
}
else
@@ -1559,13 +1601,14 @@ session_rule_command_fn (vlib_main_t * vm, unformat_input_t * input,
.scope = scope,
};
if ((rv = vnet_session_rule_add_del (&args)))
- return clib_error_return (0, "rule add del returned %u", rv);
+ error = clib_error_return (0, "rule add del returned %u", rv);
+done:
+ vec_free (ns_id);
vec_free (tag);
- return 0;
+ return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (session_rule_command, static) =
{
.path = "session rule",
@@ -1573,7 +1616,6 @@ VLIB_CLI_COMMAND (session_rule_command, static) =
"<lcl-ip/plen> <lcl-port> <rmt-ip/plen> <rmt-port> action <action>",
.function = session_rule_command_fn,
};
-/* *INDENT-ON* */
void
session_lookup_dump_rules_table (u32 fib_index, u8 fib_proto,
@@ -1696,7 +1738,6 @@ show_session_rules_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_session_rules_command, static) =
{
.path = "show session rules",
@@ -1704,11 +1745,93 @@ VLIB_CLI_COMMAND (show_session_rules_command, static) =
"<lcl-port> <rmt-ip/plen> <rmt-port> scope <scope>]",
.function = show_session_rules_command_fn,
};
-/* *INDENT-ON* */
+
+u8 *
+format_session_lookup_tables (u8 *s, va_list *args)
+{
+ u32 fib_proto = va_arg (*args, u32);
+ u32 *fibs, num_fibs = 0, fib_index, indent;
+ session_table_t *st;
+ u64 total_mem = 0;
+
+ fibs = fib_index_to_table_index[fib_proto];
+
+ for (fib_index = 0; fib_index < vec_len (fibs); fib_index++)
+ {
+ if (fibs[fib_index] == ~0)
+ continue;
+
+ num_fibs += 1;
+ st = session_table_get (fibs[fib_index]);
+ total_mem += session_table_memory_size (st);
+ }
+
+ indent = format_get_indent (s);
+ s = format (s, "active fibs:\t%u\n", num_fibs);
+ s = format (s, "%Umax fib-index:\t%u\n", format_white_space, indent,
+ vec_len (fibs) - 1);
+ s = format (s, "%Utable memory:\t%U\n", format_white_space, indent,
+ format_memory_size, total_mem);
+ s = format (s, "%Uvec memory:\t%U\n", format_white_space, indent,
+ format_memory_size, vec_mem_size (fibs));
+
+ return s;
+}
+
+static clib_error_t *
+show_session_lookup_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ session_table_t *st;
+ u32 fib_index = ~0;
+
+ session_cli_return_if_not_enabled ();
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table %u", &fib_index))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (fib_index != ~0)
+ {
+ st = session_table_get_for_fib_index (FIB_PROTOCOL_IP4, fib_index);
+ if (st)
+ vlib_cli_output (vm, "%U", format_session_table, st);
+ else
+ vlib_cli_output (vm, "no ip4 table for fib-index %u", fib_index);
+ st = session_table_get_for_fib_index (FIB_PROTOCOL_IP6, fib_index);
+ if (st)
+ vlib_cli_output (vm, "%U", format_session_table, st);
+ else
+ vlib_cli_output (vm, "no ip6 table for fib-index %u", fib_index);
+ goto done;
+ }
+
+ vlib_cli_output (vm, "ip4 fib lookup tables:\n %U",
+ format_session_lookup_tables, FIB_PROTOCOL_IP4);
+ vlib_cli_output (vm, "ip6 fib lookup tables:\n %U",
+ format_session_lookup_tables, FIB_PROTOCOL_IP6);
+
+done:
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_session_lookup_command, static) = {
+ .path = "show session lookup",
+ .short_help = "show session lookup [table <fib-index>]",
+ .function = show_session_lookup_command_fn,
+};
void
session_lookup_init (void)
{
+ session_lookup_main_t *slm = &sl_main;
+
+ clib_spinlock_init (&slm->st_alloc_lock);
+
/*
* Allocate default table and map it to fib_index 0
*/
diff --git a/src/vnet/session/session_lookup.h b/src/vnet/session/session_lookup.h
index c1037dff8c9..f9ffc15165a 100644
--- a/src/vnet/session/session_lookup.h
+++ b/src/vnet/session/session_lookup.h
@@ -29,6 +29,11 @@ typedef enum session_lookup_result_
SESSION_LOOKUP_RESULT_FILTERED
} session_lookup_result_t;
+typedef struct session_lookup_main_
+{
+ clib_spinlock_t st_alloc_lock;
+} session_lookup_main_t;
+
session_t *session_lookup_safe4 (u32 fib_index, ip4_address_t * lcl,
ip4_address_t * rmt, u16 lcl_port,
u16 rmt_port, u8 proto);
@@ -130,7 +135,7 @@ typedef struct _session_rule_add_del_args
u8 transport_proto;
} session_rule_add_del_args_t;
-int vnet_session_rule_add_del (session_rule_add_del_args_t * args);
+session_error_t vnet_session_rule_add_del (session_rule_add_del_args_t *args);
void session_lookup_set_tables_appns (app_namespace_t * app_ns);
void session_lookup_init (void);
diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c
index b8b5ce2d8de..0ec158fb429 100644
--- a/src/vnet/session/session_node.c
+++ b/src/vnet/session/session_node.c
@@ -26,12 +26,28 @@
#include <svm/queue.h>
#include <sys/timerfd.h>
-#define app_check_thread_and_barrier(_fn, _arg) \
- if (!vlib_thread_is_main_w_barrier ()) \
- { \
- vlib_rpc_call_main_thread (_fn, (u8 *) _arg, sizeof(*_arg)); \
- return; \
- }
+static inline void
+session_wrk_send_evt_to_main (session_worker_t *wrk, session_evt_elt_t *elt)
+{
+ session_evt_elt_t *he;
+ uword thread_index;
+ u8 is_empty;
+
+ thread_index = wrk->vm->thread_index;
+ he = clib_llist_elt (wrk->event_elts, wrk->evts_pending_main);
+ is_empty = clib_llist_is_empty (wrk->event_elts, evt_list, he);
+ clib_llist_add_tail (wrk->event_elts, evt_list, elt, he);
+ if (is_empty)
+ session_send_rpc_evt_to_thread (0, session_wrk_handle_evts_main_rpc,
+ uword_to_pointer (thread_index, void *));
+}
+
+#define app_check_thread_and_barrier(_wrk, _elt) \
+ if (!vlib_thread_is_main_w_barrier ()) \
+ { \
+ session_wrk_send_evt_to_main (wrk, elt); \
+ return; \
+ }
static void
session_wrk_timerfd_update (session_worker_t *wrk, u64 time_ns)
@@ -93,16 +109,17 @@ session_mq_free_ext_config (application_t *app, uword offset)
}
static void
-session_mq_listen_handler (void *data)
+session_mq_listen_handler (session_worker_t *wrk, session_evt_elt_t *elt)
{
- session_listen_msg_t *mp = (session_listen_msg_t *) data;
vnet_listen_args_t _a, *a = &_a;
+ session_listen_msg_t *mp;
app_worker_t *app_wrk;
application_t *app;
int rv;
- app_check_thread_and_barrier (session_mq_listen_handler, mp);
+ app_check_thread_and_barrier (wrk, elt);
+ mp = session_evt_ctrl_data (wrk, elt);
app = application_lookup (mp->client_index);
if (!app)
return;
@@ -122,26 +139,31 @@ session_mq_listen_handler (void *data)
a->sep_ext.ext_cfg = session_mq_get_ext_config (app, mp->ext_config);
if ((rv = vnet_listen (a)))
- clib_warning ("listen returned: %U", format_session_error, rv);
+ session_worker_stat_error_inc (wrk, rv, 1);
app_wrk = application_get_worker (app, mp->wrk_index);
- mq_send_session_bound_cb (app_wrk->wrk_index, mp->context, a->handle, rv);
+ app_worker_listened_notify (app_wrk, a->handle, mp->context, rv);
if (mp->ext_config)
session_mq_free_ext_config (app, mp->ext_config);
+
+ /* Make sure events are flushed before releasing barrier, to avoid
+ * potential race with accept. */
+ app_wrk_flush_wrk_events (app_wrk, 0);
}
static void
-session_mq_listen_uri_handler (void *data)
+session_mq_listen_uri_handler (session_worker_t *wrk, session_evt_elt_t *elt)
{
- session_listen_uri_msg_t *mp = (session_listen_uri_msg_t *) data;
vnet_listen_args_t _a, *a = &_a;
+ session_listen_uri_msg_t *mp;
app_worker_t *app_wrk;
application_t *app;
int rv;
- app_check_thread_and_barrier (session_mq_listen_uri_handler, mp);
+ app_check_thread_and_barrier (wrk, elt);
+ mp = session_evt_ctrl_data (wrk, elt);
app = application_lookup (mp->client_index);
if (!app)
return;
@@ -152,7 +174,8 @@ session_mq_listen_uri_handler (void *data)
rv = vnet_bind_uri (a);
app_wrk = application_get_worker (app, 0);
- mq_send_session_bound_cb (app_wrk->wrk_index, mp->context, a->handle, rv);
+ app_worker_listened_notify (app_wrk, a->handle, mp->context, rv);
+ app_wrk_flush_wrk_events (app_wrk, 0);
}
static void
@@ -160,6 +183,7 @@ session_mq_connect_one (session_connect_msg_t *mp)
{
vnet_connect_args_t _a, *a = &_a;
app_worker_t *app_wrk;
+ session_worker_t *wrk;
application_t *app;
int rv;
@@ -173,6 +197,7 @@ session_mq_connect_one (session_connect_msg_t *mp)
a->sep.port = mp->port;
a->sep.transport_proto = mp->proto;
a->sep.peer.fib_index = mp->vrf;
+ a->sep.dscp = mp->dscp;
clib_memcpy_fast (&a->sep.peer.ip, &mp->lcl_ip, sizeof (mp->lcl_ip));
if (mp->is_ip4)
{
@@ -192,9 +217,10 @@ session_mq_connect_one (session_connect_msg_t *mp)
if ((rv = vnet_connect (a)))
{
- clib_warning ("connect returned: %U", format_session_error, rv);
+ wrk = session_main_get_worker (vlib_get_thread_index ());
+ session_worker_stat_error_inc (wrk, rv, 1);
app_wrk = application_get_worker (app, mp->wrk_index);
- mq_send_session_connected_cb (app_wrk->wrk_index, mp->context, 0, rv);
+ app_worker_connect_notify (app_wrk, 0, rv, mp->context);
}
if (mp->ext_config)
@@ -205,23 +231,20 @@ static void
session_mq_handle_connects_rpc (void *arg)
{
u32 max_connects = 32, n_connects = 0;
- vlib_main_t *vm = vlib_get_main ();
session_evt_elt_t *he, *elt, *next;
- session_worker_t *fwrk, *wrk;
+ session_worker_t *fwrk;
- ASSERT (vlib_get_thread_index () == 0);
+ ASSERT (session_vlib_thread_is_cl_thread ());
/* Pending connects on linked list pertaining to first worker */
- fwrk = session_main_get_worker (1);
+ fwrk = session_main_get_worker (transport_cl_thread ());
if (!fwrk->n_pending_connects)
- goto update_state;
-
- vlib_worker_thread_barrier_sync (vm);
+ return;
he = clib_llist_elt (fwrk->event_elts, fwrk->pending_connects);
elt = clib_llist_next (fwrk->event_elts, evt_list, he);
- /* Avoid holding the barrier for too long */
+ /* Avoid holding the worker for too long */
while (n_connects < max_connects && elt != he)
{
next = clib_llist_next (fwrk->event_elts, evt_list, elt);
@@ -235,45 +258,10 @@ session_mq_handle_connects_rpc (void *arg)
/* Decrement with worker barrier */
fwrk->n_pending_connects -= n_connects;
-
- vlib_worker_thread_barrier_release (vm);
-
-update_state:
-
- /* Switch worker to poll mode if it was in interrupt mode and had work or
- * back to interrupt if threshold of loops without a connect is passed.
- * While in poll mode, reprogram connects rpc */
- wrk = session_main_get_worker (0);
- if (wrk->state != SESSION_WRK_POLLING)
+ if (fwrk->n_pending_connects > 0)
{
- if (n_connects)
- {
- session_wrk_set_state (wrk, SESSION_WRK_POLLING);
- vlib_node_set_state (vm, session_queue_node.index,
- VLIB_NODE_STATE_POLLING);
- wrk->no_connect_loops = 0;
- }
- }
- else
- {
- if (!n_connects)
- {
- if (++wrk->no_connect_loops > 1e5)
- {
- session_wrk_set_state (wrk, SESSION_WRK_INTERRUPT);
- vlib_node_set_state (vm, session_queue_node.index,
- VLIB_NODE_STATE_INTERRUPT);
- }
- }
- else
- wrk->no_connect_loops = 0;
- }
-
- if (wrk->state == SESSION_WRK_POLLING)
- {
- elt = session_evt_alloc_ctrl (wrk);
- elt->evt.event_type = SESSION_CTRL_EVT_RPC;
- elt->evt.rpc_args.fp = session_mq_handle_connects_rpc;
+ session_send_rpc_evt_to_thread_force (fwrk->vm->thread_index,
+ session_mq_handle_connects_rpc, 0);
}
}
@@ -283,20 +271,28 @@ session_mq_connect_handler (session_worker_t *wrk, session_evt_elt_t *elt)
u32 thread_index = wrk - session_main.wrk;
session_evt_elt_t *he;
- /* No workers, so just deal with the connect now */
- if (PREDICT_FALSE (!thread_index))
+ if (PREDICT_FALSE (thread_index > transport_cl_thread ()))
{
- session_mq_connect_one (session_evt_ctrl_data (wrk, elt));
+ clib_warning ("Connect on wrong thread. Dropping");
return;
}
- if (PREDICT_FALSE (thread_index != 1))
+ /* If on worker, check if main has any pending messages. Avoids reordering
+ * with other control messages that need to be handled by main
+ */
+ if (thread_index)
{
- clib_warning ("Connect on wrong thread. Dropping");
- return;
+ he = clib_llist_elt (wrk->event_elts, wrk->evts_pending_main);
+
+ /* Events pending on main, postpone to avoid reordering */
+ if (!clib_llist_is_empty (wrk->event_elts, evt_list, he))
+ {
+ clib_llist_add_tail (wrk->event_elts, evt_list, elt, he);
+ return;
+ }
}
- /* Add to pending list to be handled by main thread */
+ /* Add to pending list to be handled by first worker */
he = clib_llist_elt (wrk->event_elts, wrk->pending_connects);
clib_llist_add_tail (wrk->event_elts, evt_list, elt, he);
@@ -304,23 +300,23 @@ session_mq_connect_handler (session_worker_t *wrk, session_evt_elt_t *elt)
wrk->n_pending_connects += 1;
if (wrk->n_pending_connects == 1)
{
- vlib_node_set_interrupt_pending (vlib_get_main_by_index (0),
- session_queue_node.index);
- session_send_rpc_evt_to_thread (0, session_mq_handle_connects_rpc, 0);
+ session_send_rpc_evt_to_thread_force (thread_index,
+ session_mq_handle_connects_rpc, 0);
}
}
static void
-session_mq_connect_uri_handler (void *data)
+session_mq_connect_uri_handler (session_worker_t *wrk, session_evt_elt_t *elt)
{
- session_connect_uri_msg_t *mp = (session_connect_uri_msg_t *) data;
vnet_connect_args_t _a, *a = &_a;
+ session_connect_uri_msg_t *mp;
app_worker_t *app_wrk;
application_t *app;
int rv;
- app_check_thread_and_barrier (session_mq_connect_uri_handler, mp);
+ app_check_thread_and_barrier (wrk, elt);
+ mp = session_evt_ctrl_data (wrk, elt);
app = application_lookup (mp->client_index);
if (!app)
return;
@@ -331,9 +327,9 @@ session_mq_connect_uri_handler (void *data)
a->app_index = app->app_index;
if ((rv = vnet_connect_uri (a)))
{
- clib_warning ("connect_uri returned: %d", rv);
+ session_worker_stat_error_inc (wrk, rv, 1);
app_wrk = application_get_worker (app, 0 /* default wrk only */ );
- mq_send_session_connected_cb (app_wrk->wrk_index, mp->context, 0, rv);
+ app_worker_connect_notify (app_wrk, 0, rv, mp->context);
}
}
@@ -370,14 +366,15 @@ session_mq_disconnect_handler (void *data)
}
static void
-app_mq_detach_handler (void *data)
+app_mq_detach_handler (session_worker_t *wrk, session_evt_elt_t *elt)
{
- session_app_detach_msg_t *mp = (session_app_detach_msg_t *) data;
vnet_app_detach_args_t _a, *a = &_a;
+ session_app_detach_msg_t *mp;
application_t *app;
- app_check_thread_and_barrier (app_mq_detach_handler, mp);
+ app_check_thread_and_barrier (wrk, elt);
+ mp = session_evt_ctrl_data (wrk, elt);
app = application_lookup (mp->client_index);
if (!app)
return;
@@ -388,18 +385,19 @@ app_mq_detach_handler (void *data)
}
static void
-session_mq_unlisten_rpc (session_unlisten_msg_t *mp)
+session_mq_unlisten_handler (session_worker_t *wrk, session_evt_elt_t *elt)
{
- vlib_main_t *vm = vlib_get_main ();
vnet_unlisten_args_t _a, *a = &_a;
+ session_unlisten_msg_t *mp;
app_worker_t *app_wrk;
session_handle_t sh;
application_t *app;
- u32 context;
int rv;
+ app_check_thread_and_barrier (wrk, elt);
+
+ mp = session_evt_ctrl_data (wrk, elt);
sh = mp->handle;
- context = mp->context;
app = application_lookup (mp->client_index);
if (!app)
@@ -410,65 +408,34 @@ session_mq_unlisten_rpc (session_unlisten_msg_t *mp)
a->handle = sh;
a->wrk_map_index = mp->wrk_index;
- vlib_worker_thread_barrier_sync (vm);
-
if ((rv = vnet_unlisten (a)))
- clib_warning ("unlisten returned: %d", rv);
-
- vlib_worker_thread_barrier_release (vm);
+ session_worker_stat_error_inc (wrk, rv, 1);
app_wrk = application_get_worker (app, a->wrk_map_index);
if (!app_wrk)
return;
- mq_send_unlisten_reply (app_wrk, sh, context, rv);
- clib_mem_free (mp);
-}
-
-static void
-session_mq_unlisten_handler (session_worker_t *wrk, session_evt_elt_t *elt)
-{
- u32 thread_index = wrk - session_main.wrk;
- session_unlisten_msg_t *mp, *arg;
-
- mp = session_evt_ctrl_data (wrk, elt);
- arg = clib_mem_alloc (sizeof (session_unlisten_msg_t));
- clib_memcpy_fast (arg, mp, sizeof (*arg));
-
- if (PREDICT_FALSE (!thread_index))
- {
- session_mq_unlisten_rpc (arg);
- return;
- }
-
- session_send_rpc_evt_to_thread_force (0, session_mq_unlisten_rpc, arg);
+ app_worker_unlisten_reply (app_wrk, sh, mp->context, rv);
}
static void
-session_mq_accepted_reply_handler (void *data)
+session_mq_accepted_reply_handler (session_worker_t *wrk,
+ session_evt_elt_t *elt)
{
- session_accepted_reply_msg_t *mp = (session_accepted_reply_msg_t *) data;
vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+ session_accepted_reply_msg_t *mp;
session_state_t old_state;
app_worker_t *app_wrk;
session_t *s;
- /* Server isn't interested, kill the session */
- if (mp->retval)
- {
- a->app_index = mp->context;
- a->handle = mp->handle;
- vnet_disconnect_session (a);
- return;
- }
+ mp = session_evt_ctrl_data (wrk, elt);
/* Mail this back from the main thread. We're not polling in main
* thread so we're using other workers for notifications. */
- if (vlib_num_workers () && vlib_get_thread_index () != 0
- && session_thread_from_handle (mp->handle) == 0)
+ if (session_thread_from_handle (mp->handle) == 0 && vlib_num_workers () &&
+ vlib_get_thread_index () != 0)
{
- vlib_rpc_call_main_thread (session_mq_accepted_reply_handler,
- (u8 *) mp, sizeof (*mp));
+ session_wrk_send_evt_to_main (wrk, elt);
return;
}
@@ -483,27 +450,36 @@ session_mq_accepted_reply_handler (void *data)
return;
}
- if (!session_has_transport (s))
+ /* Server isn't interested, disconnect the session */
+ if (mp->retval)
{
- s->session_state = SESSION_STATE_READY;
- if (ct_session_connect_notify (s, SESSION_E_NONE))
- return;
+ a->app_index = mp->context;
+ a->handle = mp->handle;
+ vnet_disconnect_session (a);
+ s->app_wrk_index = SESSION_INVALID_INDEX;
+ return;
}
- else
+
+ /* Special handling for cut-through sessions */
+ if (!session_has_transport (s))
{
- old_state = s->session_state;
- s->session_state = SESSION_STATE_READY;
+ session_set_state (s, SESSION_STATE_READY);
+ ct_session_connect_notify (s, SESSION_E_NONE);
+ return;
+ }
- if (!svm_fifo_is_empty_prod (s->rx_fifo))
- app_worker_lock_and_send_event (app_wrk, s, SESSION_IO_EVT_RX);
+ old_state = s->session_state;
+ session_set_state (s, SESSION_STATE_READY);
- /* Closed while waiting for app to reply. Resend disconnect */
- if (old_state >= SESSION_STATE_TRANSPORT_CLOSING)
- {
- app_worker_close_notify (app_wrk, s);
- s->session_state = old_state;
- return;
- }
+ if (!svm_fifo_is_empty_prod (s->rx_fifo))
+ app_worker_rx_notify (app_wrk, s);
+
+ /* Closed while waiting for app to reply. Resend disconnect */
+ if (old_state >= SESSION_STATE_TRANSPORT_CLOSING)
+ {
+ app_worker_close_notify (app_wrk, s);
+ session_set_state (s, old_state);
+ return;
}
}
@@ -515,15 +491,13 @@ session_mq_reset_reply_handler (void *data)
app_worker_t *app_wrk;
session_t *s;
application_t *app;
- u32 index, thread_index;
mp = (session_reset_reply_msg_t *) data;
app = application_lookup (mp->context);
if (!app)
return;
- session_parse_handle (mp->handle, &index, &thread_index);
- s = session_get_if_valid (index, thread_index);
+ s = session_get_from_handle_if_valid (mp->handle);
/* No session or not the right session */
if (!s || s->session_state < SESSION_STATE_TRANSPORT_CLOSING)
@@ -633,6 +607,7 @@ session_mq_worker_update_handler (void *data)
session_event_t *evt;
session_t *s;
application_t *app;
+ int rv;
app = application_lookup (mp->client_index);
if (!app)
@@ -669,7 +644,9 @@ session_mq_worker_update_handler (void *data)
return;
}
- app_worker_own_session (app_wrk, s);
+ rv = app_worker_own_session (app_wrk, s);
+ if (rv)
+ session_stat_error_inc (rv, 1);
/*
* Send reply
@@ -696,7 +673,7 @@ session_mq_worker_update_handler (void *data)
session_send_io_evt_to_thread (s->tx_fifo, SESSION_IO_EVT_TX);
if (s->rx_fifo && !svm_fifo_is_empty (s->rx_fifo))
- app_worker_lock_and_send_event (app_wrk, s, SESSION_IO_EVT_RX);
+ app_worker_rx_notify (app_wrk, s);
if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING)
app_worker_close_notify (app_wrk, s);
@@ -774,6 +751,67 @@ session_mq_transport_attr_handler (void *data)
svm_msg_q_add_and_unlock (app_wrk->event_queue, msg);
}
+void
+session_wrk_handle_evts_main_rpc (void *args)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ clib_llist_index_t ei, next_ei;
+ session_evt_elt_t *he, *elt;
+ session_worker_t *fwrk;
+ u32 thread_index;
+
+ vlib_worker_thread_barrier_sync (vm);
+
+ thread_index = pointer_to_uword (args);
+ fwrk = session_main_get_worker (thread_index);
+
+ he = clib_llist_elt (fwrk->event_elts, fwrk->evts_pending_main);
+ ei = clib_llist_next_index (he, evt_list);
+
+ while (ei != fwrk->evts_pending_main)
+ {
+ elt = clib_llist_elt (fwrk->event_elts, ei);
+ next_ei = clib_llist_next_index (elt, evt_list);
+ clib_llist_remove (fwrk->event_elts, evt_list, elt);
+ switch (elt->evt.event_type)
+ {
+ case SESSION_CTRL_EVT_LISTEN:
+ session_mq_listen_handler (fwrk, elt);
+ break;
+ case SESSION_CTRL_EVT_UNLISTEN:
+ session_mq_unlisten_handler (fwrk, elt);
+ break;
+ case SESSION_CTRL_EVT_APP_DETACH:
+ app_mq_detach_handler (fwrk, elt);
+ break;
+ case SESSION_CTRL_EVT_CONNECT_URI:
+ session_mq_connect_uri_handler (fwrk, elt);
+ break;
+ case SESSION_CTRL_EVT_ACCEPTED_REPLY:
+ session_mq_accepted_reply_handler (fwrk, elt);
+ break;
+ case SESSION_CTRL_EVT_CONNECT:
+ session_mq_connect_handler (fwrk, elt);
+ break;
+ default:
+ clib_warning ("unhandled %u", elt->evt.event_type);
+ ALWAYS_ASSERT (0);
+ break;
+ }
+
+ /* Regrab element in case pool moved */
+ elt = clib_llist_elt (fwrk->event_elts, ei);
+ if (!clib_llist_elt_is_linked (elt, evt_list))
+ {
+ session_evt_ctrl_data_free (fwrk, elt);
+ clib_llist_put (fwrk->event_elts, elt);
+ }
+ ei = next_ei;
+ }
+
+ vlib_worker_thread_barrier_release (vm);
+}
+
vlib_node_registration_t session_queue_node;
typedef struct
@@ -795,21 +833,21 @@ format_session_queue_trace (u8 * s, va_list * args)
return s;
}
-#define foreach_session_queue_error \
-_(TX, "Packets transmitted") \
-_(TIMER, "Timer events") \
-_(NO_BUFFER, "Out of buffers")
+#define foreach_session_queue_error \
+ _ (TX, tx, INFO, "Packets transmitted") \
+ _ (TIMER, timer, INFO, "Timer events") \
+ _ (NO_BUFFER, no_buffer, ERROR, "Out of buffers")
typedef enum
{
-#define _(sym,str) SESSION_QUEUE_ERROR_##sym,
+#define _(f, n, s, d) SESSION_QUEUE_ERROR_##f,
foreach_session_queue_error
#undef _
SESSION_QUEUE_N_ERROR,
} session_queue_error_t;
-static char *session_queue_error_strings[] = {
-#define _(sym,string) string,
+static vlib_error_desc_t session_error_counters[] = {
+#define _(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s },
foreach_session_queue_error
#undef _
};
@@ -822,36 +860,134 @@ enum
};
static void
-session_tx_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
- u32 next_index, u32 * to_next, u16 n_segs,
- session_t * s, u32 n_trace)
+session_tx_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
+ u32 next_index, vlib_buffer_t **bufs, u16 n_segs,
+ session_t *s, u32 n_trace)
{
+ vlib_buffer_t **b = bufs;
+
while (n_trace && n_segs)
{
- vlib_buffer_t *b = vlib_get_buffer (vm, to_next[0]);
- if (PREDICT_TRUE
- (vlib_trace_buffer
- (vm, node, next_index, b, 1 /* follow_chain */ )))
+ if (PREDICT_TRUE (vlib_trace_buffer (vm, node, next_index, b[0],
+ 1 /* follow_chain */)))
{
session_queue_trace_t *t =
- vlib_add_trace (vm, node, b, sizeof (*t));
+ vlib_add_trace (vm, node, b[0], sizeof (*t));
t->session_index = s->session_index;
t->server_thread_index = s->thread_index;
n_trace--;
}
- to_next++;
+ b++;
n_segs--;
}
vlib_set_trace_count (vm, node, n_trace);
}
+always_inline int
+session_tx_fill_dma_transfers (session_worker_t *wrk,
+ session_tx_context_t *ctx, vlib_buffer_t *b)
+{
+ vlib_main_t *vm = wrk->vm;
+ u32 len_to_deq;
+ u8 *data0 = NULL;
+ int n_bytes_read, len_write;
+ svm_fifo_seg_t data_fs[2];
+
+ u32 n_segs = 2;
+ u16 n_transfers = 0;
+ /*
+ * Start with the first buffer in chain
+ */
+ b->error = 0;
+ b->flags = VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ b->current_data = 0;
+ data0 = vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN);
+ len_to_deq = clib_min (ctx->left_to_snd, ctx->deq_per_first_buf);
+
+ n_bytes_read = svm_fifo_segments (ctx->s->tx_fifo, ctx->sp.tx_offset,
+ data_fs, &n_segs, len_to_deq);
+
+ len_write = n_bytes_read;
+ ASSERT (n_bytes_read == len_to_deq);
+
+ while (n_bytes_read)
+ {
+ wrk->batch_num++;
+ vlib_dma_batch_add (vm, wrk->batch, data0, data_fs[n_transfers].data,
+ data_fs[n_transfers].len);
+ data0 += data_fs[n_transfers].len;
+ n_bytes_read -= data_fs[n_transfers].len;
+ n_transfers++;
+ }
+ return len_write;
+}
+
+always_inline int
+session_tx_fill_dma_transfers_tail (session_worker_t *wrk,
+ session_tx_context_t *ctx,
+ vlib_buffer_t *b, u32 len_to_deq, u8 *data)
+{
+ vlib_main_t *vm = wrk->vm;
+ int n_bytes_read, len_write;
+ svm_fifo_seg_t data_fs[2];
+ u32 n_segs = 2;
+ u16 n_transfers = 0;
+
+ n_bytes_read = svm_fifo_segments (ctx->s->tx_fifo, ctx->sp.tx_offset,
+ data_fs, &n_segs, len_to_deq);
+
+ len_write = n_bytes_read;
+
+ ASSERT (n_bytes_read == len_to_deq);
+
+ while (n_bytes_read)
+ {
+ wrk->batch_num++;
+ vlib_dma_batch_add (vm, wrk->batch, data, data_fs[n_transfers].data,
+ data_fs[n_transfers].len);
+ data += data_fs[n_transfers].len;
+ n_bytes_read -= data_fs[n_transfers].len;
+ n_transfers++;
+ }
+
+ return len_write;
+}
+
+always_inline int
+session_tx_copy_data (session_worker_t *wrk, session_tx_context_t *ctx,
+ vlib_buffer_t *b, u32 len_to_deq, u8 *data0)
+{
+ int n_bytes_read;
+ if (PREDICT_TRUE (!wrk->dma_enabled))
+ n_bytes_read =
+ svm_fifo_peek (ctx->s->tx_fifo, ctx->sp.tx_offset, len_to_deq, data0);
+ else
+ n_bytes_read = session_tx_fill_dma_transfers (wrk, ctx, b);
+ return n_bytes_read;
+}
+
+always_inline int
+session_tx_copy_data_tail (session_worker_t *wrk, session_tx_context_t *ctx,
+ vlib_buffer_t *b, u32 len_to_deq, u8 *data)
+{
+ int n_bytes_read;
+ if (PREDICT_TRUE (!wrk->dma_enabled))
+ n_bytes_read =
+ svm_fifo_peek (ctx->s->tx_fifo, ctx->sp.tx_offset, len_to_deq, data);
+ else
+ n_bytes_read =
+ session_tx_fill_dma_transfers_tail (wrk, ctx, b, len_to_deq, data);
+ return n_bytes_read;
+}
+
always_inline void
-session_tx_fifo_chain_tail (vlib_main_t * vm, session_tx_context_t * ctx,
- vlib_buffer_t * b, u16 * n_bufs, u8 peek_data)
+session_tx_fifo_chain_tail (session_worker_t *wrk, session_tx_context_t *ctx,
+ vlib_buffer_t *b, u16 *n_bufs, u8 peek_data)
{
+ vlib_main_t *vm = wrk->vm;
vlib_buffer_t *chain_b, *prev_b;
u32 chain_bi0, to_deq, left_from_seg;
- u16 len_to_deq, n_bytes_read;
+ int len_to_deq, n_bytes_read;
u8 *data, j;
b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
@@ -873,8 +1009,8 @@ session_tx_fifo_chain_tail (vlib_main_t * vm, session_tx_context_t * ctx,
data = vlib_buffer_get_current (chain_b);
if (peek_data)
{
- n_bytes_read = svm_fifo_peek (ctx->s->tx_fifo,
- ctx->sp.tx_offset, len_to_deq, data);
+ n_bytes_read =
+ session_tx_copy_data_tail (wrk, ctx, b, len_to_deq, data);
ctx->sp.tx_offset += n_bytes_read;
}
else
@@ -931,13 +1067,12 @@ session_tx_fifo_chain_tail (vlib_main_t * vm, session_tx_context_t * ctx,
}
always_inline void
-session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx,
- vlib_buffer_t * b, u16 * n_bufs, u8 peek_data)
+session_tx_fill_buffer (session_worker_t *wrk, session_tx_context_t *ctx,
+ vlib_buffer_t *b, u16 *n_bufs, u8 peek_data)
{
u32 len_to_deq;
u8 *data0;
int n_bytes_read;
-
/*
* Start with the first buffer in chain
*/
@@ -950,8 +1085,7 @@ session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx,
if (peek_data)
{
- n_bytes_read = svm_fifo_peek (ctx->s->tx_fifo, ctx->sp.tx_offset,
- len_to_deq, data0);
+ n_bytes_read = session_tx_copy_data (wrk, ctx, b, len_to_deq, data0);
ASSERT (n_bytes_read > 0);
/* Keep track of progress locally, transport is also supposed to
* increment it independently when pushing the header */
@@ -973,10 +1107,10 @@ session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx,
n_bytes_read = svm_fifo_peek (f, offset, deq_now, data0);
ASSERT (n_bytes_read > 0);
- if (ctx->s->session_state == SESSION_STATE_LISTENING)
+ if (transport_connection_is_cless (ctx->tc))
{
- ip_copy (&ctx->tc->rmt_ip, &hdr->rmt_ip, ctx->tc->is_ip4);
- ctx->tc->rmt_port = hdr->rmt_port;
+ clib_memcpy_fast (data0 - sizeof (session_dgram_hdr_t), hdr,
+ sizeof (*hdr));
}
hdr->data_offset += n_bytes_read;
if (hdr->data_offset == hdr->data_length)
@@ -998,6 +1132,7 @@ session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx,
ASSERT (n_bytes_read > 0);
}
}
+
b->current_length = n_bytes_read;
ctx->left_to_snd -= n_bytes_read;
@@ -1005,7 +1140,7 @@ session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx,
* Fill in the remaining buffers in the chain, if any
*/
if (PREDICT_FALSE (ctx->n_bufs_per_seg > 1 && ctx->left_to_snd))
- session_tx_fifo_chain_tail (vm, ctx, b, n_bufs, peek_data);
+ session_tx_fifo_chain_tail (wrk, ctx, b, n_bufs, peek_data);
}
always_inline u8
@@ -1018,7 +1153,15 @@ session_tx_not_ready (session_t * s, u8 peek_data)
/* Can retransmit for closed sessions but can't send new data if
* session is not ready or closed */
else if (s->session_state < SESSION_STATE_READY)
- return 1;
+ {
+ /* Allow accepting session to send custom packets.
+ * For instance, tcp want to send acks in established, but
+ * the app has not called accept() yet */
+ if (s->session_state == SESSION_STATE_ACCEPTING &&
+ (s->flags & SESSION_F_CUSTOM_TX))
+ return 0;
+ return 1;
+ }
else if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSED)
{
/* Allow closed transports to still send custom packets.
@@ -1029,6 +1172,11 @@ session_tx_not_ready (session_t * s, u8 peek_data)
return 2;
}
}
+ else
+ {
+ if (s->session_state == SESSION_STATE_TRANSPORT_DELETED)
+ return 2;
+ }
return 0;
}
@@ -1085,9 +1233,28 @@ session_tx_set_dequeue_params (vlib_main_t * vm, session_tx_context_t * ctx,
svm_fifo_peek (ctx->s->tx_fifo, 0, sizeof (ctx->hdr),
(u8 *) & ctx->hdr);
+ /* Zero length dgrams not supported */
+ if (PREDICT_FALSE (ctx->hdr.data_length == 0))
+ {
+ svm_fifo_dequeue_drop (ctx->s->tx_fifo, sizeof (ctx->hdr));
+ ctx->max_len_to_snd = 0;
+ return;
+ }
+ /* We cannot be sure apps have not enqueued incomplete dgrams */
+ if (PREDICT_FALSE (ctx->max_dequeue <
+ ctx->hdr.data_length + sizeof (ctx->hdr)))
+ {
+ ctx->max_len_to_snd = 0;
+ return;
+ }
ASSERT (ctx->hdr.data_length > ctx->hdr.data_offset);
len = ctx->hdr.data_length - ctx->hdr.data_offset;
+ if (ctx->hdr.gso_size)
+ {
+ ctx->sp.snd_mss = clib_min (ctx->sp.snd_mss, ctx->hdr.gso_size);
+ }
+
/* Process multiple dgrams if smaller than min (buf_space, mss).
* This avoids handling multiple dgrams if they require buffer
* chains */
@@ -1107,11 +1274,13 @@ session_tx_set_dequeue_params (vlib_main_t * vm, session_tx_context_t * ctx,
{
svm_fifo_peek (ctx->s->tx_fifo, offset, sizeof (ctx->hdr),
(u8 *) & hdr);
- ASSERT (hdr.data_length > hdr.data_offset);
dgram_len = hdr.data_length - hdr.data_offset;
- if (len + dgram_len > ctx->max_dequeue
- || first_dgram_len != dgram_len)
+ if (offset + sizeof (hdr) + hdr.data_length >
+ ctx->max_dequeue ||
+ first_dgram_len != dgram_len)
break;
+ /* Assert here to allow test above with zero length dgrams */
+ ASSERT (hdr.data_length > hdr.data_offset);
len += dgram_len;
offset += sizeof (hdr) + hdr.data_length;
}
@@ -1180,8 +1349,30 @@ session_tx_maybe_reschedule (session_worker_t * wrk,
svm_fifo_unset_event (s->tx_fifo);
if (svm_fifo_max_dequeue_cons (s->tx_fifo) > ctx->sp.tx_offset)
- if (svm_fifo_set_event (s->tx_fifo))
- session_evt_add_head_old (wrk, elt);
+ {
+ if (svm_fifo_set_event (s->tx_fifo))
+ session_evt_add_head_old (wrk, elt);
+ }
+ else
+ {
+ transport_connection_deschedule (ctx->tc);
+ }
+}
+
+always_inline void
+session_tx_add_pending_buffer (session_worker_t *wrk, u32 bi, u32 next_index)
+{
+ if (PREDICT_TRUE (!wrk->dma_enabled))
+ {
+ vec_add1 (wrk->pending_tx_buffers, bi);
+ vec_add1 (wrk->pending_tx_nexts, next_index);
+ }
+ else
+ {
+ session_dma_transfer *dma_transfer = &wrk->dma_trans[wrk->trans_tail];
+ vec_add1 (dma_transfer->pending_tx_buffers, bi);
+ vec_add1 (dma_transfer->pending_tx_nexts, next_index);
+ }
}
always_inline int
@@ -1227,9 +1418,12 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
ctx->sp.max_burst_size = max_burst;
n_custom_tx = ctx->transport_vft->custom_tx (ctx->tc, &ctx->sp);
*n_tx_packets += n_custom_tx;
- if (PREDICT_FALSE
- (ctx->s->session_state >= SESSION_STATE_TRANSPORT_CLOSED))
- return SESSION_TX_OK;
+ if (PREDICT_FALSE (ctx->s->session_state >=
+ SESSION_STATE_TRANSPORT_CLOSED))
+ {
+ svm_fifo_unset_event (ctx->s->tx_fifo);
+ return SESSION_TX_OK;
+ }
max_burst -= n_custom_tx;
if (!max_burst || (ctx->s->flags & SESSION_F_CUSTOM_TX))
{
@@ -1238,6 +1432,11 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
}
}
+ /* Connection previously descheduled because it had no data to send.
+ * Clear descheduled flag and reset pacer if in use */
+ if (transport_connection_is_descheduled (ctx->tc))
+ transport_connection_clear_descheduled (ctx->tc);
+
transport_connection_snd_params (ctx->tc, &ctx->sp);
if (!ctx->sp.snd_space)
@@ -1300,6 +1499,8 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
ctx->left_to_snd = ctx->max_len_to_snd;
n_left = ctx->n_segs_per_evt;
+ vec_validate (ctx->transport_pending_bufs, n_left);
+
while (n_left >= 4)
{
vlib_buffer_t *b0, *b1;
@@ -1318,18 +1519,15 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
b0 = vlib_get_buffer (vm, bi0);
b1 = vlib_get_buffer (vm, bi1);
- session_tx_fill_buffer (vm, ctx, b0, &n_bufs, peek_data);
- session_tx_fill_buffer (vm, ctx, b1, &n_bufs, peek_data);
-
- ctx->transport_vft->push_header (ctx->tc, b0);
- ctx->transport_vft->push_header (ctx->tc, b1);
+ session_tx_fill_buffer (wrk, ctx, b0, &n_bufs, peek_data);
+ session_tx_fill_buffer (wrk, ctx, b1, &n_bufs, peek_data);
+ ctx->transport_pending_bufs[ctx->n_segs_per_evt - n_left] = b0;
+ ctx->transport_pending_bufs[ctx->n_segs_per_evt - n_left + 1] = b1;
n_left -= 2;
- vec_add1 (wrk->pending_tx_buffers, bi0);
- vec_add1 (wrk->pending_tx_buffers, bi1);
- vec_add1 (wrk->pending_tx_nexts, next_index);
- vec_add1 (wrk->pending_tx_nexts, next_index);
+ session_tx_add_pending_buffer (wrk, bi0, next_index);
+ session_tx_add_pending_buffer (wrk, bi1, next_index);
}
while (n_left)
{
@@ -1345,20 +1543,20 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
bi0 = ctx->tx_buffers[--n_bufs];
b0 = vlib_get_buffer (vm, bi0);
- session_tx_fill_buffer (vm, ctx, b0, &n_bufs, peek_data);
-
- /* Ask transport to push header after current_length and
- * total_length_not_including_first_buffer are updated */
- ctx->transport_vft->push_header (ctx->tc, b0);
+ session_tx_fill_buffer (wrk, ctx, b0, &n_bufs, peek_data);
+ ctx->transport_pending_bufs[ctx->n_segs_per_evt - n_left] = b0;
n_left -= 1;
- vec_add1 (wrk->pending_tx_buffers, bi0);
- vec_add1 (wrk->pending_tx_nexts, next_index);
+ session_tx_add_pending_buffer (wrk, bi0, next_index);
}
+ /* Ask transport to push headers */
+ ctx->transport_vft->push_header (ctx->tc, ctx->transport_pending_bufs,
+ ctx->n_segs_per_evt);
+
if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node)) > 0))
- session_tx_trace_frame (vm, node, next_index, wrk->pending_tx_buffers,
+ session_tx_trace_frame (vm, node, next_index, ctx->transport_pending_bufs,
ctx->n_segs_per_evt, ctx->s, n_trace);
if (PREDICT_FALSE (n_bufs))
@@ -1367,7 +1565,7 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
*n_tx_packets += ctx->n_segs_per_evt;
SESSION_EVT (SESSION_EVT_DEQ, ctx->s, ctx->max_len_to_snd, ctx->max_dequeue,
- ctx->s->tx_fifo->has_event, wrk->last_vlib_time);
+ ctx->s->tx_fifo->shr->has_event, wrk->last_vlib_time);
ASSERT (ctx->left_to_snd == 0);
@@ -1412,20 +1610,30 @@ session_tx_fifo_dequeue_internal (session_worker_t * wrk,
{
transport_send_params_t *sp = &wrk->ctx.sp;
session_t *s = wrk->ctx.s;
+ clib_llist_index_t ei;
u32 n_packets;
- if (PREDICT_FALSE (s->session_state >= SESSION_STATE_TRANSPORT_CLOSED))
+ if (PREDICT_FALSE ((s->session_state >= SESSION_STATE_TRANSPORT_CLOSED) ||
+ (s->session_state == SESSION_STATE_CONNECTING &&
+ (s->flags & SESSION_F_HALF_OPEN))))
return 0;
/* Clear custom-tx flag used to request reschedule for tx */
s->flags &= ~SESSION_F_CUSTOM_TX;
+ sp->flags = 0;
+ sp->bytes_dequeued = 0;
sp->max_burst_size = clib_min (SESSION_NODE_FRAME_SIZE - *n_tx_packets,
TRANSPORT_PACER_MAX_BURST_PKTS);
+ /* Grab elt index since app transports can enqueue events on tx */
+ ei = clib_llist_entry_index (wrk->event_elts, elt);
+
n_packets = transport_custom_tx (session_get_transport_proto (s), s, sp);
*n_tx_packets += n_packets;
+ elt = clib_llist_elt (wrk->event_elts, ei);
+
if (s->flags & SESSION_F_CUSTOM_TX)
{
session_evt_add_old (wrk, elt);
@@ -1438,8 +1646,8 @@ session_tx_fifo_dequeue_internal (session_worker_t * wrk,
session_evt_add_head_old (wrk, elt);
}
- if (sp->max_burst_size &&
- svm_fifo_needs_deq_ntf (s->tx_fifo, sp->max_burst_size))
+ if (sp->bytes_dequeued &&
+ svm_fifo_needs_deq_ntf (s->tx_fifo, sp->bytes_dequeued))
session_dequeue_notify (s);
return n_packets;
@@ -1491,10 +1699,10 @@ session_event_dispatch_ctrl (session_worker_t * wrk, session_evt_elt_t * elt)
session_transport_reset (s);
break;
case SESSION_CTRL_EVT_LISTEN:
- session_mq_listen_handler (session_evt_ctrl_data (wrk, elt));
+ session_mq_listen_handler (wrk, elt);
break;
case SESSION_CTRL_EVT_LISTEN_URI:
- session_mq_listen_uri_handler (session_evt_ctrl_data (wrk, elt));
+ session_mq_listen_uri_handler (wrk, elt);
break;
case SESSION_CTRL_EVT_UNLISTEN:
session_mq_unlisten_handler (wrk, elt);
@@ -1503,7 +1711,7 @@ session_event_dispatch_ctrl (session_worker_t * wrk, session_evt_elt_t * elt)
session_mq_connect_handler (wrk, elt);
break;
case SESSION_CTRL_EVT_CONNECT_URI:
- session_mq_connect_uri_handler (session_evt_ctrl_data (wrk, elt));
+ session_mq_connect_uri_handler (wrk, elt);
break;
case SESSION_CTRL_EVT_SHUTDOWN:
session_mq_shutdown_handler (session_evt_ctrl_data (wrk, elt));
@@ -1515,7 +1723,7 @@ session_event_dispatch_ctrl (session_worker_t * wrk, session_evt_elt_t * elt)
session_mq_disconnected_handler (session_evt_ctrl_data (wrk, elt));
break;
case SESSION_CTRL_EVT_ACCEPTED_REPLY:
- session_mq_accepted_reply_handler (session_evt_ctrl_data (wrk, elt));
+ session_mq_accepted_reply_handler (wrk, elt);
break;
case SESSION_CTRL_EVT_DISCONNECTED_REPLY:
session_mq_disconnected_reply_handler (session_evt_ctrl_data (wrk,
@@ -1528,7 +1736,7 @@ session_event_dispatch_ctrl (session_worker_t * wrk, session_evt_elt_t * elt)
session_mq_worker_update_handler (session_evt_ctrl_data (wrk, elt));
break;
case SESSION_CTRL_EVT_APP_DETACH:
- app_mq_detach_handler (session_evt_ctrl_data (wrk, elt));
+ app_mq_detach_handler (wrk, elt);
break;
case SESSION_CTRL_EVT_APP_WRK_RPC:
session_mq_app_wrk_rpc_handler (session_evt_ctrl_data (wrk, elt));
@@ -1572,7 +1780,7 @@ session_event_dispatch_io (session_worker_t * wrk, vlib_node_runtime_t * node,
s = session_event_get_session (wrk, e);
if (PREDICT_FALSE (!s))
break;
- CLIB_PREFETCH (s->tx_fifo, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (s->tx_fifo, sizeof (*(s->tx_fifo)), LOAD);
wrk->ctx.s = s;
/* Spray packets in per session type frames, since they go to
* different nodes */
@@ -1580,7 +1788,7 @@ session_event_dispatch_io (session_worker_t * wrk, vlib_node_runtime_t * node,
break;
case SESSION_IO_EVT_RX:
s = session_event_get_session (wrk, e);
- if (!s)
+ if (!s || s->session_state >= SESSION_STATE_TRANSPORT_CLOSED)
break;
transport_app_rx_evt (session_get_transport_proto (s),
s->connection_index, s->thread_index);
@@ -1591,19 +1799,21 @@ session_event_dispatch_io (session_worker_t * wrk, vlib_node_runtime_t * node,
break;
svm_fifo_unset_event (s->rx_fifo);
app_wrk = app_worker_get (s->app_wrk_index);
- app_worker_builtin_rx (app_wrk, s);
+ app_worker_rx_notify (app_wrk, s);
break;
- case SESSION_IO_EVT_BUILTIN_TX:
- s = session_get_from_handle_if_valid (e->session_handle);
+ case SESSION_IO_EVT_TX_MAIN:
+ s = session_get_if_valid (e->session_index, 0 /* main thread */);
+ if (PREDICT_FALSE (!s))
+ break;
wrk->ctx.s = s;
if (PREDICT_TRUE (s != 0))
- session_tx_fifo_dequeue_internal (wrk, node, elt, n_tx_packets);
+ (smm->session_tx_fns[s->session_type]) (wrk, node, elt, n_tx_packets);
break;
default:
clib_warning ("unhandled event type %d", e->event_type);
}
- SESSION_EVT (SESSION_IO_EVT_COUNTS, e->event_type, 1, wrk);
+ SESSION_EVT (SESSION_EVT_IO_EVT_COUNTS, e->event_type, 1, wrk);
/* Regrab elements in case pool moved */
elt = clib_llist_elt (wrk->event_elts, ei);
@@ -1611,14 +1821,22 @@ session_event_dispatch_io (session_worker_t * wrk, vlib_node_runtime_t * node,
clib_llist_put (wrk->event_elts, elt);
}
-/* *INDENT-OFF* */
static const u32 session_evt_msg_sizes[] = {
#define _(symc, sym) \
[SESSION_CTRL_EVT_ ## symc] = sizeof (session_ ## sym ##_msg_t),
foreach_session_ctrl_evt
#undef _
};
-/* *INDENT-ON* */
+
+always_inline void
+session_update_time_subscribers (session_main_t *smm, clib_time_type_t now,
+ u32 thread_index)
+{
+ session_update_time_fn *fn;
+
+ vec_foreach (fn, smm->update_time_fns)
+ (*fn) (now, thread_index);
+}
always_inline void
session_evt_add_to_list (session_worker_t * wrk, session_event_t * evt)
@@ -1652,9 +1870,9 @@ static void
session_flush_pending_tx_buffers (session_worker_t * wrk,
vlib_node_runtime_t * node)
{
- vlib_buffer_enqueue_to_next (wrk->vm, node, wrk->pending_tx_buffers,
- wrk->pending_tx_nexts,
- vec_len (wrk->pending_tx_nexts));
+ vlib_buffer_enqueue_to_next_vec (wrk->vm, node, &wrk->pending_tx_buffers,
+ &wrk->pending_tx_nexts,
+ vec_len (wrk->pending_tx_nexts));
vec_reset_length (wrk->pending_tx_buffers);
vec_reset_length (wrk->pending_tx_nexts);
}
@@ -1685,7 +1903,7 @@ session_wrk_update_state (session_worker_t *wrk)
if (wrk->state == SESSION_WRK_POLLING)
{
- if (clib_llist_elts (wrk->event_elts) == 4 &&
+ if (clib_llist_elts (wrk->event_elts) == 5 &&
vlib_last_vectors_per_main_loop (vm) < 1)
{
session_wrk_set_state (wrk, SESSION_WRK_INTERRUPT);
@@ -1695,7 +1913,7 @@ session_wrk_update_state (session_worker_t *wrk)
}
else if (wrk->state == SESSION_WRK_INTERRUPT)
{
- if (clib_llist_elts (wrk->event_elts) > 4 ||
+ if (clib_llist_elts (wrk->event_elts) > 5 ||
vlib_last_vectors_per_main_loop (vm) > 1)
{
session_wrk_set_state (wrk, SESSION_WRK_POLLING);
@@ -1734,10 +1952,19 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
/*
* Update transport time
*/
- transport_update_time (wrk->last_vlib_time, thread_index);
+ session_update_time_subscribers (smm, wrk->last_vlib_time, thread_index);
n_tx_packets = vec_len (wrk->pending_tx_buffers);
SESSION_EVT (SESSION_EVT_DSP_CNTRS, UPDATE_TIME, wrk);
+ if (PREDICT_FALSE (wrk->dma_enabled))
+ {
+ if (wrk->trans_head == ((wrk->trans_tail + 1) & (wrk->trans_size - 1)))
+ return 0;
+ wrk->batch = vlib_dma_batch_new (vm, wrk->config_index);
+ if (!wrk->batch)
+ return 0;
+ }
+
/*
* Dequeue new internal mq events
*/
@@ -1807,6 +2034,20 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
};
}
+ if (PREDICT_FALSE (wrk->dma_enabled))
+ {
+ if (wrk->batch_num)
+ {
+ vlib_dma_batch_set_cookie (vm, wrk->batch, wrk->trans_tail);
+ wrk->batch_num = 0;
+ wrk->trans_tail++;
+ if (wrk->trans_tail == wrk->trans_size)
+ wrk->trans_tail = 0;
+ }
+
+ vlib_dma_batch_submit (vm, wrk->batch);
+ }
+
SESSION_EVT (SESSION_EVT_DSP_CNTRS, OLD_IO_EVTS, wrk);
if (vec_len (wrk->pending_tx_buffers))
@@ -1823,19 +2064,16 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return n_tx_packets;
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (session_queue_node) =
-{
+VLIB_REGISTER_NODE (session_queue_node) = {
.function = session_queue_node_fn,
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
.name = "session-queue",
.format_trace = format_session_queue_trace,
.type = VLIB_NODE_TYPE_INPUT,
- .n_errors = ARRAY_LEN (session_queue_error_strings),
- .error_strings = session_queue_error_strings,
+ .n_errors = SESSION_QUEUE_N_ERROR,
+ .error_counters = session_error_counters,
.state = VLIB_NODE_STATE_DISABLED,
};
-/* *INDENT-ON* */
static clib_error_t *
session_wrk_tfd_read_ready (clib_file_t *cf)
@@ -1939,7 +2177,6 @@ session_queue_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (session_queue_process_node) =
{
.function = session_queue_process,
@@ -1947,7 +2184,6 @@ VLIB_REGISTER_NODE (session_queue_process_node) =
.name = "session-queue-process",
.state = VLIB_NODE_STATE_DISABLED,
};
-/* *INDENT-ON* */
static_always_inline uword
session_queue_pre_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
@@ -1960,7 +2196,6 @@ session_queue_pre_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
return session_queue_node_fn (vm, node, frame);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (session_queue_pre_input_node) =
{
.function = session_queue_pre_input_inline,
@@ -1968,7 +2203,6 @@ VLIB_REGISTER_NODE (session_queue_pre_input_node) =
.name = "session-queue-main",
.state = VLIB_NODE_STATE_DISABLED,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/session/session_rules_table.c b/src/vnet/session/session_rules_table.c
index 34bd6a38676..70a702cf55c 100644
--- a/src/vnet/session/session_rules_table.c
+++ b/src/vnet/session/session_rules_table.c
@@ -386,11 +386,11 @@ session_rules_table_lookup6 (session_rules_table_t * srt,
* @param srt table where rule should be added
* @param args rule arguments
*
- * @return 0 if success, clib_error_t error otherwise
+ * @return 0 if success, session_error_t error otherwise
*/
-int
-session_rules_table_add_del (session_rules_table_t * srt,
- session_rule_table_add_del_args_t * args)
+session_error_t
+session_rules_table_add_del (session_rules_table_t *srt,
+ session_rule_table_add_del_args_t *args)
{
u8 fib_proto = args->rmt.fp_proto, *rt;
u32 ri_from_tag, ri;
@@ -398,7 +398,7 @@ session_rules_table_add_del (session_rules_table_t * srt,
ri_from_tag = session_rules_table_rule_for_tag (srt, args->tag);
if (args->is_add && ri_from_tag != SESSION_RULES_TABLE_INVALID_INDEX)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
if (fib_proto == FIB_PROTOCOL_IP4)
{
@@ -509,11 +509,18 @@ session_rules_table_add_del (session_rules_table_t * srt,
}
}
else
- return VNET_API_ERROR_INVALID_VALUE_2;
+ return SESSION_E_INVALID;
return 0;
}
void
+session_rules_table_free (session_rules_table_t *srt)
+{
+ mma_rules_table_free_16 (&srt->session_rules_tables_16);
+ mma_rules_table_free_40 (&srt->session_rules_tables_40);
+}
+
+void
session_rules_table_init (session_rules_table_t * srt)
{
mma_rules_table_16_t *srt4;
@@ -598,11 +605,9 @@ session_rules_table_cli_dump (vlib_main_t * vm, session_rules_table_t * srt,
srt4 = &srt->session_rules_tables_16;
vlib_cli_output (vm, "IP4 rules");
- /* *INDENT-OFF* */
pool_foreach (sr4, srt4->rules) {
vlib_cli_output (vm, "%U", format_session_rule4, srt, sr4);
}
- /* *INDENT-ON* */
}
else if (fib_proto == FIB_PROTOCOL_IP6)
@@ -612,11 +617,9 @@ session_rules_table_cli_dump (vlib_main_t * vm, session_rules_table_t * srt,
srt6 = &srt->session_rules_tables_40;
vlib_cli_output (vm, "IP6 rules");
- /* *INDENT-OFF* */
pool_foreach (sr6, srt6->rules) {
vlib_cli_output (vm, "%U", format_session_rule6, srt, sr6);
}
- /* *INDENT-ON* */
}
}
diff --git a/src/vnet/session/session_rules_table.h b/src/vnet/session/session_rules_table.h
index 8679cb8a0c7..010d50a6398 100644
--- a/src/vnet/session/session_rules_table.h
+++ b/src/vnet/session/session_rules_table.h
@@ -18,11 +18,11 @@
#include <vnet/vnet.h>
#include <vnet/fib/fib.h>
+#include <vnet/session/session_types.h>
#include <vnet/session/transport.h>
#include <vnet/session/mma_16.h>
#include <vnet/session/mma_40.h>
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
union
@@ -52,7 +52,6 @@ typedef CLIB_PACKED (struct
u64 as_u64[5];
};
}) session_mask_or_match_6_t;
-/* *INDENT-ON* */
#define SESSION_RULE_TAG_MAX_LEN 64
#define SESSION_RULES_TABLE_INVALID_INDEX MMA_TABLE_INVALID_INDEX
@@ -111,11 +110,13 @@ void session_rules_table_show_rule (vlib_main_t * vm,
ip46_address_t * lcl_ip, u16 lcl_port,
ip46_address_t * rmt_ip, u16 rmt_port,
u8 is_ip4);
-int session_rules_table_add_del (session_rules_table_t * srt,
- session_rule_table_add_del_args_t * args);
+session_error_t
+session_rules_table_add_del (session_rules_table_t *srt,
+ session_rule_table_add_del_args_t *args);
u8 *session_rules_table_rule_tag (session_rules_table_t * srt, u32 ri,
u8 is_ip4);
void session_rules_table_init (session_rules_table_t * srt);
+void session_rules_table_free (session_rules_table_t *srt);
#endif /* SRC_VNET_SESSION_SESSION_RULES_TABLE_H_ */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/session/session_table.c b/src/vnet/session/session_table.c
index d0b576fda7b..dbbe771979c 100644
--- a/src/vnet/session/session_table.c
+++ b/src/vnet/session/session_table.c
@@ -60,6 +60,31 @@ session_table_get (u32 table_index)
_(v6,halfopen,buckets,20000) \
_(v6,halfopen,memory,(64<<20))
+void
+session_table_free (session_table_t *slt, u8 fib_proto)
+{
+ u8 all = fib_proto > FIB_PROTOCOL_IP6 ? 1 : 0;
+ int i;
+
+ for (i = 0; i < TRANSPORT_N_PROTOS; i++)
+ session_rules_table_free (&slt->session_rules[i]);
+
+ vec_free (slt->session_rules);
+
+ if (fib_proto == FIB_PROTOCOL_IP4 || all)
+ {
+ clib_bihash_free_16_8 (&slt->v4_session_hash);
+ clib_bihash_free_16_8 (&slt->v4_half_open_hash);
+ }
+ if (fib_proto == FIB_PROTOCOL_IP6 || all)
+ {
+ clib_bihash_free_48_8 (&slt->v6_session_hash);
+ clib_bihash_free_48_8 (&slt->v6_half_open_hash);
+ }
+
+ pool_put (lookup_tables, slt);
+}
+
/**
* Initialize session table hash tables
*
@@ -160,7 +185,66 @@ ip4_session_table_walk (clib_bihash_16_8_t * hash,
&ctx);
}
-/* *INDENT-ON* */
+u32
+session_table_memory_size (session_table_t *st)
+{
+ u64 total_size = 0;
+
+ if (clib_bihash_is_initialised_16_8 (&st->v4_session_hash))
+ {
+ clib_bihash_alloc_chunk_16_8_t *c = st->v4_session_hash.chunks;
+ while (c)
+ {
+ total_size += c->size;
+ c = c->next;
+ }
+ c = st->v4_half_open_hash.chunks;
+ while (c)
+ {
+ total_size += c->size;
+ c = c->next;
+ }
+ }
+
+ if (clib_bihash_is_initialised_48_8 (&st->v6_session_hash))
+ {
+ clib_bihash_alloc_chunk_48_8_t *c = st->v6_session_hash.chunks;
+ while (c)
+ {
+ total_size += c->size;
+ c = c->next;
+ }
+ c = st->v6_half_open_hash.chunks;
+ while (c)
+ {
+ total_size += c->size;
+ c = c->next;
+ }
+ }
+
+ return total_size;
+}
+
+u8 *
+format_session_table (u8 *s, va_list *args)
+{
+ session_table_t *st = va_arg (*args, session_table_t *);
+
+ if (clib_bihash_is_initialised_16_8 (&st->v4_session_hash))
+ {
+ s = format (s, "%U", format_bihash_16_8, &st->v4_session_hash, 0);
+ s = format (s, "%U", format_bihash_16_8, &st->v4_half_open_hash, 0);
+ }
+
+ if (clib_bihash_is_initialised_48_8 (&st->v6_session_hash))
+ {
+ s = format (s, "%U", format_bihash_48_8, &st->v6_session_hash, 0);
+ s = format (s, "%U", format_bihash_48_8, &st->v6_half_open_hash, 0);
+ }
+
+ return s;
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/session/session_table.h b/src/vnet/session/session_table.h
index ead3c302681..636b8d77bee 100644
--- a/src/vnet/session/session_table.h
+++ b/src/vnet/session/session_table.h
@@ -67,6 +67,10 @@ session_table_t *session_table_alloc (void);
session_table_t *session_table_get (u32 table_index);
u32 session_table_index (session_table_t * slt);
void session_table_init (session_table_t * slt, u8 fib_proto);
+void session_table_free (session_table_t *slt, u8 fib_proto);
+
+u32 session_table_memory_size (session_table_t *st);
+u8 *format_session_table (u8 *s, va_list *args);
/* Internal, try not to use it! */
session_table_t *_get_session_tables ();
@@ -75,7 +79,6 @@ session_table_t *_get_session_tables ();
pool_foreach (VAR, _get_session_tables ()) BODY
#endif /* SRC_VNET_SESSION_SESSION_TABLE_H_ */
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/session/session_test.c b/src/vnet/session/session_test.c
new file mode 100644
index 00000000000..770e7263024
--- /dev/null
+++ b/src/vnet/session/session_test.c
@@ -0,0 +1,363 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vppinfra/error.h>
+#include <vpp/api/types.h>
+
+#include <vnet/ip/ip_types_api.h>
+
+#define __plugin_msg_base session_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+#include <vlibmemory/vlib.api_enum.h>
+#include <vlibmemory/vlib.api_types.h>
+
+/* Declare message IDs */
+#include <vnet/format_fns.h>
+#include <vnet/session/session.api_enum.h>
+#include <vnet/session/session.api_types.h>
+
+#define vl_endianfun /* define message structures */
+#include <vnet/session/session.api.h>
+#undef vl_endianfun
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ u32 ping_id;
+ vat_main_t *vat_main;
+} session_test_main_t;
+
+static session_test_main_t session_test_main;
+
+static int
+api_session_rule_add_del (vat_main_t *vam)
+{
+ vl_api_session_rule_add_del_t *mp;
+ unformat_input_t *i = vam->input;
+ u32 proto = ~0, lcl_port, rmt_port, action = 0, lcl_plen, rmt_plen;
+ u32 appns_index = 0, scope = 0;
+ ip4_address_t lcl_ip4, rmt_ip4;
+ ip6_address_t lcl_ip6, rmt_ip6;
+ u8 is_ip4 = 1, conn_set = 0;
+ u8 is_add = 1, *tag = 0;
+ int ret;
+ fib_prefix_t lcl, rmt;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "add"))
+ ;
+ else if (unformat (i, "proto tcp"))
+ proto = 0;
+ else if (unformat (i, "proto udp"))
+ proto = 1;
+ else if (unformat (i, "appns %d", &appns_index))
+ ;
+ else if (unformat (i, "scope %d", &scope))
+ ;
+ else if (unformat (i, "tag %_%v%_", &tag))
+ ;
+ else if (unformat (i, "%U/%d %d %U/%d %d", unformat_ip4_address,
+ &lcl_ip4, &lcl_plen, &lcl_port, unformat_ip4_address,
+ &rmt_ip4, &rmt_plen, &rmt_port))
+ {
+ is_ip4 = 1;
+ conn_set = 1;
+ }
+ else if (unformat (i, "%U/%d %d %U/%d %d", unformat_ip6_address,
+ &lcl_ip6, &lcl_plen, &lcl_port, unformat_ip6_address,
+ &rmt_ip6, &rmt_plen, &rmt_port))
+ {
+ is_ip4 = 0;
+ conn_set = 1;
+ }
+ else if (unformat (i, "action %d", &action))
+ ;
+ else
+ break;
+ }
+ if (proto == ~0 || !conn_set || action == ~0)
+ {
+ errmsg ("transport proto, connection and action must be set");
+ return -99;
+ }
+
+ if (scope > 3)
+ {
+ errmsg ("scope should be 0-3");
+ return -99;
+ }
+
+ M (SESSION_RULE_ADD_DEL, mp);
+
+ clib_memset (&lcl, 0, sizeof (lcl));
+ clib_memset (&rmt, 0, sizeof (rmt));
+ if (is_ip4)
+ {
+ ip_set (&lcl.fp_addr, &lcl_ip4, 1);
+ ip_set (&rmt.fp_addr, &rmt_ip4, 1);
+ lcl.fp_len = lcl_plen;
+ rmt.fp_len = rmt_plen;
+ }
+ else
+ {
+ ip_set (&lcl.fp_addr, &lcl_ip6, 0);
+ ip_set (&rmt.fp_addr, &rmt_ip6, 0);
+ lcl.fp_len = lcl_plen;
+ rmt.fp_len = rmt_plen;
+ }
+
+ ip_prefix_encode (&lcl, &mp->lcl);
+ ip_prefix_encode (&rmt, &mp->rmt);
+ mp->lcl_port = clib_host_to_net_u16 ((u16) lcl_port);
+ mp->rmt_port = clib_host_to_net_u16 ((u16) rmt_port);
+ mp->transport_proto =
+ proto ? TRANSPORT_PROTO_API_UDP : TRANSPORT_PROTO_API_TCP;
+ mp->action_index = clib_host_to_net_u32 (action);
+ mp->appns_index = clib_host_to_net_u32 (appns_index);
+ mp->scope = scope;
+ mp->is_add = is_add;
+ if (tag)
+ {
+ clib_memcpy (mp->tag, tag, vec_len (tag));
+ vec_free (tag);
+ }
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_app_attach_reply_t_handler (vl_api_app_attach_reply_t *mp)
+{
+}
+
+static void
+vl_api_app_add_cert_key_pair_reply_t_handler (
+ vl_api_app_add_cert_key_pair_reply_t *mp)
+{
+}
+
+static int
+api_app_attach (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_application_detach (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_app_del_cert_key_pair (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_app_add_cert_key_pair (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_session_rules_dump (vat_main_t *vam)
+{
+ vl_api_session_rules_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%=20s", "Session Rules");
+ }
+
+ M (SESSION_RULES_DUMP, mp);
+ /* send it... */
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ PING (&session_test_main, mp_ping);
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_session_rules_details_t_handler (vl_api_session_rules_details_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+ fib_prefix_t lcl, rmt;
+
+ ip_prefix_decode (&mp->lcl, &lcl);
+ ip_prefix_decode (&mp->rmt, &rmt);
+
+ if (lcl.fp_proto == FIB_PROTOCOL_IP4)
+ {
+ print (vam->ofp,
+ "appns %u tp %u scope %d %U/%d %d %U/%d %d action: %d tag: %s",
+ clib_net_to_host_u32 (mp->appns_index), mp->transport_proto,
+ mp->scope, format_ip4_address, &lcl.fp_addr.ip4, lcl.fp_len,
+ clib_net_to_host_u16 (mp->lcl_port), format_ip4_address,
+ &rmt.fp_addr.ip4, rmt.fp_len, clib_net_to_host_u16 (mp->rmt_port),
+ clib_net_to_host_u32 (mp->action_index), mp->tag);
+ }
+ else
+ {
+ print (vam->ofp,
+ "appns %u tp %u scope %d %U/%d %d %U/%d %d action: %d tag: %s",
+ clib_net_to_host_u32 (mp->appns_index), mp->transport_proto,
+ mp->scope, format_ip6_address, &lcl.fp_addr.ip6, lcl.fp_len,
+ clib_net_to_host_u16 (mp->lcl_port), format_ip6_address,
+ &rmt.fp_addr.ip6, rmt.fp_len, clib_net_to_host_u16 (mp->rmt_port),
+ clib_net_to_host_u32 (mp->action_index), mp->tag);
+ }
+}
+
+static void
+vl_api_app_namespace_add_del_reply_t_handler (
+ vl_api_app_namespace_add_del_reply_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ if (retval == 0)
+ errmsg ("app ns index %d\n", ntohl (mp->appns_index));
+ vam->result_ready = 1;
+ }
+}
+
+static void
+vl_api_app_namespace_add_del_v2_reply_t_handler (
+ vl_api_app_namespace_add_del_v2_reply_t *vat)
+{
+}
+
+static void
+vl_api_app_worker_add_del_reply_t_handler (
+ vl_api_app_worker_add_del_reply_t *vat)
+{
+}
+
+static int
+api_app_namespace_add_del_v2 (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_session_enable_disable (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_app_worker_add_del (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_app_namespace_add_del (vat_main_t *vam)
+{
+ vl_api_app_namespace_add_del_t *mp;
+ unformat_input_t *i = vam->input;
+ u8 *ns_id = 0, secret_set = 0, sw_if_index_set = 0;
+ u32 sw_if_index, ip4_fib_id, ip6_fib_id;
+ u64 secret;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "id %_%v%_", &ns_id))
+ ;
+ else if (unformat (i, "secret %lu", &secret))
+ secret_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "ip4_fib_id %d", &ip4_fib_id))
+ ;
+ else if (unformat (i, "ip6_fib_id %d", &ip6_fib_id))
+ ;
+ else
+ break;
+ }
+ if (!ns_id || !secret_set || !sw_if_index_set)
+ {
+ errmsg ("namespace id, secret and sw_if_index must be set");
+ return -99;
+ }
+ if (vec_len (ns_id) > 64)
+ {
+ errmsg ("namespace id too long");
+ return -99;
+ }
+ M (APP_NAMESPACE_ADD_DEL, mp);
+
+ vl_api_vec_to_api_string (ns_id, &mp->namespace_id);
+ mp->secret = clib_host_to_net_u64 (secret);
+ mp->sw_if_index = clib_host_to_net_u32 (sw_if_index);
+ mp->ip4_fib_id = clib_host_to_net_u32 (ip4_fib_id);
+ mp->ip6_fib_id = clib_host_to_net_u32 (ip6_fib_id);
+ vec_free (ns_id);
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_app_namespace_add_del_v4_reply_t_handler (
+ vl_api_app_namespace_add_del_v4_reply_t *mp)
+{
+}
+
+static int
+api_app_namespace_add_del_v4 (vat_main_t *vat)
+{
+ return -1;
+}
+
+static void
+vl_api_app_namespace_add_del_v3_reply_t_handler (
+ vl_api_app_namespace_add_del_v3_reply_t *mp)
+{
+}
+
+static int
+api_app_namespace_add_del_v3 (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_session_sapi_enable_disable (vat_main_t *vat)
+{
+ return -1;
+}
+
+#include <vnet/session/session.api_test.c>
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/session_types.h b/src/vnet/session/session_types.h
index 246978e0ac3..5e650727d61 100644
--- a/src/vnet/session/session_types.h
+++ b/src/vnet/session/session_types.h
@@ -22,8 +22,22 @@
#define SESSION_INVALID_INDEX ((u32)~0)
#define SESSION_INVALID_HANDLE ((u64)~0)
#define SESSION_CTRL_MSG_MAX_SIZE 86
+#define SESSION_CTRL_MSG_TX_MAX_SIZE 160
#define SESSION_NODE_FRAME_SIZE 128
+typedef u8 session_type_t;
+typedef u64 session_handle_t;
+
+typedef union session_handle_tu_
+{
+ session_handle_t handle;
+ struct
+ {
+ u32 session_index;
+ u32 thread_index;
+ };
+} __attribute__ ((__transparent_union__)) session_handle_tu_t;
+
#define foreach_session_endpoint_fields \
foreach_transport_endpoint_cfg_fields \
_(u8, transport_proto) \
@@ -35,6 +49,23 @@ typedef struct _session_endpoint
#undef _
} session_endpoint_t;
+#define foreach_session_endpoint_cfg_flags _ (PROXY_LISTEN, "proxy listener")
+
+typedef enum session_endpoint_cfg_flags_bits_
+{
+#define _(sym, str) SESSION_ENDPT_CFG_F_BIT_##sym,
+ foreach_session_endpoint_cfg_flags
+#undef _
+} __clib_packed session_endpoint_cfg_flags_bits_t;
+
+typedef enum session_endpoint_cfg_flags_
+{
+#define _(sym, str) \
+ SESSION_ENDPT_CFG_F_##sym = 1 << SESSION_ENDPT_CFG_F_BIT_##sym,
+ foreach_session_endpoint_cfg_flags
+#undef _
+} __clib_packed session_endpoint_cfg_flags_t;
+
typedef struct _session_endpoint_cfg
{
#define _(type, name) type name;
@@ -45,7 +76,7 @@ typedef struct _session_endpoint_cfg
u32 ns_index;
u8 original_tp;
u64 parent_handle;
- u8 flags;
+ session_endpoint_cfg_flags_t flags;
transport_endpt_ext_cfg_t *ext_cfg;
} session_endpoint_cfg_t;
@@ -107,9 +138,6 @@ session_endpoint_is_zero (session_endpoint_t * sep)
return ip_is_zero (&sep->ip, sep->is_ip4);
}
-typedef u8 session_type_t;
-typedef u64 session_handle_t;
-
typedef enum
{
SESSION_CLEANUP_TRANSPORT,
@@ -126,19 +154,19 @@ typedef enum session_ft_action_
/*
* Session states
*/
-#define foreach_session_state \
- _(CREATED, "created") \
- _(LISTENING, "listening") \
- _(CONNECTING, "connecting") \
- _(ACCEPTING, "accepting") \
- _(READY, "ready") \
- _(OPENED, "opened") \
- _(TRANSPORT_CLOSING, "transport-closing") \
- _(CLOSING, "closing") \
- _(APP_CLOSED, "app-closed") \
- _(TRANSPORT_CLOSED, "transport-closed") \
- _(CLOSED, "closed") \
- _(TRANSPORT_DELETED, "transport-deleted") \
+#define foreach_session_state \
+ _ (CREATED, "created") \
+ _ (LISTENING, "listening") \
+ _ (CONNECTING, "connecting") \
+ _ (ACCEPTING, "accepting") \
+ _ (READY, "ready") \
+ _ (OPENED, "opened") \
+ _ (TRANSPORT_CLOSING, "transport-closing") \
+ _ (CLOSING, "closing") \
+ _ (APP_CLOSED, "app-closed") \
+ _ (TRANSPORT_CLOSED, "transport-closed") \
+ _ (CLOSED, "closed") \
+ _ (TRANSPORT_DELETED, "transport-deleted")
typedef enum
{
@@ -146,7 +174,7 @@ typedef enum
foreach_session_state
#undef _
SESSION_N_STATES,
-} session_state_t;
+} __clib_packed session_state_t;
#define foreach_session_flag \
_ (RX_EVT, "rx-event") \
@@ -155,7 +183,9 @@ typedef enum
_ (IS_MIGRATING, "migrating") \
_ (UNIDIRECTIONAL, "unidirectional") \
_ (CUSTOM_FIFO_TUNING, "custom-fifo-tuning") \
- _ (HALF_OPEN, "half-open")
+ _ (HALF_OPEN, "half-open") \
+ _ (APP_CLOSED, "app-closed") \
+ _ (IS_CLESS, "connectionless")
typedef enum session_flags_bits_
{
@@ -178,38 +208,42 @@ typedef struct session_
svm_fifo_t *rx_fifo;
svm_fifo_t *tx_fifo;
+ union
+ {
+ session_handle_t handle;
+ struct
+ {
+ /** Index in thread pool where session was allocated */
+ u32 session_index;
+
+ /** Index of the thread that allocated the session */
+ u32 thread_index;
+ };
+ };
+
/** Type built from transport and network protocol types */
session_type_t session_type;
/** State in session layer state machine. See @ref session_state_t */
- volatile u8 session_state;
-
- /** Index in thread pool where session was allocated */
- u32 session_index;
+ volatile session_state_t session_state;
/** Index of the app worker that owns the session */
u32 app_wrk_index;
- /** Index of the thread that allocated the session */
- u8 thread_index;
-
/** Session flags. See @ref session_flags_t */
- u32 flags;
+ session_flags_t flags;
/** Index of the transport connection associated to the session */
u32 connection_index;
- /** Index of application that owns the listener. Set only if a listener */
- u32 app_index;
+ /** App listener index in app's listener pool if a listener */
+ u32 al_index;
union
{
/** Parent listener session index if the result of an accept */
session_handle_t listener_handle;
- /** App listener index in app's listener pool if a listener */
- u32 al_index;
-
/** Index in app worker's half-open table if a half-open */
u32 ho_index;
};
@@ -282,45 +316,35 @@ session_tx_is_dgram (session_t * s)
always_inline session_handle_t
session_handle (session_t * s)
{
- return ((u64) s->thread_index << 32) | (u64) s->session_index;
+ return s->handle;
}
always_inline u32
-session_index_from_handle (session_handle_t handle)
+session_index_from_handle (session_handle_tu_t handle)
{
- return handle & 0xFFFFFFFF;
+ return handle.session_index;
}
always_inline u32
-session_thread_from_handle (session_handle_t handle)
+session_thread_from_handle (session_handle_tu_t handle)
{
- return handle >> 32;
+ return handle.thread_index;
}
always_inline void
-session_parse_handle (session_handle_t handle, u32 * index,
- u32 * thread_index)
+session_parse_handle (session_handle_tu_t handle, u32 *index,
+ u32 *thread_index)
{
- *index = session_index_from_handle (handle);
- *thread_index = session_thread_from_handle (handle);
+ *index = handle.session_index;
+ *thread_index = handle.thread_index;
}
static inline session_handle_t
session_make_handle (u32 session_index, u32 data)
{
- return (((u64) data << 32) | (u64) session_index);
-}
-
-always_inline u32
-session_handle_index (session_handle_t ho_handle)
-{
- return (ho_handle & 0xffffffff);
-}
-
-always_inline u32
-session_handle_data (session_handle_t ho_handle)
-{
- return (ho_handle >> 32);
+ return ((session_handle_tu_t){ .session_index = session_index,
+ .thread_index = data })
+ .handle;
}
typedef enum
@@ -329,7 +353,7 @@ typedef enum
SESSION_IO_EVT_TX,
SESSION_IO_EVT_TX_FLUSH,
SESSION_IO_EVT_BUILTIN_RX,
- SESSION_IO_EVT_BUILTIN_TX,
+ SESSION_IO_EVT_TX_MAIN,
SESSION_CTRL_EVT_RPC,
SESSION_CTRL_EVT_HALF_CLOSE,
SESSION_CTRL_EVT_CLOSE,
@@ -360,6 +384,8 @@ typedef enum
SESSION_CTRL_EVT_APP_WRK_RPC,
SESSION_CTRL_EVT_TRANSPORT_ATTR,
SESSION_CTRL_EVT_TRANSPORT_ATTR_REPLY,
+ SESSION_CTRL_EVT_TRANSPORT_CLOSED,
+ SESSION_CTRL_EVT_HALF_CLEANUP,
} session_evt_type_t;
#define foreach_session_ctrl_evt \
@@ -394,7 +420,6 @@ typedef enum
#define FIFO_EVENT_APP_TX SESSION_IO_EVT_TX
#define FIFO_EVENT_DISCONNECT SESSION_CTRL_EVT_CLOSE
#define FIFO_EVENT_BUILTIN_RX SESSION_IO_EVT_BUILTIN_RX
-#define FIFO_EVENT_BUILTIN_TX SESSION_IO_EVT_BUILTIN_TX
typedef enum
{
@@ -419,6 +444,7 @@ typedef struct
session_handle_t session_handle;
session_rpc_args_t rpc_args;
u32 ctrl_data_index;
+ u64 as_u64[2];
struct
{
u8 data[0];
@@ -443,12 +469,12 @@ typedef struct session_dgram_header_
u16 rmt_port;
u16 lcl_port;
u8 is_ip4;
+ u16 gso_size;
} __clib_packed session_dgram_hdr_t;
#define SESSION_CONN_ID_LEN 37
-#define SESSION_CONN_HDR_LEN 45
-
-STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 8),
+#define SESSION_CONN_HDR_LEN 47
+STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 10),
"session conn id wrong length");
#define foreach_session_error \
@@ -466,9 +492,12 @@ STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 8),
_ (NOLISTEN, "not listening") \
_ (NOSESSION, "session does not exist") \
_ (NOAPP, "app not attached") \
+ _ (APP_ATTACHED, "app already attached") \
_ (PORTINUSE, "lcl port in use") \
_ (IPINUSE, "ip in use") \
_ (ALREADY_LISTENING, "ip port pair already listened on") \
+ _ (ADDR_NOT_IN_USE, "address not in use") \
+ _ (INVALID, "invalid value") \
_ (INVALID_RMT_IP, "invalid remote ip") \
_ (INVALID_APPWRK, "invalid app worker") \
_ (INVALID_NS, "invalid namespace") \
@@ -486,7 +515,10 @@ STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 8),
_ (NOEXTCFG, "no extended transport config") \
_ (NOCRYPTOENG, "no crypto engine") \
_ (NOCRYPTOCKP, "cert key pair not found ") \
- _ (LOCAL_CONNECT, "could not connect with local scope")
+ _ (LOCAL_CONNECT, "could not connect with local scope") \
+ _ (WRONG_NS_SECRET, "wrong ns secret") \
+ _ (SYSCALL, "system call error") \
+ _ (TRANSPORT_NO_REG, "transport was not registered")
typedef enum session_error_p_
{
diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c
index 526f1a2da15..1c2a9261d3c 100644
--- a/src/vnet/session/transport.c
+++ b/src/vnet/session/transport.c
@@ -17,36 +17,31 @@
#include <vnet/session/session.h>
#include <vnet/fib/fib.h>
-typedef struct local_endpoint_
-{
- transport_endpoint_t ep;
- int refcnt;
-} local_endpoint_t;
-
/**
* Per-type vector of transport protocol virtual function tables
*/
transport_proto_vft_t *tp_vfts;
-/*
- * Port allocator seed
- */
-static u32 port_allocator_seed;
-
-/*
- * Local endpoints table
- */
-static transport_endpoint_table_t local_endpoints_table;
+typedef struct local_endpoint_
+{
+ transport_endpoint_t ep;
+ transport_proto_t proto;
+ int refcnt;
+} local_endpoint_t;
-/*
- * Pool of local endpoints
- */
-static local_endpoint_t *local_endpoints;
+typedef struct transport_main_
+{
+ transport_endpoint_table_t local_endpoints_table;
+ local_endpoint_t *local_endpoints;
+ u32 *lcl_endpts_freelist;
+ u32 port_allocator_seed;
+ u16 port_allocator_min_src_port;
+ u16 port_allocator_max_src_port;
+ u8 lcl_endpts_cleanup_pending;
+ clib_spinlock_t local_endpoints_lock;
+} transport_main_t;
-/*
- * Local endpoints pool lock
- */
-static clib_spinlock_t local_endpoints_lock;
+static transport_main_t tp_main;
u8 *
format_transport_proto (u8 * s, va_list * args)
@@ -76,6 +71,35 @@ format_transport_proto_short (u8 * s, va_list * args)
return s;
}
+const char *transport_flags_str[] = {
+#define _(sym, str) str,
+ foreach_transport_connection_flag
+#undef _
+};
+
+u8 *
+format_transport_flags (u8 *s, va_list *args)
+{
+ transport_connection_flags_t flags;
+ int i, last = -1;
+
+ flags = va_arg (*args, transport_connection_flags_t);
+
+ for (i = 0; i < TRANSPORT_CONNECTION_N_FLAGS; i++)
+ if (flags & (1 << i))
+ last = i;
+
+ for (i = 0; i < last; i++)
+ {
+ if (flags & (1 << i))
+ s = format (s, "%s, ", transport_flags_str[i]);
+ }
+ if (last >= 0)
+ s = format (s, "%s", transport_flags_str[last]);
+
+ return s;
+}
+
u8 *
format_transport_connection (u8 * s, va_list * args)
{
@@ -100,8 +124,8 @@ format_transport_connection (u8 * s, va_list * args)
if (transport_connection_is_tx_paced (tc))
s = format (s, "%Upacer: %U\n", format_white_space, indent,
format_transport_pacer, &tc->pacer, tc->thread_index);
- s = format (s, "%Utransport: flags 0x%x\n", format_white_space, indent,
- tc->flags);
+ s = format (s, "%Utransport: flags: %U\n", format_white_space, indent,
+ format_transport_flags, tc->flags);
}
return s;
}
@@ -124,14 +148,13 @@ u8 *
format_transport_half_open_connection (u8 * s, va_list * args)
{
u32 transport_proto = va_arg (*args, u32);
- u32 ho_index = va_arg (*args, u32);
transport_proto_vft_t *tp_vft;
tp_vft = transport_protocol_get_vft (transport_proto);
if (!tp_vft)
return s;
- s = format (s, "%U", tp_vft->format_half_open, ho_index);
+ s = (tp_vft->format_half_open) (s, args);
return s;
}
@@ -314,6 +337,8 @@ transport_cleanup_half_open (transport_proto_t tp, u32 conn_index)
int
transport_connect (transport_proto_t tp, transport_endpoint_cfg_t * tep)
{
+ if (PREDICT_FALSE (!tp_vfts[tp].connect))
+ return SESSION_E_TRANSPORT_NO_REG;
return tp_vfts[tp].connect (tep);
}
@@ -341,8 +366,10 @@ transport_reset (transport_proto_t tp, u32 conn_index, u8 thread_index)
u32
transport_start_listen (transport_proto_t tp, u32 session_index,
- transport_endpoint_t * tep)
+ transport_endpoint_cfg_t *tep)
{
+ if (PREDICT_FALSE (!tp_vfts[tp].start_listen))
+ return SESSION_E_TRANSPORT_NO_REG;
return tp_vfts[tp].start_listen (session_index, tep);
}
@@ -420,67 +447,148 @@ transport_connection_attribute (transport_proto_t tp, u32 conn_index,
#define PORT_MASK ((1 << 16)- 1)
void
-transport_endpoint_del (u32 tepi)
+transport_endpoint_free (u32 tepi)
{
- clib_spinlock_lock_if_init (&local_endpoints_lock);
- pool_put_index (local_endpoints, tepi);
- clib_spinlock_unlock_if_init (&local_endpoints_lock);
+ transport_main_t *tm = &tp_main;
+ pool_put_index (tm->local_endpoints, tepi);
}
always_inline local_endpoint_t *
-transport_endpoint_new (void)
+transport_endpoint_alloc (void)
{
+ transport_main_t *tm = &tp_main;
local_endpoint_t *lep;
- pool_get_zero (local_endpoints, lep);
+
+ ASSERT (vlib_get_thread_index () <= transport_cl_thread ());
+
+ pool_get_aligned_safe (tm->local_endpoints, lep, 0);
return lep;
}
+static void
+transport_cleanup_freelist (void)
+{
+ transport_main_t *tm = &tp_main;
+ local_endpoint_t *lep;
+ u32 *lep_indexp;
+
+ clib_spinlock_lock (&tm->local_endpoints_lock);
+
+ vec_foreach (lep_indexp, tm->lcl_endpts_freelist)
+ {
+ lep = pool_elt_at_index (tm->local_endpoints, *lep_indexp);
+
+ /* Port re-shared after attempt to cleanup */
+ if (lep->refcnt > 0)
+ continue;
+
+ transport_endpoint_table_del (&tm->local_endpoints_table, lep->proto,
+ &lep->ep);
+ transport_endpoint_free (*lep_indexp);
+ }
+
+ vec_reset_length (tm->lcl_endpts_freelist);
+
+ tm->lcl_endpts_cleanup_pending = 0;
+
+ clib_spinlock_unlock (&tm->local_endpoints_lock);
+}
+
void
-transport_endpoint_cleanup (u8 proto, ip46_address_t * lcl_ip, u16 port)
+transport_program_endpoint_cleanup (u32 lepi)
+{
+ transport_main_t *tm = &tp_main;
+ u8 flush_fl = 0;
+
+ /* All workers can free connections. Synchronize access to freelist */
+ clib_spinlock_lock (&tm->local_endpoints_lock);
+
+ vec_add1 (tm->lcl_endpts_freelist, lepi);
+
+ /* Avoid accumulating lots of endpoints for cleanup */
+ if (!tm->lcl_endpts_cleanup_pending &&
+ vec_len (tm->lcl_endpts_freelist) > 32)
+ {
+ tm->lcl_endpts_cleanup_pending = 1;
+ flush_fl = 1;
+ }
+
+ clib_spinlock_unlock (&tm->local_endpoints_lock);
+
+ if (flush_fl)
+ session_send_rpc_evt_to_thread_force (transport_cl_thread (),
+ transport_cleanup_freelist, 0);
+}
+
+int
+transport_release_local_endpoint (u8 proto, ip46_address_t *lcl_ip, u16 port)
{
+ transport_main_t *tm = &tp_main;
local_endpoint_t *lep;
u32 lepi;
- /* Cleanup local endpoint if this was an active connect */
- lepi = transport_endpoint_lookup (&local_endpoints_table, proto, lcl_ip,
- clib_net_to_host_u16 (port));
- if (lepi != ENDPOINT_INVALID_INDEX)
+ lepi = transport_endpoint_lookup (&tm->local_endpoints_table, proto, lcl_ip,
+ port);
+ if (lepi == ENDPOINT_INVALID_INDEX)
+ return -1;
+
+ /* First worker may be cleaning up ports so avoid touching free bitmap */
+ lep = &tm->local_endpoints[lepi];
+ ASSERT (lep->refcnt >= 1);
+
+ /* Local endpoint no longer in use, program cleanup */
+ if (!clib_atomic_sub_fetch (&lep->refcnt, 1))
{
- lep = pool_elt_at_index (local_endpoints, lepi);
- if (!clib_atomic_sub_fetch (&lep->refcnt, 1))
- {
- transport_endpoint_table_del (&local_endpoints_table, proto,
- &lep->ep);
- transport_endpoint_del (lepi);
- }
+ transport_program_endpoint_cleanup (lepi);
+ return 0;
}
+
+ /* Not an error, just in idication that endpoint was not cleaned up */
+ return -1;
}
-static void
-transport_endpoint_mark_used (u8 proto, ip46_address_t * ip, u16 port)
+static int
+transport_endpoint_mark_used (u8 proto, ip46_address_t *ip, u16 port)
{
+ transport_main_t *tm = &tp_main;
local_endpoint_t *lep;
- clib_spinlock_lock_if_init (&local_endpoints_lock);
- lep = transport_endpoint_new ();
+ u32 tei;
+
+ ASSERT (vlib_get_thread_index () <= transport_cl_thread ());
+
+ tei =
+ transport_endpoint_lookup (&tm->local_endpoints_table, proto, ip, port);
+ if (tei != ENDPOINT_INVALID_INDEX)
+ return SESSION_E_PORTINUSE;
+
+ /* Pool reallocs with worker barrier */
+ lep = transport_endpoint_alloc ();
clib_memcpy_fast (&lep->ep.ip, ip, sizeof (*ip));
lep->ep.port = port;
+ lep->proto = proto;
lep->refcnt = 1;
- transport_endpoint_table_add (&local_endpoints_table, proto, &lep->ep,
- lep - local_endpoints);
- clib_spinlock_unlock_if_init (&local_endpoints_lock);
+
+ transport_endpoint_table_add (&tm->local_endpoints_table, proto, &lep->ep,
+ lep - tm->local_endpoints);
+
+ return 0;
}
void
transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip, u16 port)
{
+ transport_main_t *tm = &tp_main;
local_endpoint_t *lep;
u32 lepi;
- lepi = transport_endpoint_lookup (&local_endpoints_table, proto, lcl_ip,
- clib_net_to_host_u16 (port));
+ /* Active opens should call this only from a control thread, which are also
+ * used to allocate and free ports. So, pool has only one writer and
+ * potentially many readers. Listeners are allocated with barrier */
+ lepi = transport_endpoint_lookup (&tm->local_endpoints_table, proto, lcl_ip,
+ port);
if (lepi != ENDPOINT_INVALID_INDEX)
{
- lep = pool_elt_at_index (local_endpoints, lepi);
+ lep = pool_elt_at_index (tm->local_endpoints, lepi);
clib_atomic_add_fetch (&lep->refcnt, 1);
}
}
@@ -488,18 +596,22 @@ transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip, u16 port)
/**
* Allocate local port and add if successful add entry to local endpoint
* table to mark the pair as used.
+ *
+ * @return port in net order or -1 if port cannot be allocated
*/
int
-transport_alloc_local_port (u8 proto, ip46_address_t * ip)
+transport_alloc_local_port (u8 proto, ip46_address_t *lcl_addr,
+ transport_endpoint_cfg_t *rmt)
{
- u16 min = 1024, max = 65535; /* XXX configurable ? */
+ transport_main_t *tm = &tp_main;
+ u16 min = tm->port_allocator_min_src_port;
+ u16 max = tm->port_allocator_max_src_port;
int tries, limit;
- u32 tei;
limit = max - min;
- /* Only support active opens from thread 0 */
- ASSERT (vlib_get_thread_index () == 0);
+ /* Only support active opens from one of ctrl threads */
+ ASSERT (vlib_get_thread_index () <= transport_cl_thread ());
/* Search for first free slot */
for (tries = 0; tries < limit; tries++)
@@ -509,19 +621,26 @@ transport_alloc_local_port (u8 proto, ip46_address_t * ip)
/* Find a port in the specified range */
while (1)
{
- port = random_u32 (&port_allocator_seed) & PORT_MASK;
+ port = random_u32 (&tm->port_allocator_seed) & PORT_MASK;
if (PREDICT_TRUE (port >= min && port < max))
- break;
+ {
+ port = clib_host_to_net_u16 (port);
+ break;
+ }
}
- /* Look it up. If not found, we're done */
- tei = transport_endpoint_lookup (&local_endpoints_table, proto, ip,
- port);
- if (tei == ENDPOINT_INVALID_INDEX)
- {
- transport_endpoint_mark_used (proto, ip, port);
- return port;
- }
+ if (!transport_endpoint_mark_used (proto, lcl_addr, port))
+ return port;
+
+ /* IP:port pair already in use, check if 6-tuple available */
+ if (session_lookup_connection (rmt->fib_index, lcl_addr, &rmt->ip, port,
+ rmt->port, proto, rmt->is_ip4))
+ continue;
+
+ /* 6-tuple is available so increment lcl endpoint refcount */
+ transport_share_local_endpoint (proto, lcl_addr, port);
+
+ return port;
}
return -1;
}
@@ -549,14 +668,14 @@ transport_get_interface_ip (u32 sw_if_index, u8 is_ip4, ip46_address_t * addr)
}
static session_error_t
-transport_find_local_ip_for_remote (u32 sw_if_index,
- transport_endpoint_t * rmt,
- ip46_address_t * lcl_addr)
+transport_find_local_ip_for_remote (u32 *sw_if_index,
+ transport_endpoint_t *rmt,
+ ip46_address_t *lcl_addr)
{
fib_node_index_t fei;
fib_prefix_t prefix;
- if (sw_if_index == ENDPOINT_INVALID_INDEX)
+ if (*sw_if_index == ENDPOINT_INVALID_INDEX)
{
/* Find a FIB path to the destination */
clib_memcpy_fast (&prefix.fp_addr, &rmt->ip, sizeof (rmt->ip));
@@ -570,13 +689,13 @@ transport_find_local_ip_for_remote (u32 sw_if_index,
if (fei == FIB_NODE_INDEX_INVALID)
return SESSION_E_NOROUTE;
- sw_if_index = fib_entry_get_resolving_interface (fei);
- if (sw_if_index == ENDPOINT_INVALID_INDEX)
+ *sw_if_index = fib_entry_get_resolving_interface (fei);
+ if (*sw_if_index == ENDPOINT_INVALID_INDEX)
return SESSION_E_NOINTF;
}
clib_memset (lcl_addr, 0, sizeof (*lcl_addr));
- return transport_get_interface_ip (sw_if_index, rmt->is_ip4, lcl_addr);
+ return transport_get_interface_ip (*sw_if_index, rmt->is_ip4, lcl_addr);
}
int
@@ -584,16 +703,16 @@ transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt_cfg,
ip46_address_t * lcl_addr, u16 * lcl_port)
{
transport_endpoint_t *rmt = (transport_endpoint_t *) rmt_cfg;
+ transport_main_t *tm = &tp_main;
session_error_t error;
int port;
- u32 tei;
/*
* Find the local address
*/
if (ip_is_zero (&rmt_cfg->peer.ip, rmt_cfg->peer.is_ip4))
{
- error = transport_find_local_ip_for_remote (rmt_cfg->peer.sw_if_index,
+ error = transport_find_local_ip_for_remote (&rmt_cfg->peer.sw_if_index,
rmt, lcl_addr);
if (error)
return error;
@@ -605,26 +724,37 @@ transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt_cfg,
sizeof (rmt_cfg->peer.ip));
}
+ /* Cleanup freelist if need be */
+ if (vec_len (tm->lcl_endpts_freelist))
+ transport_cleanup_freelist ();
+
/*
* Allocate source port
*/
if (rmt_cfg->peer.port == 0)
{
- port = transport_alloc_local_port (proto, lcl_addr);
+ port = transport_alloc_local_port (proto, lcl_addr, rmt_cfg);
if (port < 1)
return SESSION_E_NOPORT;
*lcl_port = port;
}
else
{
- port = clib_net_to_host_u16 (rmt_cfg->peer.port);
- *lcl_port = port;
- tei = transport_endpoint_lookup (&local_endpoints_table, proto,
- lcl_addr, port);
- if (tei != ENDPOINT_INVALID_INDEX)
+ *lcl_port = rmt_cfg->peer.port;
+
+ if (!transport_endpoint_mark_used (proto, lcl_addr, rmt_cfg->peer.port))
+ return 0;
+
+ /* IP:port pair already in use, check if 6-tuple available */
+ if (session_lookup_connection (rmt->fib_index, lcl_addr, &rmt->ip,
+ rmt_cfg->peer.port, rmt->port, proto,
+ rmt->is_ip4))
return SESSION_E_PORTINUSE;
- transport_endpoint_mark_used (proto, lcl_addr, port);
+ /* 6-tuple is available so increment lcl endpoint refcount */
+ transport_share_local_endpoint (proto, lcl_addr, rmt_cfg->peer.port);
+
+ return 0;
}
return 0;
@@ -660,15 +790,15 @@ static inline u32
spacer_max_burst (spacer_t * pacer, clib_us_time_t time_now)
{
u64 n_periods = (time_now - pacer->last_update);
- u64 inc;
+ i64 inc;
if ((inc = (f32) n_periods * pacer->tokens_per_period) > 10)
{
pacer->last_update = time_now;
- pacer->bucket = clib_min (pacer->bucket + inc, pacer->max_burst);
+ pacer->bucket = clib_min (pacer->bucket + inc, (i64) pacer->max_burst);
}
- return pacer->bucket > 0 ? pacer->max_burst : 0;
+ return pacer->bucket >= 0 ? pacer->max_burst : 0;
}
static inline void
@@ -790,7 +920,7 @@ void
transport_connection_reschedule (transport_connection_t * tc)
{
tc->flags &= ~TRANSPORT_CONNECTION_F_DESCHED;
- transport_connection_tx_pacer_reset_bucket (tc, TRANSPORT_PACER_MIN_BURST);
+ transport_connection_tx_pacer_reset_bucket (tc, 0 /* bucket */);
if (transport_max_tx_dequeue (tc))
sesssion_reschedule_tx (tc);
else
@@ -830,6 +960,9 @@ transport_enable_disable (vlib_main_t * vm, u8 is_en)
{
if (vft->enable)
(vft->enable) (vm, is_en);
+
+ if (vft->update_time)
+ session_register_update_time_fn (vft->update_time, is_en);
}
}
@@ -838,6 +971,7 @@ transport_init (void)
{
vlib_thread_main_t *vtm = vlib_get_thread_main ();
session_main_t *smm = vnet_get_session_main ();
+ transport_main_t *tm = &tp_main;
u32 num_threads;
if (smm->local_endpoints_table_buckets == 0)
@@ -846,15 +980,18 @@ transport_init (void)
smm->local_endpoints_table_memory = 512 << 20;
/* Initialize [port-allocator] random number seed */
- port_allocator_seed = (u32) clib_cpu_time_now ();
+ tm->port_allocator_seed = (u32) clib_cpu_time_now ();
+ tm->port_allocator_min_src_port = smm->port_allocator_min_src_port;
+ tm->port_allocator_max_src_port = smm->port_allocator_max_src_port;
- clib_bihash_init_24_8 (&local_endpoints_table, "local endpoints table",
+ clib_bihash_init_24_8 (&tm->local_endpoints_table, "local endpoints table",
smm->local_endpoints_table_buckets,
smm->local_endpoints_table_memory);
+ clib_spinlock_init (&tm->local_endpoints_lock);
+
num_threads = 1 /* main thread */ + vtm->n_threads;
if (num_threads > 1)
{
- clib_spinlock_init (&local_endpoints_lock);
/* Main not polled if there are workers */
smm->transport_cl_thread = 1;
}
diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h
index 447552c539e..e6ba1ecbc5f 100644
--- a/src/vnet/session/transport.h
+++ b/src/vnet/session/transport.h
@@ -57,6 +57,7 @@ typedef struct transport_send_params_
struct
{
u32 max_burst_size;
+ u32 bytes_dequeued;
};
};
transport_snd_flags_t flags;
@@ -65,13 +66,12 @@ typedef struct transport_send_params_
/*
* Transport protocol virtual function table
*/
-/* *INDENT-OFF* */
typedef struct _transport_proto_vft
{
/*
* Setup
*/
- u32 (*start_listen) (u32 session_index, transport_endpoint_t * lcl);
+ u32 (*start_listen) (u32 session_index, transport_endpoint_cfg_t *lcl);
u32 (*stop_listen) (u32 conn_index);
int (*connect) (transport_endpoint_cfg_t * rmt);
void (*half_close) (u32 conn_index, u32 thread_index);
@@ -85,7 +85,8 @@ typedef struct _transport_proto_vft
* Transmission
*/
- u32 (*push_header) (transport_connection_t * tconn, vlib_buffer_t * b);
+ u32 (*push_header) (transport_connection_t *tconn, vlib_buffer_t **b,
+ u32 n_bufs);
int (*send_params) (transport_connection_t * tconn,
transport_send_params_t *sp);
void (*update_time) (f64 time_now, u8 thread_index);
@@ -123,16 +124,13 @@ typedef struct _transport_proto_vft
*/
transport_options_t transport_options;
} transport_proto_vft_t;
-/* *INDENT-ON* */
extern transport_proto_vft_t *tp_vfts;
-#define transport_proto_foreach(VAR, BODY) \
-do { \
- for (VAR = 0; VAR < vec_len (tp_vfts); VAR++) \
- if (tp_vfts[VAR].push_header != 0) \
- do { BODY; } while (0); \
-} while (0)
+#define transport_proto_foreach(VAR, VAR_ALLOW_BM) \
+ for (VAR = 0; VAR < vec_len (tp_vfts); VAR++) \
+ if (tp_vfts[VAR].push_header != 0) \
+ if (VAR_ALLOW_BM & (1 << VAR))
int transport_connect (transport_proto_t tp, transport_endpoint_cfg_t * tep);
void transport_half_close (transport_proto_t tp, u32 conn_index,
@@ -140,7 +138,7 @@ void transport_half_close (transport_proto_t tp, u32 conn_index,
void transport_close (transport_proto_t tp, u32 conn_index, u8 thread_index);
void transport_reset (transport_proto_t tp, u32 conn_index, u8 thread_index);
u32 transport_start_listen (transport_proto_t tp, u32 session_index,
- transport_endpoint_t * tep);
+ transport_endpoint_cfg_t *tep);
u32 transport_stop_listen (transport_proto_t tp, u32 conn_index);
void transport_cleanup (transport_proto_t tp, u32 conn_index,
u8 thread_index);
@@ -246,13 +244,14 @@ transport_register_new_protocol (const transport_proto_vft_t * vft,
transport_proto_vft_t *transport_protocol_get_vft (transport_proto_t tp);
void transport_update_time (clib_time_type_t time_now, u8 thread_index);
-int transport_alloc_local_port (u8 proto, ip46_address_t * ip);
-int transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt,
- ip46_address_t * lcl_addr,
- u16 * lcl_port);
+int transport_alloc_local_port (u8 proto, ip46_address_t *ip,
+ transport_endpoint_cfg_t *rmt);
+int transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t *rmt,
+ ip46_address_t *lcl_addr, u16 *lcl_port);
void transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip,
u16 port);
-void transport_endpoint_cleanup (u8 proto, ip46_address_t * lcl_ip, u16 port);
+int transport_release_local_endpoint (u8 proto, ip46_address_t *lcl_ip,
+ u16 port);
void transport_enable_disable (vlib_main_t * vm, u8 is_en);
void transport_init (void);
@@ -329,6 +328,19 @@ transport_connection_is_tx_paced (transport_connection_t * tc)
return (tc->flags & TRANSPORT_CONNECTION_F_IS_TX_PACED);
}
+/**
+ * Clear descheduled flag and update pacer if needed
+ *
+ * To add session to scheduler use @ref transport_connection_reschedule
+ */
+always_inline void
+transport_connection_clear_descheduled (transport_connection_t *tc)
+{
+ tc->flags &= ~TRANSPORT_CONNECTION_F_DESCHED;
+ if (transport_connection_is_tx_paced (tc))
+ transport_connection_tx_pacer_reset_bucket (tc, 0 /* bucket */);
+}
+
u8 *format_transport_pacer (u8 * s, va_list * args);
/**
diff --git a/src/vnet/session/transport_types.h b/src/vnet/session/transport_types.h
index 9ea1f2102b4..b3469fa9fdb 100644
--- a/src/vnet/session/transport_types.h
+++ b/src/vnet/session/transport_types.h
@@ -21,10 +21,8 @@
#include <vnet/tcp/tcp_debug.h>
#include <vppinfra/bihash_24_8.h>
-
#define TRANSPORT_MAX_HDRS_LEN 140 /* Max number of bytes for headers */
-
typedef enum transport_dequeue_type_
{
TRANSPORT_TX_PEEK, /**< reliable transport protos */
@@ -42,24 +40,35 @@ typedef enum transport_service_type_
TRANSPORT_N_SERVICES
} transport_service_type_t;
+/*
+ * IS_TX_PACED : Connection sending is paced
+ * NO_LOOKUP: Don't register connection in lookup. Does not apply to local
+ * apps and transports using the network layer (udp/tcp)
+ * DESCHED: Connection descheduled by the session layer
+ * CLESS: Connection is "connection less". Some important implications of that
+ * are that connections are not pinned to workers and listeners will
+ * have fifos associated to them
+ */
+#define foreach_transport_connection_flag \
+ _ (IS_TX_PACED, "tx_paced") \
+ _ (NO_LOOKUP, "no_lookup") \
+ _ (DESCHED, "descheduled") \
+ _ (CLESS, "connectionless")
+
+typedef enum transport_connection_flags_bits_
+{
+#define _(sym, str) TRANSPORT_CONNECTION_F_BIT_##sym,
+ foreach_transport_connection_flag
+#undef _
+ TRANSPORT_CONNECTION_N_FLAGS
+} transport_connection_flags_bits_t;
+
typedef enum transport_connection_flags_
{
- TRANSPORT_CONNECTION_F_IS_TX_PACED = 1 << 0,
- /**
- * Don't register connection in lookup. Does not apply to local apps
- * and transports using the network layer (udp/tcp)
- */
- TRANSPORT_CONNECTION_F_NO_LOOKUP = 1 << 1,
- /**
- * Connection descheduled by the session layer.
- */
- TRANSPORT_CONNECTION_F_DESCHED = 1 << 2,
- /**
- * Connection is "connection less". Some important implications of that
- * are that connections are not pinned to workers and listeners will
- * have fifos associated to them
- */
- TRANSPORT_CONNECTION_F_CLESS = 1 << 3,
+#define _(sym, str) \
+ TRANSPORT_CONNECTION_F_##sym = 1 << TRANSPORT_CONNECTION_F_BIT_##sym,
+ foreach_transport_connection_flag
+#undef _
} transport_connection_flags_t;
typedef struct _spacer
@@ -106,6 +115,7 @@ typedef struct _transport_connection
u32 c_index; /**< Connection index in transport pool */
u32 thread_index; /**< Worker-thread index */
u8 flags; /**< Transport specific flags */
+ u8 dscp; /**< Differentiated Services Code Point */
/*fib_node_index_t rmt_fei;
dpo_id_t rmt_dpo; */
@@ -114,7 +124,7 @@ typedef struct _transport_connection
#if TRANSPORT_DEBUG
elog_track_t elog_track; /**< Event logging */
- u32 cc_stat_tstamp; /**< CC stats timestamp */
+ f64 cc_stat_tstamp; /**< CC stats timestamp */
#endif
/**
@@ -146,6 +156,7 @@ typedef struct _transport_connection
#define c_stats connection.stats
#define c_pacer connection.pacer
#define c_flags connection.flags
+#define c_dscp connection.dscp
#define s_ho_handle pacer.bytes_per_sec
} transport_connection_t;
@@ -164,7 +175,8 @@ STATIC_ASSERT (sizeof (transport_connection_t) <= 128,
_ (TLS, "tls", "J") \
_ (QUIC, "quic", "Q") \
_ (DTLS, "dtls", "D") \
- _ (SRTP, "srtp", "R")
+ _ (SRTP, "srtp", "R") \
+ _ (HTTP, "http", "H")
typedef enum _transport_proto
{
@@ -175,6 +187,7 @@ typedef enum _transport_proto
u8 *format_transport_proto (u8 * s, va_list * args);
u8 *format_transport_proto_short (u8 * s, va_list * args);
+u8 *format_transport_flags (u8 *s, va_list *args);
u8 *format_transport_connection (u8 * s, va_list * args);
u8 *format_transport_listen_connection (u8 * s, va_list * args);
u8 *format_transport_half_open_connection (u8 * s, va_list * args);
@@ -209,6 +222,7 @@ typedef enum transport_endpt_cfg_flags_
_ (u32, next_node_index) \
_ (u32, next_node_opaque) \
_ (u16, mss) \
+ _ (u8, dscp) \
_ (u8, transport_flags) \
/* clang-format on */