diff options
Diffstat (limited to 'src/vnet/session')
33 files changed, 5125 insertions, 2220 deletions
diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c index 7fe81885725..c66548507e5 100644 --- a/src/vnet/session/application.c +++ b/src/vnet/session/application.c @@ -31,10 +31,12 @@ static app_main_t app_main; static app_listener_t * app_listener_alloc (application_t * app) { + app_main_t *am = &app_main; app_listener_t *app_listener; - pool_get (app->listeners, app_listener); + + pool_get (am->listeners, app_listener); clib_memset (app_listener, 0, sizeof (*app_listener)); - app_listener->al_index = app_listener - app->listeners; + app_listener->al_index = app_listener - am->listeners; app_listener->app_index = app->app_index; app_listener->session_index = SESSION_INVALID_INDEX; app_listener->local_index = SESSION_INVALID_INDEX; @@ -43,18 +45,23 @@ app_listener_alloc (application_t * app) } app_listener_t * -app_listener_get (application_t * app, u32 app_listener_index) +app_listener_get (u32 app_listener_index) { - return pool_elt_at_index (app->listeners, app_listener_index); + app_main_t *am = &app_main; + + return pool_elt_at_index (am->listeners, app_listener_index); } static void app_listener_free (application_t * app, app_listener_t * app_listener) { + app_main_t *am = &app_main; + clib_bitmap_free (app_listener->workers); + vec_free (app_listener->cl_listeners); if (CLIB_DEBUG) clib_memset (app_listener, 0xfa, sizeof (*app_listener)); - pool_put (app->listeners, app_listener); + pool_put (am->listeners, app_listener); } session_handle_t @@ -63,24 +70,14 @@ app_listener_handle (app_listener_t * al) return al->ls_handle; } -app_listener_t * -app_listener_get_w_session (session_t * ls) -{ - application_t *app; - - app = application_get_if_valid (ls->app_index); - if (!app) - return 0; - return app_listener_get (app, ls->al_index); -} - session_handle_t app_listen_session_handle (session_t * ls) { app_listener_t *al; - al = app_listener_get_w_session (ls); - if (!al) + /* TODO(fcoras): quic session handles */ + if (ls->al_index == SESSION_INVALID_INDEX) return listen_session_get_handle (ls); + al = app_listener_get (ls->al_index); return al->ls_handle; } @@ -91,7 +88,7 @@ app_listener_get_w_handle (session_handle_t handle) ls = session_get_from_handle_if_valid (handle); if (!ls) return 0; - return app_listener_get_w_session (ls); + return app_listener_get (ls->al_index); } app_listener_t * @@ -112,7 +109,7 @@ app_listener_lookup (application_t * app, session_endpoint_cfg_t * sep_ext) if (handle != SESSION_INVALID_HANDLE) { ls = listen_session_get_from_handle (handle); - return app_listener_get_w_session (ls); + return app_listener_get (ls->al_index); } } @@ -122,7 +119,7 @@ app_listener_lookup (application_t * app, session_endpoint_cfg_t * sep_ext) if (handle != SESSION_INVALID_HANDLE) { ls = listen_session_get_from_handle (handle); - return app_listener_get_w_session ((session_t *) ls); + return app_listener_get (ls->al_index); } /* @@ -144,7 +141,7 @@ app_listener_lookup (application_t * app, session_endpoint_cfg_t * sep_ext) if (handle != SESSION_INVALID_HANDLE) { ls = listen_session_get_from_handle (handle); - return app_listener_get_w_session ((session_t *) ls); + return app_listener_get (ls->al_index); } } } @@ -181,7 +178,6 @@ app_listener_alloc_and_init (application_t * app, local_st = session_type_from_proto_and_ip (TRANSPORT_PROTO_NONE, sep->is_ip4); ls = listen_session_alloc (0, local_st); - ls->app_index = app->app_index; ls->app_wrk_index = sep->app_wrk_index; lh = session_handle (ls); @@ -189,11 +185,12 @@ app_listener_alloc_and_init (application_t * app, { ls = session_get_from_handle (lh); session_free (ls); + app_listener_free (app, app_listener); return rv; } ls = session_get_from_handle (lh); - app_listener = app_listener_get (app, al_index); + app_listener = app_listener_get (al_index); app_listener->local_index = ls->session_index; app_listener->ls_handle = lh; ls->al_index = al_index; @@ -212,7 +209,6 @@ app_listener_alloc_and_init (application_t * app, * build it's own specific listening connection. */ ls = listen_session_alloc (0, st); - ls->app_index = app->app_index; ls->app_wrk_index = sep->app_wrk_index; /* Listen pool can be reallocated if the transport is @@ -223,10 +219,11 @@ app_listener_alloc_and_init (application_t * app, { ls = listen_session_get_from_handle (lh); session_free (ls); + app_listener_free (app, app_listener); return rv; } ls = listen_session_get_from_handle (lh); - app_listener = app_listener_get (app, al_index); + app_listener = app_listener_get (al_index); app_listener->session_index = ls->session_index; app_listener->ls_handle = lh; ls->al_index = al_index; @@ -288,8 +285,9 @@ app_listener_cleanup (app_listener_t * al) } static app_worker_t * -app_listener_select_worker (application_t * app, app_listener_t * al) +app_listener_select_worker (app_listener_t *al) { + application_t *app; u32 wrk_index; app = application_get (al->app_index); @@ -319,6 +317,13 @@ app_listener_get_local_session (app_listener_t * al) return listen_session_get (al->local_index); } +session_t * +app_listener_get_wrk_cl_session (app_listener_t *al, u32 wrk_map_index) +{ + u32 si = vec_elt (al->cl_listeners, wrk_map_index); + return session_get (si, 0 /* listener thread */); +} + static app_worker_map_t * app_worker_map_alloc (application_t * app) { @@ -642,7 +647,7 @@ app_rx_mqs_alloc (application_t *app) cfg->ring_cfgs = rc; eqs->ssvm.ssvm_size = svm_msg_q_size_to_alloc (cfg) * n_mqs + (1 << 20); - eqs->ssvm.name = format (0, "%s-rx-mqs-seg%c", app->name, 0); + eqs->ssvm.name = format (0, "%v-rx-mqs-seg%c", app->name, 0); if (ssvm_server_init (&eqs->ssvm, SSVM_SEGMENT_MEMFD)) { @@ -684,7 +689,7 @@ application_get_rx_mqs_segment (application_t *app) { if (application_use_private_rx_mqs ()) return &app->rx_mqs_segment; - return session_main_get_evt_q_segment (); + return session_main_get_wrk_mqs_segment (); } void @@ -723,6 +728,12 @@ application_get_if_valid (u32 app_index) return pool_elt_at_index (app_main.app_pool, app_index); } +static int +_null_app_tx_callback (session_t *s) +{ + return 0; +} + static void application_verify_cb_fns (session_cb_vft_t * cb_fns) { @@ -734,6 +745,8 @@ application_verify_cb_fns (session_cb_vft_t * cb_fns) clib_warning ("No session disconnect callback function provided"); if (cb_fns->session_reset_callback == 0) clib_warning ("No session reset callback function provided"); + if (!cb_fns->builtin_app_tx_callback) + cb_fns->builtin_app_tx_callback = _null_app_tx_callback; } /** @@ -747,14 +760,14 @@ application_verify_cfg (ssvm_segment_type_t st) u8 is_valid; if (st == SSVM_SEGMENT_MEMFD) { - is_valid = (session_main_get_evt_q_segment () != 0); + is_valid = (session_main_get_wrk_mqs_segment () != 0); if (!is_valid) clib_warning ("memfd seg: vpp's event qs IN binary api svm region"); return is_valid; } else if (st == SSVM_SEGMENT_SHM) { - is_valid = (session_main_get_evt_q_segment () == 0); + is_valid = (session_main_get_wrk_mqs_segment () == 0); if (!is_valid) clib_warning ("shm seg: vpp's event qs NOT IN binary api svm region"); return is_valid; @@ -763,8 +776,8 @@ application_verify_cfg (ssvm_segment_type_t st) return 1; } -static int -application_alloc_and_init (app_init_args_t * a) +static session_error_t +application_alloc_and_init (app_init_args_t *a) { ssvm_segment_type_t seg_type = SSVM_SEGMENT_MEMFD; segment_manager_props_t *props; @@ -785,15 +798,15 @@ application_alloc_and_init (app_init_args_t * a) { clib_warning ("mq eventfds can only be used if socket transport is " "used for binary api"); - return VNET_API_ERROR_APP_UNSUPPORTED_CFG; + return SESSION_E_NOSUPPORT; } if (!application_verify_cfg (seg_type)) - return VNET_API_ERROR_APP_UNSUPPORTED_CFG; + return SESSION_E_NOSUPPORT; if (opts[APP_OPTIONS_PREALLOC_FIFO_PAIRS] && opts[APP_OPTIONS_PREALLOC_FIFO_HDRS]) - return VNET_API_ERROR_APP_UNSUPPORTED_CFG; + return SESSION_E_NOSUPPORT; /* Check that the obvious things are properly set up */ application_verify_cb_fns (a->session_cb_vft); @@ -819,6 +832,8 @@ application_alloc_and_init (app_init_args_t * a) props->add_segment_size = opts[APP_OPTIONS_ADD_SEGMENT_SIZE]; props->add_segment = 1; } + if (opts[APP_OPTIONS_FLAGS] & APP_OPTIONS_FLAGS_USE_HUGE_PAGE) + props->huge_page = 1; if (opts[APP_OPTIONS_RX_FIFO_SIZE]) props->rx_fifo_size = opts[APP_OPTIONS_RX_FIFO_SIZE]; if (opts[APP_OPTIONS_TX_FIFO_SIZE]) @@ -872,12 +887,10 @@ application_free (application_t * app) * Free workers */ - /* *INDENT-OFF* */ pool_flush (wrk_map, app->worker_maps, ({ app_wrk = app_worker_get (wrk_map->wrk_index); app_worker_free (app_wrk); })); - /* *INDENT-ON* */ pool_free (app->worker_maps); /* @@ -920,13 +933,11 @@ application_detach_process (application_t * app, u32 api_client_index) APP_DBG ("Detaching for app %v index %u api client index %u", app->name, app->app_index, api_client_index); - /* *INDENT-OFF* */ pool_foreach (wrk_map, app->worker_maps) { app_wrk = app_worker_get (wrk_map->wrk_index); if (app_wrk->api_client_index == api_client_index) vec_add1 (wrks, app_wrk->wrk_index); } - /* *INDENT-ON* */ if (!vec_len (wrks)) { @@ -947,6 +958,31 @@ application_detach_process (application_t * app, u32 api_client_index) vec_free (wrks); } +void +application_namespace_cleanup (app_namespace_t *app_ns) +{ + u32 *app_indices = 0, *app_index; + application_t *app; + u32 ns_index; + + ns_index = app_namespace_index (app_ns); + pool_foreach (app, app_main.app_pool) + if (app->ns_index == ns_index) + vec_add1 (app_indices, app->ns_index); + + vec_foreach (app_index, app_indices) + { + app = application_get (*app_index); + + if (application_is_proxy (app)) + application_remove_proxy (app); + app->flags &= ~APP_OPTIONS_FLAGS_IS_PROXY; + + application_free (app); + } + vec_free (app_indices); +} + app_worker_t * application_get_worker (application_t * app, u32 wrk_map_index) { @@ -972,12 +1008,55 @@ application_n_workers (application_t * app) app_worker_t * application_listener_select_worker (session_t * ls) { - application_t *app; app_listener_t *al; - app = application_get (ls->app_index); - al = app_listener_get (app, ls->al_index); - return app_listener_select_worker (app, al); + al = app_listener_get (ls->al_index); + return app_listener_select_worker (al); +} + +always_inline u32 +app_listener_cl_flow_hash (session_dgram_hdr_t *hdr) +{ + u32 hash = 0; + + if (hdr->is_ip4) + { + hash = clib_crc32c_u32 (hash, hdr->rmt_ip.ip4.as_u32); + hash = clib_crc32c_u32 (hash, hdr->lcl_ip.ip4.as_u32); + hash = clib_crc32c_u16 (hash, hdr->rmt_port); + hash = clib_crc32c_u16 (hash, hdr->lcl_port); + } + else + { + hash = clib_crc32c_u64 (hash, hdr->rmt_ip.ip6.as_u64[0]); + hash = clib_crc32c_u64 (hash, hdr->rmt_ip.ip6.as_u64[1]); + hash = clib_crc32c_u64 (hash, hdr->lcl_ip.ip6.as_u64[0]); + hash = clib_crc32c_u64 (hash, hdr->lcl_ip.ip6.as_u64[1]); + hash = clib_crc32c_u16 (hash, hdr->rmt_port); + hash = clib_crc32c_u16 (hash, hdr->lcl_port); + } + + return hash; +} + +session_t * +app_listener_select_wrk_cl_session (session_t *ls, session_dgram_hdr_t *hdr) +{ + u32 wrk_map_index = 0; + app_listener_t *al; + + al = app_listener_get (ls->al_index); + /* Crude test to check if only worker 0 is set */ + if (al->workers[0] != 1) + { + u32 hash = app_listener_cl_flow_hash (hdr); + hash %= vec_len (al->workers) * sizeof (uword); + wrk_map_index = clib_bitmap_next_set (al->workers, hash); + if (wrk_map_index == ~0) + wrk_map_index = clib_bitmap_first_set (al->workers); + } + + return app_listener_get_wrk_cl_session (al, wrk_map_index); } int @@ -1019,8 +1098,8 @@ application_alloc_worker_and_init (application_t * app, app_worker_t ** wrk) return 0; } -int -vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a) +session_error_t +vnet_app_worker_add_del (vnet_app_worker_add_del_args_t *a) { fifo_segment_t *fs; app_worker_map_t *wrk_map; @@ -1031,7 +1110,7 @@ vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a) app = application_get (a->app_index); if (!app) - return VNET_API_ERROR_INVALID_VALUE; + return SESSION_E_INVALID; if (a->is_add) { @@ -1054,13 +1133,15 @@ vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a) { wrk_map = app_worker_map_get (app, a->wrk_map_index); if (!wrk_map) - return VNET_API_ERROR_INVALID_VALUE; + return SESSION_E_INVALID; app_wrk = app_worker_get (wrk_map->wrk_index); if (!app_wrk) - return VNET_API_ERROR_INVALID_VALUE; + return SESSION_E_INVALID; application_api_table_del (app_wrk->api_client_index); + if (appns_sapi_enabled ()) + sapi_socket_close_w_handle (app_wrk->api_client_index); app_worker_free (app_wrk); app_worker_map_free (app, wrk_map); if (application_n_workers (app) == 0) @@ -1069,8 +1150,8 @@ vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a) return 0; } -static int -app_validate_namespace (u8 * namespace_id, u64 secret, u32 * app_ns_index) +static session_error_t +app_validate_namespace (u8 *namespace_id, u64 secret, u32 *app_ns_index) { app_namespace_t *app_ns; if (vec_len (namespace_id) == 0) @@ -1082,12 +1163,12 @@ app_validate_namespace (u8 * namespace_id, u64 secret, u32 * app_ns_index) *app_ns_index = app_namespace_index_from_id (namespace_id); if (*app_ns_index == APP_NAMESPACE_INVALID_INDEX) - return VNET_API_ERROR_APP_INVALID_NS; + return SESSION_E_INVALID_NS; app_ns = app_namespace_get (*app_ns_index); if (!app_ns) - return VNET_API_ERROR_APP_INVALID_NS; + return SESSION_E_INVALID_NS; if (app_ns->ns_secret != secret) - return VNET_API_ERROR_APP_WRONG_NS_SECRET; + return SESSION_E_WRONG_NS_SECRET; return 0; } @@ -1111,8 +1192,8 @@ app_name_from_api_index (u32 api_client_index) * to external app and a segment manager for shared memory fifo based * communication with the external app. */ -int -vnet_application_attach (vnet_app_attach_args_t * a) +session_error_t +vnet_application_attach (vnet_app_attach_args_t *a) { fifo_segment_t *fs; application_t *app = 0; @@ -1121,17 +1202,17 @@ vnet_application_attach (vnet_app_attach_args_t * a) u32 app_ns_index = 0; u8 *app_name = 0; u64 secret; - int rv; + session_error_t rv; if (a->api_client_index != APP_INVALID_INDEX) app = application_lookup (a->api_client_index); else if (a->name) app = application_lookup_name (a->name); else - return VNET_API_ERROR_INVALID_VALUE; + return SESSION_E_INVALID; if (app) - return VNET_API_ERROR_APP_ALREADY_ATTACHED; + return SESSION_E_APP_ATTACHED; /* Socket api sets the name and validates namespace prior to attach */ if (!a->use_sock_api) @@ -1185,8 +1266,8 @@ vnet_application_attach (vnet_app_attach_args_t * a) /** * Detach application from vpp */ -int -vnet_application_detach (vnet_app_detach_args_t * a) +session_error_t +vnet_application_detach (vnet_app_detach_args_t *a) { application_t *app; @@ -1194,7 +1275,7 @@ vnet_application_detach (vnet_app_detach_args_t * a) if (!app) { clib_warning ("app not attached"); - return VNET_API_ERROR_APPLICATION_NOT_ATTACHED; + return SESSION_E_NOAPP; } app_interface_check_thread_and_barrier (vnet_application_detach, a); @@ -1202,11 +1283,15 @@ vnet_application_detach (vnet_app_detach_args_t * a) return 0; } - static u8 -session_endpoint_in_ns (session_endpoint_t * sep) +session_endpoint_in_ns (session_endpoint_cfg_t *sep) { - u8 is_lep = session_endpoint_is_local (sep); + u8 is_lep; + + if (sep->flags & SESSION_ENDPT_CFG_F_PROXY_LISTEN) + return 1; + + is_lep = session_endpoint_is_local ((session_endpoint_t *) sep); if (!is_lep && sep->sw_if_index != ENDPOINT_INVALID_INDEX && !ip_interface_has_address (sep->sw_if_index, &sep->ip, sep->is_ip4)) { @@ -1215,6 +1300,7 @@ session_endpoint_in_ns (session_endpoint_t * sep) sep->is_ip4); return 0; } + return (is_lep || ip_is_local (sep->fib_index, &sep->ip, sep->is_ip4)); } @@ -1263,8 +1349,8 @@ session_endpoint_update_for_app (session_endpoint_cfg_t * sep, } } -int -vnet_listen (vnet_listen_args_t * a) +session_error_t +vnet_listen (vnet_listen_args_t *a) { app_listener_t *app_listener; app_worker_t *app_wrk; @@ -1284,7 +1370,7 @@ vnet_listen (vnet_listen_args_t * a) a->sep_ext.app_wrk_index = app_wrk->wrk_index; session_endpoint_update_for_app (&a->sep_ext, app, 0 /* is_connect */ ); - if (!session_endpoint_in_ns (&a->sep)) + if (!session_endpoint_in_ns (&a->sep_ext)) return SESSION_E_INVALID_NS; /* @@ -1317,13 +1403,13 @@ vnet_listen (vnet_listen_args_t * a) return 0; } -int -vnet_connect (vnet_connect_args_t * a) +session_error_t +vnet_connect (vnet_connect_args_t *a) { app_worker_t *client_wrk; application_t *client; - ASSERT (vlib_thread_is_main_w_barrier ()); + ASSERT (session_vlib_thread_is_cl_thread ()); if (session_endpoint_is_zero (&a->sep)) return SESSION_E_INVALID_RMT_IP; @@ -1341,7 +1427,7 @@ vnet_connect (vnet_connect_args_t * a) */ if (application_has_local_scope (client)) { - int rv; + session_error_t rv; a->sep_ext.original_tp = a->sep_ext.transport_proto; a->sep_ext.transport_proto = TRANSPORT_PROTO_NONE; @@ -1356,8 +1442,8 @@ vnet_connect (vnet_connect_args_t * a) return app_worker_connect_session (client_wrk, &a->sep_ext, &a->sh); } -int -vnet_unlisten (vnet_unlisten_args_t * a) +session_error_t +vnet_unlisten (vnet_unlisten_args_t *a) { app_worker_t *app_wrk; app_listener_t *al; @@ -1387,7 +1473,7 @@ vnet_unlisten (vnet_unlisten_args_t * a) return app_worker_stop_listen (app_wrk, al); } -int +session_error_t vnet_shutdown_session (vnet_shutdown_args_t *a) { app_worker_t *app_wrk; @@ -1408,8 +1494,8 @@ vnet_shutdown_session (vnet_shutdown_args_t *a) return 0; } -int -vnet_disconnect_session (vnet_disconnect_args_t * a) +session_error_t +vnet_disconnect_session (vnet_disconnect_args_t *a) { app_worker_t *app_wrk; session_t *s; @@ -1449,7 +1535,7 @@ application_change_listener_owner (session_t * s, app_worker_t * app_wrk) if (!app) return SESSION_E_NOAPP; - app_listener = app_listener_get (app, s->al_index); + app_listener = app_listener_get (s->al_index); /* Only remove from lb for now */ app_listener->workers = clib_bitmap_set (app_listener->workers, @@ -1493,6 +1579,12 @@ application_has_global_scope (application_t * app) return app->flags & APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE; } +int +application_original_dst_is_enabled (application_t *app) +{ + return app->flags & APP_OPTIONS_FLAGS_GET_ORIGINAL_DST; +} + static clib_error_t * application_start_stop_proxy_fib_proto (application_t * app, u8 fib_proto, u8 transport_proto, u8 is_start) @@ -1609,12 +1701,8 @@ application_setup_proxy (application_t * app) ASSERT (application_is_proxy (app)); - /* *INDENT-OFF* */ - transport_proto_foreach (tp, ({ - if (transports & (1 << tp)) - application_start_stop_proxy (app, tp, 1); - })); - /* *INDENT-ON* */ + transport_proto_foreach (tp, transports) + application_start_stop_proxy (app, tp, 1); } void @@ -1625,12 +1713,8 @@ application_remove_proxy (application_t * app) ASSERT (application_is_proxy (app)); - /* *INDENT-OFF* */ - transport_proto_foreach (tp, ({ - if (transports & (1 << tp)) - application_start_stop_proxy (app, tp, 0); - })); - /* *INDENT-ON* */ + transport_proto_foreach (tp, transports) + application_start_stop_proxy (app, tp, 0); } segment_manager_props_t * @@ -1657,12 +1741,11 @@ application_format_listeners (application_t * app, int verbose) if (!app) { - vlib_cli_output (vm, "%U", format_app_worker_listener, 0 /* header */ , + vlib_cli_output (vm, "%U", format_app_worker_listener, NULL /* header */, 0, 0, verbose); return; } - /* *INDENT-OFF* */ pool_foreach (wrk_map, app->worker_maps) { app_wrk = app_worker_get (wrk_map->wrk_index); if (hash_elts (app_wrk->listeners_table) == 0) @@ -1672,7 +1755,6 @@ application_format_listeners (application_t * app, int verbose) handle, sm_index, verbose); })); } - /* *INDENT-ON* */ } static void @@ -1687,12 +1769,10 @@ application_format_connects (application_t * app, int verbose) return; } - /* *INDENT-OFF* */ pool_foreach (wrk_map, app->worker_maps) { app_wrk = app_worker_get (wrk_map->wrk_index); app_worker_format_connects (app_wrk, verbose); } - /* *INDENT-ON* */ } u8 * @@ -1793,12 +1873,10 @@ format_application (u8 * s, va_list * args) format_memory_size, props->rx_fifo_size, format_memory_size, props->tx_fifo_size); - /* *INDENT-OFF* */ pool_foreach (wrk_map, app->worker_maps) { app_wrk = app_worker_get (wrk_map->wrk_index); s = format (s, "%U", format_app_worker, app_wrk); } - /* *INDENT-ON* */ return s; } @@ -1816,11 +1894,9 @@ application_format_all_listeners (vlib_main_t * vm, int verbose) application_format_listeners (0, verbose); - /* *INDENT-OFF* */ pool_foreach (app, app_main.app_pool) { application_format_listeners (app, verbose); } - /* *INDENT-ON* */ } void @@ -1836,11 +1912,9 @@ application_format_all_clients (vlib_main_t * vm, int verbose) application_format_connects (0, verbose); - /* *INDENT-OFF* */ pool_foreach (app, app_main.app_pool) { application_format_connects (app, verbose); } - /* *INDENT-ON* */ } static clib_error_t * @@ -1850,11 +1924,9 @@ show_certificate_command_fn (vlib_main_t * vm, unformat_input_t * input, app_cert_key_pair_t *ckpair; session_cli_return_if_not_enabled (); - /* *INDENT-OFF* */ pool_foreach (ckpair, app_main.cert_key_pair_store) { vlib_cli_output (vm, "%U", format_cert_key_pair, ckpair); } - /* *INDENT-ON* */ return 0; } @@ -1865,14 +1937,12 @@ appliction_format_app_mq (vlib_main_t * vm, application_t * app) app_worker_t *wrk; int i; - /* *INDENT-OFF* */ pool_foreach (map, app->worker_maps) { wrk = app_worker_get (map->wrk_index); vlib_cli_output (vm, "[A%d][%d]%U", app->app_index, map->wrk_index, format_svm_msg_q, wrk->event_queue); } - /* *INDENT-ON* */ for (i = 0; i < vec_len (app->rx_mqs); i++) vlib_cli_output (vm, "[A%d][R%d]%U", app->app_index, i, format_svm_msg_q, @@ -1893,11 +1963,9 @@ appliction_format_all_app_mq (vlib_main_t * vm) session_main_get_vpp_event_queue (i)); } - /* *INDENT-OFF* */ pool_foreach (app, app_main.app_pool) { appliction_format_app_mq (vm, app); } - /* *INDENT-ON* */ return 0; } @@ -1905,10 +1973,11 @@ static clib_error_t * show_app_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - int do_server = 0, do_client = 0, do_mq = 0; + int do_server = 0, do_client = 0, do_mq = 0, do_transports = 0; application_t *app; u32 app_index = ~0; int verbose = 0; + u8 is_ta; session_cli_return_if_not_enabled (); @@ -1918,6 +1987,8 @@ show_app_command_fn (vlib_main_t * vm, unformat_input_t * input, do_server = 1; else if (unformat (input, "client")) do_client = 1; + else if (unformat (input, "transports")) + do_transports = 1; else if (unformat (input, "mq")) do_mq = 1; else if (unformat (input, "%u", &app_index)) @@ -1971,11 +2042,11 @@ show_app_command_fn (vlib_main_t * vm, unformat_input_t * input, if (!do_server && !do_client) { vlib_cli_output (vm, "%U", format_application, 0, 0); - /* *INDENT-OFF* */ pool_foreach (app, app_main.app_pool) { - vlib_cli_output (vm, "%U", format_application, app, 0); + is_ta = app->flags & APP_OPTIONS_FLAGS_IS_TRANSPORT_APP; + if ((!do_transports && !is_ta) || (do_transports && is_ta)) + vlib_cli_output (vm, "%U", format_application, app, 0); } - /* *INDENT-ON* */ } return 0; @@ -2045,7 +2116,7 @@ vnet_app_del_cert_key_pair (u32 index) u32 *app_index; if (!(ckpair = app_cert_key_pair_get_if_valid (index))) - return (VNET_API_ERROR_INVALID_VALUE); + return SESSION_E_INVALID; vec_foreach (app_index, ckpair->app_interests) { @@ -2078,23 +2149,20 @@ application_init (vlib_main_t * vm) return 0; } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (application_init); -VLIB_CLI_COMMAND (show_app_command, static) = -{ +VLIB_CLI_COMMAND (show_app_command, static) = { .path = "show app", - .short_help = "show app [app_id] [server|client] [mq] [verbose]", + .short_help = "show app [index] [server|client] [mq] [verbose] " + "[transports]", .function = show_app_command_fn, }; -VLIB_CLI_COMMAND (show_certificate_command, static) = -{ +VLIB_CLI_COMMAND (show_certificate_command, static) = { .path = "show app certificate", .short_help = "list app certs and keys present in store", .function = show_certificate_command_fn, }; -/* *INDENT-ON* */ crypto_engine_type_t app_crypto_engine_type_add (void) diff --git a/src/vnet/session/application.h b/src/vnet/session/application.h index 5ddf1d21fe3..c68a911230f 100644 --- a/src/vnet/session/application.h +++ b/src/vnet/session/application.h @@ -29,6 +29,16 @@ #define APP_DBG(_fmt, _args...) #endif +typedef struct app_wrk_postponed_msg_ +{ + u32 len; + u8 event_type; + u8 ring; + u8 is_sapi; + int fd; + u8 data[SESSION_CTRL_MSG_TX_MAX_SIZE]; +} app_wrk_postponed_msg_t; + typedef struct app_worker_ { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); @@ -59,11 +69,20 @@ typedef struct app_worker_ /** API index for the worker. Needed for multi-process apps */ u32 api_client_index; + /** Set if mq is congested */ + u8 mq_congested; + u8 app_is_builtin; /** Pool of half-open session handles. Tracked in case worker detaches */ session_handle_t *half_open_table; + /* Per vpp worker fifos of events for app worker */ + session_event_t **wrk_evts; + + /* Vector of vpp workers mq congestion flags */ + u8 *wrk_mq_congested; + /** Protects detached seg managers */ clib_spinlock_t detached_seg_managers_lock; @@ -87,6 +106,8 @@ typedef struct app_listener_ session_handle_t ls_handle; /**< session handle of the local or global listening session that also identifies the app listener */ + u32 *cl_listeners; /**< vector that maps app workers to their + cl sessions with fifos */ } app_listener_t; typedef enum app_rx_mq_flags_ @@ -130,9 +151,6 @@ typedef struct application_ u16 proxied_transports; - /** Pool of listeners for the app */ - app_listener_t *listeners; - /** Preferred tls engine */ u8 tls_engine; @@ -179,6 +197,9 @@ typedef struct app_main_ */ application_t *app_pool; + /** Pool of app listeners */ + app_listener_t *listeners; + /** * Hash table of apps by api client index */ @@ -227,7 +248,7 @@ typedef struct _vnet_app_worker_add_del_args #define APP_NS_INVALID_INDEX ((u32)~0) #define APP_INVALID_SEGMENT_MANAGER_INDEX ((u32) ~0) -app_listener_t *app_listener_get (application_t * app, u32 al_index); +app_listener_t *app_listener_get (u32 al_index); int app_listener_alloc_and_init (application_t * app, session_endpoint_cfg_t * sep, app_listener_t ** listener); @@ -235,6 +256,8 @@ void app_listener_cleanup (app_listener_t * app_listener); session_handle_t app_listener_handle (app_listener_t * app_listener); app_listener_t *app_listener_lookup (application_t * app, session_endpoint_cfg_t * sep); +session_t *app_listener_select_wrk_cl_session (session_t *ls, + session_dgram_hdr_t *hdr); /** * Get app listener handle for listening session @@ -258,9 +281,9 @@ session_handle_t app_listen_session_handle (session_t * ls); * @return pointer to app listener or 0 */ app_listener_t *app_listener_get_w_handle (session_handle_t handle); -app_listener_t *app_listener_get_w_session (session_t * ls); session_t *app_listener_get_session (app_listener_t * al); session_t *app_listener_get_local_session (app_listener_t * al); +session_t *app_listener_get_wrk_cl_session (app_listener_t *al, u32 wrk_index); application_t *application_get (u32 index); application_t *application_get_if_valid (u32 index); @@ -280,6 +303,8 @@ u8 application_has_local_scope (application_t * app); u8 application_has_global_scope (application_t * app); void application_setup_proxy (application_t * app); void application_remove_proxy (application_t * app); +void application_namespace_cleanup (app_namespace_t *app_ns); +int application_original_dst_is_enabled (application_t *app); segment_manager_props_t *application_get_segment_manager_properties (u32 app_index); @@ -296,6 +321,12 @@ void application_enable_rx_mqs_nodes (u8 is_en); * App worker */ +always_inline u8 +app_worker_mq_is_congested (app_worker_t *app_wrk) +{ + return app_wrk->mq_congested > 0; +} + app_worker_t *app_worker_alloc (application_t * app); int application_alloc_worker_and_init (application_t * app, app_worker_t ** wrk); @@ -306,9 +337,14 @@ int app_worker_own_session (app_worker_t * app_wrk, session_t * s); void app_worker_free (app_worker_t * app_wrk); int app_worker_connect_session (app_worker_t *app, session_endpoint_cfg_t *sep, session_handle_t *rsh); -int app_worker_start_listen (app_worker_t * app_wrk, app_listener_t * lstnr); +session_error_t app_worker_start_listen (app_worker_t *app_wrk, + app_listener_t *lstnr); int app_worker_stop_listen (app_worker_t * app_wrk, app_listener_t * al); int app_worker_init_accepted (session_t * s); +int app_worker_listened_notify (app_worker_t *app_wrk, session_handle_t alsh, + u32 opaque, session_error_t err); +int app_worker_unlisten_reply (app_worker_t *app_wrk, session_handle_t sh, + u32 opaque, session_error_t err); int app_worker_accept_notify (app_worker_t * app_wrk, session_t * s); int app_worker_init_connected (app_worker_t * app_wrk, session_t * s); int app_worker_connect_notify (app_worker_t * app_wrk, session_t * s, @@ -321,13 +357,21 @@ int app_worker_transport_closed_notify (app_worker_t * app_wrk, int app_worker_reset_notify (app_worker_t * app_wrk, session_t * s); int app_worker_cleanup_notify (app_worker_t * app_wrk, session_t * s, session_cleanup_ntf_t ntf); +int app_worker_cleanup_notify_custom (app_worker_t *app_wrk, session_t *s, + session_cleanup_ntf_t ntf, + void (*cleanup_cb) (session_t *s)); int app_worker_migrate_notify (app_worker_t * app_wrk, session_t * s, session_handle_t new_sh); -int app_worker_builtin_rx (app_worker_t * app_wrk, session_t * s); -int app_worker_builtin_tx (app_worker_t * app_wrk, session_t * s); +int app_worker_rx_notify (app_worker_t *app_wrk, session_t *s); int app_worker_session_fifo_tuning (app_worker_t * app_wrk, session_t * s, svm_fifo_t * f, session_ft_action_t act, u32 len); +void app_worker_add_event (app_worker_t *app_wrk, session_t *s, + session_evt_type_t evt_type); +void app_worker_add_event_custom (app_worker_t *app_wrk, u32 thread_index, + session_event_t *evt); +int app_wrk_flush_wrk_events (app_worker_t *app_wrk, u32 thread_index); +void app_worker_del_all_events (app_worker_t *app_wrk); segment_manager_t *app_worker_get_listen_segment_manager (app_worker_t *, session_t *); segment_manager_t *app_worker_get_connect_segment_manager (app_worker_t *); @@ -338,9 +382,14 @@ int app_worker_del_segment_notify (app_worker_t * app_wrk, u32 app_worker_n_listeners (app_worker_t * app); session_t *app_worker_first_listener (app_worker_t * app, u8 fib_proto, u8 transport_proto); -int app_worker_send_event (app_worker_t * app, session_t * s, u8 evt); -int app_worker_lock_and_send_event (app_worker_t * app, session_t * s, - u8 evt_type); +void app_wrk_send_ctrl_evt_fd (app_worker_t *app_wrk, u8 evt_type, void *msg, + u32 msg_len, int fd); +void app_wrk_send_ctrl_evt (app_worker_t *app_wrk, u8 evt_type, void *msg, + u32 msg_len); +u8 app_worker_mq_wrk_is_congested (app_worker_t *app_wrk, u32 thread_index); +void app_worker_set_mq_wrk_congested (app_worker_t *app_wrk, u32 thread_index); +void app_worker_unset_wrk_mq_congested (app_worker_t *app_wrk, + u32 thread_index); session_t *app_worker_proxy_listener (app_worker_t * app, u8 fib_proto, u8 transport_proto); void app_worker_del_detached_sm (app_worker_t * app_wrk, u32 sm_index); @@ -349,7 +398,7 @@ u8 *format_app_worker_listener (u8 * s, va_list * args); u8 *format_crypto_engine (u8 * s, va_list * args); u8 *format_crypto_context (u8 * s, va_list * args); void app_worker_format_connects (app_worker_t * app_wrk, int verbose); -int vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a); +session_error_t vnet_app_worker_add_del (vnet_app_worker_add_del_args_t *a); uword unformat_application_proto (unformat_input_t * input, va_list * args); @@ -357,17 +406,17 @@ app_cert_key_pair_t *app_cert_key_pair_get (u32 index); app_cert_key_pair_t *app_cert_key_pair_get_if_valid (u32 index); app_cert_key_pair_t *app_cert_key_pair_get_default (); -/* Needed while we support both bapi and mq ctrl messages */ -int mq_send_session_bound_cb (u32 app_wrk_index, u32 api_context, - session_handle_t handle, int rv); -int mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context, - session_t * s, session_error_t err); -void mq_send_unlisten_reply (app_worker_t * app_wrk, session_handle_t sh, - u32 context, int rv); +void sapi_socket_close_w_handle (u32 api_handle); crypto_engine_type_t app_crypto_engine_type_add (void); u8 app_crypto_engine_n_types (void); +static inline u8 +app_worker_application_is_builtin (app_worker_t *app_wrk) +{ + return app_wrk->app_is_builtin; +} + #endif /* SRC_VNET_SESSION_APPLICATION_H_ */ /* diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c index 74f456a1eab..a62f914d43a 100644 --- a/src/vnet/session/application_interface.c +++ b/src/vnet/session/application_interface.c @@ -73,8 +73,8 @@ unformat_vnet_uri (unformat_input_t * input, va_list * args) static u8 *cache_uri; static session_endpoint_cfg_t *cache_sep; -int -parse_uri (char *uri, session_endpoint_cfg_t * sep) +session_error_t +parse_uri (char *uri, session_endpoint_cfg_t *sep) { unformat_input_t _input, *input = &_input; @@ -92,7 +92,7 @@ parse_uri (char *uri, session_endpoint_cfg_t * sep) if (!unformat (input, "%U", unformat_vnet_uri, sep)) { unformat_free (input); - return VNET_API_ERROR_INVALID_VALUE; + return SESSION_E_INVALID; } unformat_free (input); @@ -106,8 +106,8 @@ parse_uri (char *uri, session_endpoint_cfg_t * sep) return 0; } -int -vnet_bind_uri (vnet_listen_args_t * a) +session_error_t +vnet_bind_uri (vnet_listen_args_t *a) { session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL; int rv; @@ -120,36 +120,36 @@ vnet_bind_uri (vnet_listen_args_t * a) return vnet_listen (a); } -int -vnet_unbind_uri (vnet_unlisten_args_t * a) +session_error_t +vnet_unbind_uri (vnet_unlisten_args_t *a) { session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL; application_t *app; session_t *listener; u32 table_index; - int rv; + session_error_t rv; if ((rv = parse_uri (a->uri, &sep))) return rv; app = application_get (a->app_index); if (!app) - return VNET_API_ERROR_INVALID_VALUE; + return SESSION_E_INVALID; table_index = application_session_table (app, fib_ip_proto (!sep.is_ip4)); listener = session_lookup_listener (table_index, (session_endpoint_t *) & sep); if (!listener) - return VNET_API_ERROR_ADDRESS_NOT_IN_USE; + return SESSION_E_ADDR_NOT_IN_USE; a->handle = listen_session_get_handle (listener); return vnet_unlisten (a); } -int -vnet_connect_uri (vnet_connect_args_t * a) +session_error_t +vnet_connect_uri (vnet_connect_args_t *a) { session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL; - int rv; + session_error_t rv; if ((rv = parse_uri (a->uri, &sep))) return rv; diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h index b10dd6c150d..f175e4a58c6 100644 --- a/src/vnet/session/application_interface.h +++ b/src/vnet/session/application_interface.h @@ -62,6 +62,13 @@ typedef struct session_cb_vft_ /** Notify app that session pool migration happened */ void (*session_migrate_callback) (session_t * s, session_handle_t new_sh); + /** Notify app (external only) that listen was processed */ + int (*session_listened_callback) (u32 app_wrk_index, u32 api_context, + session_handle_t handle, int rv); + /** Notify app (external only) that unlisten was processed */ + void (*session_unlistened_callback) (u32 app_wrk_index, session_handle_t sh, + u32 context, int rv); + /** Direct RX callback for built-in application */ int (*builtin_app_rx_callback) (session_t * session); @@ -74,6 +81,8 @@ typedef struct session_cb_vft_ /** Delegate fifo-tuning-logic to application */ int (*fifo_tuning_callback) (session_t * s, svm_fifo_t * f, session_ft_action_t act, u32 bytes); + /** Custom fifo allocation for proxy */ + int (*proxy_alloc_session_fifos) (session_t *s); } session_cb_vft_t; @@ -117,7 +126,7 @@ typedef struct _vnet_bind_args_t /* * Results */ - u64 handle; + session_handle_t handle; } vnet_listen_args_t; typedef struct _vnet_unlisten_args_t @@ -125,7 +134,7 @@ typedef struct _vnet_unlisten_args_t union { char *uri; - u64 handle; /**< Session handle */ + session_handle_t handle; /**< Session handle */ }; u32 app_index; /**< Owning application index */ u32 wrk_map_index; /**< App's local pool worker index */ @@ -232,7 +241,9 @@ typedef enum _ (USE_GLOBAL_SCOPE, "App can use global session scope") \ _ (USE_LOCAL_SCOPE, "App can use local session scope") \ _ (EVT_MQ_USE_EVENTFD, "Use eventfds for signaling") \ - _ (MEMFD_FOR_BUILTIN, "Use memfd for builtin app segs") + _ (MEMFD_FOR_BUILTIN, "Use memfd for builtin app segs") \ + _ (USE_HUGE_PAGE, "Use huge page for FIFO") \ + _ (GET_ORIGINAL_DST, "Get original dst enabled") typedef enum _app_options { @@ -269,24 +280,26 @@ typedef enum session_fd_flag_ #undef _ } session_fd_flag_t; -int parse_uri (char *uri, session_endpoint_cfg_t * sep); -int vnet_bind_uri (vnet_listen_args_t *); -int vnet_unbind_uri (vnet_unlisten_args_t * a); -int vnet_connect_uri (vnet_connect_args_t * a); +session_error_t parse_uri (char *uri, session_endpoint_cfg_t *sep); +session_error_t vnet_bind_uri (vnet_listen_args_t *); +session_error_t vnet_unbind_uri (vnet_unlisten_args_t *a); +session_error_t vnet_connect_uri (vnet_connect_args_t *a); -int vnet_application_attach (vnet_app_attach_args_t * a); -int vnet_application_detach (vnet_app_detach_args_t * a); -int vnet_listen (vnet_listen_args_t * a); -int vnet_connect (vnet_connect_args_t * a); -int vnet_unlisten (vnet_unlisten_args_t * a); -int vnet_shutdown_session (vnet_shutdown_args_t *a); -int vnet_disconnect_session (vnet_disconnect_args_t * a); +session_error_t vnet_application_attach (vnet_app_attach_args_t *a); +session_error_t vnet_application_detach (vnet_app_detach_args_t *a); +session_error_t vnet_listen (vnet_listen_args_t *a); +session_error_t vnet_connect (vnet_connect_args_t *a); +session_error_t vnet_unlisten (vnet_unlisten_args_t *a); +session_error_t vnet_shutdown_session (vnet_shutdown_args_t *a); +session_error_t vnet_disconnect_session (vnet_disconnect_args_t *a); int vnet_app_add_cert_key_pair (vnet_app_add_cert_key_pair_args_t * a); int vnet_app_del_cert_key_pair (u32 index); /** Ask for app cb on pair deletion */ int vnet_app_add_cert_key_interest (u32 index, u32 app_index); +uword unformat_vnet_uri (unformat_input_t *input, va_list *args); + typedef struct app_session_transport_ { ip46_address_t rmt_ip; /**< remote ip */ @@ -296,15 +309,15 @@ typedef struct app_session_transport_ u8 is_ip4; /**< set if uses ip4 networking */ } app_session_transport_t; -#define foreach_app_session_field \ - _(svm_fifo_t, *rx_fifo) /**< rx fifo */ \ - _(svm_fifo_t, *tx_fifo) /**< tx fifo */ \ - _(session_type_t, session_type) /**< session type */ \ - _(volatile u8, session_state) /**< session state */ \ - _(u32, session_index) /**< index in owning pool */ \ - _(app_session_transport_t, transport) /**< transport info */ \ - _(svm_msg_q_t, *vpp_evt_q) /**< vpp event queue */ \ - _(u8, is_dgram) /**< flag for dgram mode */ \ +#define foreach_app_session_field \ + _ (svm_fifo_t, *rx_fifo) /**< rx fifo */ \ + _ (svm_fifo_t, *tx_fifo) /**< tx fifo */ \ + _ (session_type_t, session_type) /**< session type */ \ + _ (volatile u8, session_state) /**< session state */ \ + _ (u32, session_index) /**< index in owning pool */ \ + _ (app_session_transport_t, transport) /**< transport info */ \ + _ (svm_msg_q_t, *vpp_evt_q) /**< vpp event queue */ \ + _ (u8, is_dgram) /**< flag for dgram mode */ typedef struct { @@ -343,7 +356,7 @@ STATIC_ASSERT (sizeof (session_listen_uri_msg_t) <= SESSION_CTRL_MSG_MAX_SIZE, typedef struct session_bound_msg_ { u32 context; - u64 handle; + session_handle_t handle; i32 retval; u8 lcl_is_ip4; u8 lcl_ip[16]; @@ -366,15 +379,15 @@ typedef struct session_unlisten_msg_ typedef struct session_unlisten_reply_msg_ { u32 context; - u64 handle; + session_handle_t handle; i32 retval; } __clib_packed session_unlisten_reply_msg_t; typedef struct session_accepted_msg_ { u32 context; - u64 listener_handle; - u64 handle; + session_handle_t listener_handle; + session_handle_t handle; uword server_rx_fifo; uword server_tx_fifo; u64 segment_handle; @@ -383,13 +396,15 @@ typedef struct session_accepted_msg_ transport_endpoint_t lcl; transport_endpoint_t rmt; u8 flags; + u32 original_dst_ip4; + u16 original_dst_port; } __clib_packed session_accepted_msg_t; typedef struct session_accepted_reply_msg_ { u32 context; i32 retval; - u64 handle; + session_handle_t handle; } __clib_packed session_accepted_reply_msg_t; typedef struct session_connect_msg_ @@ -408,6 +423,7 @@ typedef struct session_connect_msg_ u32 ckpair_index; u8 crypto_engine; u8 flags; + u8 dscp; uword ext_config; } __clib_packed session_connect_msg_t; @@ -428,7 +444,7 @@ typedef struct session_connected_msg_ { u32 context; i32 retval; - u64 handle; + session_handle_t handle; uword server_rx_fifo; uword server_tx_fifo; u64 segment_handle; @@ -458,33 +474,33 @@ typedef struct session_disconnected_msg_ { u32 client_index; u32 context; - u64 handle; + session_handle_t handle; } __clib_packed session_disconnected_msg_t; typedef struct session_disconnected_reply_msg_ { u32 context; i32 retval; - u64 handle; + session_handle_t handle; } __clib_packed session_disconnected_reply_msg_t; typedef struct session_reset_msg_ { u32 client_index; u32 context; - u64 handle; + session_handle_t handle; } __clib_packed session_reset_msg_t; typedef struct session_reset_reply_msg_ { u32 context; i32 retval; - u64 handle; + session_handle_t handle; } __clib_packed session_reset_reply_msg_t; typedef struct session_req_worker_update_msg_ { - u64 session_handle; + session_handle_t session_handle; } __clib_packed session_req_worker_update_msg_t; /* NOTE: using u16 for wrk indices because message needs to fit in 18B */ @@ -493,12 +509,12 @@ typedef struct session_worker_update_msg_ u32 client_index; u16 wrk_index; u16 req_wrk_index; - u64 handle; + session_handle_t handle; } __clib_packed session_worker_update_msg_t; typedef struct session_worker_update_reply_msg_ { - u64 handle; + session_handle_t handle; uword rx_fifo; uword tx_fifo; u64 segment_handle; @@ -612,8 +628,8 @@ app_send_io_evt_to_vpp (svm_msg_q_t * mq, u32 session_index, u8 evt_type, { if (svm_msg_q_try_lock (mq)) return -1; - if (PREDICT_FALSE (svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING) - || svm_msg_q_is_full (mq))) + if (PREDICT_FALSE ( + svm_msg_q_or_ring_is_full (mq, SESSION_MQ_IO_EVT_RING))) { svm_msg_q_unlock (mq); return -2; @@ -628,9 +644,8 @@ app_send_io_evt_to_vpp (svm_msg_q_t * mq, u32 session_index, u8 evt_type, else { svm_msg_q_lock (mq); - while (svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING) - || svm_msg_q_is_full (mq)) - svm_msg_q_wait_prod (mq); + while (svm_msg_q_or_ring_is_full (mq, SESSION_MQ_IO_EVT_RING)) + svm_msg_q_or_ring_wait_prod (mq, SESSION_MQ_IO_EVT_RING); msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING); evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg); evt->session_index = session_index; @@ -640,14 +655,18 @@ app_send_io_evt_to_vpp (svm_msg_q_t * mq, u32 session_index, u8 evt_type, } } +#define app_send_dgram_raw(f, at, vpp_evt_q, data, len, evt_type, do_evt, \ + noblock) \ + app_send_dgram_raw_gso (f, at, vpp_evt_q, data, len, 0, evt_type, do_evt, \ + noblock) + always_inline int -app_send_dgram_raw (svm_fifo_t * f, app_session_transport_t * at, - svm_msg_q_t * vpp_evt_q, u8 * data, u32 len, u8 evt_type, - u8 do_evt, u8 noblock) +app_send_dgram_raw_gso (svm_fifo_t *f, app_session_transport_t *at, + svm_msg_q_t *vpp_evt_q, u8 *data, u32 len, + u16 gso_size, u8 evt_type, u8 do_evt, u8 noblock) { session_dgram_hdr_t hdr; int rv; - if (svm_fifo_max_enqueue_prod (f) < (sizeof (session_dgram_hdr_t) + len)) return 0; @@ -658,10 +677,8 @@ app_send_dgram_raw (svm_fifo_t * f, app_session_transport_t * at, hdr.rmt_port = at->rmt_port; clib_memcpy_fast (&hdr.lcl_ip, &at->lcl_ip, sizeof (ip46_address_t)); hdr.lcl_port = at->lcl_port; - - /* *INDENT-OFF* */ + hdr.gso_size = gso_size; svm_fifo_seg_t segs[2] = {{ (u8 *) &hdr, sizeof (hdr) }, { data, len }}; - /* *INDENT-ON* */ rv = svm_fifo_enqueue_segments (f, segs, 2, 0 /* allow partial */ ); if (PREDICT_FALSE (rv < 0)) @@ -786,13 +803,11 @@ app_recv (app_session_t * s, u8 * data, u32 len) return app_recv_stream (s, data, len); } -/* *INDENT-OFF* */ static char *session_error_str[] = { #define _(sym, str) str, foreach_session_error #undef _ }; -/* *INDENT-ON* */ static inline u8 * format_session_error (u8 * s, va_list * args) @@ -817,6 +832,8 @@ typedef enum app_sapi_msg_type APP_SAPI_MSG_TYPE_ADD_DEL_WORKER, APP_SAPI_MSG_TYPE_ADD_DEL_WORKER_REPLY, APP_SAPI_MSG_TYPE_SEND_FDS, + APP_SAPI_MSG_TYPE_ADD_DEL_CERT_KEY, + APP_SAPI_MSG_TYPE_ADD_DEL_CERT_KEY_REPLY, } __clib_packed app_sapi_msg_type_e; typedef struct app_sapi_attach_msg_ @@ -861,6 +878,22 @@ typedef struct app_sapi_worker_add_del_reply_msg_ u8 is_add; } __clib_packed app_sapi_worker_add_del_reply_msg_t; +typedef struct app_sapi_cert_key_add_del_msg_ +{ + u32 context; + u32 index; + u16 cert_len; + u16 certkey_len; + u8 is_add; +} __clib_packed app_sapi_cert_key_add_del_msg_t; + +typedef struct app_sapi_cert_key_add_del_reply_msg_ +{ + u32 context; + i32 retval; + u32 index; +} __clib_packed app_sapi_cert_key_add_del_reply_msg_t; + typedef struct app_sapi_msg_ { app_sapi_msg_type_e type; @@ -870,6 +903,8 @@ typedef struct app_sapi_msg_ app_sapi_attach_reply_msg_t attach_reply; app_sapi_worker_add_del_msg_t worker_add_del; app_sapi_worker_add_del_reply_msg_t worker_add_del_reply; + app_sapi_cert_key_add_del_msg_t cert_key_add_del; + app_sapi_cert_key_add_del_reply_msg_t cert_key_add_del_reply; }; } __clib_packed app_sapi_msg_t; diff --git a/src/vnet/session/application_local.c b/src/vnet/session/application_local.c index 3c62dade0f5..3cb743d10e0 100644 --- a/src/vnet/session/application_local.c +++ b/src/vnet/session/application_local.c @@ -41,9 +41,25 @@ typedef struct ct_segments_ ct_segment_t *segments; } ct_segments_ctx_t; +typedef struct ct_cleanup_req_ +{ + u32 ct_index; +} ct_cleanup_req_t; + +typedef struct ct_worker_ +{ + ct_connection_t *connections; /**< Per-worker connection pools */ + u32 *pending_connects; /**< Fifo of pending ho indices */ + ct_cleanup_req_t *pending_cleanups; /**< Fifo of pending indices */ + u8 have_connects; /**< Set if connect rpc pending */ + u8 have_cleanups; /**< Set if cleanup rpc pending */ + clib_spinlock_t pending_connects_lock; /**< Lock for pending connects */ + u32 *new_connects; /**< Burst of connects to be done */ +} ct_worker_t; + typedef struct ct_main_ { - ct_connection_t **connections; /**< Per-worker connection pools */ + ct_worker_t *wrk; /**< Per-worker state */ u32 n_workers; /**< Number of vpp workers */ u32 n_sessions; /**< Cumulative sessions counter */ u32 *ho_reusable; /**< Vector of reusable ho indices */ @@ -51,17 +67,28 @@ typedef struct ct_main_ clib_rwlock_t app_segs_lock; /**< RW lock for seg contexts */ uword *app_segs_ctxs_table; /**< App handle to segment pool map */ ct_segments_ctx_t *app_seg_ctxs; /**< Pool of ct segment contexts */ + u32 **fwrk_pending_connects; /**< First wrk pending half-opens */ + u32 fwrk_thread; /**< First worker thread */ + u8 fwrk_have_flush; /**< Flag for connect flush rpc */ } ct_main_t; static ct_main_t ct_main; +static inline ct_worker_t * +ct_worker_get (u32 thread_index) +{ + return &ct_main.wrk[thread_index]; +} + static ct_connection_t * ct_connection_alloc (u32 thread_index) { + ct_worker_t *wrk = ct_worker_get (thread_index); ct_connection_t *ct; - pool_get_zero (ct_main.connections[thread_index], ct); - ct->c_c_index = ct - ct_main.connections[thread_index]; + pool_get_aligned_safe (wrk->connections, ct, CLIB_CACHE_LINE_BYTES); + clib_memset (ct, 0, sizeof (*ct)); + ct->c_c_index = ct - wrk->connections; ct->c_thread_index = thread_index; ct->client_wrk = ~0; ct->server_wrk = ~0; @@ -73,22 +100,25 @@ ct_connection_alloc (u32 thread_index) static ct_connection_t * ct_connection_get (u32 ct_index, u32 thread_index) { - if (pool_is_free_index (ct_main.connections[thread_index], ct_index)) + ct_worker_t *wrk = ct_worker_get (thread_index); + + if (pool_is_free_index (wrk->connections, ct_index)) return 0; - return pool_elt_at_index (ct_main.connections[thread_index], ct_index); + return pool_elt_at_index (wrk->connections, ct_index); } static void ct_connection_free (ct_connection_t * ct) { + ct_worker_t *wrk = ct_worker_get (ct->c_thread_index); + if (CLIB_DEBUG) { - u32 thread_index = ct->c_thread_index; - memset (ct, 0xfc, sizeof (*ct)); - pool_put (ct_main.connections[thread_index], ct); + clib_memset (ct, 0xfc, sizeof (*ct)); + pool_put (wrk->connections, ct); return; } - pool_put (ct_main.connections[ct->c_thread_index], ct); + pool_put (wrk->connections, ct); } static ct_connection_t * @@ -99,11 +129,18 @@ ct_half_open_alloc (void) clib_spinlock_lock (&cm->ho_reuseable_lock); vec_foreach (hip, cm->ho_reusable) - pool_put_index (cm->connections[0], *hip); + pool_put_index (cm->wrk[cm->fwrk_thread].connections, *hip); vec_reset_length (cm->ho_reusable); clib_spinlock_unlock (&cm->ho_reuseable_lock); - return ct_connection_alloc (0); + return ct_connection_alloc (cm->fwrk_thread); +} + +static ct_connection_t * +ct_half_open_get (u32 ho_index) +{ + ct_main_t *cm = &ct_main; + return ct_connection_get (ho_index, cm->fwrk_thread); } void @@ -137,6 +174,33 @@ ct_session_endpoint (session_t * ll, session_endpoint_t * sep) } static void +ct_set_invalid_app_wrk (ct_connection_t *ct, u8 is_client) +{ + ct_connection_t *peer_ct; + + peer_ct = ct_connection_get (ct->peer_index, ct->c_thread_index); + + if (is_client) + { + ct->client_wrk = APP_INVALID_INDEX; + if (peer_ct) + ct->client_wrk = APP_INVALID_INDEX; + } + else + { + ct->server_wrk = APP_INVALID_INDEX; + if (peer_ct) + ct->server_wrk = APP_INVALID_INDEX; + } +} + +static inline u64 +ct_client_seg_handle (u64 server_sh, u32 client_wrk_index) +{ + return (((u64) client_wrk_index << 56) | server_sh); +} + +static void ct_session_dealloc_fifos (ct_connection_t *ct, svm_fifo_t *rx_fifo, svm_fifo_t *tx_fifo) { @@ -146,8 +210,8 @@ ct_session_dealloc_fifos (ct_connection_t *ct, svm_fifo_t *rx_fifo, app_worker_t *app_wrk; ct_segment_t *ct_seg; fifo_segment_t *fs; - u8 del_segment = 0; u32 seg_index; + session_t *s; int cnt; /* @@ -202,77 +266,82 @@ ct_session_dealloc_fifos (ct_connection_t *ct, svm_fifo_t *rx_fifo, if (ct->flags & CT_CONN_F_CLIENT) { cnt = ct_seg->client_n_sessions; - if (!cnt) - ct_seg->flags |= CT_SEGMENT_F_CLIENT_DETACHED; + if (cnt) + goto done; + ct_seg->flags |= CT_SEGMENT_F_CLIENT_DETACHED; + s = session_get (ct->c_s_index, ct->c_thread_index); + if (s->app_wrk_index == APP_INVALID_INDEX) + ct_set_invalid_app_wrk (ct, 1 /* is_client */); } else { cnt = ct_seg->server_n_sessions; - if (!cnt) - ct_seg->flags |= CT_SEGMENT_F_SERVER_DETACHED; + if (cnt) + goto done; + ct_seg->flags |= CT_SEGMENT_F_SERVER_DETACHED; + s = session_get (ct->c_s_index, ct->c_thread_index); + if (s->app_wrk_index == APP_INVALID_INDEX) + ct_set_invalid_app_wrk (ct, 0 /* is_client */); } + if (!(ct_seg->flags & CT_SEGMENT_F_CLIENT_DETACHED) || + !(ct_seg->flags & CT_SEGMENT_F_SERVER_DETACHED)) + goto done; + /* * Remove segment context because both client and server detached */ - if (!cnt && (ct_seg->flags & CT_SEGMENT_F_CLIENT_DETACHED) && - (ct_seg->flags & CT_SEGMENT_F_SERVER_DETACHED)) - { - pool_put_index (seg_ctx->segments, ct->ct_seg_index); + pool_put_index (seg_ctx->segments, ct->ct_seg_index); - /* - * No more segment indices left, remove the segments context - */ - if (!pool_elts (seg_ctx->segments)) - { - u64 table_handle = seg_ctx->client_wrk << 16 | seg_ctx->server_wrk; - table_handle = (u64) seg_ctx->sm_index << 32 | table_handle; - hash_unset (cm->app_segs_ctxs_table, table_handle); - pool_free (seg_ctx->segments); - pool_put_index (cm->app_seg_ctxs, ct->seg_ctx_index); - } - del_segment = 1; + /* + * No more segment indices left, remove the segments context + */ + if (!pool_elts (seg_ctx->segments)) + { + u64 table_handle = seg_ctx->client_wrk << 16 | seg_ctx->server_wrk; + table_handle = (u64) seg_ctx->sm_index << 32 | table_handle; + hash_unset (cm->app_segs_ctxs_table, table_handle); + pool_free (seg_ctx->segments); + pool_put_index (cm->app_seg_ctxs, ct->seg_ctx_index); } - clib_rwlock_writer_unlock (&cm->app_segs_lock); - /* - * Session counter went to zero, notify the app that detached + * Segment to be removed so notify both apps */ - if (cnt) - return; - if (ct->flags & CT_CONN_F_CLIENT) - { - app_wrk = app_worker_get_if_valid (ct->client_wrk); - /* Determine if client app still needs notification, i.e., if it is - * still attached. If client detached and this is the last ct session - * on this segment, then its connects segment manager should also be - * detached, so do not send notification */ - if (app_wrk) - { - segment_manager_t *csm; - csm = app_worker_get_connect_segment_manager (app_wrk); - if (!segment_manager_app_detached (csm)) - app_worker_del_segment_notify (app_wrk, ct->segment_handle); - } - } - else if (!segment_manager_app_detached (sm)) + app_wrk = app_worker_get_if_valid (ct->client_wrk); + /* Determine if client app still needs notification, i.e., if it is + * still attached. If client detached and this is the last ct session + * on this segment, then its connects segment manager should also be + * detached, so do not send notification */ + if (app_wrk) { - app_wrk = app_worker_get (ct->server_wrk); - app_worker_del_segment_notify (app_wrk, ct->segment_handle); + segment_manager_t *csm; + csm = app_worker_get_connect_segment_manager (app_wrk); + if (!segment_manager_app_detached (csm)) + app_worker_del_segment_notify ( + app_wrk, ct_client_seg_handle (ct->segment_handle, ct->client_wrk)); } - if (!del_segment) - return; - + /* Notify server app and free segment */ segment_manager_lock_and_del_segment (sm, seg_index); /* Cleanup segment manager if needed. If server detaches there's a chance * the client's sessions will hold up segment removal */ if (segment_manager_app_detached (sm) && !segment_manager_has_fifos (sm)) segment_manager_free_safe (sm); + +done: + + clib_rwlock_writer_unlock (&cm->app_segs_lock); +} + +static void +ct_session_force_disconnect_server (ct_connection_t *sct) +{ + sct->peer_index = ~0; + session_transport_closing_notify (&sct->connection); } int @@ -294,9 +363,7 @@ ct_session_connect_notify (session_t *ss, session_error_t err) /* Client closed while waiting for reply from server */ if (PREDICT_FALSE (!cct)) { - session_transport_closing_notify (&sct->connection); - session_transport_delete_notify (&sct->connection); - ct_connection_free (sct); + ct_session_force_disconnect_server (sct); return 0; } @@ -307,16 +374,19 @@ ct_session_connect_notify (session_t *ss, session_error_t err) goto connect_error; /* - * Alloc client session + * Alloc client session, server session assumed to be established */ + ASSERT (ss->session_state >= SESSION_STATE_READY); + cs = session_alloc (thread_index); ss = session_get (ss_index, thread_index); cs->session_type = ss->session_type; cs->listener_handle = SESSION_INVALID_HANDLE; - cs->session_state = SESSION_STATE_CONNECTING; + session_set_state (cs, SESSION_STATE_CONNECTING); cs->app_wrk_index = client_wrk->wrk_index; cs->connection_index = cct->c_c_index; + cs->opaque = opaque; cct->c_s_index = cs->session_index; /* This will allocate fifos for the session. They won't be used for @@ -325,23 +395,23 @@ ct_session_connect_notify (session_t *ss, session_error_t err) if ((err = app_worker_init_connected (client_wrk, cs))) { session_free (cs); - session_close (ss); + ct_session_force_disconnect_server (sct); err = SESSION_E_ALLOC; goto connect_error; } - cs->session_state = SESSION_STATE_CONNECTING; + session_set_state (cs, SESSION_STATE_CONNECTING); if (app_worker_connect_notify (client_wrk, cs, 0, opaque)) { segment_manager_dealloc_fifos (cs->rx_fifo, cs->tx_fifo); session_free (cs); - session_close (ss); + ct_session_force_disconnect_server (sct); goto cleanup_client; } cs = session_get (cct->c_s_index, cct->c_thread_index); - cs->session_state = SESSION_STATE_READY; + session_set_state (cs, SESSION_STATE_READY); return 0; @@ -373,9 +443,6 @@ ct_lookup_free_segment (ct_main_t *cm, segment_manager_t *sm, pool_foreach (ct_seg, seg_ctx->segments) { /* Client or server has detached so segment cannot be used */ - if ((ct_seg->flags & CT_SEGMENT_F_SERVER_DETACHED) || - (ct_seg->flags & CT_SEGMENT_F_CLIENT_DETACHED)) - continue; fs = segment_manager_get_segment (sm, ct_seg->segment_index); free_bytes = fifo_segment_available_bytes (fs); max_fifos = fifo_segment_size (fs) / seg_ctx->fifo_pair_bytes; @@ -395,11 +462,11 @@ ct_alloc_segment (ct_main_t *cm, app_worker_t *server_wrk, u64 table_handle, segment_manager_t *sm, u32 client_wrk_index) { u32 seg_ctx_index = ~0, sm_index, pair_bytes; + u64 seg_size, seg_handle, client_seg_handle; segment_manager_props_t *props; const u32 margin = 16 << 10; ct_segments_ctx_t *seg_ctx; app_worker_t *client_wrk; - u64 seg_size, seg_handle; application_t *server; ct_segment_t *ct_seg; uword *spp; @@ -461,7 +528,11 @@ ct_alloc_segment (ct_main_t *cm, app_worker_t *server_wrk, u64 table_handle, goto error; client_wrk = app_worker_get (client_wrk_index); - if (app_worker_add_segment_notify (client_wrk, seg_handle)) + /* Make sure client workers do not have overlapping segment handles. + * Ideally, we should attach fs to client worker segment manager and + * create a new handle but that's not currently possible. */ + client_seg_handle = ct_client_seg_handle (seg_handle, client_wrk_index); + if (app_worker_add_segment_notify (client_wrk, client_seg_handle)) { app_worker_del_segment_notify (server_wrk, seg_handle); goto error; @@ -515,6 +586,8 @@ ct_init_accepted_session (app_worker_t *server_wrk, ct_connection_t *ct, ct->seg_ctx_index = ct_seg->seg_ctx_index; ct->ct_seg_index = ct_seg->ct_seg_index; fs_index = ct_seg->segment_index; + ct_seg->flags &= + ~(CT_SEGMENT_F_SERVER_DETACHED | CT_SEGMENT_F_CLIENT_DETACHED); __atomic_add_fetch (&ct_seg->server_n_sessions, 1, __ATOMIC_RELAXED); __atomic_add_fetch (&ct_seg->client_n_sessions, 1, __ATOMIC_RELAXED); } @@ -573,10 +646,6 @@ ct_init_accepted_session (app_worker_t *server_wrk, ct_connection_t *ct, ls->tx_fifo->shr->master_session_index = ls->session_index; ls->rx_fifo->master_thread_index = ls->thread_index; ls->tx_fifo->master_thread_index = ls->thread_index; - ls->rx_fifo->segment_manager = sm_index; - ls->tx_fifo->segment_manager = sm_index; - ls->rx_fifo->segment_index = fs_index; - ls->tx_fifo->segment_index = fs_index; seg_handle = segment_manager_segment_handle (sm, fs); segment_manager_segment_reader_unlock (sm); @@ -587,23 +656,21 @@ ct_init_accepted_session (app_worker_t *server_wrk, ct_connection_t *ct, } static void -ct_accept_rpc_wrk_handler (void *accept_args) +ct_accept_one (u32 thread_index, u32 ho_index) { - u32 cct_index, ho_index, thread_index, ll_index; ct_connection_t *sct, *cct, *ho; transport_connection_t *ll_ct; app_worker_t *server_wrk; + u32 cct_index, ll_index; session_t *ss, *ll; /* * Alloc client ct and initialize from ho */ - thread_index = vlib_get_thread_index (); cct = ct_connection_alloc (thread_index); cct_index = cct->c_c_index; - ho_index = pointer_to_uword (accept_args); - ho = ct_connection_get (ho_index, 0); + ho = ct_half_open_get (ho_index); /* Unlikely but half-open session and transport could have been freed */ if (PREDICT_FALSE (!ho)) @@ -640,7 +707,7 @@ ct_accept_rpc_wrk_handler (void *accept_args) sct->c_rmt_port = 0; sct->c_lcl_port = ll_ct->lcl_port; sct->c_is_ip4 = cct->c_is_ip4; - clib_memcpy (&sct->c_lcl_ip, &ll_ct->lcl_ip, sizeof (ll_ct->lcl_ip)); + clib_memcpy (&sct->c_lcl_ip, &cct->c_rmt_ip, sizeof (cct->c_rmt_ip)); sct->client_wrk = cct->client_wrk; sct->c_proto = TRANSPORT_PROTO_NONE; sct->client_opaque = cct->client_opaque; @@ -659,7 +726,7 @@ ct_accept_rpc_wrk_handler (void *accept_args) sct->c_is_ip4); ss->connection_index = sct->c_c_index; ss->listener_handle = listen_session_get_handle (ll); - ss->session_state = SESSION_STATE_CREATED; + session_set_state (ss, SESSION_STATE_CREATED); server_wrk = application_listener_select_worker (ll); ss->app_wrk_index = server_wrk->wrk_index; @@ -675,15 +742,17 @@ ct_accept_rpc_wrk_handler (void *accept_args) return; } + cct->server_wrk = sct->server_wrk; cct->seg_ctx_index = sct->seg_ctx_index; cct->ct_seg_index = sct->ct_seg_index; cct->client_rx_fifo = ss->tx_fifo; cct->client_tx_fifo = ss->rx_fifo; cct->client_rx_fifo->refcnt++; cct->client_tx_fifo->refcnt++; - cct->segment_handle = sct->segment_handle; + cct->segment_handle = + ct_client_seg_handle (sct->segment_handle, cct->client_wrk); - ss->session_state = SESSION_STATE_ACCEPTING; + session_set_state (ss, SESSION_STATE_ACCEPTING); if (app_worker_accept_notify (server_wrk, ss)) { ct_session_connect_notify (ss, SESSION_E_REFUSED); @@ -693,13 +762,93 @@ ct_accept_rpc_wrk_handler (void *accept_args) } } -static int -ct_connect (app_worker_t * client_wrk, session_t * ll, - session_endpoint_cfg_t * sep) +static void +ct_accept_rpc_wrk_handler (void *rpc_args) { - u32 thread_index, ho_index; + u32 thread_index, n_connects, i, n_pending; + const u32 max_connects = 32; + ct_worker_t *wrk; + u8 need_rpc = 0; + + thread_index = pointer_to_uword (rpc_args); + wrk = ct_worker_get (thread_index); + + /* Connects could be handled without worker barrier so grab lock */ + clib_spinlock_lock (&wrk->pending_connects_lock); + + n_pending = clib_fifo_elts (wrk->pending_connects); + n_connects = clib_min (n_pending, max_connects); + vec_validate (wrk->new_connects, n_connects); + + for (i = 0; i < n_connects; i++) + clib_fifo_sub1 (wrk->pending_connects, wrk->new_connects[i]); + + if (n_pending == n_connects) + wrk->have_connects = 0; + else + need_rpc = 1; + + clib_spinlock_unlock (&wrk->pending_connects_lock); + + for (i = 0; i < n_connects; i++) + ct_accept_one (thread_index, wrk->new_connects[i]); + + if (need_rpc) + session_send_rpc_evt_to_thread_force ( + thread_index, ct_accept_rpc_wrk_handler, + uword_to_pointer (thread_index, void *)); +} + +static void +ct_fwrk_flush_connects (void *rpc_args) +{ + u32 thread_index, fwrk_index, n_workers; ct_main_t *cm = &ct_main; - ct_connection_t *ho; + ct_worker_t *wrk; + u8 need_rpc; + + fwrk_index = cm->fwrk_thread; + n_workers = vec_len (cm->fwrk_pending_connects); + + for (thread_index = fwrk_index; thread_index < n_workers; thread_index++) + { + if (!vec_len (cm->fwrk_pending_connects[thread_index])) + continue; + + wrk = ct_worker_get (thread_index); + + /* Connects can be done without worker barrier, grab dst worker lock */ + if (thread_index != fwrk_index) + clib_spinlock_lock (&wrk->pending_connects_lock); + + clib_fifo_add (wrk->pending_connects, + cm->fwrk_pending_connects[thread_index], + vec_len (cm->fwrk_pending_connects[thread_index])); + if (!wrk->have_connects) + { + wrk->have_connects = 1; + need_rpc = 1; + } + + if (thread_index != fwrk_index) + clib_spinlock_unlock (&wrk->pending_connects_lock); + + vec_reset_length (cm->fwrk_pending_connects[thread_index]); + + if (need_rpc) + session_send_rpc_evt_to_thread_force ( + thread_index, ct_accept_rpc_wrk_handler, + uword_to_pointer (thread_index, void *)); + } + + cm->fwrk_have_flush = 0; +} + +static void +ct_program_connect_to_wrk (u32 ho_index) +{ + ct_main_t *cm = &ct_main; + u32 thread_index; /* Simple round-robin policy for spreading sessions over workers. We skip * thread index 0, i.e., offset the index by 1, when we have workers as it @@ -708,6 +857,25 @@ ct_connect (app_worker_t * client_wrk, session_t * ll, cm->n_sessions += 1; thread_index = cm->n_workers ? (cm->n_sessions % cm->n_workers) + 1 : 0; + /* Pospone flushing of connect request to dst worker until after session + * layer fully initializes the half-open session. */ + vec_add1 (cm->fwrk_pending_connects[thread_index], ho_index); + if (!cm->fwrk_have_flush) + { + session_send_rpc_evt_to_thread_force ( + cm->fwrk_thread, ct_fwrk_flush_connects, + uword_to_pointer (thread_index, void *)); + cm->fwrk_have_flush = 1; + } +} + +static int +ct_connect (app_worker_t *client_wrk, session_t *ll, + session_endpoint_cfg_t *sep) +{ + ct_connection_t *ho; + u32 ho_index; + /* * Alloc and init client half-open transport */ @@ -725,22 +893,19 @@ ct_connect (app_worker_t * client_wrk, session_t * ll, clib_memcpy (&ho->c_rmt_ip, &sep->ip, sizeof (sep->ip)); ho->flags |= CT_CONN_F_CLIENT; ho->c_s_index = ~0; - ho->actual_tp = sep->transport_proto; + ho->actual_tp = sep->original_tp; /* - * Accept connection on thread selected above. Connected reply comes + * Program connect on a worker, connected reply comes * after server accepts the connection. */ - - session_send_rpc_evt_to_thread_force (thread_index, - ct_accept_rpc_wrk_handler, - uword_to_pointer (ho_index, void *)); + ct_program_connect_to_wrk (ho_index); return ho_index; } static u32 -ct_start_listen (u32 app_listener_index, transport_endpoint_t * tep) +ct_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep) { session_endpoint_cfg_t *sep; ct_connection_t *ct; @@ -772,9 +937,9 @@ ct_listener_get (u32 ct_index) } static transport_connection_t * -ct_half_open_get (u32 ct_index) +ct_session_half_open_get (u32 ct_index) { - return (transport_connection_t *) ct_connection_get (ct_index, 0); + return (transport_connection_t *) ct_half_open_get (ct_index); } static void @@ -796,7 +961,10 @@ ct_session_cleanup (u32 conn_index, u32 thread_index) static void ct_cleanup_ho (u32 ho_index) { - ct_connection_free (ct_connection_get (ho_index, 0)); + ct_connection_t *ho; + + ho = ct_half_open_get (ho_index); + ct_connection_free (ho); } static int @@ -827,7 +995,7 @@ ct_session_connect (transport_endpoint_cfg_t * tep) goto global_scope; ll = listen_session_get_from_handle (lh); - al = app_listener_get_w_session (ll); + al = app_listener_get (ll->al_index); /* * Break loop if rule in local table points to connecting app. This @@ -856,58 +1024,189 @@ global_scope: ll = session_lookup_listener_wildcard (table_index, sep); /* Avoid connecting app to own listener */ - if (ll && ll->app_index != app->app_index) - return ct_connect (app_wrk, ll, sep_ext); + if (ll) + { + al = app_listener_get (ll->al_index); + if (al->app_index != app->app_index) + return ct_connect (app_wrk, ll, sep_ext); + } /* Failed to connect but no error */ return SESSION_E_LOCAL_CONNECT; } +static inline int +ct_close_is_reset (ct_connection_t *ct, session_t *s) +{ + if (ct->flags & CT_CONN_F_RESET) + return 1; + if (ct->flags & CT_CONN_F_CLIENT) + return (svm_fifo_max_dequeue (ct->client_rx_fifo) > 0); + else + return (svm_fifo_max_dequeue (s->rx_fifo) > 0); +} + static void -ct_session_close (u32 ct_index, u32 thread_index) +ct_session_cleanup_server_session (session_t *s) { - ct_connection_t *ct, *peer_ct; + ct_connection_t *ct; + + ct = (ct_connection_t *) session_get_transport (s); + ct_session_dealloc_fifos (ct, s->rx_fifo, s->tx_fifo); + session_free (s); + ct_connection_free (ct); +} + +static void +ct_session_postponed_cleanup (ct_connection_t *ct) +{ + ct_connection_t *peer_ct; app_worker_t *app_wrk; session_t *s; - ct = ct_connection_get (ct_index, thread_index); s = session_get (ct->c_s_index, ct->c_thread_index); - peer_ct = ct_connection_get (ct->peer_index, thread_index); + app_wrk = app_worker_get_if_valid (s->app_wrk_index); + + peer_ct = ct_connection_get (ct->peer_index, ct->c_thread_index); if (peer_ct) { - peer_ct->peer_index = ~0; - /* Make sure session was allocated */ - if (peer_ct->flags & CT_CONN_F_HALF_OPEN) - { - ct_session_connect_notify (s, SESSION_E_REFUSED); - } - else if (peer_ct->c_s_index != ~0) - session_transport_closing_notify (&peer_ct->connection); + if (ct_close_is_reset (ct, s)) + session_transport_reset_notify (&peer_ct->connection); else - ct_connection_free (peer_ct); + session_transport_closing_notify (&peer_ct->connection); } + session_transport_closed_notify (&ct->connection); + + /* It would be cleaner to call session_transport_delete_notify + * but then we can't control session cleanup lower */ + session_set_state (s, SESSION_STATE_TRANSPORT_DELETED); + if (app_wrk) + app_worker_cleanup_notify (app_wrk, s, SESSION_CLEANUP_TRANSPORT); if (ct->flags & CT_CONN_F_CLIENT) { /* Normal free for client session as the fifos are allocated through * the connects segment manager in a segment that's not shared with * the server */ - session_free_w_fifos (s); ct_session_dealloc_fifos (ct, ct->client_rx_fifo, ct->client_tx_fifo); + session_program_cleanup (s); + ct_connection_free (ct); } else { /* Manual session and fifo segment cleanup to avoid implicit * segment manager cleanups and notifications */ - app_wrk = app_worker_get_if_valid (s->app_wrk_index); if (app_wrk) - app_worker_cleanup_notify (app_wrk, s, SESSION_CLEANUP_SESSION); + { + /* Remove custom cleanup notify infra when/if switching to normal + * session cleanup. Note that ct is freed in the cb function */ + app_worker_cleanup_notify_custom (app_wrk, s, + SESSION_CLEANUP_SESSION, + ct_session_cleanup_server_session); + } + else + { + ct_connection_free (ct); + } + } +} + +static void +ct_handle_cleanups (void *args) +{ + uword thread_index = pointer_to_uword (args); + const u32 max_cleanups = 100; + ct_cleanup_req_t *req; + ct_connection_t *ct; + u32 n_to_handle = 0; + ct_worker_t *wrk; + session_t *s; + + wrk = ct_worker_get (thread_index); + wrk->have_cleanups = 0; + n_to_handle = clib_fifo_elts (wrk->pending_cleanups); + n_to_handle = clib_min (n_to_handle, max_cleanups); + + while (n_to_handle) + { + clib_fifo_sub2 (wrk->pending_cleanups, req); + ct = ct_connection_get (req->ct_index, thread_index); + s = session_get (ct->c_s_index, ct->c_thread_index); + if (svm_fifo_has_event (s->tx_fifo) || (s->flags & SESSION_F_RX_EVT)) + clib_fifo_add1 (wrk->pending_cleanups, *req); + else + ct_session_postponed_cleanup (ct); + n_to_handle -= 1; + } - ct_session_dealloc_fifos (ct, s->rx_fifo, s->tx_fifo); - session_free (s); + if (clib_fifo_elts (wrk->pending_cleanups)) + { + wrk->have_cleanups = 1; + session_send_rpc_evt_to_thread_force ( + thread_index, ct_handle_cleanups, + uword_to_pointer (thread_index, void *)); } +} - ct_connection_free (ct); +static void +ct_program_cleanup (ct_connection_t *ct) +{ + ct_cleanup_req_t *req; + uword thread_index; + ct_worker_t *wrk; + + thread_index = ct->c_thread_index; + wrk = ct_worker_get (ct->c_thread_index); + + clib_fifo_add2 (wrk->pending_cleanups, req); + req->ct_index = ct->c_c_index; + + if (wrk->have_cleanups) + return; + + wrk->have_cleanups = 1; + session_send_rpc_evt_to_thread_force ( + thread_index, ct_handle_cleanups, uword_to_pointer (thread_index, void *)); +} + +static void +ct_session_close (u32 ct_index, u32 thread_index) +{ + ct_connection_t *ct, *peer_ct; + session_t *s; + + ct = ct_connection_get (ct_index, thread_index); + s = session_get (ct->c_s_index, ct->c_thread_index); + peer_ct = ct_connection_get (ct->peer_index, thread_index); + if (peer_ct) + { + peer_ct->peer_index = ~0; + /* Make sure session was allocated */ + if (peer_ct->flags & CT_CONN_F_HALF_OPEN) + { + ct_session_connect_notify (s, SESSION_E_REFUSED); + ct->peer_index = ~0; + } + else if (peer_ct->c_s_index == ~0) + { + /* should not happen */ + clib_warning ("ct peer without session"); + ct_connection_free (peer_ct); + } + } + + /* Do not send closed notify to make sure pending tx events are + * still delivered and program cleanup */ + ct_program_cleanup (ct); +} + +static void +ct_session_reset (u32 ct_index, u32 thread_index) +{ + ct_connection_t *ct; + ct = ct_connection_get (ct_index, thread_index); + ct->flags |= CT_CONN_F_RESET; + ct_session_close (ct_index, thread_index); } static transport_connection_t * @@ -966,12 +1265,17 @@ static int ct_app_rx_evt (transport_connection_t * tc) { ct_connection_t *ct = (ct_connection_t *) tc, *peer_ct; - session_t *ps; + session_t *ps, *s; + s = session_get (ct->c_s_index, ct->c_thread_index); + if (session_has_transport (s) || s->session_state < SESSION_STATE_READY) + return -1; peer_ct = ct_connection_get (ct->peer_index, tc->thread_index); - if (!peer_ct) + if (!peer_ct || (peer_ct->flags & CT_CONN_F_HALF_OPEN)) return -1; ps = session_get (peer_ct->c_s_index, peer_ct->c_thread_index); + if (ps->session_state >= SESSION_STATE_TRANSPORT_CLOSING) + return -1; return session_dequeue_notify (ps); } @@ -993,7 +1297,7 @@ format_ct_half_open (u8 *s, va_list *args) { u32 ho_index = va_arg (*args, u32); u32 verbose = va_arg (*args, u32); - ct_connection_t *ct = ct_connection_get (ho_index, 0); + ct_connection_t *ct = ct_half_open_get (ho_index); s = format (s, "%-" SESSION_CLI_ID_LEN "U", format_ct_connection_id, ct); if (verbose) s = format (s, "%-" SESSION_CLI_STATE_LEN "s", "HALF-OPEN"); @@ -1042,27 +1346,33 @@ format_ct_session (u8 * s, va_list * args) clib_error_t * ct_enable_disable (vlib_main_t * vm, u8 is_en) { + vlib_thread_main_t *vtm = &vlib_thread_main; ct_main_t *cm = &ct_main; + ct_worker_t *wrk; cm->n_workers = vlib_num_workers (); - vec_validate (cm->connections, cm->n_workers); + cm->fwrk_thread = transport_cl_thread (); + vec_validate (cm->wrk, vtm->n_vlib_mains); + vec_foreach (wrk, cm->wrk) + clib_spinlock_init (&wrk->pending_connects_lock); clib_spinlock_init (&cm->ho_reuseable_lock); clib_rwlock_init (&cm->app_segs_lock); + vec_validate (cm->fwrk_pending_connects, cm->n_workers); return 0; } -/* *INDENT-OFF* */ static const transport_proto_vft_t cut_thru_proto = { .enable = ct_enable_disable, .start_listen = ct_start_listen, .stop_listen = ct_stop_listen, .get_connection = ct_session_get, .get_listener = ct_listener_get, - .get_half_open = ct_half_open_get, + .get_half_open = ct_session_half_open_get, .cleanup = ct_session_cleanup, .cleanup_ho = ct_cleanup_ho, .connect = ct_session_connect, .close = ct_session_close, + .reset = ct_session_reset, .custom_tx = ct_custom_tx, .app_rx_evt = ct_app_rx_evt, .format_listener = format_ct_listener, @@ -1075,7 +1385,14 @@ static const transport_proto_vft_t cut_thru_proto = { .service_type = TRANSPORT_SERVICE_VC, }, }; -/* *INDENT-ON* */ + +static inline int +ct_session_can_tx (session_t *s) +{ + return (s->session_state == SESSION_STATE_READY || + s->session_state == SESSION_STATE_CLOSING || + s->session_state == SESSION_STATE_APP_CLOSED); +} int ct_session_tx (session_t * s) @@ -1083,6 +1400,8 @@ ct_session_tx (session_t * s) ct_connection_t *ct, *peer_ct; session_t *peer_s; + if (!ct_session_can_tx (s)) + return 0; ct = (ct_connection_t *) session_get_transport (s); peer_ct = ct_connection_get (ct->peer_index, ct->c_thread_index); if (!peer_ct) @@ -1090,6 +1409,7 @@ ct_session_tx (session_t * s) peer_s = session_get (peer_ct->c_s_index, peer_ct->c_thread_index); if (peer_s->session_state >= SESSION_STATE_TRANSPORT_CLOSING) return 0; + peer_s->flags |= SESSION_F_RX_EVT; return session_enqueue_notify (peer_s); } diff --git a/src/vnet/session/application_local.h b/src/vnet/session/application_local.h index 86edf243b22..fd2804c7baf 100644 --- a/src/vnet/session/application_local.h +++ b/src/vnet/session/application_local.h @@ -22,7 +22,8 @@ #define foreach_ct_flags \ _ (CLIENT, "client") \ - _ (HALF_OPEN, "half-open") + _ (HALF_OPEN, "half-open") \ + _ (RESET, "reset") enum { diff --git a/src/vnet/session/application_namespace.c b/src/vnet/session/application_namespace.c index 6d91fc362b2..f547dcfc031 100644 --- a/src/vnet/session/application_namespace.c +++ b/src/vnet/session/application_namespace.c @@ -19,6 +19,7 @@ #include <vnet/session/session.h> #include <vnet/fib/fib_table.h> #include <vppinfra/file.h> +#include <vppinfra/format_table.h> #include <vlib/unix/unix.h> /** @@ -40,7 +41,7 @@ app_namespace_get (u32 index) } app_namespace_t * -app_namespace_get_from_id (const u8 * ns_id) +app_namespace_get_from_id (const u8 *ns_id) { u32 index = app_namespace_index_from_id (ns_id); if (index == APP_NAMESPACE_INVALID_INDEX) @@ -54,31 +55,46 @@ app_namespace_index (app_namespace_t * app_ns) return (app_ns - app_namespace_pool); } +void +app_namespace_free (app_namespace_t *app_ns) +{ + hash_unset_mem (app_namespace_lookup_table, app_ns->ns_id); + vec_free (app_ns->ns_id); + + pool_put (app_namespace_pool, app_ns); +} + app_namespace_t * -app_namespace_alloc (u8 * ns_id) +app_namespace_alloc (const u8 *ns_id) { app_namespace_t *app_ns; + pool_get (app_namespace_pool, app_ns); clib_memset (app_ns, 0, sizeof (*app_ns)); - app_ns->ns_id = vec_dup (ns_id); + + app_ns->ns_id = vec_dup ((u8 *) ns_id); + vec_terminate_c_string (app_ns->ns_id); + hash_set_mem (app_namespace_lookup_table, app_ns->ns_id, app_ns - app_namespace_pool); + return app_ns; } -int -vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a) +session_error_t +vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t *a) { app_namespace_t *app_ns; session_table_t *st; + u32 ns_index; + session_error_t rv; if (a->is_add) { if (a->sw_if_index != APP_NAMESPACE_INVALID_INDEX && !vnet_get_sw_interface_or_null (vnet_get_main (), a->sw_if_index)) - return VNET_API_ERROR_INVALID_SW_IF_INDEX; - + return SESSION_E_INVALID; if (a->sw_if_index != APP_NAMESPACE_INVALID_INDEX) { @@ -91,7 +107,7 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a) } if (a->sw_if_index == APP_NAMESPACE_INVALID_INDEX && a->ip4_fib_id == APP_NAMESPACE_INVALID_INDEX) - return VNET_API_ERROR_INVALID_VALUE; + return SESSION_E_INVALID; app_ns = app_namespace_get_from_id (a->ns_id); if (!app_ns) @@ -102,9 +118,23 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a) st->is_local = 1; st->appns_index = app_namespace_index (app_ns); app_ns->local_table_index = session_table_index (st); + if (a->sock_name) + { + app_ns->sock_name = vec_dup (a->sock_name); + vec_terminate_c_string (app_ns->sock_name); + } + + /* Add socket for namespace, + * only at creation time */ + if (app_sapi_enabled) + { + rv = appns_sapi_add_ns_socket (app_ns); + if (rv) + return rv; + } } + app_ns->ns_secret = a->secret; - app_ns->netns = a->netns ? vec_dup (a->netns) : 0; app_ns->sw_if_index = a->sw_if_index; app_ns->ip4_fib_index = fib_table_find (FIB_PROTOCOL_IP4, a->ip4_fib_id); @@ -112,14 +142,31 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a) fib_table_find (FIB_PROTOCOL_IP6, a->ip6_fib_id); session_lookup_set_tables_appns (app_ns); - /* Add socket for namespace */ - if (app_sapi_enabled) - appns_sapi_add_ns_socket (app_ns); } else { - return VNET_API_ERROR_UNIMPLEMENTED; + ns_index = app_namespace_index_from_id (a->ns_id); + if (ns_index == APP_NAMESPACE_INVALID_INDEX) + return SESSION_E_INVALID; + + app_ns = app_namespace_get (ns_index); + if (!app_ns) + return SESSION_E_INVALID; + + application_namespace_cleanup (app_ns); + + if (app_sapi_enabled) + appns_sapi_del_ns_socket (app_ns); + + st = session_table_get (app_ns->local_table_index); + + session_table_free (st, FIB_PROTOCOL_MAX); + if (app_ns->sock_name) + vec_free (app_ns->sock_name); + + app_namespace_free (app_ns); } + return 0; } @@ -133,7 +180,13 @@ u32 app_namespace_index_from_id (const u8 * ns_id) { uword *indexp; - indexp = hash_get_mem (app_namespace_lookup_table, ns_id); + u8 *key; + + key = vec_dup ((u8 *) ns_id); + vec_terminate_c_string (key); + + indexp = hash_get_mem (app_namespace_lookup_table, key); + vec_free (key); if (!indexp) return APP_NAMESPACE_INVALID_INDEX; return *indexp; @@ -161,10 +214,15 @@ app_namespace_get_local_table (app_namespace_t * app_ns) return session_table_get (app_ns->local_table_index); } -void -appns_sapi_enable (void) +int +appns_sapi_enable_disable (int is_enable) { - app_sapi_enabled = 1; + /* This cannot be called with active sockets */ + if (pool_elts (app_namespace_pool)) + return -1; + + app_sapi_enabled = is_enable; + return 0; } u8 @@ -189,7 +247,7 @@ app_namespaces_init (void) /* clang-format off */ vnet_app_namespace_add_del_args_t a = { .ns_id = ns_id, - .netns = 0, + .sock_name = 0, .secret = 0, .sw_if_index = APP_NAMESPACE_INVALID_INDEX, .is_add = 1 @@ -204,9 +262,11 @@ static clib_error_t * app_ns_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - u8 is_add = 0, *ns_id = 0, secret_set = 0, sw_if_index_set = 0, *netns = 0; + u8 is_add = 0, *ns_id = 0, secret_set = 0, sw_if_index_set = 0; + u8 *sock_name = 0; unformat_input_t _line_input, *line_input = &_line_input; u32 sw_if_index, fib_id = APP_NAMESPACE_INVALID_INDEX; + vnet_main_t *vnm = vnet_get_main (); u64 secret; clib_error_t *error = 0; int rv; @@ -220,15 +280,20 @@ app_ns_fn (vlib_main_t * vm, unformat_input_t * input, { if (unformat (line_input, "add")) is_add = 1; + else if (unformat (line_input, "del")) + is_add = 0; else if (unformat (line_input, "id %_%v%_", &ns_id)) ; else if (unformat (line_input, "secret %lu", &secret)) secret_set = 1; else if (unformat (line_input, "sw_if_index %u", &sw_if_index)) sw_if_index_set = 1; + else if (unformat (line_input, "if %U", unformat_vnet_sw_interface, vnm, + &sw_if_index)) + sw_if_index_set = 1; else if (unformat (line_input, "fib_id", &fib_id)) ; - else if (unformat (line_input, "netns %_%v%_", &netns)) + else if (unformat (line_input, "sock-name %_%v%_", &sock_name)) ; else { @@ -238,57 +303,62 @@ app_ns_fn (vlib_main_t * vm, unformat_input_t * input, } } - if (!ns_id || !secret_set || !sw_if_index_set) + if (!ns_id) { - vlib_cli_output (vm, "namespace-id, secret and sw_if_index must be " - "provided"); + vlib_cli_output (vm, "namespace-id must be provided"); goto done; } - if (is_add) + if (is_add && (!secret_set || !sw_if_index_set)) { - /* clang-format off */ - vnet_app_namespace_add_del_args_t args = { - .ns_id = ns_id, - .netns = netns, - .secret = secret, - .sw_if_index = sw_if_index, - .ip4_fib_id = fib_id, - .is_add = 1 - }; - /* clang-format on */ - - if ((rv = vnet_app_namespace_add_del (&args))) - error = clib_error_return (0, "app namespace add del returned %d", rv); + vlib_cli_output (vm, "secret and interface must be provided"); + goto done; } + /* clang-format off */ + vnet_app_namespace_add_del_args_t args = { + .ns_id = ns_id, + .secret = secret, + .sw_if_index = sw_if_index, + .sock_name = sock_name, + .ip4_fib_id = fib_id, + .is_add = is_add, + }; + /* clang-format on */ + + if ((rv = vnet_app_namespace_add_del (&args))) + error = clib_error_return (0, "app namespace add del returned %d", rv); + done: vec_free (ns_id); - vec_free (netns); + vec_free (sock_name); unformat_free (line_input); return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (app_ns_command, static) = { .path = "app ns", - .short_help = "app ns [add] id <namespace-id> secret <secret> " - "sw_if_index <sw_if_index> [netns <ns>]", + .short_help = "app ns [add|del] id <namespace-id> secret <secret> " + "sw_if_index <sw_if_index> if <interface>", .function = app_ns_fn, }; -/* *INDENT-ON* */ u8 * format_app_namespace (u8 * s, va_list * args) { app_namespace_t *app_ns = va_arg (*args, app_namespace_t *); + vnet_main_t *vnm = vnet_get_main (); + + s = format (s, "Application namespace [%u]\nid: %s\nsecret: %lu", + app_namespace_index (app_ns), app_ns->ns_id, app_ns->ns_secret); + if (app_ns->sw_if_index != (u32) ~0) + s = format (s, "\nInterface: %U", format_vnet_sw_if_index_name, vnm, + app_ns->sw_if_index); + if (app_ns->sock_name) + s = format (s, "\nSocket: %s", app_ns->sock_name); - s = - format (s, "%-10u%-10lu%-15d%-15v%-15v%-40v", app_namespace_index (app_ns), - app_ns->ns_secret, app_ns->sw_if_index, app_ns->ns_id, - app_ns->netns, app_ns->sock_name); return s; } @@ -314,7 +384,6 @@ app_namespace_show_api (vlib_main_t * vm, app_namespace_t * app_ns) vlib_cli_output (vm, "%12s%12s%5s", "app index", "wrk index", "fd"); - /* *INDENT-OFF* */ pool_foreach (cs, app_ns->app_sockets) { handle = (app_ns_api_handle_t *) &cs->private_data; cf = clib_file_get (&file_main, handle->aah_file_index); @@ -327,7 +396,6 @@ app_namespace_show_api (vlib_main_t * vm, app_namespace_t * app_ns) vlib_cli_output (vm, "%12d%12d%5u", app_wrk->app_index, app_wrk->wrk_map_index, cf->file_descriptor); } - /* *INDENT-ON* */ } static clib_error_t * @@ -335,9 +403,11 @@ show_app_ns_fn (vlib_main_t * vm, unformat_input_t * main_input, vlib_cli_command_t * cmd) { unformat_input_t _line_input, *line_input = &_line_input; - u8 *ns_id, do_table = 0, had_input = 1, do_api = 0; + u8 *ns_id = 0, do_table = 0, had_input = 1, do_api = 0; app_namespace_t *app_ns; + vnet_main_t *vnm = vnet_get_main (); session_table_t *st; + table_t table = {}, *t = &table; session_cli_return_if_not_enabled (); @@ -349,7 +419,7 @@ show_app_ns_fn (vlib_main_t * vm, unformat_input_t * main_input, while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { - if (unformat (line_input, "table %_%v%_", &ns_id)) + if (unformat (line_input, "id %_%v%_", &ns_id)) do_table = 1; else if (unformat (line_input, "api-clients")) do_api = 1; @@ -386,20 +456,32 @@ show_app_ns_fn (vlib_main_t * vm, unformat_input_t * main_input, vlib_cli_output (vm, "table for ns %v could not be found", ns_id); goto done; } + vlib_cli_output (vm, "%U", format_app_namespace, app_ns); session_lookup_show_table_entries (vm, st, 0, 1); vec_free (ns_id); goto done; } do_ns_list: - vlib_cli_output (vm, "%-10s%-10s%-15s%-15s%-15s%-40s", "Index", "Secret", - "sw_if_index", "Id", "netns", "Socket"); + table_add_header_col (t, 5, "Index", "Secret", "Interface", "Id", "Socket"); + int i = 0; + pool_foreach (app_ns, app_namespace_pool) + { + int j = 0; + table_format_cell (t, i, j++, "%u", app_namespace_index (app_ns)); + table_format_cell (t, i, j++, "%lu", app_ns->ns_secret); + table_format_cell (t, i, j++, "%U", format_vnet_sw_if_index_name, vnm, + app_ns->sw_if_index); + table_format_cell (t, i, j++, "%s", app_ns->ns_id); + table_format_cell (t, i++, j++, "%s", app_ns->sock_name); + } - /* *INDENT-OFF* */ - pool_foreach (app_ns, app_namespace_pool) { - vlib_cli_output (vm, "%U", format_app_namespace, app_ns); - } - /* *INDENT-ON* */ + t->default_body.align = TTAA_LEFT; + t->default_header_col.align = TTAA_LEFT; + t->default_header_col.fg_color = TTAC_YELLOW; + t->default_header_col.flags = TTAF_FG_COLOR_SET; + vlib_cli_output (vm, "%U", format_table, t); + table_free (t); done: if (had_input) @@ -407,14 +489,11 @@ done: return 0; } -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_app_ns_command, static) = -{ +VLIB_CLI_COMMAND (show_app_ns_command, static) = { .path = "show app ns", - .short_help = "show app ns [table <id> [api-clients]]", + .short_help = "show app ns [id <id> [api-clients]]", .function = show_app_ns_fn, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/session/application_namespace.h b/src/vnet/session/application_namespace.h index 313b2d0e63d..261325cbe0e 100644 --- a/src/vnet/session/application_namespace.h +++ b/src/vnet/session/application_namespace.h @@ -51,11 +51,6 @@ typedef struct _app_namespace u8 *ns_id; /** - * Linux netns if one was provided - */ - u8 *netns; - - /** * Name of socket applications can use to attach to session layer */ u8 *sock_name; @@ -69,7 +64,7 @@ typedef struct _app_namespace typedef struct _vnet_app_namespace_add_del_args { u8 *ns_id; - u8 *netns; + u8 *sock_name; u64 secret; u32 sw_if_index; u32 ip4_fib_id; @@ -79,15 +74,16 @@ typedef struct _vnet_app_namespace_add_del_args #define APP_NAMESPACE_INVALID_INDEX ((u32)~0) -app_namespace_t *app_namespace_alloc (u8 * ns_id); +app_namespace_t *app_namespace_alloc (const u8 *ns_id); app_namespace_t *app_namespace_get (u32 index); -app_namespace_t *app_namespace_get_from_id (const u8 * ns_id); +app_namespace_t *app_namespace_get_from_id (const u8 *ns_id); u32 app_namespace_index (app_namespace_t * app_ns); const u8 *app_namespace_id (app_namespace_t * app_ns); const u8 *app_namespace_id_from_index (u32 index); -u32 app_namespace_index_from_id (const u8 * ns_id); +u32 app_namespace_index_from_id (const u8 *ns_id); void app_namespaces_init (void); -int vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a); +session_error_t +vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t *a); u32 app_namespace_get_fib_index (app_namespace_t * app_ns, u8 fib_proto); session_table_t *app_namespace_get_local_table (app_namespace_t * app_ns); @@ -159,8 +155,9 @@ appns_sapi_handle_sock_index (u32 sapi_sock_handle) } int appns_sapi_add_ns_socket (app_namespace_t * app_ns); +void appns_sapi_del_ns_socket (app_namespace_t *app_ns); u8 appns_sapi_enabled (void); -void appns_sapi_enable (void); +int appns_sapi_enable_disable (int is_enable); #endif /* SRC_VNET_SESSION_APPLICATION_NAMESPACE_H_ */ diff --git a/src/vnet/session/application_worker.c b/src/vnet/session/application_worker.c index be8a9e86bd5..befdb7c7002 100644 --- a/src/vnet/session/application_worker.c +++ b/src/vnet/session/application_worker.c @@ -26,6 +26,7 @@ app_worker_t * app_worker_alloc (application_t * app) { app_worker_t *app_wrk; + pool_get (app_workers, app_wrk); clib_memset (app_wrk, 0, sizeof (*app_wrk)); app_wrk->wrk_index = app_wrk - app_workers; @@ -33,6 +34,8 @@ app_worker_alloc (application_t * app) app_wrk->wrk_map_index = ~0; app_wrk->connects_seg_manager = APP_INVALID_SEGMENT_MANAGER_INDEX; clib_spinlock_init (&app_wrk->detached_seg_managers_lock); + vec_validate (app_wrk->wrk_evts, vlib_num_workers ()); + vec_validate (app_wrk->wrk_mq_congested, vlib_num_workers ()); APP_DBG ("New app %v worker %u", app->name, app_wrk->wrk_index); return app_wrk; } @@ -55,26 +58,34 @@ void app_worker_free (app_worker_t * app_wrk) { application_t *app = application_get (app_wrk->app_index); + session_handle_t handle, *handles = 0, *sh; vnet_unlisten_args_t _a, *a = &_a; - u64 handle, *handles = 0, *sm_indices = 0; segment_manager_t *sm; - session_handle_t *sh; + u64 *sm_indices = 0; session_t *ls; u32 sm_index; int i; /* + * Cleanup vpp wrk events + */ + app_worker_del_all_events (app_wrk); + for (i = 0; i < vec_len (app_wrk->wrk_evts); i++) + clib_fifo_free (app_wrk->wrk_evts[i]); + + vec_free (app_wrk->wrk_evts); + vec_free (app_wrk->wrk_mq_congested); + + /* * Listener cleanup */ - /* *INDENT-OFF* */ hash_foreach (handle, sm_index, app_wrk->listeners_table, ({ ls = listen_session_get_from_handle (handle); vec_add1 (handles, app_listen_session_handle (ls)); vec_add1 (sm_indices, sm_index); sm = segment_manager_get (sm_index); })); - /* *INDENT-ON* */ for (i = 0; i < vec_len (handles); i++) { @@ -91,7 +102,7 @@ app_worker_free (app_worker_t * app_wrk) segment_manager_init_free (sm); } } - vec_reset_length (handles); + vec_free (handles); vec_free (sm_indices); hash_free (app_wrk->listeners_table); @@ -175,31 +186,85 @@ app_worker_alloc_session_fifos (segment_manager_t * sm, session_t * s) } int +app_worker_alloc_wrk_cl_session (app_worker_t *app_wrk, session_t *ls) +{ + svm_fifo_t *rx_fifo = 0, *tx_fifo = 0; + segment_manager_t *sm; + session_handle_t lsh; + app_listener_t *al; + session_t *s; + + al = app_listener_get (ls->al_index); + sm = app_worker_get_listen_segment_manager (app_wrk, ls); + lsh = session_handle (ls); + + s = session_alloc (0 /* listener on main worker */); + session_set_state (s, SESSION_STATE_LISTENING); + s->flags |= SESSION_F_IS_CLESS; + s->app_wrk_index = app_wrk->wrk_index; + ls = session_get_from_handle (lsh); + s->session_type = ls->session_type; + s->connection_index = ls->connection_index; + + segment_manager_alloc_session_fifos (sm, s->thread_index, &rx_fifo, + &tx_fifo); + + rx_fifo->shr->master_session_index = s->session_index; + rx_fifo->master_thread_index = s->thread_index; + + tx_fifo->shr->master_session_index = s->session_index; + tx_fifo->master_thread_index = s->thread_index; + + s->rx_fifo = rx_fifo; + s->tx_fifo = tx_fifo; + + vec_validate (al->cl_listeners, app_wrk->wrk_map_index); + al->cl_listeners[app_wrk->wrk_map_index] = s->session_index; + + return 0; +} + +void +app_worker_free_wrk_cl_session (app_worker_t *app_wrk, session_t *ls) +{ + app_listener_t *al; + session_t *s; + + al = app_listener_get (ls->al_index); + + s = app_listener_get_wrk_cl_session (al, app_wrk->wrk_map_index); + segment_manager_dealloc_fifos (s->rx_fifo, s->tx_fifo); + session_free (s); + + al->cl_listeners[app_wrk->wrk_map_index] = SESSION_INVALID_INDEX; +} + +int app_worker_init_listener (app_worker_t * app_wrk, session_t * ls) { segment_manager_t *sm; /* Allocate segment manager. All sessions derived out of a listen session - * have fifos allocated by the same segment manager. */ + * have fifos allocated by the same segment manager. + * TODO(fcoras): limit memory consumption by cless listeners */ if (!(sm = app_worker_alloc_segment_manager (app_wrk))) return SESSION_E_ALLOC; + /* Once the first segment is mapped, don't remove it until unlisten */ + sm->first_is_protected = 1; + /* Keep track of the segment manager for the listener or this worker */ hash_set (app_wrk->listeners_table, listen_session_get_handle (ls), segment_manager_index (sm)); - if (transport_connection_is_cless (session_get_transport (ls))) - { - if (ls->rx_fifo) - return SESSION_E_NOSUPPORT; - return app_worker_alloc_session_fifos (sm, ls); - } + if (ls->flags & SESSION_F_IS_CLESS) + return app_worker_alloc_wrk_cl_session (app_wrk, ls); + return 0; } -int -app_worker_start_listen (app_worker_t * app_wrk, - app_listener_t * app_listener) +session_error_t +app_worker_start_listen (app_worker_t *app_wrk, app_listener_t *app_listener) { session_t *ls; int rv; @@ -263,17 +328,14 @@ app_worker_stop_listen_session (app_worker_t * app_wrk, session_t * ls) if (PREDICT_FALSE (!sm_indexp)) return; - /* Dealloc fifos, if any (dgram listeners) */ - if (ls->rx_fifo) - { - segment_manager_dealloc_fifos (ls->rx_fifo, ls->tx_fifo); - ls->tx_fifo = ls->rx_fifo = 0; - } + if (ls->flags & SESSION_F_IS_CLESS) + app_worker_free_wrk_cl_session (app_wrk, ls); /* Try to cleanup segment manager */ sm = segment_manager_get (*sm_indexp); if (sm) { + sm->first_is_protected = 0; segment_manager_app_detach (sm); if (!segment_manager_has_fifos (sm)) { @@ -334,8 +396,10 @@ app_worker_init_accepted (session_t * s) listener = listen_session_get_from_handle (s->listener_handle); app_wrk = application_listener_select_worker (listener); - s->app_wrk_index = app_wrk->wrk_index; + if (PREDICT_FALSE (app_worker_mq_is_congested (app_wrk))) + return -1; + s->app_wrk_index = app_wrk->wrk_index; app = application_get (app_wrk->app_index); if (app->cb_fns.fifo_tuning_callback) s->flags |= SESSION_F_CUSTOM_FIFO_TUNING; @@ -348,10 +412,35 @@ app_worker_init_accepted (session_t * s) } int +app_worker_listened_notify (app_worker_t *app_wrk, session_handle_t alsh, + u32 opaque, session_error_t err) +{ + session_event_t evt = { .event_type = SESSION_CTRL_EVT_BOUND, + .as_u64[0] = alsh, + .as_u64[1] = (u64) opaque << 32 | (u32) err }; + + app_worker_add_event_custom (app_wrk, 0 /* thread index */, &evt); + + return 0; +} + +int +app_worker_unlisten_reply (app_worker_t *app_wrk, session_handle_t sh, + u32 opaque, session_error_t err) +{ + session_event_t evt = { .event_type = SESSION_CTRL_EVT_UNLISTEN_REPLY, + .as_u64[0] = sh, + .as_u64[1] = (u64) opaque << 32 | (u32) err }; + + app_worker_add_event_custom (app_wrk, 0 /* thread index */, &evt); + return 0; +} + +int app_worker_accept_notify (app_worker_t * app_wrk, session_t * s) { - application_t *app = application_get (app_wrk->app_index); - return app->cb_fns.session_accept_callback (s); + app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_ACCEPTED); + return 0; } int @@ -365,7 +454,7 @@ app_worker_init_connected (app_worker_t * app_wrk, session_t * s) /* Allocate fifos for session, unless the app is a builtin proxy */ if (application_is_builtin_proxy (app)) - return 0; + return app->cb_fns.proxy_alloc_session_fifos (s); sm = app_worker_get_connect_segment_manager (app_wrk); return app_worker_alloc_session_fifos (sm, s); @@ -375,9 +464,13 @@ int app_worker_connect_notify (app_worker_t * app_wrk, session_t * s, session_error_t err, u32 opaque) { - application_t *app = application_get (app_wrk->app_index); - return app->cb_fns.session_connected_callback (app_wrk->wrk_index, opaque, - s, err); + session_event_t evt = { .event_type = SESSION_CTRL_EVT_CONNECTED, + .as_u64[0] = s ? s->session_index : ~0, + .as_u64[1] = (u64) opaque << 32 | (u32) err }; + u32 thread_index = s ? s->thread_index : vlib_get_thread_index (); + + app_worker_add_event_custom (app_wrk, thread_index, &evt); + return 0; } int @@ -385,7 +478,7 @@ app_worker_add_half_open (app_worker_t *app_wrk, session_handle_t sh) { session_handle_t *shp; - ASSERT (vlib_get_thread_index () == 0); + ASSERT (session_vlib_thread_is_cl_thread ()); pool_get (app_wrk->half_open_table, shp); *shp = sh; @@ -395,36 +488,28 @@ app_worker_add_half_open (app_worker_t *app_wrk, session_handle_t sh) int app_worker_del_half_open (app_worker_t *app_wrk, session_t *s) { - application_t *app = application_get (app_wrk->app_index); - ASSERT (vlib_get_thread_index () <= 1); - pool_put_index (app_wrk->half_open_table, s->ho_index); - if (app->cb_fns.half_open_cleanup_callback) - app->cb_fns.half_open_cleanup_callback (s); + app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_HALF_CLEANUP); return 0; } int app_worker_close_notify (app_worker_t * app_wrk, session_t * s) { - application_t *app = application_get (app_wrk->app_index); - app->cb_fns.session_disconnect_callback (s); + app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_DISCONNECTED); return 0; } int app_worker_transport_closed_notify (app_worker_t * app_wrk, session_t * s) { - application_t *app = application_get (app_wrk->app_index); - if (app->cb_fns.session_transport_closed_callback) - app->cb_fns.session_transport_closed_callback (s); + app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_TRANSPORT_CLOSED); return 0; } int app_worker_reset_notify (app_worker_t * app_wrk, session_t * s) { - application_t *app = application_get (app_wrk->app_index); - app->cb_fns.session_reset_callback (s); + app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_RESET); return 0; } @@ -432,29 +517,33 @@ int app_worker_cleanup_notify (app_worker_t * app_wrk, session_t * s, session_cleanup_ntf_t ntf) { - application_t *app = application_get (app_wrk->app_index); - if (app->cb_fns.session_cleanup_callback) - app->cb_fns.session_cleanup_callback (s, ntf); + session_event_t evt = { .event_type = SESSION_CTRL_EVT_CLEANUP, + .as_u64[0] = (u64) ntf << 32 | s->session_index, + .as_u64[1] = pointer_to_uword (session_cleanup) }; + + app_worker_add_event_custom (app_wrk, s->thread_index, &evt); + return 0; } int -app_worker_builtin_rx (app_worker_t * app_wrk, session_t * s) +app_worker_cleanup_notify_custom (app_worker_t *app_wrk, session_t *s, + session_cleanup_ntf_t ntf, + void (*cleanup_cb) (session_t *s)) { - application_t *app = application_get (app_wrk->app_index); - app->cb_fns.builtin_app_rx_callback (s); + session_event_t evt = { .event_type = SESSION_CTRL_EVT_CLEANUP, + .as_u64[0] = (u64) ntf << 32 | s->session_index, + .as_u64[1] = pointer_to_uword (cleanup_cb) }; + + app_worker_add_event_custom (app_wrk, s->thread_index, &evt); + return 0; } int -app_worker_builtin_tx (app_worker_t * app_wrk, session_t * s) +app_worker_rx_notify (app_worker_t *app_wrk, session_t *s) { - application_t *app = application_get (app_wrk->app_index); - - if (!app->cb_fns.builtin_app_tx_callback) - return 0; - - app->cb_fns.builtin_app_tx_callback (s); + app_worker_add_event (app_wrk, s, SESSION_IO_EVT_RX); return 0; } @@ -462,8 +551,11 @@ int app_worker_migrate_notify (app_worker_t * app_wrk, session_t * s, session_handle_t new_sh) { - application_t *app = application_get (app_wrk->app_index); - app->cb_fns.session_migrate_callback (s, new_sh); + session_event_t evt = { .event_type = SESSION_CTRL_EVT_MIGRATED, + .as_u64[0] = s->session_index, + .as_u64[1] = new_sh }; + + app_worker_add_event_custom (app_wrk, s->thread_index, &evt); return 0; } @@ -472,6 +564,7 @@ app_worker_own_session (app_worker_t * app_wrk, session_t * s) { segment_manager_t *sm; svm_fifo_t *rxf, *txf; + int rv; if (s->session_state == SESSION_STATE_LISTENING) return application_change_listener_owner (s, app_wrk); @@ -488,8 +581,8 @@ app_worker_own_session (app_worker_t * app_wrk, session_t * s) s->tx_fifo = 0; sm = app_worker_get_connect_segment_manager (app_wrk); - if (app_worker_alloc_session_fifos (sm, s)) - return -1; + if ((rv = app_worker_alloc_session_fifos (sm, s))) + return rv; if (!svm_fifo_is_empty_cons (rxf)) svm_fifo_clone (s->rx_fifo, rxf); @@ -506,6 +599,9 @@ int app_worker_connect_session (app_worker_t *app_wrk, session_endpoint_cfg_t *sep, session_handle_t *rsh) { + if (PREDICT_FALSE (app_worker_mq_is_congested (app_wrk))) + return SESSION_E_REFUSED; + sep->app_wrk_index = app_wrk->wrk_index; return session_open (sep, rsh); @@ -549,14 +645,12 @@ app_worker_first_listener (app_worker_t * app_wrk, u8 fib_proto, sst = session_type_from_proto_and_ip (transport_proto, fib_proto == FIB_PROTOCOL_IP4); - /* *INDENT-OFF* */ hash_foreach (handle, sm_index, app_wrk->listeners_table, ({ listener = listen_session_get_from_handle (handle); if (listener->session_type == sst && !(listener->flags & SESSION_F_PROXY)) return listener; })); - /* *INDENT-ON* */ return 0; } @@ -573,13 +667,11 @@ app_worker_proxy_listener (app_worker_t * app_wrk, u8 fib_proto, sst = session_type_from_proto_and_ip (transport_proto, fib_proto == FIB_PROTOCOL_IP4); - /* *INDENT-OFF* */ hash_foreach (handle, sm_index, app_wrk->listeners_table, ({ listener = listen_session_get_from_handle (handle); if (listener->session_type == sst && (listener->flags & SESSION_F_PROXY)) return listener; })); - /* *INDENT-ON* */ return 0; } @@ -590,130 +682,178 @@ app_worker_proxy_listener (app_worker_t * app_wrk, u8 fib_proto, int app_worker_add_segment_notify (app_worker_t * app_wrk, u64 segment_handle) { - application_t *app = application_get (app_wrk->app_index); + session_event_t evt = { .event_type = SESSION_CTRL_EVT_APP_ADD_SEGMENT, + .as_u64[1] = segment_handle }; - return app->cb_fns.add_segment_callback (app_wrk->wrk_index, - segment_handle); + app_worker_add_event_custom (app_wrk, vlib_get_thread_index (), &evt); + + return 0; } int app_worker_del_segment_notify (app_worker_t * app_wrk, u64 segment_handle) { - application_t *app = application_get (app_wrk->app_index); - return app->cb_fns.del_segment_callback (app_wrk->wrk_index, - segment_handle); -} + session_event_t evt = { .event_type = SESSION_CTRL_EVT_APP_DEL_SEGMENT, + .as_u64[1] = segment_handle }; -static inline u8 -app_worker_application_is_builtin (app_worker_t * app_wrk) -{ - return app_wrk->app_is_builtin; + app_worker_add_event_custom (app_wrk, vlib_get_thread_index (), &evt); + + return 0; } -static inline int -app_send_io_evt_rx (app_worker_t * app_wrk, session_t * s) +static int +app_wrk_send_fd (app_worker_t *app_wrk, int fd) { - session_event_t *evt; - svm_msg_q_msg_t msg; - svm_msg_q_t *mq; + if (!appns_sapi_enabled ()) + { + vl_api_registration_t *reg; + clib_error_t *error; - if (app_worker_application_is_builtin (app_wrk)) - return app_worker_builtin_rx (app_wrk, s); + reg = + vl_mem_api_client_index_to_registration (app_wrk->api_client_index); + if (!reg) + { + clib_warning ("no api registration for client: %u", + app_wrk->api_client_index); + return -1; + } - if (svm_fifo_has_event (s->rx_fifo)) - return 0; + if (vl_api_registration_file_index (reg) == VL_API_INVALID_FI) + return -1; - mq = app_wrk->event_queue; - svm_msg_q_lock (mq); + error = vl_api_send_fd_msg (reg, &fd, 1); + if (error) + { + clib_error_report (error); + return -1; + } - if (PREDICT_FALSE (svm_msg_q_is_full (mq))) - { - clib_warning ("evt q full"); - svm_msg_q_unlock (mq); - return -1; + return 0; } - if (PREDICT_FALSE (svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING))) + app_sapi_msg_t smsg = { 0 }; + app_namespace_t *app_ns; + clib_error_t *error; + application_t *app; + clib_socket_t *cs; + u32 cs_index; + + app = application_get (app_wrk->app_index); + app_ns = app_namespace_get (app->ns_index); + cs_index = appns_sapi_handle_sock_index (app_wrk->api_client_index); + cs = appns_sapi_get_socket (app_ns, cs_index); + if (PREDICT_FALSE (!cs)) + return -1; + + /* There's no payload for the message only the type */ + smsg.type = APP_SAPI_MSG_TYPE_SEND_FDS; + error = clib_socket_sendmsg (cs, &smsg, sizeof (smsg), &fd, 1); + if (error) { - clib_warning ("evt q rings full"); - svm_msg_q_unlock (mq); + clib_error_report (error); return -1; } - msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING); - evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg); - evt->session_index = s->rx_fifo->shr->client_session_index; - evt->event_type = SESSION_IO_EVT_RX; - - (void) svm_fifo_set_event (s->rx_fifo); - svm_msg_q_add_and_unlock (mq, &msg); - return 0; } -static inline int -app_send_io_evt_tx (app_worker_t * app_wrk, session_t * s) +void +app_worker_add_event (app_worker_t *app_wrk, session_t *s, + session_evt_type_t evt_type) { - svm_msg_q_t *mq; session_event_t *evt; - svm_msg_q_msg_t msg; - if (app_worker_application_is_builtin (app_wrk)) - return app_worker_builtin_tx (app_wrk, s); + ASSERT (s->thread_index == vlib_get_thread_index ()); + clib_fifo_add2 (app_wrk->wrk_evts[s->thread_index], evt); + evt->session_index = s->session_index; + evt->event_type = evt_type; + evt->postponed = 0; - mq = app_wrk->event_queue; - svm_msg_q_lock (mq); - - if (PREDICT_FALSE (svm_msg_q_is_full (mq))) + /* First event for this app_wrk. Schedule it for handling in session input */ + if (clib_fifo_elts (app_wrk->wrk_evts[s->thread_index]) == 1) { - clib_warning ("evt q full"); - svm_msg_q_unlock (mq); - return -1; + session_worker_t *wrk = session_main_get_worker (s->thread_index); + session_wrk_program_app_wrk_evts (wrk, app_wrk->wrk_index); } +} + +void +app_worker_add_event_custom (app_worker_t *app_wrk, u32 thread_index, + session_event_t *evt) +{ + clib_fifo_add1 (app_wrk->wrk_evts[thread_index], *evt); - if (PREDICT_FALSE (svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING))) + /* First event for this app_wrk. Schedule it for handling in session input */ + if (clib_fifo_elts (app_wrk->wrk_evts[thread_index]) == 1) { - clib_warning ("evt q rings full"); - svm_msg_q_unlock (mq); - return -1; + session_worker_t *wrk = session_main_get_worker (thread_index); + session_wrk_program_app_wrk_evts (wrk, app_wrk->wrk_index); } +} - msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING); - evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg); - evt->event_type = SESSION_IO_EVT_TX; - evt->session_index = s->tx_fifo->shr->client_session_index; +always_inline void +app_wrk_send_ctrl_evt_inline (app_worker_t *app_wrk, u8 evt_type, void *msg, + u32 msg_len, int fd) +{ + svm_msg_q_msg_t _mq_msg, *mq_msg = &_mq_msg; + svm_msg_q_t *mq = app_wrk->event_queue; + session_event_t *evt; - svm_msg_q_add_and_unlock (mq, &msg); - return 0; + ASSERT (!svm_msg_q_or_ring_is_full (mq, SESSION_MQ_CTRL_EVT_RING)); + *mq_msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_CTRL_EVT_RING); + + evt = svm_msg_q_msg_data (mq, mq_msg); + clib_memset (evt, 0, sizeof (*evt)); + evt->event_type = evt_type; + clib_memcpy_fast (evt->data, msg, msg_len); + + if (fd != -1) + app_wrk_send_fd (app_wrk, fd); + + svm_msg_q_add_raw (mq, mq_msg); +} + +void +app_wrk_send_ctrl_evt_fd (app_worker_t *app_wrk, u8 evt_type, void *msg, + u32 msg_len, int fd) +{ + app_wrk_send_ctrl_evt_inline (app_wrk, evt_type, msg, msg_len, fd); } -/* *INDENT-OFF* */ -typedef int (app_send_evt_handler_fn) (app_worker_t *app, - session_t *s); -static app_send_evt_handler_fn * const app_send_evt_handler_fns[2] = { - app_send_io_evt_rx, - app_send_io_evt_tx, -}; -/* *INDENT-ON* */ +void +app_wrk_send_ctrl_evt (app_worker_t *app_wrk, u8 evt_type, void *msg, + u32 msg_len) +{ + app_wrk_send_ctrl_evt_inline (app_wrk, evt_type, msg, msg_len, -1); +} -/** - * Send event to application - * - * Logic from queue perspective is blocking. However, if queue is full, - * we return. - */ -int -app_worker_lock_and_send_event (app_worker_t * app, session_t * s, - u8 evt_type) +u8 +app_worker_mq_wrk_is_congested (app_worker_t *app_wrk, u32 thread_index) +{ + return app_wrk->wrk_mq_congested[thread_index] > 0; +} + +void +app_worker_set_mq_wrk_congested (app_worker_t *app_wrk, u32 thread_index) +{ + clib_atomic_fetch_add_relax (&app_wrk->mq_congested, 1); + ASSERT (thread_index == vlib_get_thread_index ()); + app_wrk->wrk_mq_congested[thread_index] = 1; +} + +void +app_worker_unset_wrk_mq_congested (app_worker_t *app_wrk, u32 thread_index) { - return app_send_evt_handler_fns[evt_type] (app, s); + clib_atomic_fetch_sub_relax (&app_wrk->mq_congested, 1); + ASSERT (thread_index == vlib_get_thread_index ()); + app_wrk->wrk_mq_congested[thread_index] = 0; } u8 * format_app_worker_listener (u8 * s, va_list * args) { app_worker_t *app_wrk = va_arg (*args, app_worker_t *); - u64 handle = va_arg (*args, u64); + session_handle_t handle = va_arg (*args, u64); u32 sm_index = va_arg (*args, u32); int verbose = va_arg (*args, int); session_t *listener; @@ -760,10 +900,12 @@ format_app_worker (u8 * s, va_list * args) app_worker_t *app_wrk = va_arg (*args, app_worker_t *); u32 indent = 1; - s = format (s, "%U wrk-index %u app-index %u map-index %u " - "api-client-index %d\n", format_white_space, indent, - app_wrk->wrk_index, app_wrk->app_index, app_wrk->wrk_map_index, - app_wrk->api_client_index); + s = format (s, + "%U wrk-index %u app-index %u map-index %u " + "api-client-index %d mq-cong %u\n", + format_white_space, indent, app_wrk->wrk_index, + app_wrk->app_index, app_wrk->wrk_map_index, + app_wrk->api_client_index, app_wrk->mq_congested); return s; } diff --git a/src/vnet/session/mma_template.c b/src/vnet/session/mma_template.c index ae730e5dbea..4b2770bb756 100644 --- a/src/vnet/session/mma_template.c +++ b/src/vnet/session/mma_template.c @@ -65,6 +65,11 @@ RT (mma_rule_free) (RTT (mma_rules_table) * srt, RTT (mma_rule) * rule) return rule; } +void RT (mma_rules_table_free) (RTT (mma_rules_table) * srt) +{ + pool_free (srt->rules); +} + RTT (mma_rule) * RT (mma_rules_table_get_rule) (RTT (mma_rules_table) * srt, u32 srt_index) { diff --git a/src/vnet/session/mma_template.h b/src/vnet/session/mma_template.h index dc3545a4ffe..2c0230c2869 100644 --- a/src/vnet/session/mma_template.h +++ b/src/vnet/session/mma_template.h @@ -41,11 +41,9 @@ typedef struct { u32 action_index; u32 *next_indices; - /* *INDENT-OFF* */ RTT (mma_mask_or_match) mask; RTT (mma_mask_or_match) match; RTT (mma_mask_or_match) max_match; - /* *INDENT-ON* */ } RTT (mma_rule); typedef int (*RTT (rule_cmp_fn)) (RTT (mma_rule) * rule1, diff --git a/src/vnet/session/segment_manager.c b/src/vnet/session/segment_manager.c index c7a06d8b636..80bebdca9b5 100644 --- a/src/vnet/session/segment_manager.c +++ b/src/vnet/session/segment_manager.c @@ -89,28 +89,30 @@ segment_manager_segment_index (segment_manager_t * sm, fifo_segment_t * seg) */ static inline int segment_manager_add_segment_inline (segment_manager_t *sm, uword segment_size, - u8 notify_app, u8 flags) + u8 notify_app, u8 flags, u8 need_lock) { segment_manager_main_t *smm = &sm_main; segment_manager_props_t *props; + app_worker_t *app_wrk; fifo_segment_t *fs; u32 fs_index = ~0; u8 *seg_name; int rv; props = segment_manager_properties_get (sm); + app_wrk = app_worker_get (sm->app_wrk_index); /* Not configured for addition of new segments and not first */ if (!props->add_segment && !segment_size) { - clib_warning ("cannot allocate new segment"); - return VNET_API_ERROR_INVALID_VALUE; + SESSION_DBG ("cannot allocate new segment"); + return SESSION_E_INVALID; } /* * Allocate fifo segment and grab lock if needed */ - if (vlib_num_workers ()) + if (need_lock) clib_rwlock_writer_lock (&sm->segments_rwlock); pool_get_zero (sm->segments, fs); @@ -119,18 +121,24 @@ segment_manager_add_segment_inline (segment_manager_t *sm, uword segment_size, * Allocate ssvm segment */ segment_size = segment_size ? segment_size : props->add_segment_size; - segment_size = round_pow2 (segment_size, clib_mem_get_page_size ()); - - if (props->segment_type != SSVM_SEGMENT_PRIVATE) + /* add overhead to ensure the result segment size is at least + * of that requested */ + segment_size += + sizeof (fifo_segment_header_t) + + vlib_thread_main.n_vlib_mains * sizeof (fifo_segment_slice_t) + + FIFO_SEGMENT_ALLOC_OVERHEAD; + + if (props->huge_page) { - seg_name = format (0, "%d-%d%c", getpid (), smm->seg_name_counter++, 0); + uword hugepage_size = clib_mem_get_default_hugepage_size (); + segment_size = round_pow2 (segment_size, hugepage_size); + fs->ssvm.huge_page = 1; } else - { - app_worker_t *app_wrk = app_worker_get (sm->app_wrk_index); - application_t *app = application_get (app_wrk->app_index); - seg_name = format (0, "%v segment%c", app->name, 0); - } + segment_size = round_pow2 (segment_size, clib_mem_get_page_size ()); + + seg_name = format (0, "seg-%u-%u-%u%c", app_wrk->app_index, + app_wrk->wrk_index, smm->seg_name_counter++, 0); fs->ssvm.ssvm_size = segment_size; fs->ssvm.name = seg_name; @@ -154,15 +162,17 @@ segment_manager_add_segment_inline (segment_manager_t *sm, uword segment_size, * Save segment index before dropping lock, if any held */ fs_index = fs - sm->segments; + fs->fs_index = fs_index; + fs->sm_index = segment_manager_index (sm); /* * Set watermarks in segment */ - fs->h->high_watermark = sm->high_watermark; - fs->h->low_watermark = sm->low_watermark; + fs->high_watermark = sm->high_watermark; + fs->low_watermark = sm->low_watermark; + fs->flags = flags; + fs->flags &= ~FIFO_SEGMENT_F_MEM_LIMIT; fs->h->pct_first_alloc = props->pct_first_alloc; - fs->h->flags = flags; - fs->h->flags &= ~FIFO_SEGMENT_F_MEM_LIMIT; if (notify_app) { @@ -172,11 +182,14 @@ segment_manager_add_segment_inline (segment_manager_t *sm, uword segment_size, app_wrk = app_worker_get (sm->app_wrk_index); rv = app_worker_add_segment_notify (app_wrk, fs_handle); if (rv) - return rv; + { + fs_index = rv; + goto done; + } } done: - if (vlib_num_workers ()) + if (need_lock) clib_rwlock_writer_unlock (&sm->segments_rwlock); return fs_index; @@ -186,14 +199,16 @@ int segment_manager_add_segment (segment_manager_t *sm, uword segment_size, u8 notify_app) { - return segment_manager_add_segment_inline (sm, segment_size, notify_app, 0); + return segment_manager_add_segment_inline (sm, segment_size, notify_app, + 0 /* flags */, 0 /* need_lock */); } int segment_manager_add_segment2 (segment_manager_t *sm, uword segment_size, u8 flags) { - return segment_manager_add_segment_inline (sm, segment_size, 0, flags); + return segment_manager_add_segment_inline (sm, segment_size, 0, flags, + vlib_num_workers ()); } /** @@ -235,7 +250,8 @@ segment_manager_get_segment_if_valid (segment_manager_t * sm, * Removes segment after acquiring writer lock */ static inline void -sm_lock_and_del_segment_inline (segment_manager_t * sm, u32 fs_index) +sm_lock_and_del_segment_inline (segment_manager_t *sm, u32 fs_index, + u8 check_if_empty) { fifo_segment_t *fs; u8 is_prealloc; @@ -246,6 +262,9 @@ sm_lock_and_del_segment_inline (segment_manager_t * sm, u32 fs_index) if (!fs) goto done; + if (check_if_empty && fifo_segment_has_fifos (fs)) + goto done; + is_prealloc = fifo_segment_flags (fs) & FIFO_SEGMENT_F_IS_PREALLOCATED; if (is_prealloc && !segment_manager_app_detached (sm)) goto done; @@ -259,7 +278,7 @@ done: void segment_manager_lock_and_del_segment (segment_manager_t * sm, u32 fs_index) { - sm_lock_and_del_segment_inline (sm, fs_index); + sm_lock_and_del_segment_inline (sm, fs_index, 0 /* check_if_empty */); } /** @@ -326,12 +345,6 @@ segment_manager_segment_reader_unlock (segment_manager_t * sm) clib_rwlock_reader_unlock (&sm->segments_rwlock); } -void -segment_manager_segment_writer_unlock (segment_manager_t * sm) -{ - clib_rwlock_writer_unlock (&sm->segments_rwlock); -} - segment_manager_t * segment_manager_alloc (void) { @@ -405,7 +418,7 @@ segment_manager_init_first (segment_manager_t * sm) fs_index = segment_manager_add_segment (sm, max_seg_size, 0); if (fs_index < 0) { - clib_warning ("Failed to preallocate segment %d", i); + SESSION_DBG ("Failed to preallocate segment %d", i); return fs_index; } @@ -427,7 +440,7 @@ segment_manager_init_first (segment_manager_t * sm) fs_index = segment_manager_add_segment (sm, first_seg_size, 0); if (fs_index < 0) { - clib_warning ("Failed to allocate segment"); + SESSION_DBG ("Failed to allocate segment"); return fs_index; } @@ -445,7 +458,7 @@ segment_manager_init_first (segment_manager_t * sm) for (; i < fs->n_slices; i++) { if (fifo_segment_prealloc_fifo_hdrs (fs, i, hdrs_per_slice)) - return VNET_API_ERROR_SVM_SEGMENT_CREATE_FAIL; + return SESSION_E_SEG_CREATE; } } @@ -486,11 +499,9 @@ segment_manager_free (segment_manager_t * sm) * the manager is explicitly deleted/detached by the app. */ clib_rwlock_writer_lock (&sm->segments_rwlock); - /* *INDENT-OFF* */ pool_foreach (fifo_segment, sm->segments) { segment_manager_del_segment (sm, fifo_segment); } - /* *INDENT-ON* */ pool_free (sm->segments); clib_rwlock_writer_unlock (&sm->segments_rwlock); @@ -569,7 +580,6 @@ segment_manager_has_fifos (segment_manager_t * sm) fifo_segment_t *seg; u8 first = 1; - /* *INDENT-OFF* */ segment_manager_foreach_segment_w_lock (seg, sm, ({ if (CLIB_DEBUG && !first && !fifo_segment_has_fifos (seg) && !(fifo_segment_flags (seg) & FIFO_SEGMENT_F_IS_PREALLOCATED)) @@ -584,7 +594,6 @@ segment_manager_has_fifos (segment_manager_t * sm) return 1; } })); - /* *INDENT-ON* */ return 0; } @@ -604,7 +613,6 @@ segment_manager_del_sessions (segment_manager_t * sm) ASSERT (pool_elts (sm->segments) != 0); /* Across all fifo segments used by the server */ - /* *INDENT-OFF* */ segment_manager_foreach_segment_w_lock (fs, sm, ({ for (slice_index = 0; slice_index < fs->n_slices; slice_index++) { @@ -629,7 +637,6 @@ segment_manager_del_sessions (segment_manager_t * sm) * sessions if the segment can be removed. */ })); - /* *INDENT-ON* */ vec_foreach (handle, handles) { @@ -695,19 +702,16 @@ segment_manager_del_sessions_filter (segment_manager_t *sm, } int -segment_manager_try_alloc_fifos (fifo_segment_t * fifo_segment, - u32 thread_index, +segment_manager_try_alloc_fifos (fifo_segment_t *fs, u32 thread_index, u32 rx_fifo_size, u32 tx_fifo_size, - svm_fifo_t ** rx_fifo, svm_fifo_t ** tx_fifo) + svm_fifo_t **rx_fifo, svm_fifo_t **tx_fifo) { rx_fifo_size = clib_max (rx_fifo_size, sm_main.default_fifo_size); - *rx_fifo = fifo_segment_alloc_fifo_w_slice (fifo_segment, thread_index, - rx_fifo_size, + *rx_fifo = fifo_segment_alloc_fifo_w_slice (fs, thread_index, rx_fifo_size, FIFO_SEGMENT_RX_FIFO); tx_fifo_size = clib_max (tx_fifo_size, sm_main.default_fifo_size); - *tx_fifo = fifo_segment_alloc_fifo_w_slice (fifo_segment, thread_index, - tx_fifo_size, + *tx_fifo = fifo_segment_alloc_fifo_w_slice (fs, thread_index, tx_fifo_size, FIFO_SEGMENT_TX_FIFO); if (*rx_fifo == 0) @@ -715,45 +719,37 @@ segment_manager_try_alloc_fifos (fifo_segment_t * fifo_segment, /* This would be very odd, but handle it... */ if (*tx_fifo != 0) { - fifo_segment_free_fifo (fifo_segment, *tx_fifo); + fifo_segment_free_fifo (fs, *tx_fifo); *tx_fifo = 0; } - return -1; + return SESSION_E_SEG_NO_SPACE; } if (*tx_fifo == 0) { if (*rx_fifo != 0) { - fifo_segment_free_fifo (fifo_segment, *rx_fifo); + fifo_segment_free_fifo (fs, *rx_fifo); *rx_fifo = 0; } - return -1; + return SESSION_E_SEG_NO_SPACE; } return 0; } -int -segment_manager_alloc_session_fifos (segment_manager_t * sm, - u32 thread_index, - svm_fifo_t ** rx_fifo, - svm_fifo_t ** tx_fifo) +static inline int +sm_lookup_segment_and_alloc_fifos (segment_manager_t *sm, + segment_manager_props_t *props, + u32 thread_index, svm_fifo_t **rx_fifo, + svm_fifo_t **tx_fifo) { - int alloc_fail = 1, rv = 0, new_fs_index; - uword free_bytes, max_free_bytes = 0; - segment_manager_props_t *props; - fifo_segment_t *fs = 0, *cur; - u32 sm_index, fs_index; - - props = segment_manager_properties_get (sm); - - /* - * Find the first free segment to allocate the fifos in - */ + uword free_bytes, max_free_bytes; + fifo_segment_t *cur, *fs = 0; - segment_manager_segment_reader_lock (sm); + max_free_bytes = props->rx_fifo_size + props->tx_fifo_size - 1; - pool_foreach (cur, sm->segments) { + pool_foreach (cur, sm->segments) + { if (fifo_segment_flags (cur) & FIFO_SEGMENT_F_CUSTOM_USE) continue; free_bytes = fifo_segment_available_bytes (cur); @@ -762,63 +758,93 @@ segment_manager_alloc_session_fifos (segment_manager_t * sm, max_free_bytes = free_bytes; fs = cur; } - } - - if (fs) - { - alloc_fail = segment_manager_try_alloc_fifos (fs, thread_index, - props->rx_fifo_size, - props->tx_fifo_size, - rx_fifo, tx_fifo); - /* On success, keep lock until fifos are initialized */ - if (!alloc_fail) - goto alloc_success; } - segment_manager_segment_reader_unlock (sm); + if (PREDICT_FALSE (!fs)) + return SESSION_E_SEG_NO_SPACE; - /* - * Allocation failed, see if we can add a new segment - */ - if (props->add_segment) + return segment_manager_try_alloc_fifos ( + fs, thread_index, props->rx_fifo_size, props->tx_fifo_size, rx_fifo, + tx_fifo); +} + +static int +sm_lock_and_alloc_segment_and_fifos (segment_manager_t *sm, + segment_manager_props_t *props, + u32 thread_index, svm_fifo_t **rx_fifo, + svm_fifo_t **tx_fifo) +{ + int new_fs_index, rv; + fifo_segment_t *fs; + + if (!props->add_segment) + return SESSION_E_SEG_NO_SPACE; + + clib_rwlock_writer_lock (&sm->segments_rwlock); + + /* Make sure there really is no free space. Another worker might've freed + * some fifos or allocated a segment */ + rv = sm_lookup_segment_and_alloc_fifos (sm, props, thread_index, rx_fifo, + tx_fifo); + if (!rv) + goto done; + + new_fs_index = + segment_manager_add_segment (sm, 0 /* segment_size*/, 1 /* notify_app */); + if (new_fs_index < 0) { - if ((new_fs_index = segment_manager_add_segment (sm, 0, 1)) < 0) - { - clib_warning ("Failed to add new segment"); - return SESSION_E_SEG_CREATE; - } - fs = segment_manager_get_segment_w_lock (sm, new_fs_index); - alloc_fail = segment_manager_try_alloc_fifos (fs, thread_index, - props->rx_fifo_size, - props->tx_fifo_size, - rx_fifo, tx_fifo); - if (alloc_fail) - { - clib_warning ("Added a segment, still can't allocate a fifo"); - segment_manager_segment_reader_unlock (sm); - return SESSION_E_SEG_NO_SPACE2; - } + rv = SESSION_E_SEG_CREATE; + goto done; } - else + fs = segment_manager_get_segment (sm, new_fs_index); + rv = segment_manager_try_alloc_fifos (fs, thread_index, props->rx_fifo_size, + props->tx_fifo_size, rx_fifo, tx_fifo); + if (rv) { - SESSION_DBG ("Can't add new seg and no space to allocate fifos!"); - return SESSION_E_SEG_NO_SPACE; + SESSION_DBG ("Added a segment, still can't allocate a fifo"); + rv = SESSION_E_SEG_NO_SPACE2; + goto done; } -alloc_success: - ASSERT (rx_fifo && tx_fifo); +done: + + clib_rwlock_writer_unlock (&sm->segments_rwlock); + + return rv; +} + +int +segment_manager_alloc_session_fifos (segment_manager_t * sm, + u32 thread_index, + svm_fifo_t ** rx_fifo, + svm_fifo_t ** tx_fifo) +{ + segment_manager_props_t *props; + int rv; + + props = segment_manager_properties_get (sm); - sm_index = segment_manager_index (sm); - fs_index = segment_manager_segment_index (sm, fs); - (*tx_fifo)->segment_manager = sm_index; - (*rx_fifo)->segment_manager = sm_index; - (*tx_fifo)->segment_index = fs_index; - (*rx_fifo)->segment_index = fs_index; + /* + * Fast path: find the first segment with enough free space and + * try to allocate the fifos. Done with reader lock + */ + + segment_manager_segment_reader_lock (sm); + + rv = sm_lookup_segment_and_alloc_fifos (sm, props, thread_index, rx_fifo, + tx_fifo); - /* Drop the lock after app is notified */ segment_manager_segment_reader_unlock (sm); - return rv; + /* + * Slow path: if no fifo segment or alloc fail grab writer lock and try + * to allocate new segment + */ + if (PREDICT_FALSE (rv < 0)) + return sm_lock_and_alloc_segment_and_fifos (sm, props, thread_index, + rx_fifo, tx_fifo); + + return 0; } void @@ -827,10 +853,15 @@ segment_manager_dealloc_fifos (svm_fifo_t * rx_fifo, svm_fifo_t * tx_fifo) segment_manager_t *sm; fifo_segment_t *fs; u32 segment_index; + u8 try_delete = 0; if (!rx_fifo || !tx_fifo) return; + /* Thread that allocated the fifos must be the one to clean them up */ + ASSERT (rx_fifo->master_thread_index == vlib_get_thread_index () || + rx_fifo->refcnt > 1 || vlib_thread_is_main_w_barrier ()); + /* It's possible to have no segment manager if the session was removed * as result of a detach. */ if (!(sm = segment_manager_get_if_valid (rx_fifo->segment_manager))) @@ -842,26 +873,30 @@ segment_manager_dealloc_fifos (svm_fifo_t * rx_fifo, svm_fifo_t * tx_fifo) fifo_segment_free_fifo (fs, tx_fifo); /* - * Try to remove svm segment if it has no fifos. This can be done only if + * Try to remove fifo segment if it has no fifos. This can be done only if * the segment is not the first in the segment manager or if it is first * and it is not protected. Moreover, if the segment is first and the app * has detached from the segment manager, remove the segment manager. */ if (!fifo_segment_has_fifos (fs)) { - segment_manager_segment_reader_unlock (sm); + /* If first, remove only if not protected */ + try_delete = segment_index != 0 || !sm->first_is_protected; + } + + segment_manager_segment_reader_unlock (sm); - /* Remove segment if it holds no fifos or first but not protected */ - if (segment_index != 0 || !sm->first_is_protected) - sm_lock_and_del_segment_inline (sm, segment_index); + if (PREDICT_FALSE (try_delete)) + { + /* Only remove if empty after writer lock acquired */ + sm_lock_and_del_segment_inline (sm, segment_index, + 1 /* check_if_empty */); /* Remove segment manager if no sessions and detached from app */ if (segment_manager_app_detached (sm) && !segment_manager_has_fifos (sm)) segment_manager_free_safe (sm); } - else - segment_manager_segment_reader_unlock (sm); } void @@ -920,12 +955,10 @@ segment_manager_alloc_queue (fifo_segment_t * segment, fifo_evt_size = sizeof (session_event_t); notif_q_size = clib_max (16, props->evt_q_size >> 4); - /* *INDENT-OFF* */ svm_msg_q_ring_cfg_t rc[SESSION_MQ_N_RINGS] = { {props->evt_q_size, fifo_evt_size, 0}, {notif_q_size, session_evt_size, 0} }; - /* *INDENT-ON* */ cfg->consumer_pid = 0; cfg->n_rings = 2; cfg->q_nitems = props->evt_q_size; @@ -984,79 +1017,111 @@ segment_manager_main_init (void) sm->default_low_watermark = 50; } +static u8 * +format_segment_manager (u8 *s, va_list *args) +{ + segment_manager_t *sm = va_arg (*args, segment_manager_t *); + int verbose = va_arg (*args, int); + app_worker_t *app_wrk; + uword max_fifo_size; + fifo_segment_t *seg; + application_t *app; + u8 custom_logic; + + app_wrk = app_worker_get_if_valid (sm->app_wrk_index); + app = app_wrk ? application_get (app_wrk->app_index) : 0; + custom_logic = (app && (app->cb_fns.fifo_tuning_callback)) ? 1 : 0; + max_fifo_size = sm->max_fifo_size; + + s = format (s, + "[%u] %v app-wrk: %u segs: %u max-fifo-sz: %U " + "wmarks: %u %u %s flags: 0x%x", + segment_manager_index (sm), app ? app->name : 0, + sm->app_wrk_index, pool_elts (sm->segments), format_memory_size, + max_fifo_size, sm->high_watermark, sm->low_watermark, + custom_logic ? "custom-tuning" : "no-tuning", sm->flags); + + if (!verbose || !pool_elts (sm->segments)) + return s; + + s = format (s, "\n\n"); + + segment_manager_foreach_segment_w_lock ( + seg, sm, ({ s = format (s, " *%U", format_fifo_segment, seg, verbose); })); + + return s; +} + static clib_error_t * segment_manager_show_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { + unformat_input_t _line_input, *line_input = &_line_input; segment_manager_main_t *smm = &sm_main; u8 show_segments = 0, verbose = 0; - uword max_fifo_size; segment_manager_t *sm; - fifo_segment_t *seg; - app_worker_t *app_wrk; - application_t *app; - u8 custom_logic; + u32 sm_index = ~0; + + if (!unformat_user (input, unformat_line_input, line_input)) + { + vlib_cli_output (vm, "%d segment managers allocated", + pool_elts (smm->segment_managers)); + return 0; + } - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { - if (unformat (input, "segments")) + if (unformat (line_input, "segments")) show_segments = 1; - else if (unformat (input, "verbose")) + else if (unformat (line_input, "verbose")) verbose = 1; + else if (unformat (line_input, "index %u", &sm_index)) + ; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + vlib_cli_output (vm, "unknown input [%U]", format_unformat_error, + line_input); + goto done; + } } - vlib_cli_output (vm, "%d segment managers allocated", - pool_elts (smm->segment_managers)); - if (verbose && pool_elts (smm->segment_managers)) + + if (!pool_elts (smm->segment_managers)) + goto done; + + if (sm_index != ~0) { - vlib_cli_output (vm, "%-6s%=10s%=10s%=13s%=11s%=11s%=12s", - "Index", "AppIndex", "Segments", "MaxFifoSize", - "HighWater", "LowWater", "FifoTuning"); + sm = segment_manager_get_if_valid (sm_index); + if (!sm) + { + vlib_cli_output (vm, "segment manager %u not allocated", sm_index); + goto done; + } + vlib_cli_output (vm, "%U", format_segment_manager, sm, 1 /* verbose */); + goto done; + } - /* *INDENT-OFF* */ + if (verbose || show_segments) + { pool_foreach (sm, smm->segment_managers) { - app_wrk = app_worker_get_if_valid (sm->app_wrk_index); - app = app_wrk ? application_get (app_wrk->app_index) : 0; - custom_logic = (app && (app->cb_fns.fifo_tuning_callback)) ? 1 : 0; - max_fifo_size = sm->max_fifo_size; - - vlib_cli_output (vm, "%-6d%=10d%=10d%=13U%=11d%=11d%=12s", - segment_manager_index (sm), - sm->app_wrk_index, pool_elts (sm->segments), - format_memory_size, max_fifo_size, - sm->high_watermark, sm->low_watermark, - custom_logic ? "custom" : "none"); + vlib_cli_output (vm, "%U", format_segment_manager, sm, + show_segments); } - /* *INDENT-ON* */ vlib_cli_output (vm, "\n"); } - if (show_segments) - { - vlib_cli_output (vm, "%U", format_fifo_segment, 0, verbose); - /* *INDENT-OFF* */ - pool_foreach (sm, smm->segment_managers) { - segment_manager_foreach_segment_w_lock (seg, sm, ({ - vlib_cli_output (vm, "%U", format_fifo_segment, seg, verbose); - })); - } - /* *INDENT-ON* */ +done: + + unformat_free (line_input); - } return 0; } -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (segment_manager_show_command, static) = -{ +VLIB_CLI_COMMAND (segment_manager_show_command, static) = { .path = "show segment-manager", - .short_help = "show segment-manager [segments][verbose]", + .short_help = "show segment-manager [segments][verbose][index <nn>]", .function = segment_manager_show_fn, }; -/* *INDENT-ON* */ void segment_manager_format_sessions (segment_manager_t * sm, int verbose) @@ -1085,7 +1150,6 @@ segment_manager_format_sessions (segment_manager_t * sm, int verbose) clib_rwlock_reader_lock (&sm->segments_rwlock); - /* *INDENT-OFF* */ pool_foreach (fs, sm->segments) { for (slice_index = 0; slice_index < fs->n_slices; slice_index++) { @@ -1117,7 +1181,6 @@ segment_manager_format_sessions (segment_manager_t * sm, int verbose) vec_free (s); } } - /* *INDENT-ON* */ clib_rwlock_reader_unlock (&sm->segments_rwlock); } diff --git a/src/vnet/session/segment_manager.h b/src/vnet/session/segment_manager.h index 5a3d772ff02..1e99c4605a6 100644 --- a/src/vnet/session/segment_manager.h +++ b/src/vnet/session/segment_manager.h @@ -40,6 +40,7 @@ typedef struct _segment_manager_props u8 high_watermark; /**< memory usage high watermark % */ u8 low_watermark; /**< memory usage low watermark % */ u8 pct_first_alloc; /**< pct of fifo size to alloc */ + u8 huge_page; /**< use hugepage */ } segment_manager_props_t; typedef enum seg_manager_flag_ @@ -102,8 +103,23 @@ segment_manager_t *segment_manager_get (u32 index); segment_manager_t *segment_manager_get_if_valid (u32 index); u32 segment_manager_index (segment_manager_t * sm); +/** + * Add segment without lock + * + * @param sm Segment manager + * @param segment_size Size of segment to be added + * @param notify_app Flag set if app notification requested + */ int segment_manager_add_segment (segment_manager_t *sm, uword segment_size, u8 notify_app); + +/** + * Add segment with lock + * + * @param sm Segment manager + * @param segment_size Size of segment to be added + * @param flags Flags to be set on segment + */ int segment_manager_add_segment2 (segment_manager_t *sm, uword segment_size, u8 flags); void segment_manager_del_segment (segment_manager_t * sm, @@ -122,7 +138,6 @@ u64 segment_manager_make_segment_handle (u32 segment_manager_index, u64 segment_manager_segment_handle (segment_manager_t * sm, fifo_segment_t * segment); void segment_manager_segment_reader_unlock (segment_manager_t * sm); -void segment_manager_segment_writer_unlock (segment_manager_t * sm); int segment_manager_alloc_session_fifos (segment_manager_t * sm, u32 thread_index, @@ -175,7 +190,9 @@ static inline void segment_manager_parse_segment_handle (u64 segment_handle, u32 * sm_index, u32 * segment_index) { - *sm_index = segment_handle >> 32; + /* Upper 8 bits zeroed out as they may be used for cut-through segments. + * See @ref ct_alloc_segment */ + *sm_index = (segment_handle >> 32) & 0xFFFFFF; *segment_index = segment_handle & 0xFFFFFFFF; } diff --git a/src/vnet/session/session.api b/src/vnet/session/session.api index 43bde1afbbd..6affae4112d 100644 --- a/src/vnet/session/session.api +++ b/src/vnet/session/session.api @@ -117,38 +117,6 @@ autoreply define app_del_cert_key_pair { u32 index; }; -/** \brief Application add TLS certificate - ### WILL BE DEPRECATED POST 20.01 ### - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param cert_len - certificate length - @param cert - certificate as a string -*/ -autoreply define application_tls_cert_add { - option deprecated="to be removed post 21.06"; - u32 client_index; - u32 context; - u32 app_index; - u16 cert_len; - u8 cert[cert_len]; -}; - -/** \brief Application add TLS key - ### WILL BE DEPRECATED POST 20.01 ### - @param client_index - opaque cookie to identify the sender - @param context - sender context, to match reply w/ request - @param key_len - certificate length - @param key - PEM encoded key as a string -*/ -autoreply define application_tls_key_add { - option deprecated="to be removed post 21.06"; - u32 client_index; - u32 context; - u32 app_index; - u16 key_len; - u8 key[key_len]; -}; - /** \brief add/del application worker @param client_index - opaque cookie to identify the sender client to vpp direction only @@ -203,6 +171,18 @@ autoreply define session_enable_disable { bool is_enable [default=true]; }; +/** \brief enable/disable session layer socket api + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param is_enable - disable session layer if 0, enable otherwise +*/ +autoreply define session_sapi_enable_disable { + u32 client_index; + u32 context; + bool is_enable [default=true]; +}; + /** \brief add/del application namespace @param client_index - opaque cookie to identify the sender client to vpp direction only @@ -239,17 +219,86 @@ define app_namespace_add_del { @param ip6_fib_id - id of ip6 fib that "supports" the namespace. Ignored if sw_if_index set. @param namespace_id - namespace id + @param sock_name - socket name (path, abstract socket name) +*/ +define app_namespace_add_del_v4 { + option deprecated; + u32 client_index; + u32 context; + u64 secret; + bool is_add [default=true]; + vl_api_interface_index_t sw_if_index [default=0xffffffff]; + u32 ip4_fib_id; + u32 ip6_fib_id; + string namespace_id[64]; + string sock_name[]; +}; + +/** \brief Reply for app namespace add/del + @param context - returned sender context, to match reply w/ request + @param retval - return code + @param appns_index - app namespace index +*/ +define app_namespace_add_del_v4_reply +{ + u32 context; + i32 retval; + u32 appns_index; +}; + +/** \brief add/del application namespace + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param secret - secret shared between app and vpp + @param sw_if_index - local interface that "supports" namespace. Set to + ~0 if no preference + @param ip4_fib_id - id of ip4 fib that "supports" the namespace. Ignored + if sw_if_index set. + @param ip6_fib_id - id of ip6 fib that "supports" the namespace. Ignored + if sw_if_index set. + @param namespace_id - namespace id @param netns - linux net namespace */ define app_namespace_add_del_v2 { + option deprecated; + u32 client_index; + u32 context; + u64 secret; + vl_api_interface_index_t sw_if_index [default=0xffffffff]; + u32 ip4_fib_id; + u32 ip6_fib_id; + string namespace_id[64]; + string netns[64]; +}; + +/** \brief add/del application namespace + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param secret - secret shared between app and vpp + @param sw_if_index - local interface that "supports" namespace. Set to + ~0 if no preference + @param ip4_fib_id - id of ip4 fib that "supports" the namespace. Ignored + if sw_if_index set. + @param ip6_fib_id - id of ip6 fib that "supports" the namespace. Ignored + if sw_if_index set. + @param namespace_id - namespace id + @param netns - linux net namespace + @param sock_name - socket name (path, abstract socket name) +*/ +define app_namespace_add_del_v3 { + option deprecated; u32 client_index; u32 context; u64 secret; + bool is_add [default=true]; vl_api_interface_index_t sw_if_index [default=0xffffffff]; u32 ip4_fib_id; u32 ip6_fib_id; string namespace_id[64]; string netns[64]; + string sock_name[]; }; /** \brief Reply for app namespace add/del @@ -272,6 +321,15 @@ define app_namespace_add_del_reply */ define app_namespace_add_del_v2_reply { + option deprecated; + u32 context; + i32 retval; + u32 appns_index; +}; + +define app_namespace_add_del_v3_reply +{ + option deprecated; u32 context; i32 retval; u32 appns_index; diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index 05712181ab0..67e7ee39001 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -17,10 +17,13 @@ * @brief Session and session manager */ +#include <vnet/plugin/plugin.h> #include <vnet/session/session.h> #include <vnet/session/application.h> #include <vnet/dpo/load_balance.h> #include <vnet/fib/ip4_fib.h> +#include <vlib/stats/stats.h> +#include <vlib/dma/dma.h> session_main_t session_main; @@ -36,8 +39,7 @@ session_send_evt_to_thread (void *data, void *args, u32 thread_index, mq = wrk->vpp_event_queue; if (PREDICT_FALSE (svm_msg_q_lock (mq))) return -1; - if (PREDICT_FALSE (svm_msg_q_is_full (mq) - || svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING))) + if (PREDICT_FALSE (svm_msg_q_or_ring_is_full (mq, SESSION_MQ_IO_EVT_RING))) { svm_msg_q_unlock (mq); return -2; @@ -58,7 +60,7 @@ session_send_evt_to_thread (void *data, void *args, u32 thread_index, evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg); evt->session_index = *(u32 *) data; break; - case SESSION_IO_EVT_BUILTIN_TX: + case SESSION_IO_EVT_TX_MAIN: case SESSION_CTRL_EVT_CLOSE: case SESSION_CTRL_EVT_RESET: msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING); @@ -95,6 +97,13 @@ session_send_io_evt_to_thread_custom (void *data, u32 thread_index, } int +session_program_tx_io_evt (session_handle_tu_t sh, session_evt_type_t evt_type) +{ + return session_send_evt_to_thread ((void *) &sh.session_index, 0, + (u32) sh.thread_index, evt_type); +} + +int session_send_ctrl_evt_to_thread (session_t * s, session_evt_type_t evt_type) { /* only events supported are disconnect, shutdown and reset */ @@ -202,39 +211,25 @@ session_alloc (u32 thread_index) { session_worker_t *wrk = &session_main.wrk[thread_index]; session_t *s; - u8 will_expand = 0; - pool_get_aligned_will_expand (wrk->sessions, will_expand, - CLIB_CACHE_LINE_BYTES); - /* If we have peekers, let them finish */ - if (PREDICT_FALSE (will_expand && vlib_num_workers ())) - { - clib_rwlock_writer_lock (&wrk->peekers_rw_locks); - pool_get_aligned (wrk->sessions, s, CLIB_CACHE_LINE_BYTES); - clib_rwlock_writer_unlock (&wrk->peekers_rw_locks); - } - else - { - pool_get_aligned (wrk->sessions, s, CLIB_CACHE_LINE_BYTES); - } + + pool_get_aligned_safe (wrk->sessions, s, CLIB_CACHE_LINE_BYTES); clib_memset (s, 0, sizeof (*s)); s->session_index = s - wrk->sessions; s->thread_index = thread_index; - s->app_index = APP_INVALID_INDEX; + s->al_index = APP_INVALID_INDEX; + return s; } void session_free (session_t * s) { - if (CLIB_DEBUG) - { - u8 thread_index = s->thread_index; - clib_memset (s, 0xFA, sizeof (*s)); - pool_put (session_main.wrk[thread_index].sessions, s); - return; - } + session_worker_t *wrk = &session_main.wrk[s->thread_index]; + SESSION_EVT (SESSION_EVT_FREE, s); - pool_put (session_main.wrk[s->thread_index].sessions, s); + if (CLIB_DEBUG) + clib_memset (s, 0xFA, sizeof (*s)); + pool_put (wrk->sessions, s); } u8 @@ -252,35 +247,48 @@ session_is_valid (u32 si, u8 thread_index) || s->session_state <= SESSION_STATE_LISTENING) return 1; - if (s->session_state == SESSION_STATE_CONNECTING && + if ((s->session_state == SESSION_STATE_CONNECTING || + s->session_state == SESSION_STATE_TRANSPORT_CLOSED) && (s->flags & SESSION_F_HALF_OPEN)) return 1; tc = session_get_transport (s); - if (s->connection_index != tc->c_index - || s->thread_index != tc->thread_index || tc->s_index != si) + if (s->connection_index != tc->c_index || + s->thread_index != tc->thread_index || tc->s_index != si) return 0; return 1; } +void +session_cleanup (session_t *s) +{ + segment_manager_dealloc_fifos (s->rx_fifo, s->tx_fifo); + session_free (s); +} + static void session_cleanup_notify (session_t * s, session_cleanup_ntf_t ntf) { app_worker_t *app_wrk; app_wrk = app_worker_get_if_valid (s->app_wrk_index); - if (!app_wrk) - return; + if (PREDICT_FALSE (!app_wrk)) + { + if (ntf == SESSION_CLEANUP_TRANSPORT) + return; + + session_cleanup (s); + return; + } app_worker_cleanup_notify (app_wrk, s, ntf); } void -session_free_w_fifos (session_t * s) +session_program_cleanup (session_t *s) { + ASSERT (s->session_state == SESSION_STATE_TRANSPORT_DELETED); session_cleanup_notify (s, SESSION_CLEANUP_SESSION); - segment_manager_dealloc_fifos (s->rx_fifo, s->tx_fifo); - session_free (s); } /** @@ -297,7 +305,7 @@ session_delete (session_t * s) if ((rv = session_lookup_del_session (s))) clib_warning ("session %u hash delete rv %d", s->session_index, rv); - session_free_w_fifos (s); + session_program_cleanup (s); } void @@ -312,16 +320,27 @@ session_cleanup_half_open (session_handle_t ho_handle) * session should be removed. */ if (ho->connection_index == ~0) { - ho->session_state = SESSION_STATE_CLOSED; + session_set_state (ho, SESSION_STATE_CLOSED); return; } /* Migrated transports are no longer half-opens */ transport_cleanup (session_get_transport_proto (ho), - ho->connection_index, ho->app_index /* overloaded */); + ho->connection_index, ho->al_index /* overloaded */); + } + else if (ho->session_state != SESSION_STATE_TRANSPORT_DELETED) + { + /* Cleanup half-open session lookup table if need be */ + if (ho->session_state != SESSION_STATE_TRANSPORT_CLOSED) + { + transport_connection_t *tc; + tc = transport_get_half_open (session_get_transport_proto (ho), + ho->connection_index); + if (tc && !(tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP)) + session_lookup_del_half_open (tc); + } + transport_cleanup_half_open (session_get_transport_proto (ho), + ho->connection_index); } - else - transport_cleanup_half_open (session_get_transport_proto (ho), - ho->connection_index); session_free (ho); } @@ -330,10 +349,12 @@ session_half_open_free (session_t *ho) { app_worker_t *app_wrk; - ASSERT (vlib_get_thread_index () <= 1); - app_wrk = app_worker_get (ho->app_wrk_index); - app_worker_del_half_open (app_wrk, ho); - session_free (ho); + ASSERT (vlib_get_thread_index () <= transport_cl_thread ()); + app_wrk = app_worker_get_if_valid (ho->app_wrk_index); + if (app_wrk) + app_worker_del_half_open (app_wrk, ho); + else + session_free (ho); } static void @@ -346,16 +367,26 @@ session_half_open_free_rpc (void *args) void session_half_open_delete_notify (transport_connection_t *tc) { + session_t *ho = ho_session_get (tc->s_index); + + /* Cleanup half-open lookup table if need be */ + if (ho->session_state != SESSION_STATE_TRANSPORT_CLOSED) + { + if (!(tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP)) + session_lookup_del_half_open (tc); + } + session_set_state (ho, SESSION_STATE_TRANSPORT_DELETED); + /* Notification from ctrl thread accepted without rpc */ - if (!tc->thread_index) + if (tc->thread_index == transport_cl_thread ()) { - session_half_open_free (ho_session_get (tc->s_index)); + session_half_open_free (ho); } else { void *args = uword_to_pointer ((uword) tc->s_index, void *); - session_send_rpc_evt_to_thread_force (0, session_half_open_free_rpc, - args); + session_send_rpc_evt_to_thread_force (transport_cl_thread (), + session_half_open_free_rpc, args); } } @@ -364,6 +395,9 @@ session_half_open_migrate_notify (transport_connection_t *tc) { session_t *ho; + /* Support half-open migrations only for transports with no lookup */ + ASSERT (tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP); + ho = ho_session_get (tc->s_index); ho->flags |= SESSION_F_IS_MIGRATING; ho->connection_index = ~0; @@ -383,8 +417,8 @@ session_half_open_migrated_notify (transport_connection_t *tc) return -1; } ho->connection_index = tc->c_index; - /* Overload app index for half-open with new thread */ - ho->app_index = tc->thread_index; + /* Overload al_index for half-open with new thread */ + ho->al_index = tc->thread_index; return 0; } @@ -399,7 +433,7 @@ session_alloc_for_connection (transport_connection_t * tc) s = session_alloc (thread_index); s->session_type = session_type_from_proto_and_ip (tc->proto, tc->is_ip4); - s->session_state = SESSION_STATE_CLOSED; + session_set_state (s, SESSION_STATE_CLOSED); /* Attach transport to session and vice versa */ s->connection_index = tc->c_index; @@ -546,10 +580,162 @@ session_fifo_tuning (session_t * s, svm_fifo_t * f, } } +void +session_wrk_program_app_wrk_evts (session_worker_t *wrk, u32 app_wrk_index) +{ + u8 need_interrupt; + + ASSERT ((wrk - session_main.wrk) == vlib_get_thread_index ()); + need_interrupt = clib_bitmap_is_zero (wrk->app_wrks_pending_ntf); + wrk->app_wrks_pending_ntf = + clib_bitmap_set (wrk->app_wrks_pending_ntf, app_wrk_index, 1); + + if (need_interrupt) + vlib_node_set_interrupt_pending (wrk->vm, session_input_node.index); +} + +always_inline void +session_program_io_event (app_worker_t *app_wrk, session_t *s, + session_evt_type_t et, u8 is_cl) +{ + if (is_cl) + { + /* Special events for connectionless sessions */ + et += SESSION_IO_EVT_BUILTIN_RX - SESSION_IO_EVT_RX; + + ASSERT (s->thread_index == 0 || et == SESSION_IO_EVT_TX_MAIN); + session_event_t evt = { + .event_type = et, + .session_handle = session_handle (s), + }; + + app_worker_add_event_custom (app_wrk, vlib_get_thread_index (), &evt); + } + else + { + app_worker_add_event (app_wrk, s, et); + } +} + +static inline int +session_notify_subscribers (u32 app_index, session_t *s, svm_fifo_t *f, + session_evt_type_t evt_type) +{ + app_worker_t *app_wrk; + application_t *app; + u8 is_cl; + int i; + + app = application_get (app_index); + if (!app) + return -1; + + is_cl = s->thread_index != vlib_get_thread_index (); + for (i = 0; i < f->shr->n_subscribers; i++) + { + app_wrk = application_get_worker (app, f->shr->subscribers[i]); + if (!app_wrk) + continue; + session_program_io_event (app_wrk, s, evt_type, is_cl ? 1 : 0); + } + + return 0; +} + +always_inline int +session_enqueue_notify_inline (session_t *s, u8 is_cl) +{ + app_worker_t *app_wrk; + + app_wrk = app_worker_get_if_valid (s->app_wrk_index); + if (PREDICT_FALSE (!app_wrk)) + return -1; + + session_program_io_event (app_wrk, s, SESSION_IO_EVT_RX, is_cl); + + if (PREDICT_FALSE (svm_fifo_n_subscribers (s->rx_fifo))) + return session_notify_subscribers (app_wrk->app_index, s, s->rx_fifo, + SESSION_IO_EVT_RX); + + return 0; +} + +int +session_enqueue_notify (session_t *s) +{ + return session_enqueue_notify_inline (s, 0 /* is_cl */); +} + +int +session_enqueue_notify_cl (session_t *s) +{ + return session_enqueue_notify_inline (s, 1 /* is_cl */); +} + +int +session_dequeue_notify (session_t *s) +{ + app_worker_t *app_wrk; + u8 is_cl; + + /* Unset as soon as event is requested */ + svm_fifo_clear_deq_ntf (s->tx_fifo); + + app_wrk = app_worker_get_if_valid (s->app_wrk_index); + if (PREDICT_FALSE (!app_wrk)) + return -1; + + is_cl = s->session_state == SESSION_STATE_LISTENING || + s->session_state == SESSION_STATE_OPENED; + session_program_io_event (app_wrk, s, SESSION_IO_EVT_TX, is_cl ? 1 : 0); + + if (PREDICT_FALSE (svm_fifo_n_subscribers (s->tx_fifo))) + return session_notify_subscribers (app_wrk->app_index, s, s->tx_fifo, + SESSION_IO_EVT_TX); + + return 0; +} + +/** + * Flushes queue of sessions that are to be notified of new data + * enqueued events. + * + * @param transport_proto transport protocol for which queue to be flushed + * @param thread_index Thread index for which the flush is to be performed. + * @return 0 on success or a positive number indicating the number of + * failures due to API queue being full. + */ +void +session_main_flush_enqueue_events (transport_proto_t transport_proto, + u32 thread_index) +{ + session_worker_t *wrk = session_main_get_worker (thread_index); + session_handle_t *handles; + session_t *s; + u32 i, is_cl; + + handles = wrk->session_to_enqueue[transport_proto]; + + for (i = 0; i < vec_len (handles); i++) + { + s = session_get_from_handle (handles[i]); + session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, + 0 /* TODO/not needed */); + is_cl = + s->thread_index != thread_index || (s->flags & SESSION_F_IS_CLESS); + if (!is_cl) + session_enqueue_notify_inline (s, 0); + else + session_enqueue_notify_inline (s, 1); + } + + vec_reset_length (handles); + wrk->session_to_enqueue[transport_proto] = handles; +} + /* - * Enqueue data for delivery to session peer. Does not notify peer of enqueue - * event but on request can queue notification events for later delivery by - * calling stream_server_flush_enqueue_events(). + * Enqueue data for delivery to app. If requested, it queues app notification + * event for later delivery. * * @param tc Transport connection which is to be enqueued data * @param b Buffer to be enqueued @@ -598,15 +784,14 @@ session_enqueue_stream_connection (transport_connection_t * tc, if (queue_event) { - /* Queue RX event on this fifo. Eventually these will need to be flushed - * by calling stream_server_flush_enqueue_events () */ - session_worker_t *wrk; - - wrk = session_main_get_worker (s->thread_index); + /* Queue RX event on this fifo. Eventually these will need to be + * flushed by calling @ref session_main_flush_enqueue_events () */ if (!(s->flags & SESSION_F_RX_EVT)) { + session_worker_t *wrk = session_main_get_worker (s->thread_index); + ASSERT (s->thread_index == vlib_get_thread_index ()); s->flags |= SESSION_F_RX_EVT; - vec_add1 (wrk->session_to_enqueue[tc->proto], s->session_index); + vec_add1 (wrk->session_to_enqueue[tc->proto], session_handle (s)); } session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0); @@ -615,10 +800,11 @@ session_enqueue_stream_connection (transport_connection_t * tc, return enqueued; } -int -session_enqueue_dgram_connection (session_t * s, - session_dgram_hdr_t * hdr, - vlib_buffer_t * b, u8 proto, u8 queue_event) +always_inline int +session_enqueue_dgram_connection_inline (session_t *s, + session_dgram_hdr_t *hdr, + vlib_buffer_t *b, u8 proto, + u8 queue_event, u32 is_cl) { int rv; @@ -627,12 +813,10 @@ session_enqueue_dgram_connection (session_t * s, if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))) { - /* *INDENT-OFF* */ svm_fifo_seg_t segs[2] = { { (u8 *) hdr, sizeof (*hdr) }, { vlib_buffer_get_current (b), b->current_length } }; - /* *INDENT-ON* */ rv = svm_fifo_enqueue_segments (s->rx_fifo, segs, 2, 0 /* allow_partial */ ); @@ -664,15 +848,16 @@ session_enqueue_dgram_connection (session_t * s, if (queue_event && rv > 0) { - /* Queue RX event on this fifo. Eventually these will need to be flushed - * by calling stream_server_flush_enqueue_events () */ - session_worker_t *wrk; - - wrk = session_main_get_worker (s->thread_index); + /* Queue RX event on this fifo. Eventually these will need to be + * flushed by calling @ref session_main_flush_enqueue_events () */ if (!(s->flags & SESSION_F_RX_EVT)) { + u32 thread_index = + is_cl ? vlib_get_thread_index () : s->thread_index; + session_worker_t *wrk = session_main_get_worker (thread_index); + ASSERT (s->thread_index == vlib_get_thread_index () || is_cl); s->flags |= SESSION_F_RX_EVT; - vec_add1 (wrk->session_to_enqueue[proto], s->session_index); + vec_add1 (wrk->session_to_enqueue[proto], session_handle (s)); } session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0); @@ -681,6 +866,34 @@ session_enqueue_dgram_connection (session_t * s, } int +session_enqueue_dgram_connection (session_t *s, session_dgram_hdr_t *hdr, + vlib_buffer_t *b, u8 proto, u8 queue_event) +{ + return session_enqueue_dgram_connection_inline (s, hdr, b, proto, + queue_event, 0 /* is_cl */); +} + +int +session_enqueue_dgram_connection2 (session_t *s, session_dgram_hdr_t *hdr, + vlib_buffer_t *b, u8 proto, u8 queue_event) +{ + return session_enqueue_dgram_connection_inline (s, hdr, b, proto, + queue_event, 1 /* is_cl */); +} + +int +session_enqueue_dgram_connection_cl (session_t *s, session_dgram_hdr_t *hdr, + vlib_buffer_t *b, u8 proto, + u8 queue_event) +{ + session_t *awls; + + awls = app_listener_select_wrk_cl_session (s, hdr); + return session_enqueue_dgram_connection_inline (awls, hdr, b, proto, + queue_event, 1 /* is_cl */); +} + +int session_tx_fifo_peek_bytes (transport_connection_t * tc, u8 * buffer, u32 offset, u32 max_bytes) { @@ -703,187 +916,6 @@ session_tx_fifo_dequeue_drop (transport_connection_t * tc, u32 max_bytes) return rv; } -static inline int -session_notify_subscribers (u32 app_index, session_t * s, - svm_fifo_t * f, session_evt_type_t evt_type) -{ - app_worker_t *app_wrk; - application_t *app; - int i; - - app = application_get (app_index); - if (!app) - return -1; - - for (i = 0; i < f->shr->n_subscribers; i++) - { - app_wrk = application_get_worker (app, f->shr->subscribers[i]); - if (!app_wrk) - continue; - if (app_worker_lock_and_send_event (app_wrk, s, evt_type)) - return -1; - } - - return 0; -} - -/** - * Notify session peer that new data has been enqueued. - * - * @param s Stream session for which the event is to be generated. - * @param lock Flag to indicate if call should lock message queue. - * - * @return 0 on success or negative number if failed to send notification. - */ -static inline int -session_enqueue_notify_inline (session_t * s) -{ - app_worker_t *app_wrk; - u32 session_index; - u8 n_subscribers; - - session_index = s->session_index; - n_subscribers = svm_fifo_n_subscribers (s->rx_fifo); - - app_wrk = app_worker_get_if_valid (s->app_wrk_index); - if (PREDICT_FALSE (!app_wrk)) - { - SESSION_DBG ("invalid s->app_index = %d", s->app_wrk_index); - return 0; - } - - SESSION_EVT (SESSION_EVT_ENQ, s, svm_fifo_max_dequeue_prod (s->rx_fifo)); - - s->flags &= ~SESSION_F_RX_EVT; - - /* Application didn't confirm accept yet */ - if (PREDICT_FALSE (s->session_state == SESSION_STATE_ACCEPTING)) - return 0; - - if (PREDICT_FALSE (app_worker_lock_and_send_event (app_wrk, s, - SESSION_IO_EVT_RX))) - return -1; - - if (PREDICT_FALSE (n_subscribers)) - { - s = session_get (session_index, vlib_get_thread_index ()); - return session_notify_subscribers (app_wrk->app_index, s, - s->rx_fifo, SESSION_IO_EVT_RX); - } - - return 0; -} - -int -session_enqueue_notify (session_t * s) -{ - return session_enqueue_notify_inline (s); -} - -static void -session_enqueue_notify_rpc (void *arg) -{ - u32 session_index = pointer_to_uword (arg); - session_t *s; - - s = session_get_if_valid (session_index, vlib_get_thread_index ()); - if (!s) - return; - - session_enqueue_notify (s); -} - -/** - * Like session_enqueue_notify, but can be called from a thread that does not - * own the session. - */ -void -session_enqueue_notify_thread (session_handle_t sh) -{ - u32 thread_index = session_thread_from_handle (sh); - u32 session_index = session_index_from_handle (sh); - - /* - * Pass session index (u32) as opposed to handle (u64) in case pointers - * are not 64-bit. - */ - session_send_rpc_evt_to_thread (thread_index, - session_enqueue_notify_rpc, - uword_to_pointer (session_index, void *)); -} - -int -session_dequeue_notify (session_t * s) -{ - app_worker_t *app_wrk; - - svm_fifo_clear_deq_ntf (s->tx_fifo); - - app_wrk = app_worker_get_if_valid (s->app_wrk_index); - if (PREDICT_FALSE (!app_wrk)) - return -1; - - if (PREDICT_FALSE (app_worker_lock_and_send_event (app_wrk, s, - SESSION_IO_EVT_TX))) - return -1; - - if (PREDICT_FALSE (s->tx_fifo->shr->n_subscribers)) - return session_notify_subscribers (app_wrk->app_index, s, - s->tx_fifo, SESSION_IO_EVT_TX); - - return 0; -} - -/** - * Flushes queue of sessions that are to be notified of new data - * enqueued events. - * - * @param thread_index Thread index for which the flush is to be performed. - * @return 0 on success or a positive number indicating the number of - * failures due to API queue being full. - */ -int -session_main_flush_enqueue_events (u8 transport_proto, u32 thread_index) -{ - session_worker_t *wrk = session_main_get_worker (thread_index); - session_t *s; - int i, errors = 0; - u32 *indices; - - indices = wrk->session_to_enqueue[transport_proto]; - - for (i = 0; i < vec_len (indices); i++) - { - s = session_get_if_valid (indices[i], thread_index); - if (PREDICT_FALSE (!s)) - { - errors++; - continue; - } - - session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, - 0 /* TODO/not needed */ ); - - if (PREDICT_FALSE (session_enqueue_notify_inline (s))) - errors++; - } - - vec_reset_length (indices); - wrk->session_to_enqueue[transport_proto] = indices; - - return errors; -} - -int -session_main_flush_all_enqueue_events (u8 transport_proto) -{ - vlib_thread_main_t *vtm = vlib_get_thread_main (); - int i, errors = 0; - for (i = 0; i < 1 + vtm->n_threads; i++) - errors += session_main_flush_enqueue_events (transport_proto, i); - return errors; -} - int session_stream_connect_notify (transport_connection_t * tc, session_error_t err) @@ -898,6 +930,7 @@ session_stream_connect_notify (transport_connection_t * tc, session_lookup_del_half_open (tc); ho = ho_session_get (tc->s_index); + session_set_state (ho, SESSION_STATE_TRANSPORT_CLOSED); opaque = ho->opaque; app_wrk = app_worker_get_if_valid (ho->app_wrk_index); if (!app_wrk) @@ -907,8 +940,9 @@ session_stream_connect_notify (transport_connection_t * tc, return app_worker_connect_notify (app_wrk, s, err, opaque); s = session_alloc_for_connection (tc); - s->session_state = SESSION_STATE_CONNECTING; + session_set_state (s, SESSION_STATE_CONNECTING); s->app_wrk_index = app_wrk->wrk_index; + s->opaque = opaque; new_si = s->session_index; new_ti = s->thread_index; @@ -920,7 +954,7 @@ session_stream_connect_notify (transport_connection_t * tc, } s = session_get (new_si, new_ti); - s->session_state = SESSION_STATE_READY; + session_set_state (s, SESSION_STATE_READY); session_lookup_add_connection (tc, session_handle (s)); if (app_worker_connect_notify (app_wrk, s, SESSION_E_NONE, opaque)) @@ -937,17 +971,19 @@ session_stream_connect_notify (transport_connection_t * tc, } static void -session_switch_pool_reply (void *arg) +session_switch_pool_closed_rpc (void *arg) { - u32 session_index = pointer_to_uword (arg); + session_handle_t sh; session_t *s; - s = session_get_if_valid (session_index, vlib_get_thread_index ()); + sh = pointer_to_uword (arg); + s = session_get_from_handle_if_valid (sh); if (!s) return; - /* Notify app that it has data on the new session */ - session_enqueue_notify (s); + transport_cleanup (session_get_transport_proto (s), s->connection_index, + s->thread_index); + session_cleanup (s); } typedef struct _session_switch_pool_args @@ -965,39 +1001,40 @@ static void session_switch_pool (void *cb_args) { session_switch_pool_args_t *args = (session_switch_pool_args_t *) cb_args; - session_handle_t new_sh; + session_handle_t sh, new_sh; segment_manager_t *sm; app_worker_t *app_wrk; session_t *s; - void *rargs; ASSERT (args->thread_index == vlib_get_thread_index ()); s = session_get (args->session_index, args->thread_index); - transport_cleanup (session_get_transport_proto (s), s->connection_index, - s->thread_index); + app_wrk = app_worker_get_if_valid (s->app_wrk_index); + if (!app_wrk) + goto app_closed; - new_sh = session_make_handle (args->new_session_index, - args->new_thread_index); + /* Cleanup fifo segment slice state for fifos */ + sm = app_worker_get_connect_segment_manager (app_wrk); + segment_manager_detach_fifo (sm, &s->rx_fifo); + segment_manager_detach_fifo (sm, &s->tx_fifo); - app_wrk = app_worker_get_if_valid (s->app_wrk_index); - if (app_wrk) - { - /* Cleanup fifo segment slice state for fifos */ - sm = app_worker_get_connect_segment_manager (app_wrk); - segment_manager_detach_fifo (sm, &s->rx_fifo); - segment_manager_detach_fifo (sm, &s->tx_fifo); + /* Check if session closed during migration */ + if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING) + goto app_closed; - /* Notify app, using old session, about the migration event */ - app_worker_migrate_notify (app_wrk, s, new_sh); - } + new_sh = + session_make_handle (args->new_session_index, args->new_thread_index); + app_worker_migrate_notify (app_wrk, s, new_sh); - /* Trigger app read and fifo updates on the new thread */ - rargs = uword_to_pointer (args->new_session_index, void *); - session_send_rpc_evt_to_thread (args->new_thread_index, - session_switch_pool_reply, rargs); + clib_mem_free (cb_args); + return; - session_free (s); +app_closed: + /* Session closed during migration. Clean everything up */ + sh = session_handle (s); + session_send_rpc_evt_to_thread (args->new_thread_index, + session_switch_pool_closed_rpc, + uword_to_pointer (sh, void *)); clib_mem_free (cb_args); } @@ -1018,7 +1055,7 @@ session_dgram_connect_notify (transport_connection_t * tc, */ new_s = session_clone_safe (tc->s_index, old_thread_index); new_s->connection_index = tc->c_index; - new_s->session_state = SESSION_STATE_READY; + session_set_state (new_s, SESSION_STATE_READY); new_s->flags |= SESSION_F_IS_MIGRATING; if (!(tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP)) @@ -1067,7 +1104,16 @@ session_transport_closing_notify (transport_connection_t * tc) s = session_get (tc->s_index, tc->thread_index); if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING) return; - s->session_state = SESSION_STATE_TRANSPORT_CLOSING; + + /* Wait for reply from app before sending notification as the + * accept might be rejected */ + if (s->session_state == SESSION_STATE_ACCEPTING) + { + session_set_state (s, SESSION_STATE_TRANSPORT_CLOSING); + return; + } + + session_set_state (s, SESSION_STATE_TRANSPORT_CLOSING); app_wrk = app_worker_get (s->app_wrk_index); app_worker_close_notify (app_wrk, s); } @@ -1108,7 +1154,7 @@ session_transport_delete_notify (transport_connection_t * tc) * because transport will soon be closed and closed sessions * are assumed to have been removed from the lookup table */ session_lookup_del_session (s); - s->session_state = SESSION_STATE_TRANSPORT_DELETED; + session_set_state (s, SESSION_STATE_TRANSPORT_DELETED); session_cleanup_notify (s, SESSION_CLEANUP_TRANSPORT); svm_fifo_dequeue_drop_all (s->tx_fifo); break; @@ -1119,7 +1165,7 @@ session_transport_delete_notify (transport_connection_t * tc) * session is just removed because both transport and app have * confirmed the close*/ session_lookup_del_session (s); - s->session_state = SESSION_STATE_TRANSPORT_DELETED; + session_set_state (s, SESSION_STATE_TRANSPORT_DELETED); session_cleanup_notify (s, SESSION_CLEANUP_TRANSPORT); svm_fifo_dequeue_drop_all (s->tx_fifo); session_program_transport_ctrl_evt (s, SESSION_CTRL_EVT_CLOSE); @@ -1128,6 +1174,7 @@ session_transport_delete_notify (transport_connection_t * tc) break; case SESSION_STATE_CLOSED: session_cleanup_notify (s, SESSION_CLEANUP_TRANSPORT); + session_set_state (s, SESSION_STATE_TRANSPORT_DELETED); session_delete (s); break; default: @@ -1155,6 +1202,9 @@ session_transport_closed_notify (transport_connection_t * tc) if (!(s = session_get_if_valid (tc->s_index, tc->thread_index))) return; + if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSED) + return; + /* Transport thinks that app requested close but it actually didn't. * Can happen for tcp: * 1)if fin and rst are received in close succession. @@ -1163,17 +1213,15 @@ session_transport_closed_notify (transport_connection_t * tc) { session_transport_closing_notify (tc); svm_fifo_dequeue_drop_all (s->tx_fifo); - s->session_state = SESSION_STATE_TRANSPORT_CLOSED; + session_set_state (s, SESSION_STATE_TRANSPORT_CLOSED); } /* If app close has not been received or has not yet resulted in * a transport close, only mark the session transport as closed */ else if (s->session_state <= SESSION_STATE_CLOSING) - { - s->session_state = SESSION_STATE_TRANSPORT_CLOSED; - } + session_set_state (s, SESSION_STATE_TRANSPORT_CLOSED); /* If app also closed, switch to closed */ else if (s->session_state == SESSION_STATE_APP_CLOSED) - s->session_state = SESSION_STATE_CLOSED; + session_set_state (s, SESSION_STATE_CLOSED); app_wrk = app_worker_get_if_valid (s->app_wrk_index); if (app_wrk) @@ -1193,7 +1241,12 @@ session_transport_reset_notify (transport_connection_t * tc) svm_fifo_dequeue_drop_all (s->tx_fifo); if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING) return; - s->session_state = SESSION_STATE_TRANSPORT_CLOSING; + if (s->session_state == SESSION_STATE_ACCEPTING) + { + session_set_state (s, SESSION_STATE_TRANSPORT_CLOSING); + return; + } + session_set_state (s, SESSION_STATE_TRANSPORT_CLOSING); app_wrk = app_worker_get (s->app_wrk_index); app_worker_reset_notify (app_wrk, s); } @@ -1210,12 +1263,12 @@ session_stream_accept_notify (transport_connection_t * tc) return -1; if (s->session_state != SESSION_STATE_CREATED) return 0; - s->session_state = SESSION_STATE_ACCEPTING; + session_set_state (s, SESSION_STATE_ACCEPTING); if (app_worker_accept_notify (app_wrk, s)) { /* On transport delete, no notifications should be sent. Unless, the * accept is retried and successful. */ - s->session_state = SESSION_STATE_CREATED; + session_set_state (s, SESSION_STATE_CREATED); return -1; } return 0; @@ -1233,7 +1286,7 @@ session_stream_accept (transport_connection_t * tc, u32 listener_index, s = session_alloc_for_connection (tc); s->listener_handle = ((u64) thread_index << 32) | (u64) listener_index; - s->session_state = SESSION_STATE_CREATED; + session_set_state (s, SESSION_STATE_CREATED); if ((rv = app_worker_init_accepted (s))) { @@ -1277,6 +1330,7 @@ session_dgram_accept (transport_connection_t * tc, u32 listener_index, } session_lookup_add_connection (tc, session_handle (s)); + session_set_state (s, SESSION_STATE_ACCEPTING); app_wrk = app_worker_get (s->app_wrk_index); if ((rv = app_worker_accept_notify (app_wrk, s))) @@ -1314,7 +1368,10 @@ session_open_cl (session_endpoint_cfg_t *rmt, session_handle_t *rsh) app_wrk = app_worker_get (rmt->app_wrk_index); s = session_alloc_for_connection (tc); s->app_wrk_index = app_wrk->wrk_index; - s->session_state = SESSION_STATE_OPENED; + s->opaque = rmt->opaque; + session_set_state (s, SESSION_STATE_OPENED); + if (transport_connection_is_cless (tc)) + s->flags |= SESSION_F_IS_CLESS; if (app_worker_init_connected (app_wrk, s)) { session_free (s); @@ -1382,13 +1439,11 @@ session_open_app (session_endpoint_cfg_t *rmt, session_handle_t *rsh) typedef int (*session_open_service_fn) (session_endpoint_cfg_t *, session_handle_t *); -/* *INDENT-OFF* */ static session_open_service_fn session_open_srv_fns[TRANSPORT_N_SERVICES] = { session_open_vc, session_open_cl, session_open_app, }; -/* *INDENT-ON* */ /** * Ask transport to open connection to remote transport endpoint. @@ -1422,12 +1477,12 @@ session_open (session_endpoint_cfg_t *rmt, session_handle_t *rsh) int session_listen (session_t * ls, session_endpoint_cfg_t * sep) { - transport_endpoint_t *tep; + transport_endpoint_cfg_t *tep; int tc_index; u32 s_index; /* Transport bind/listen */ - tep = session_endpoint_to_transport (sep); + tep = session_endpoint_to_transport_cfg (sep); s_index = ls->session_index; tc_index = transport_start_listen (session_get_transport_proto (ls), s_index, tep); @@ -1439,6 +1494,9 @@ session_listen (session_t * ls, session_endpoint_cfg_t * sep) * worker because local tables (for ct sessions) are not backed by a fib */ ls = listen_session_get (s_index); ls->connection_index = tc_index; + ls->opaque = sep->opaque; + if (transport_connection_is_cless (session_get_transport (ls))) + ls->flags |= SESSION_F_IS_CLESS; return 0; } @@ -1493,9 +1551,15 @@ session_half_close (session_t *s) void session_close (session_t * s) { - if (!s) + if (!s || (s->flags & SESSION_F_APP_CLOSED)) return; + /* Transports can close and delete their state independent of app closes + * and transport initiated state transitions can hide app closes. Instead + * of extending the state machine to support separate tracking of app and + * transport initiated closes, use a flag. */ + s->flags |= SESSION_F_APP_CLOSED; + if (s->session_state >= SESSION_STATE_CLOSING) { /* Session will only be removed once both app and transport @@ -1506,7 +1570,12 @@ session_close (session_t * s) return; } - s->session_state = SESSION_STATE_CLOSING; + /* App closed so stop propagating dequeue notifications. + * App might disconnect session before connected, in this case, + * tx_fifo may not be setup yet, so clear only it's inited. */ + if (s->tx_fifo) + svm_fifo_clear_deq_ntf (s->tx_fifo); + session_set_state (s, SESSION_STATE_CLOSING); session_program_transport_ctrl_evt (s, SESSION_CTRL_EVT_CLOSE); } @@ -1518,12 +1587,46 @@ session_reset (session_t * s) { if (s->session_state >= SESSION_STATE_CLOSING) return; - /* Drop all outstanding tx data */ - svm_fifo_dequeue_drop_all (s->tx_fifo); - s->session_state = SESSION_STATE_CLOSING; + /* Drop all outstanding tx data + * App might disconnect session before connected, in this case, + * tx_fifo may not be setup yet, so clear only it's inited. */ + if (s->tx_fifo) + svm_fifo_dequeue_drop_all (s->tx_fifo); + session_set_state (s, SESSION_STATE_CLOSING); session_program_transport_ctrl_evt (s, SESSION_CTRL_EVT_RESET); } +void +session_detach_app (session_t *s) +{ + if (s->session_state < SESSION_STATE_TRANSPORT_CLOSING) + { + session_close (s); + } + else if (s->session_state < SESSION_STATE_TRANSPORT_DELETED) + { + transport_connection_t *tc; + + /* Transport is closing but it's not yet deleted. Confirm close and + * subsequently detach transport from session and enqueue a session + * cleanup notification. Transport closed and cleanup notifications are + * going to be dropped by session layer apis */ + transport_close (session_get_transport_proto (s), s->connection_index, + s->thread_index); + tc = session_get_transport (s); + tc->s_index = SESSION_INVALID_INDEX; + session_set_state (s, SESSION_STATE_TRANSPORT_DELETED); + session_cleanup_notify (s, SESSION_CLEANUP_SESSION); + } + else + { + session_cleanup_notify (s, SESSION_CLEANUP_SESSION); + } + + s->flags |= SESSION_F_APP_CLOSED; + s->app_wrk_index = APP_INVALID_INDEX; +} + /** * Notify transport the session can be half-disconnected. * @@ -1555,10 +1658,10 @@ session_transport_close (session_t * s) if (s->session_state >= SESSION_STATE_APP_CLOSED) { if (s->session_state == SESSION_STATE_TRANSPORT_CLOSED) - s->session_state = SESSION_STATE_CLOSED; + session_set_state (s, SESSION_STATE_CLOSED); /* If transport is already deleted, just free the session */ else if (s->session_state >= SESSION_STATE_TRANSPORT_DELETED) - session_free_w_fifos (s); + session_program_cleanup (s); return; } @@ -1568,7 +1671,7 @@ session_transport_close (session_t * s) * delete notify. This will finally lead to the complete cleanup of the * session. */ - s->session_state = SESSION_STATE_APP_CLOSED; + session_set_state (s, SESSION_STATE_APP_CLOSED); transport_close (session_get_transport_proto (s), s->connection_index, s->thread_index); @@ -1583,13 +1686,13 @@ session_transport_reset (session_t * s) if (s->session_state >= SESSION_STATE_APP_CLOSED) { if (s->session_state == SESSION_STATE_TRANSPORT_CLOSED) - s->session_state = SESSION_STATE_CLOSED; + session_set_state (s, SESSION_STATE_CLOSED); else if (s->session_state >= SESSION_STATE_TRANSPORT_DELETED) - session_free_w_fifos (s); + session_program_cleanup (s); return; } - s->session_state = SESSION_STATE_APP_CLOSED; + session_set_state (s, SESSION_STATE_APP_CLOSED); transport_reset (session_get_transport_proto (s), s->connection_index, s->thread_index); } @@ -1616,64 +1719,63 @@ session_transport_cleanup (session_t * s) } /** - * Allocate event queues in the shared-memory segment + * Allocate worker mqs in share-able segment * - * That can only be a newly created memfd segment, that must be - * mapped by all apps/stack users. + * That can only be a newly created memfd segment, that must be mapped + * by all apps/stack users unless private rx mqs are enabled. */ void -session_vpp_event_queues_allocate (session_main_t * smm) +session_vpp_wrk_mqs_alloc (session_main_t *smm) { - u32 evt_q_length = 2048, evt_size = sizeof (session_event_t); - fifo_segment_t *eqs = &smm->evt_qs_segment; - uword eqs_size = 64 << 20; - pid_t vpp_pid = getpid (); + u32 mq_q_length = 2048, evt_size = sizeof (session_event_t); + fifo_segment_t *mqs_seg = &smm->wrk_mqs_segment; + svm_msg_q_cfg_t _cfg, *cfg = &_cfg; + uword mqs_seg_size; int i; - if (smm->configured_event_queue_length) - evt_q_length = smm->configured_event_queue_length; + mq_q_length = clib_max (mq_q_length, smm->configured_wrk_mq_length); - if (smm->evt_qs_segment_size) - eqs_size = smm->evt_qs_segment_size; + svm_msg_q_ring_cfg_t rc[SESSION_MQ_N_RINGS] = { + { mq_q_length, evt_size, 0 }, { mq_q_length >> 1, 256, 0 } + }; + cfg->consumer_pid = 0; + cfg->n_rings = 2; + cfg->q_nitems = mq_q_length; + cfg->ring_cfgs = rc; + + /* + * Compute mqs segment size based on rings config and leave space + * for passing extended configuration messages, i.e., data allocated + * outside of the rings. If provided with a config value, accept it + * if larger than minimum size. + */ + mqs_seg_size = svm_msg_q_size_to_alloc (cfg) * vec_len (smm->wrk); + mqs_seg_size = mqs_seg_size + (1 << 20); + mqs_seg_size = clib_max (mqs_seg_size, smm->wrk_mqs_segment_size); - eqs->ssvm.ssvm_size = eqs_size; - eqs->ssvm.my_pid = vpp_pid; - eqs->ssvm.name = format (0, "%s%c", "session: evt-qs-segment", 0); - /* clib_mem_vm_map_shared consumes first page before requested_va */ - eqs->ssvm.requested_va = smm->session_baseva + clib_mem_get_page_size (); + mqs_seg->ssvm.ssvm_size = mqs_seg_size; + mqs_seg->ssvm.my_pid = getpid (); + mqs_seg->ssvm.name = format (0, "%s%c", "session: wrk-mqs-segment", 0); - if (ssvm_server_init (&eqs->ssvm, SSVM_SEGMENT_MEMFD)) + if (ssvm_server_init (&mqs_seg->ssvm, SSVM_SEGMENT_MEMFD)) { clib_warning ("failed to initialize queue segment"); return; } - fifo_segment_init (eqs); + fifo_segment_init (mqs_seg); /* Special fifo segment that's filled only with mqs */ - eqs->h->n_mqs = vec_len (smm->wrk); + mqs_seg->h->n_mqs = vec_len (smm->wrk); for (i = 0; i < vec_len (smm->wrk); i++) - { - svm_msg_q_cfg_t _cfg, *cfg = &_cfg; - svm_msg_q_ring_cfg_t rc[SESSION_MQ_N_RINGS] = { - {evt_q_length, evt_size, 0} - , - {evt_q_length >> 1, 256, 0} - }; - cfg->consumer_pid = 0; - cfg->n_rings = 2; - cfg->q_nitems = evt_q_length; - cfg->ring_cfgs = rc; - - smm->wrk[i].vpp_event_queue = fifo_segment_msg_q_alloc (eqs, i, cfg); - } + smm->wrk[i].vpp_event_queue = fifo_segment_msg_q_alloc (mqs_seg, i, cfg); } fifo_segment_t * -session_main_get_evt_q_segment (void) +session_main_get_wrk_mqs_segment (void) { - return &session_main.evt_qs_segment; + return &session_main.wrk_mqs_segment; } u64 @@ -1689,14 +1791,28 @@ session_segment_handle (session_t * s) f->segment_index); } -/* *INDENT-OFF* */ +void +session_get_original_dst (transport_endpoint_t *i2o_src, + transport_endpoint_t *i2o_dst, + transport_proto_t transport_proto, u32 *original_dst, + u16 *original_dst_port) +{ + session_main_t *smm = vnet_get_session_main (); + ip_protocol_t proto = + (transport_proto == TRANSPORT_PROTO_TCP ? IPPROTO_TCP : IPPROTO_UDP); + if (!smm->original_dst_lookup || !i2o_dst->is_ip4) + return; + smm->original_dst_lookup (&i2o_src->ip.ip4, i2o_src->port, &i2o_dst->ip.ip4, + i2o_dst->port, proto, original_dst, + original_dst_port); +} + static session_fifo_rx_fn *session_tx_fns[TRANSPORT_TX_N_FNS] = { session_tx_fifo_peek_and_snd, session_tx_fifo_dequeue_and_snd, session_tx_fifo_dequeue_internal, session_tx_fifo_dequeue_and_snd }; -/* *INDENT-ON* */ void session_register_transport (transport_proto_t transport_proto, @@ -1721,6 +1837,39 @@ session_register_transport (transport_proto_t transport_proto, session_tx_fns[vft->transport_options.tx_type]; } +void +session_register_update_time_fn (session_update_time_fn fn, u8 is_add) +{ + session_main_t *smm = &session_main; + session_update_time_fn *fi; + u32 fi_pos = ~0; + u8 found = 0; + + vec_foreach (fi, smm->update_time_fns) + { + if (*fi == fn) + { + fi_pos = fi - smm->update_time_fns; + found = 1; + break; + } + } + + if (is_add) + { + if (found) + { + clib_warning ("update time fn %p already registered", fn); + return; + } + vec_add1 (smm->update_time_fns, fn); + } + else + { + vec_del1 (smm->update_time_fns, fi_pos); + } +} + transport_proto_t session_add_transport_proto (void) { @@ -1788,6 +1937,44 @@ session_queue_run_on_main_thread (vlib_main_t * vm) vlib_node_set_interrupt_pending (vm, session_queue_node.index); } +static void +session_stats_collector_fn (vlib_stats_collector_data_t *d) +{ + u32 i, n_workers, n_wrk_sessions, n_sessions = 0; + session_main_t *smm = &session_main; + session_worker_t *wrk; + counter_t **counters; + counter_t *cb; + + n_workers = vec_len (smm->wrk); + vlib_stats_validate (d->entry_index, 0, n_workers - 1); + counters = d->entry->data; + cb = counters[0]; + + for (i = 0; i < vec_len (smm->wrk); i++) + { + wrk = session_main_get_worker (i); + n_wrk_sessions = pool_elts (wrk->sessions); + cb[i] = n_wrk_sessions; + n_sessions += n_wrk_sessions; + } + + vlib_stats_set_gauge (d->private_data, n_sessions); +} + +static void +session_stats_collector_init (void) +{ + vlib_stats_collector_reg_t reg = {}; + + reg.entry_index = + vlib_stats_add_counter_vector ("/sys/session/sessions_per_worker"); + reg.private_data = vlib_stats_add_gauge ("/sys/session/sessions_total"); + reg.collect_fn = session_stats_collector_fn; + vlib_stats_register_collector_fn (®); + vlib_stats_validate (reg.entry_index, 0, vlib_get_n_threads ()); +} + static clib_error_t * session_manager_main_enable (vlib_main_t * vm) { @@ -1808,6 +1995,7 @@ session_manager_main_enable (vlib_main_t * vm) /* Allocate cache line aligned worker contexts */ vec_validate_aligned (smm->wrk, num_threads - 1, CLIB_CACHE_LINE_BYTES); + clib_spinlock_init (&session_main.pool_realloc_lock); for (i = 0; i < num_threads; i++) { @@ -1816,21 +2004,20 @@ session_manager_main_enable (vlib_main_t * vm) wrk->new_head = clib_llist_make_head (wrk->event_elts, evt_list); wrk->old_head = clib_llist_make_head (wrk->event_elts, evt_list); wrk->pending_connects = clib_llist_make_head (wrk->event_elts, evt_list); + wrk->evts_pending_main = + clib_llist_make_head (wrk->event_elts, evt_list); wrk->vm = vlib_get_main_by_index (i); wrk->last_vlib_time = vlib_time_now (vm); wrk->last_vlib_us_time = wrk->last_vlib_time * CLIB_US_TIME_FREQ; wrk->timerfd = -1; vec_validate (wrk->session_to_enqueue, smm->last_transport_proto_type); - if (num_threads > 1) - clib_rwlock_init (&smm->wrk[i].peekers_rw_locks); - if (!smm->no_adaptive && smm->use_private_rx_mqs) session_wrk_enable_adaptive_mode (wrk); } /* Allocate vpp event queues segment and queue */ - session_vpp_event_queues_allocate (smm); + session_vpp_wrk_mqs_alloc (smm); /* Initialize segment manager properties */ segment_manager_main_init (); @@ -1860,6 +2047,7 @@ session_manager_main_enable (vlib_main_t * vm) session_lookup_init (); app_namespaces_init (); transport_init (); + session_stats_collector_init (); smm->is_initialized = 1; done: @@ -1879,6 +2067,87 @@ session_manager_main_disable (vlib_main_t * vm) transport_enable_disable (vm, 0 /* is_en */ ); } +/* in this new callback, cookie hint the index */ +void +session_dma_completion_cb (vlib_main_t *vm, struct vlib_dma_batch *batch) +{ + session_worker_t *wrk; + wrk = session_main_get_worker (vm->thread_index); + session_dma_transfer *dma_transfer; + + dma_transfer = &wrk->dma_trans[wrk->trans_head]; + vec_add (wrk->pending_tx_buffers, dma_transfer->pending_tx_buffers, + vec_len (dma_transfer->pending_tx_buffers)); + vec_add (wrk->pending_tx_nexts, dma_transfer->pending_tx_nexts, + vec_len (dma_transfer->pending_tx_nexts)); + vec_reset_length (dma_transfer->pending_tx_buffers); + vec_reset_length (dma_transfer->pending_tx_nexts); + wrk->trans_head++; + if (wrk->trans_head == wrk->trans_size) + wrk->trans_head = 0; + return; +} + +static void +session_prepare_dma_args (vlib_dma_config_t *args) +{ + args->max_batches = 16; + args->max_transfers = DMA_TRANS_SIZE; + args->max_transfer_size = 65536; + args->features = 0; + args->sw_fallback = 1; + args->barrier_before_last = 1; + args->callback_fn = session_dma_completion_cb; +} + +static void +session_node_enable_dma (u8 is_en, int n_vlibs) +{ + vlib_dma_config_t args; + session_prepare_dma_args (&args); + session_worker_t *wrk; + vlib_main_t *vm; + + int config_index = -1; + + if (is_en) + { + vm = vlib_get_main_by_index (0); + config_index = vlib_dma_config_add (vm, &args); + } + else + { + vm = vlib_get_main_by_index (0); + wrk = session_main_get_worker (0); + if (wrk->config_index >= 0) + vlib_dma_config_del (vm, wrk->config_index); + } + int i; + for (i = 0; i < n_vlibs; i++) + { + vm = vlib_get_main_by_index (i); + wrk = session_main_get_worker (vm->thread_index); + wrk->config_index = config_index; + if (is_en) + { + if (config_index >= 0) + wrk->dma_enabled = true; + wrk->dma_trans = (session_dma_transfer *) clib_mem_alloc ( + sizeof (session_dma_transfer) * DMA_TRANS_SIZE); + bzero (wrk->dma_trans, + sizeof (session_dma_transfer) * DMA_TRANS_SIZE); + } + else + { + if (wrk->dma_trans) + clib_mem_free (wrk->dma_trans); + } + wrk->trans_head = 0; + wrk->trans_tail = 0; + wrk->trans_size = DMA_TRANS_SIZE; + } +} + void session_node_enable_disable (u8 is_en) { @@ -1914,11 +2183,15 @@ session_node_enable_disable (u8 is_en) if (!sm->poll_main) continue; } + vlib_node_set_state (vm, session_input_node.index, mstate); vlib_node_set_state (vm, session_queue_node.index, state); } if (sm->use_private_rx_mqs) application_enable_rx_mqs_nodes (is_en); + + if (sm->dma_enabled) + session_node_enable_dma (is_en, n_vlibs); } clib_error_t * @@ -1953,17 +2226,9 @@ session_main_init (vlib_main_t * vm) smm->poll_main = 0; smm->use_private_rx_mqs = 0; smm->no_adaptive = 0; - smm->session_baseva = HIGH_SEGMENT_BASEVA; - -#if (HIGH_SEGMENT_BASEVA > (4ULL << 30)) - smm->session_va_space_size = 128ULL << 30; - smm->evt_qs_segment_size = 64 << 20; -#else - smm->session_va_space_size = 128 << 20; - smm->evt_qs_segment_size = 1 << 20; -#endif - - smm->last_transport_proto_type = TRANSPORT_PROTO_SRTP; + smm->last_transport_proto_type = TRANSPORT_PROTO_HTTP; + smm->port_allocator_min_src_port = 1024; + smm->port_allocator_max_src_port = 65535; return 0; } @@ -1993,13 +2258,16 @@ session_config_fn (vlib_main_t * vm, unformat_input_t * input) while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - if (unformat (input, "event-queue-length %d", &nitems)) + if (unformat (input, "wrk-mq-length %d", &nitems)) { if (nitems >= 2048) - smm->configured_event_queue_length = nitems; + smm->configured_wrk_mq_length = nitems; else clib_warning ("event queue length %d too small, ignored", nitems); } + else if (unformat (input, "wrk-mqs-segment-size %U", + unformat_memory_size, &smm->wrk_mqs_segment_size)) + ; else if (unformat (input, "preallocated-sessions %d", &smm->preallocated_sessions)) ; @@ -2058,24 +2326,44 @@ session_config_fn (vlib_main_t * vm, unformat_input_t * input) else if (unformat (input, "local-endpoints-table-buckets %d", &smm->local_endpoints_table_buckets)) ; - /* Deprecated but maintained for compatibility */ - else if (unformat (input, "evt_qs_memfd_seg")) - ; - else if (unformat (input, "evt_qs_seg_size %U", unformat_memory_size, - &smm->evt_qs_segment_size)) - ; + else if (unformat (input, "min-src-port %d", &tmp)) + smm->port_allocator_min_src_port = tmp; + else if (unformat (input, "max-src-port %d", &tmp)) + smm->port_allocator_max_src_port = tmp; else if (unformat (input, "enable")) smm->session_enable_asap = 1; - else if (unformat (input, "segment-baseva 0x%lx", &smm->session_baseva)) - ; else if (unformat (input, "use-app-socket-api")) - appns_sapi_enable (); + (void) appns_sapi_enable_disable (1 /* is_enable */); else if (unformat (input, "poll-main")) smm->poll_main = 1; else if (unformat (input, "use-private-rx-mqs")) smm->use_private_rx_mqs = 1; else if (unformat (input, "no-adaptive")) smm->no_adaptive = 1; + else if (unformat (input, "use-dma")) + smm->dma_enabled = 1; + else if (unformat (input, "nat44-original-dst-enable")) + { + smm->original_dst_lookup = vlib_get_plugin_symbol ( + "nat_plugin.so", "nat44_original_dst_lookup"); + } + /* + * Deprecated but maintained for compatibility + */ + else if (unformat (input, "evt_qs_memfd_seg")) + ; + else if (unformat (input, "segment-baseva 0x%lx", &tmp)) + ; + else if (unformat (input, "evt_qs_seg_size %U", unformat_memory_size, + &smm->wrk_mqs_segment_size)) + ; + else if (unformat (input, "event-queue-length %d", &nitems)) + { + if (nitems >= 2048) + smm->configured_wrk_mq_length = nitems; + else + clib_warning ("event queue length %d too small, ignored", nitems); + } else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h index 2d01eb6a67a..a5604bf8725 100644 --- a/src/vnet/session/session.h +++ b/src/vnet/session/session.h @@ -21,23 +21,12 @@ #include <vnet/session/session_debug.h> #include <svm/message_queue.h> #include <svm/fifo_segment.h> +#include <vlib/dma/dma.h> -#define foreach_session_input_error \ -_(NO_SESSION, "No session drops") \ -_(NO_LISTENER, "No listener for dst port drops") \ -_(ENQUEUED, "Packets pushed into rx fifo") \ -_(NOT_READY, "Session not ready packets") \ -_(FIFO_FULL, "Packets dropped for lack of rx fifo space") \ -_(EVENT_FIFO_FULL, "Events not sent for lack of event fifo space") \ -_(API_QUEUE_FULL, "Sessions not created for lack of API queue space") \ - -typedef enum +typedef struct session_wrk_stats_ { -#define _(sym,str) SESSION_ERROR_##sym, - foreach_session_input_error -#undef _ - SESSION_N_ERROR, -} session_input_error_t; + u32 errors[SESSION_N_ERRORS]; +} session_wrk_stats_t; typedef struct session_tx_context_ { @@ -59,6 +48,7 @@ typedef struct session_tx_context_ /** Vector of tx buffer free lists */ u32 *tx_buffers; + vlib_buffer_t **transport_pending_bufs; } session_tx_context_t; typedef struct session_evt_elt @@ -84,6 +74,13 @@ typedef enum session_wrk_flags_ SESSION_WRK_F_ADAPTIVE = 1 << 0, } __clib_packed session_wrk_flag_t; +#define DMA_TRANS_SIZE 1024 +typedef struct +{ + u32 *pending_tx_buffers; + u16 *pending_tx_nexts; +} session_dma_transfer; + typedef struct session_worker_ { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); @@ -103,8 +100,8 @@ typedef struct session_worker_ /** Convenience pointer to this worker's vlib_main */ vlib_main_t *vm; - /** Per-proto vector of sessions to enqueue */ - u32 **session_to_enqueue; + /** Per-proto vector of session handles to enqueue */ + session_handle_t **session_to_enqueue; /** Timerfd used to periodically signal wrk session queue node */ int timerfd; @@ -133,9 +130,6 @@ typedef struct session_worker_ /** Head of list of pending events */ clib_llist_index_t old_head; - /** Peekers rw lock */ - clib_rwlock_t peekers_rw_locks; - /** Vector of buffers to be sent */ u32 *pending_tx_buffers; @@ -151,8 +145,22 @@ typedef struct session_worker_ /** Flag that is set if main thread signaled to handle connects */ u32 n_pending_connects; - /** Main thread loops in poll mode without a connect */ - u32 no_connect_loops; + /** List head for first worker evts pending handling on main */ + clib_llist_index_t evts_pending_main; + + /** Per-app-worker bitmap of pending notifications */ + uword *app_wrks_pending_ntf; + + int config_index; + u8 dma_enabled; + session_dma_transfer *dma_trans; + u16 trans_head; + u16 trans_tail; + u16 trans_size; + u16 batch_num; + vlib_dma_batch_t *batch; + + session_wrk_stats_t stats; #if SESSION_DEBUG /** last event poll time by thread */ @@ -170,13 +178,22 @@ extern session_fifo_rx_fn session_tx_fifo_dequeue_internal; u8 session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e); +typedef void (*session_update_time_fn) (f64 time_now, u8 thread_index); +typedef void (*nat44_original_dst_lookup_fn) ( + ip4_address_t *i2o_src, u16 i2o_src_port, ip4_address_t *i2o_dst, + u16 i2o_dst_port, ip_protocol_t proto, u32 *original_dst, + u16 *original_dst_port); + typedef struct session_main_ { /** Worker contexts */ session_worker_t *wrk; + /** Vector of transport update time functions */ + session_update_time_fn *update_time_fns; + /** Event queues memfd segment */ - fifo_segment_t evt_qs_segment; + fifo_segment_t wrk_mqs_segment; /** Unique segment name counter */ u32 unique_segment_name_counter; @@ -189,11 +206,22 @@ typedef struct session_main_ * Trade memory for speed, for now */ u32 *session_type_to_next; - /** Thread for cl and ho that rely on cl allocs */ + /** Thread used for allocating active open connections, i.e., half-opens + * for transports like tcp, and sessions that will be migrated for cl + * transports like udp. If vpp has workers, this will be first worker. */ u32 transport_cl_thread; transport_proto_t last_transport_proto_type; + /** Number of workers at pool realloc barrier */ + volatile u32 pool_realloc_at_barrier; + + /** Number of workers doing reallocs */ + volatile u32 pool_realloc_doing_work; + + /** Lock to synchronize parallel forced reallocs */ + clib_spinlock_t pool_realloc_lock; + /* * Config parameters */ @@ -217,12 +245,13 @@ typedef struct session_main_ u8 no_adaptive; /** vpp fifo event queue configured length */ - u32 configured_event_queue_length; + u32 configured_wrk_mq_length; /** Session ssvm segment configs*/ - uword session_baseva; - uword session_va_space_size; - uword evt_qs_segment_size; + uword wrk_mqs_segment_size; + + /** Session enable dma*/ + u8 dma_enabled; /** Session table size parameters */ u32 configured_v4_session_table_buckets; @@ -238,14 +267,22 @@ typedef struct session_main_ u32 local_endpoints_table_memory; u32 local_endpoints_table_buckets; + /** Transport source port allocation range */ + u16 port_allocator_min_src_port; + u16 port_allocator_max_src_port; + /** Preallocate session config parameter */ u32 preallocated_sessions; u16 msg_id_base; + + /** Query nat44-ed session to get original dst ip4 & dst port. */ + nat44_original_dst_lookup_fn original_dst_lookup; } session_main_t; extern session_main_t session_main; extern vlib_node_registration_t session_queue_node; +extern vlib_node_registration_t session_input_node; extern vlib_node_registration_t session_queue_process_node; extern vlib_node_registration_t session_queue_pre_input_node; @@ -301,7 +338,7 @@ session_evt_ctrl_data (session_worker_t * wrk, session_evt_elt_t * elt) static inline void session_evt_ctrl_data_free (session_worker_t * wrk, session_evt_elt_t * elt) { - ASSERT (elt->evt.event_type > SESSION_IO_EVT_BUILTIN_TX); + ASSERT (elt->evt.event_type >= SESSION_CTRL_EVT_RPC); pool_put_index (wrk->ctrl_evts_data, elt->evt.ctrl_data_index); } @@ -329,7 +366,8 @@ int session_wrk_handle_mq (session_worker_t *wrk, svm_msg_q_t *mq); session_t *session_alloc (u32 thread_index); void session_free (session_t * s); -void session_free_w_fifos (session_t * s); +void session_cleanup (session_t *s); +void session_program_cleanup (session_t *s); void session_cleanup_half_open (session_handle_t ho_handle); u8 session_is_valid (u32 si, u8 thread_index); @@ -354,100 +392,53 @@ session_get_if_valid (u64 si, u32 thread_index) } always_inline session_t * -session_get_from_handle (session_handle_t handle) +session_get_from_handle (session_handle_tu_t handle) { session_main_t *smm = &session_main; - u32 session_index, thread_index; - session_parse_handle (handle, &session_index, &thread_index); - return pool_elt_at_index (smm->wrk[thread_index].sessions, session_index); + return pool_elt_at_index (smm->wrk[handle.thread_index].sessions, + handle.session_index); } always_inline session_t * -session_get_from_handle_if_valid (session_handle_t handle) +session_get_from_handle_if_valid (session_handle_tu_t handle) { - u32 session_index, thread_index; - session_parse_handle (handle, &session_index, &thread_index); - return session_get_if_valid (session_index, thread_index); + return session_get_if_valid (handle.session_index, handle.thread_index); } -u64 session_segment_handle (session_t * s); - /** - * Acquires a lock that blocks a session pool from expanding. + * Get session from handle and avoid pool validation if no same thread * - * This is typically used for safely peeking into other threads' - * pools in order to clone elements. Lock should be dropped as soon - * as possible by calling @ref session_pool_remove_peeker. - * - * NOTE: Avoid using pool_elt_at_index while the lock is held because - * it may lead to free elt bitmap expansion/contraction! - */ -always_inline void -session_pool_add_peeker (u32 thread_index) -{ - session_worker_t *wrk = &session_main.wrk[thread_index]; - if (thread_index == vlib_get_thread_index ()) - return; - clib_rwlock_reader_lock (&wrk->peekers_rw_locks); -} - -always_inline void -session_pool_remove_peeker (u32 thread_index) -{ - session_worker_t *wrk = &session_main.wrk[thread_index]; - if (thread_index == vlib_get_thread_index ()) - return; - clib_rwlock_reader_unlock (&wrk->peekers_rw_locks); -} - -/** - * Get session from handle and 'lock' pool resize if not in same thread - * - * Caller should drop the peek 'lock' as soon as possible. + * Peekers are fine because pool grows with barrier (see @ref session_alloc) */ always_inline session_t * -session_get_from_handle_safe (u64 handle) +session_get_from_handle_safe (session_handle_tu_t handle) { - u32 thread_index = session_thread_from_handle (handle); - session_worker_t *wrk = &session_main.wrk[thread_index]; + session_worker_t *wrk = &session_main.wrk[handle.thread_index]; - if (thread_index == vlib_get_thread_index ()) + if (handle.thread_index == vlib_get_thread_index ()) { - return pool_elt_at_index (wrk->sessions, - session_index_from_handle (handle)); + return pool_elt_at_index (wrk->sessions, handle.session_index); } else { - session_pool_add_peeker (thread_index); - /* Don't use pool_elt_at index. See @ref session_pool_add_peeker */ - return wrk->sessions + session_index_from_handle (handle); + /* Don't use pool_elt_at index to avoid pool bitmap reallocs */ + return wrk->sessions + handle.session_index; } } -always_inline u32 -session_get_index (session_t * s) -{ - return (s - session_main.wrk[s->thread_index].sessions); -} - always_inline session_t * session_clone_safe (u32 session_index, u32 thread_index) { + u32 current_thread_index = vlib_get_thread_index (), new_index; session_t *old_s, *new_s; - u32 current_thread_index = vlib_get_thread_index (); - /* If during the memcpy pool is reallocated AND the memory allocator - * decides to give the old chunk of memory to somebody in a hurry to - * scribble something on it, we have a problem. So add this thread as - * a session pool peeker. - */ - session_pool_add_peeker (thread_index); new_s = session_alloc (current_thread_index); + new_index = new_s->session_index; + /* Session pools are reallocated with barrier (see @ref session_alloc) */ old_s = session_main.wrk[thread_index].sessions + session_index; clib_memcpy_fast (new_s, old_s, sizeof (*new_s)); - session_pool_remove_peeker (thread_index); new_s->thread_index = current_thread_index; - new_s->session_index = session_get_index (new_s); + new_s->session_index = new_index; return new_s; } @@ -457,16 +448,19 @@ int session_stop_listen (session_t * s); void session_half_close (session_t *s); void session_close (session_t * s); void session_reset (session_t * s); +void session_detach_app (session_t *s); void session_transport_half_close (session_t *s); void session_transport_close (session_t * s); void session_transport_reset (session_t * s); void session_transport_cleanup (session_t * s); -int session_send_io_evt_to_thread (svm_fifo_t * f, - session_evt_type_t evt_type); -int session_enqueue_notify (session_t * s); +int session_enqueue_notify (session_t *s); int session_dequeue_notify (session_t * s); +int session_enqueue_notify_cl (session_t *s); +int session_send_io_evt_to_thread (svm_fifo_t *f, session_evt_type_t evt_type); int session_send_io_evt_to_thread_custom (void *data, u32 thread_index, session_evt_type_t evt_type); +int session_program_tx_io_evt (session_handle_tu_t sh, + session_evt_type_t evt_type); void session_send_rpc_evt_to_thread (u32 thread_index, void *fp, void *rpc_args); void session_send_rpc_evt_to_thread_force (u32 thread_index, void *fp, @@ -479,6 +473,7 @@ void session_get_endpoint (session_t * s, transport_endpoint_t * tep, u8 is_lcl); int session_transport_attribute (session_t *s, u8 is_get, transport_endpt_attr_t *attr); +u64 session_segment_handle (session_t *s); u8 *format_session (u8 * s, va_list * args); uword unformat_session (unformat_input_t * input, va_list * args); @@ -496,6 +491,13 @@ int session_enqueue_dgram_connection (session_t * s, session_dgram_hdr_t * hdr, vlib_buffer_t * b, u8 proto, u8 queue_event); +int session_enqueue_dgram_connection2 (session_t *s, session_dgram_hdr_t *hdr, + vlib_buffer_t *b, u8 proto, + u8 queue_event); +int session_enqueue_dgram_connection_cl (session_t *s, + session_dgram_hdr_t *hdr, + vlib_buffer_t *b, u8 proto, + u8 queue_event); int session_stream_connect_notify (transport_connection_t * tc, session_error_t err); int session_dgram_connect_notify (transport_connection_t * tc, @@ -513,6 +515,7 @@ int session_stream_accept (transport_connection_t * tc, u32 listener_index, u32 thread_index, u8 notify); int session_dgram_accept (transport_connection_t * tc, u32 listener_index, u32 thread_index); + /** * Initialize session layer for given transport proto and ip version * @@ -529,10 +532,18 @@ void session_register_transport (transport_proto_t transport_proto, const transport_proto_vft_t * vft, u8 is_ip4, u32 output_node); transport_proto_t session_add_transport_proto (void); +void session_register_update_time_fn (session_update_time_fn fn, u8 is_add); int session_tx_fifo_peek_bytes (transport_connection_t * tc, u8 * buffer, u32 offset, u32 max_bytes); u32 session_tx_fifo_dequeue_drop (transport_connection_t * tc, u32 max_bytes); +always_inline void +session_set_state (session_t *s, session_state_t session_state) +{ + s->session_state = session_state; + SESSION_EVT (SESSION_EVT_STATE_CHANGE, s); +} + always_inline u32 transport_max_rx_enqueue (transport_connection_t * tc) { @@ -575,6 +586,19 @@ transport_rx_fifo_has_ooo_data (transport_connection_t * tc) return svm_fifo_has_ooo_data (s->rx_fifo); } +always_inline u32 +transport_tx_fifo_has_dgram (transport_connection_t *tc) +{ + session_t *s = session_get (tc->s_index, tc->thread_index); + u32 max_deq = svm_fifo_max_dequeue_cons (s->tx_fifo); + session_dgram_pre_hdr_t phdr; + + if (max_deq <= sizeof (session_dgram_hdr_t)) + return 0; + svm_fifo_peek (s->tx_fifo, 0, sizeof (phdr), (u8 *) &phdr); + return max_deq >= phdr.data_length + sizeof (session_dgram_hdr_t); +} + always_inline void transport_rx_fifo_req_deq_ntf (transport_connection_t *tc) { @@ -615,12 +639,19 @@ transport_cl_thread (void) return session_main.transport_cl_thread; } +always_inline u32 +session_vlib_thread_is_cl_thread (void) +{ + return (vlib_get_thread_index () == transport_cl_thread () || + vlib_thread_is_main_w_barrier ()); +} + /* * Listen sessions */ -always_inline u64 -listen_session_get_handle (session_t * s) +always_inline session_handle_t +listen_session_get_handle (session_t *s) { ASSERT (s->session_state == SESSION_STATE_LISTENING || session_get_transport_proto (s) == TRANSPORT_PROTO_QUIC); @@ -667,8 +698,8 @@ always_inline session_t * ho_session_alloc (void) { session_t *s; - ASSERT (vlib_get_thread_index () == 0); - s = session_alloc (0); + ASSERT (session_vlib_thread_is_cl_thread ()); + s = session_alloc (transport_cl_thread ()); s->session_state = SESSION_STATE_CONNECTING; s->flags |= SESSION_F_HALF_OPEN; return s; @@ -677,7 +708,7 @@ ho_session_alloc (void) always_inline session_t * ho_session_get (u32 ho_index) { - return session_get (ho_index, 0 /* half-open thread */); + return session_get (ho_index, transport_cl_thread ()); } always_inline void @@ -702,7 +733,7 @@ vnet_get_session_main () always_inline session_worker_t * session_main_get_worker (u32 thread_index) { - return &session_main.wrk[thread_index]; + return vec_elt_at_index (session_main.wrk, thread_index); } static inline session_worker_t * @@ -710,13 +741,13 @@ session_main_get_worker_if_valid (u32 thread_index) { if (thread_index > vec_len (session_main.wrk)) return 0; - return &session_main.wrk[thread_index]; + return session_main_get_worker (thread_index); } always_inline svm_msg_q_t * session_main_get_vpp_event_queue (u32 thread_index) { - return session_main.wrk[thread_index].vpp_event_queue; + return session_main_get_worker (thread_index)->vpp_event_queue; } always_inline u8 @@ -725,14 +756,31 @@ session_main_is_enabled () return session_main.is_enabled == 1; } +always_inline void +session_worker_stat_error_inc (session_worker_t *wrk, int error, int value) +{ + if ((-(error) >= 0 && -(error) < SESSION_N_ERRORS)) + wrk->stats.errors[-error] += value; + else + SESSION_DBG ("unknown session counter"); +} + +always_inline void +session_stat_error_inc (int error, int value) +{ + session_worker_t *wrk; + wrk = session_main_get_worker (vlib_get_thread_index ()); + session_worker_stat_error_inc (wrk, error, value); +} + #define session_cli_return_if_not_enabled() \ do { \ if (!session_main.is_enabled) \ return clib_error_return (0, "session layer is not enabled"); \ } while (0) -int session_main_flush_enqueue_events (u8 proto, u32 thread_index); -int session_main_flush_all_enqueue_events (u8 transport_proto); +void session_main_flush_enqueue_events (transport_proto_t transport_proto, + u32 thread_index); void session_queue_run_on_main_thread (vlib_main_t * vm); /** @@ -761,12 +809,116 @@ session_wrk_update_time (session_worker_t *wrk, f64 now) } void session_wrk_enable_adaptive_mode (session_worker_t *wrk); -fifo_segment_t *session_main_get_evt_q_segment (void); +fifo_segment_t *session_main_get_wrk_mqs_segment (void); void session_node_enable_disable (u8 is_en); clib_error_t *vnet_session_enable_disable (vlib_main_t * vm, u8 is_en); +void session_wrk_handle_evts_main_rpc (void *); +void session_wrk_program_app_wrk_evts (session_worker_t *wrk, + u32 app_wrk_index); session_t *session_alloc_for_connection (transport_connection_t * tc); session_t *session_alloc_for_half_open (transport_connection_t *tc); +void session_get_original_dst (transport_endpoint_t *i2o_src, + transport_endpoint_t *i2o_dst, + transport_proto_t transport_proto, + u32 *original_dst, u16 *original_dst_port); + +typedef void (pool_safe_realloc_rpc_fn) (void *rpc_args); + +typedef struct +{ + u8 ph[STRUCT_OFFSET_OF (pool_header_t, max_elts) + 4]; + u32 flag; +} pool_safe_realloc_header_t; + +STATIC_ASSERT_SIZEOF (pool_safe_realloc_header_t, sizeof (pool_header_t)); + +#define POOL_REALLOC_SAFE_ELT_THRESH 32 + +#define pool_realloc_flag(PH) \ + ((pool_safe_realloc_header_t *) pool_header (PH))->flag + +typedef struct pool_realloc_rpc_args_ +{ + void **pool; + uword elt_size; + uword align; +} pool_realloc_rpc_args_t; + +always_inline void +pool_program_safe_realloc_rpc (void *args) +{ + vlib_main_t *vm = vlib_get_main (); + u32 free_elts, max_elts, n_alloc; + pool_realloc_rpc_args_t *pra; + + ASSERT (vlib_get_thread_index () == 0); + pra = (pool_realloc_rpc_args_t *) args; + + vlib_worker_thread_barrier_sync (vm); + + free_elts = _pool_free_elts (*pra->pool, pra->elt_size); + if (free_elts < POOL_REALLOC_SAFE_ELT_THRESH) + { + max_elts = _vec_max_len (*pra->pool, pra->elt_size); + n_alloc = clib_max (2 * max_elts, POOL_REALLOC_SAFE_ELT_THRESH); + _pool_alloc (pra->pool, n_alloc, pra->align, 0, pra->elt_size); + } + pool_realloc_flag (*pra->pool) = 0; + clib_mem_free (args); + + vlib_worker_thread_barrier_release (vm); +} + +always_inline void +pool_program_safe_realloc (void **p, u32 elt_size, u32 align) +{ + pool_realloc_rpc_args_t *pra; + + /* Reuse pad as a realloc flag */ + if (pool_realloc_flag (*p)) + return; + + pra = clib_mem_alloc (sizeof (*pra)); + pra->pool = p; + pra->elt_size = elt_size; + pra->align = align; + pool_realloc_flag (*p) = 1; + + session_send_rpc_evt_to_thread (0 /* thread index */, + pool_program_safe_realloc_rpc, pra); +} + +#define pool_needs_realloc(P) \ + ((!P) || \ + (vec_len (pool_header (P)->free_indices) < POOL_REALLOC_SAFE_ELT_THRESH && \ + pool_free_elts (P) < POOL_REALLOC_SAFE_ELT_THRESH)) + +#define pool_get_aligned_safe(P, E, align) \ + do \ + { \ + if (PREDICT_FALSE (pool_needs_realloc (P))) \ + { \ + if (PREDICT_FALSE (!(P))) \ + { \ + pool_alloc_aligned (P, POOL_REALLOC_SAFE_ELT_THRESH, align); \ + } \ + else if (PREDICT_FALSE (!pool_free_elts (P))) \ + { \ + vlib_workers_sync (); \ + pool_alloc_aligned (P, pool_max_len (P), align); \ + vlib_workers_continue (); \ + ALWAYS_ASSERT (pool_free_elts (P) > 0); \ + } \ + else \ + { \ + pool_program_safe_realloc ((void **) &(P), sizeof ((P)[0]), \ + _vec_align (P, align)); \ + } \ + } \ + pool_get_aligned (P, E, align); \ + } \ + while (0) #endif /* __included_session_h__ */ diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c index 00e67dcd2d0..48eb932a2c9 100644 --- a/src/vnet/session/session_api.c +++ b/src/vnet/session/session_api.c @@ -82,40 +82,12 @@ session_send_fds (vl_api_registration_t * reg, int fds[], int n_fds) } static int -mq_try_lock_and_alloc_msg (svm_msg_q_t * app_mq, svm_msg_q_msg_t * msg) -{ - int rv; - u8 try = 0; - while (try < 100) - { - rv = svm_msg_q_lock_and_alloc_msg_w_ring (app_mq, - SESSION_MQ_CTRL_EVT_RING, - SVM_Q_NOWAIT, msg); - if (!rv) - return 0; - /* - * Break the loop if mq is full, usually this is because the - * app has crashed or is hanging on somewhere. - */ - if (rv != -1) - break; - try++; - usleep (1); - } - clib_warning ("failed to alloc msg"); - return -1; -} - -static int mq_send_session_accepted_cb (session_t * s) { app_worker_t *app_wrk = app_worker_get (s->app_wrk_index); - svm_msg_q_msg_t _msg, *msg = &_msg; session_accepted_msg_t m = { 0 }; - svm_msg_q_t *app_mq; fifo_segment_t *eq_seg; session_t *listener; - session_event_t *evt; application_t *app; app = application_get (app_wrk->app_index); @@ -164,15 +136,14 @@ mq_send_session_accepted_cb (session_t * s) m.mq_index = s->thread_index; } - app_mq = app_wrk->event_queue; - if (mq_try_lock_and_alloc_msg (app_mq, msg)) - return SESSION_E_MQ_MSG_ALLOC; + if (application_original_dst_is_enabled (app)) + { + session_get_original_dst (&m.lcl, &m.rmt, + session_get_transport_proto (s), + &m.original_dst_ip4, &m.original_dst_port); + } - evt = svm_msg_q_msg_data (app_mq, msg); - clib_memset (evt, 0, sizeof (*evt)); - evt->event_type = SESSION_CTRL_EVT_ACCEPTED; - clib_memcpy_fast (evt->data, &m, sizeof (m)); - svm_msg_q_add_and_unlock (app_mq, msg); + app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_ACCEPTED, &m, sizeof (m)); return 0; } @@ -181,21 +152,12 @@ static inline void mq_send_session_close_evt (app_worker_t * app_wrk, session_handle_t sh, session_evt_type_t evt_type) { - svm_msg_q_msg_t _msg, *msg = &_msg; - session_disconnected_msg_t *mp; - svm_msg_q_t *app_mq; - session_event_t *evt; + session_disconnected_msg_t m = { 0 }; - app_mq = app_wrk->event_queue; - if (mq_try_lock_and_alloc_msg (app_mq, msg)) - return; - evt = svm_msg_q_msg_data (app_mq, msg); - clib_memset (evt, 0, sizeof (*evt)); - evt->event_type = evt_type; - mp = (session_disconnected_msg_t *) evt->data; - mp->handle = sh; - mp->context = app_wrk->api_client_index; - svm_msg_q_add_and_unlock (app_mq, msg); + m.handle = sh; + m.context = app_wrk->api_client_index; + + app_wrk_send_ctrl_evt (app_wrk, evt_type, &m, sizeof (m)); } static inline void @@ -249,13 +211,9 @@ int mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context, session_t * s, session_error_t err) { - svm_msg_q_msg_t _msg, *msg = &_msg; session_connected_msg_t m = { 0 }; - svm_msg_q_t *app_mq; - transport_connection_t *tc; fifo_segment_t *eq_seg; app_worker_t *app_wrk; - session_event_t *evt; application_t *app; app_wrk = app_worker_get (app_wrk_index); @@ -271,14 +229,6 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context, if (session_has_transport (s)) { - tc = session_get_transport (s); - if (!tc) - { - clib_warning ("failed to retrieve transport!"); - m.retval = SESSION_E_REFUSED; - goto snd_msg; - } - m.handle = session_handle (s); m.vpp_event_queue_address = fifo_segment_msg_q_offset (eq_seg, s->thread_index); @@ -293,7 +243,6 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context, else { ct_connection_t *cct; - session_t *ss; cct = (ct_connection_t *) session_get_transport (s); m.handle = session_handle (s); @@ -304,11 +253,10 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context, m.server_rx_fifo = fifo_segment_fifo_offset (s->rx_fifo); m.server_tx_fifo = fifo_segment_fifo_offset (s->tx_fifo); m.segment_handle = session_segment_handle (s); - ss = ct_session_get_peer (s); - m.ct_rx_fifo = fifo_segment_fifo_offset (ss->tx_fifo); - m.ct_tx_fifo = fifo_segment_fifo_offset (ss->rx_fifo); - m.ct_segment_handle = session_segment_handle (ss); m.mq_index = s->thread_index; + m.ct_rx_fifo = fifo_segment_fifo_offset (cct->client_rx_fifo); + m.ct_tx_fifo = fifo_segment_fifo_offset (cct->client_tx_fifo); + m.ct_segment_handle = cct->segment_handle; } /* Setup client session index in advance, in case data arrives @@ -318,31 +266,19 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context, snd_msg: - app_mq = app_wrk->event_queue; - - if (mq_try_lock_and_alloc_msg (app_mq, msg)) - return SESSION_E_MQ_MSG_ALLOC; + app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_CONNECTED, &m, sizeof (m)); - evt = svm_msg_q_msg_data (app_mq, msg); - clib_memset (evt, 0, sizeof (*evt)); - evt->event_type = SESSION_CTRL_EVT_CONNECTED; - clib_memcpy_fast (evt->data, &m, sizeof (m)); - - svm_msg_q_add_and_unlock (app_mq, msg); return 0; } -int +static int mq_send_session_bound_cb (u32 app_wrk_index, u32 api_context, session_handle_t handle, int rv) { - svm_msg_q_msg_t _msg, *msg = &_msg; session_bound_msg_t m = { 0 }; - svm_msg_q_t *app_mq; - transport_endpoint_t tep; + transport_connection_t *ltc; fifo_segment_t *eq_seg; app_worker_t *app_wrk; - session_event_t *evt; application_t *app; app_listener_t *al; session_t *ls = 0; @@ -362,77 +298,60 @@ mq_send_session_bound_cb (u32 app_wrk_index, u32 api_context, else ls = app_listener_get_local_session (al); - session_get_endpoint (ls, &tep, 1 /* is_lcl */); - m.lcl_port = tep.port; - m.lcl_is_ip4 = tep.is_ip4; - clib_memcpy_fast (m.lcl_ip, &tep.ip, sizeof (tep.ip)); + ltc = session_get_transport (ls); + m.lcl_port = ltc->lcl_port; + m.lcl_is_ip4 = ltc->is_ip4; + clib_memcpy_fast (m.lcl_ip, <c->lcl_ip, sizeof (m.lcl_ip)); app = application_get (app_wrk->app_index); eq_seg = application_get_rx_mqs_segment (app); m.vpp_evt_q = fifo_segment_msg_q_offset (eq_seg, ls->thread_index); m.mq_index = ls->thread_index; - if (session_transport_service_type (ls) == TRANSPORT_SERVICE_CL && - ls->rx_fifo) + if (transport_connection_is_cless (ltc)) { - m.rx_fifo = fifo_segment_fifo_offset (ls->rx_fifo); - m.tx_fifo = fifo_segment_fifo_offset (ls->tx_fifo); - m.segment_handle = session_segment_handle (ls); + session_t *wrk_ls; + m.mq_index = transport_cl_thread (); + m.vpp_evt_q = fifo_segment_msg_q_offset (eq_seg, m.mq_index); + wrk_ls = app_listener_get_wrk_cl_session (al, app_wrk->wrk_map_index); + m.rx_fifo = fifo_segment_fifo_offset (wrk_ls->rx_fifo); + m.tx_fifo = fifo_segment_fifo_offset (wrk_ls->tx_fifo); + m.segment_handle = session_segment_handle (wrk_ls); } snd_msg: - app_mq = app_wrk->event_queue; - - if (mq_try_lock_and_alloc_msg (app_mq, msg)) - return SESSION_E_MQ_MSG_ALLOC; - - evt = svm_msg_q_msg_data (app_mq, msg); - clib_memset (evt, 0, sizeof (*evt)); - evt->event_type = SESSION_CTRL_EVT_BOUND; - clib_memcpy_fast (evt->data, &m, sizeof (m)); + app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_BOUND, &m, sizeof (m)); - svm_msg_q_add_and_unlock (app_mq, msg); return 0; } -void -mq_send_unlisten_reply (app_worker_t * app_wrk, session_handle_t sh, - u32 context, int rv) +static void +mq_send_unlisten_cb (u32 app_wrk_index, session_handle_t sh, u32 context, + int rv) { - svm_msg_q_msg_t _msg, *msg = &_msg; - session_unlisten_reply_msg_t *ump; - svm_msg_q_t *app_mq; - session_event_t *evt; + session_unlisten_reply_msg_t m = { 0 }; + app_worker_t *app_wrk; - app_mq = app_wrk->event_queue; - if (mq_try_lock_and_alloc_msg (app_mq, msg)) - return; + app_wrk = app_worker_get (app_wrk_index); - evt = svm_msg_q_msg_data (app_mq, msg); - clib_memset (evt, 0, sizeof (*evt)); - evt->event_type = SESSION_CTRL_EVT_UNLISTEN_REPLY; - ump = (session_unlisten_reply_msg_t *) evt->data; - ump->context = context; - ump->handle = sh; - ump->retval = rv; - svm_msg_q_add_and_unlock (app_mq, msg); + m.context = context; + m.handle = sh; + m.retval = rv; + app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_UNLISTEN_REPLY, &m, + sizeof (m)); } static void mq_send_session_migrate_cb (session_t * s, session_handle_t new_sh) { - svm_msg_q_msg_t _msg, *msg = &_msg; session_migrated_msg_t m = { 0 }; fifo_segment_t *eq_seg; app_worker_t *app_wrk; - session_event_t *evt; - svm_msg_q_t *app_mq; application_t *app; u32 thread_index; thread_index = session_thread_from_handle (new_sh); app_wrk = app_worker_get (s->app_wrk_index); - app_mq = app_wrk->event_queue; app = application_get (app_wrk->app_index); eq_seg = application_get_rx_mqs_segment (app); @@ -442,27 +361,15 @@ mq_send_session_migrate_cb (session_t * s, session_handle_t new_sh) m.vpp_evt_q = fifo_segment_msg_q_offset (eq_seg, thread_index); m.segment_handle = SESSION_INVALID_HANDLE; - if (mq_try_lock_and_alloc_msg (app_mq, msg)) - return; - - evt = svm_msg_q_msg_data (app_mq, msg); - clib_memset (evt, 0, sizeof (*evt)); - evt->event_type = SESSION_CTRL_EVT_MIGRATED; - clib_memcpy_fast (evt->data, &m, sizeof (m)); - - svm_msg_q_add_and_unlock (app_mq, msg); + app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_MIGRATED, &m, sizeof (m)); } static int mq_send_add_segment_cb (u32 app_wrk_index, u64 segment_handle) { - int fds[SESSION_N_FD_TYPE], n_fds = 0; - svm_msg_q_msg_t _msg, *msg = &_msg; - session_app_add_segment_msg_t *mp; + session_app_add_segment_msg_t m = { 0 }; vl_api_registration_t *reg; app_worker_t *app_wrk; - session_event_t *evt; - svm_msg_q_t *app_mq; fifo_segment_t *fs; ssvm_private_t *sp; u8 fd_flags = 0; @@ -488,29 +395,16 @@ mq_send_add_segment_cb (u32 app_wrk_index, u64 segment_handle) } fd_flags |= SESSION_FD_F_MEMFD_SEGMENT; - fds[n_fds] = sp->fd; - n_fds += 1; } - app_mq = app_wrk->event_queue; - if (mq_try_lock_and_alloc_msg (app_mq, msg)) - return -1; - - if (n_fds) - session_send_fds (reg, fds, n_fds); - - evt = svm_msg_q_msg_data (app_mq, msg); - clib_memset (evt, 0, sizeof (*evt)); - evt->event_type = SESSION_CTRL_EVT_APP_ADD_SEGMENT; - mp = (session_app_add_segment_msg_t *) evt->data; - clib_memset (mp, 0, sizeof (*mp)); - mp->segment_size = sp->ssvm_size; - mp->fd_flags = fd_flags; - mp->segment_handle = segment_handle; - strncpy ((char *) mp->segment_name, (char *) sp->name, - sizeof (mp->segment_name) - 1); + m.segment_size = sp->ssvm_size; + m.fd_flags = fd_flags; + m.segment_handle = segment_handle; + strncpy ((char *) m.segment_name, (char *) sp->name, + sizeof (m.segment_name) - 1); - svm_msg_q_add_and_unlock (app_mq, msg); + app_wrk_send_ctrl_evt_fd (app_wrk, SESSION_CTRL_EVT_APP_ADD_SEGMENT, &m, + sizeof (m), sp->fd); return 0; } @@ -518,12 +412,9 @@ mq_send_add_segment_cb (u32 app_wrk_index, u64 segment_handle) static int mq_send_del_segment_cb (u32 app_wrk_index, u64 segment_handle) { - svm_msg_q_msg_t _msg, *msg = &_msg; - session_app_del_segment_msg_t *mp; + session_app_del_segment_msg_t m = { 0 }; vl_api_registration_t *reg; app_worker_t *app_wrk; - session_event_t *evt; - svm_msg_q_t *app_mq; app_wrk = app_worker_get (app_wrk_index); reg = vl_mem_api_client_index_to_registration (app_wrk->api_client_index); @@ -533,17 +424,10 @@ mq_send_del_segment_cb (u32 app_wrk_index, u64 segment_handle) return -1; } - app_mq = app_wrk->event_queue; - if (mq_try_lock_and_alloc_msg (app_mq, msg)) - return -1; + m.segment_handle = segment_handle; - evt = svm_msg_q_msg_data (app_mq, msg); - clib_memset (evt, 0, sizeof (*evt)); - evt->event_type = SESSION_CTRL_EVT_APP_DEL_SEGMENT; - mp = (session_app_del_segment_msg_t *) evt->data; - clib_memset (mp, 0, sizeof (*mp)); - mp->segment_handle = segment_handle; - svm_msg_q_add_and_unlock (app_mq, msg); + app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_APP_DEL_SEGMENT, &m, + sizeof (m)); return 0; } @@ -551,10 +435,7 @@ mq_send_del_segment_cb (u32 app_wrk_index, u64 segment_handle) static void mq_send_session_cleanup_cb (session_t * s, session_cleanup_ntf_t ntf) { - svm_msg_q_msg_t _msg, *msg = &_msg; - session_cleanup_msg_t *mp; - svm_msg_q_t *app_mq; - session_event_t *evt; + session_cleanup_msg_t m = { 0 }; app_worker_t *app_wrk; /* Propagate transport cleanup notifications only if app didn't close */ @@ -566,17 +447,56 @@ mq_send_session_cleanup_cb (session_t * s, session_cleanup_ntf_t ntf) if (!app_wrk) return; - app_mq = app_wrk->event_queue; - if (mq_try_lock_and_alloc_msg (app_mq, msg)) - return; + m.handle = session_handle (s); + m.type = ntf; + + app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_CLEANUP, &m, sizeof (m)); +} + +static int +mq_send_io_rx_event (session_t *s) +{ + session_event_t *mq_evt; + svm_msg_q_msg_t mq_msg; + app_worker_t *app_wrk; + svm_msg_q_t *mq; + + if (svm_fifo_has_event (s->rx_fifo)) + return 0; + + app_wrk = app_worker_get (s->app_wrk_index); + mq = app_wrk->event_queue; + + mq_msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING); + mq_evt = svm_msg_q_msg_data (mq, &mq_msg); + + mq_evt->event_type = SESSION_IO_EVT_RX; + mq_evt->session_index = s->rx_fifo->shr->client_session_index; + + (void) svm_fifo_set_event (s->rx_fifo); + + svm_msg_q_add_raw (mq, &mq_msg); + + return 0; +} + +static int +mq_send_io_tx_event (session_t *s) +{ + app_worker_t *app_wrk = app_worker_get (s->app_wrk_index); + svm_msg_q_t *mq = app_wrk->event_queue; + session_event_t *mq_evt; + svm_msg_q_msg_t mq_msg; + + mq_msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING); + mq_evt = svm_msg_q_msg_data (mq, &mq_msg); + + mq_evt->event_type = SESSION_IO_EVT_TX; + mq_evt->session_index = s->tx_fifo->shr->client_session_index; + + svm_msg_q_add_raw (mq, &mq_msg); - evt = svm_msg_q_msg_data (app_mq, msg); - clib_memset (evt, 0, sizeof (*evt)); - evt->event_type = SESSION_CTRL_EVT_CLEANUP; - mp = (session_cleanup_msg_t *) evt->data; - mp->handle = session_handle (s); - mp->type = ntf; - svm_msg_q_add_and_unlock (app_mq, msg); + return 0; } static session_cb_vft_t session_mq_cb_vft = { @@ -586,8 +506,12 @@ static session_cb_vft_t session_mq_cb_vft = { .session_reset_callback = mq_send_session_reset_cb, .session_migrate_callback = mq_send_session_migrate_cb, .session_cleanup_callback = mq_send_session_cleanup_cb, + .session_listened_callback = mq_send_session_bound_cb, + .session_unlistened_callback = mq_send_unlisten_cb, .add_segment_callback = mq_send_add_segment_cb, .del_segment_callback = mq_send_del_segment_cb, + .builtin_app_rx_callback = mq_send_io_rx_event, + .builtin_app_tx_callback = mq_send_io_tx_event, }; static void @@ -602,6 +526,17 @@ vl_api_session_enable_disable_t_handler (vl_api_session_enable_disable_t * mp) } static void +vl_api_session_sapi_enable_disable_t_handler ( + vl_api_session_sapi_enable_disable_t *mp) +{ + vl_api_session_sapi_enable_disable_reply_t *rmp; + int rv = 0; + + rv = appns_sapi_enable_disable (mp->is_enable); + REPLY_MACRO (VL_API_SESSION_SAPI_ENABLE_DISABLE_REPLY); +} + +static void vl_api_app_attach_t_handler (vl_api_app_attach_t * mp) { int rv = 0, *fds = 0, n_fds = 0, n_workers, i; @@ -642,7 +577,8 @@ vl_api_app_attach_t_handler (vl_api_app_attach_t * mp) if ((rv = vnet_application_attach (a))) { - clib_warning ("attach returned: %d", rv); + clib_warning ("attach returned: %U", format_session_error, rv); + rv = VNET_API_ERROR_UNSPECIFIED; vec_free (a->namespace_id); goto done; } @@ -684,27 +620,28 @@ vl_api_app_attach_t_handler (vl_api_app_attach_t * mp) } done: - /* *INDENT-OFF* */ - REPLY_MACRO2 (VL_API_APP_ATTACH_REPLY, ({ - if (!rv) - { - ctrl_thread = n_workers ? 1 : 0; - segp = (fifo_segment_t *) a->segment; - rmp->app_index = clib_host_to_net_u32 (a->app_index); - rmp->app_mq = fifo_segment_msg_q_offset (segp, 0); - rmp->vpp_ctrl_mq = fifo_segment_msg_q_offset (rx_mqs_seg, ctrl_thread); - rmp->vpp_ctrl_mq_thread = ctrl_thread; - rmp->n_fds = n_fds; - rmp->fd_flags = fd_flags; - if (vec_len (segp->ssvm.name)) - { - vl_api_vec_to_api_string (segp->ssvm.name, &rmp->segment_name); - } - rmp->segment_size = segp->ssvm.ssvm_size; - rmp->segment_handle = clib_host_to_net_u64 (a->segment_handle); - } - })); - /* *INDENT-ON* */ + REPLY_MACRO3 ( + VL_API_APP_ATTACH_REPLY, + ((!rv) ? vec_len (((fifo_segment_t *) a->segment)->ssvm.name) : 0), ({ + if (!rv) + { + ctrl_thread = n_workers ? 1 : 0; + segp = (fifo_segment_t *) a->segment; + rmp->app_index = clib_host_to_net_u32 (a->app_index); + rmp->app_mq = fifo_segment_msg_q_offset (segp, 0); + rmp->vpp_ctrl_mq = + fifo_segment_msg_q_offset (rx_mqs_seg, ctrl_thread); + rmp->vpp_ctrl_mq_thread = ctrl_thread; + rmp->n_fds = n_fds; + rmp->fd_flags = fd_flags; + if (vec_len (segp->ssvm.name)) + { + vl_api_vec_to_api_string (segp->ssvm.name, &rmp->segment_name); + } + rmp->segment_size = segp->ssvm.ssvm_size; + rmp->segment_handle = clib_host_to_net_u64 (a->segment_handle); + } + })); if (n_fds) session_send_fds (reg, fds, n_fds); @@ -746,7 +683,9 @@ vl_api_app_worker_add_del_t_handler (vl_api_app_worker_add_del_t * mp) rv = vnet_app_worker_add_del (&args); if (rv) { - clib_warning ("app worker add/del returned: %d", rv); + clib_warning ("app worker add/del returned: %U", format_session_error, + rv); + rv = VNET_API_ERROR_UNSPECIFIED; goto done; } @@ -767,25 +706,27 @@ vl_api_app_worker_add_del_t_handler (vl_api_app_worker_add_del_t * mp) n_fds += 1; } - /* *INDENT-OFF* */ done: - REPLY_MACRO2 (VL_API_APP_WORKER_ADD_DEL_REPLY, ({ - rmp->is_add = mp->is_add; - rmp->wrk_index = clib_host_to_net_u32 (args.wrk_map_index); - rmp->segment_handle = clib_host_to_net_u64 (args.segment_handle); - if (!rv && mp->is_add) - { - rmp->app_event_queue_address = - fifo_segment_msg_q_offset ((fifo_segment_t *) args.segment, 0); - rmp->n_fds = n_fds; - rmp->fd_flags = fd_flags; - if (vec_len (args.segment->name)) - { - vl_api_vec_to_api_string (args.segment->name, &rmp->segment_name); - } - } - })); - /* *INDENT-ON* */ + REPLY_MACRO3 ( + VL_API_APP_WORKER_ADD_DEL_REPLY, + ((!rv && mp->is_add) ? vec_len (args.segment->name) : 0), ({ + rmp->is_add = mp->is_add; + rmp->wrk_index = mp->wrk_index; + if (!rv && mp->is_add) + { + rmp->wrk_index = clib_host_to_net_u32 (args.wrk_map_index); + rmp->segment_handle = clib_host_to_net_u64 (args.segment_handle); + rmp->app_event_queue_address = + fifo_segment_msg_q_offset ((fifo_segment_t *) args.segment, 0); + rmp->n_fds = n_fds; + rmp->fd_flags = fd_flags; + if (vec_len (args.segment->name)) + { + vl_api_vec_to_api_string (args.segment->name, + &rmp->segment_name); + } + } + })); if (n_fds) session_send_fds (reg, fds, n_fds); @@ -811,6 +752,12 @@ vl_api_application_detach_t_handler (vl_api_application_detach_t * mp) a->app_index = app->app_index; a->api_client_index = mp->client_index; rv = vnet_application_detach (a); + if (rv) + { + clib_warning ("vnet_application_detach: %U", format_session_error, + rv); + rv = VNET_API_ERROR_UNSPECIFIED; + } } done: @@ -834,6 +781,7 @@ vl_api_app_namespace_add_del_t_handler (vl_api_app_namespace_add_del_t * mp) vnet_app_namespace_add_del_args_t args = { .ns_id = ns_id, + .sock_name = 0, .secret = clib_net_to_host_u64 (mp->secret), .sw_if_index = clib_net_to_host_u32 (mp->sw_if_index), .ip4_fib_id = clib_net_to_host_u32 (mp->ip4_fib_id), @@ -852,13 +800,11 @@ vl_api_app_namespace_add_del_t_handler (vl_api_app_namespace_add_del_t * mp) } vec_free (ns_id); - /* *INDENT-OFF* */ done: REPLY_MACRO2 (VL_API_APP_NAMESPACE_ADD_DEL_REPLY, ({ if (!rv) rmp->appns_index = clib_host_to_net_u32 (appns_index); })); - /* *INDENT-ON* */ } static void @@ -866,7 +812,7 @@ vl_api_app_namespace_add_del_v2_t_handler ( vl_api_app_namespace_add_del_v2_t *mp) { vl_api_app_namespace_add_del_v2_reply_t *rmp; - u8 *ns_id = 0, *netns = 0; + u8 *ns_id = 0; u32 appns_index = 0; int rv = 0; @@ -877,13 +823,11 @@ vl_api_app_namespace_add_del_v2_t_handler ( } mp->namespace_id[sizeof (mp->namespace_id) - 1] = 0; - mp->netns[sizeof (mp->netns) - 1] = 0; ns_id = format (0, "%s", &mp->namespace_id); - netns = format (0, "%s", &mp->netns); vnet_app_namespace_add_del_args_t args = { .ns_id = ns_id, - .netns = netns, + .sock_name = 0, .secret = clib_net_to_host_u64 (mp->secret), .sw_if_index = clib_net_to_host_u32 (mp->sw_if_index), .ip4_fib_id = clib_net_to_host_u32 (mp->ip4_fib_id), @@ -896,12 +840,11 @@ vl_api_app_namespace_add_del_v2_t_handler ( appns_index = app_namespace_index_from_id (ns_id); if (appns_index == APP_NAMESPACE_INVALID_INDEX) { - clib_warning ("app ns lookup failed"); + clib_warning ("app ns lookup failed id:%s", ns_id); rv = VNET_API_ERROR_UNSPECIFIED; } } vec_free (ns_id); - vec_free (netns); done: REPLY_MACRO2 (VL_API_APP_NAMESPACE_ADD_DEL_V2_REPLY, ({ @@ -911,6 +854,107 @@ done: } static void +vl_api_app_namespace_add_del_v4_t_handler ( + vl_api_app_namespace_add_del_v4_t *mp) +{ + vl_api_app_namespace_add_del_v4_reply_t *rmp; + u8 *ns_id = 0, *sock_name = 0; + u32 appns_index = 0; + int rv = 0; + if (session_main_is_enabled () == 0) + { + rv = VNET_API_ERROR_FEATURE_DISABLED; + goto done; + } + mp->namespace_id[sizeof (mp->namespace_id) - 1] = 0; + ns_id = format (0, "%s", &mp->namespace_id); + sock_name = vl_api_from_api_to_new_vec (mp, &mp->sock_name); + vnet_app_namespace_add_del_args_t args = { + .ns_id = ns_id, + .sock_name = sock_name, + .secret = clib_net_to_host_u64 (mp->secret), + .sw_if_index = clib_net_to_host_u32 (mp->sw_if_index), + .ip4_fib_id = clib_net_to_host_u32 (mp->ip4_fib_id), + .ip6_fib_id = clib_net_to_host_u32 (mp->ip6_fib_id), + .is_add = mp->is_add, + }; + rv = vnet_app_namespace_add_del (&args); + if (!rv && mp->is_add) + { + appns_index = app_namespace_index_from_id (ns_id); + if (appns_index == APP_NAMESPACE_INVALID_INDEX) + { + clib_warning ("app ns lookup failed id:%s", ns_id); + rv = VNET_API_ERROR_UNSPECIFIED; + } + } + vec_free (ns_id); + vec_free (sock_name); +done: + REPLY_MACRO2 (VL_API_APP_NAMESPACE_ADD_DEL_V4_REPLY, ({ + if (!rv) + rmp->appns_index = clib_host_to_net_u32 (appns_index); + })); +} + +static void +vl_api_app_namespace_add_del_v3_t_handler ( + vl_api_app_namespace_add_del_v3_t *mp) +{ + vl_api_app_namespace_add_del_v3_reply_t *rmp; + u8 *ns_id = 0, *sock_name = 0, *api_sock_name = 0; + u32 appns_index = 0; + int rv = 0; + if (session_main_is_enabled () == 0) + { + rv = VNET_API_ERROR_FEATURE_DISABLED; + goto done; + } + mp->namespace_id[sizeof (mp->namespace_id) - 1] = 0; + ns_id = format (0, "%s", &mp->namespace_id); + api_sock_name = vl_api_from_api_to_new_vec (mp, &mp->sock_name); + mp->netns[sizeof (mp->netns) - 1] = 0; + if (strlen ((char *) mp->netns) != 0) + { + sock_name = + format (0, "abstract:%v,netns_name=%s", api_sock_name, &mp->netns); + } + else + { + sock_name = api_sock_name; + api_sock_name = 0; // for vec_free + } + + vnet_app_namespace_add_del_args_t args = { + .ns_id = ns_id, + .sock_name = sock_name, + .secret = clib_net_to_host_u64 (mp->secret), + .sw_if_index = clib_net_to_host_u32 (mp->sw_if_index), + .ip4_fib_id = clib_net_to_host_u32 (mp->ip4_fib_id), + .ip6_fib_id = clib_net_to_host_u32 (mp->ip6_fib_id), + .is_add = mp->is_add, + }; + rv = vnet_app_namespace_add_del (&args); + if (!rv && mp->is_add) + { + appns_index = app_namespace_index_from_id (ns_id); + if (appns_index == APP_NAMESPACE_INVALID_INDEX) + { + clib_warning ("app ns lookup failed id:%s", ns_id); + rv = VNET_API_ERROR_UNSPECIFIED; + } + } + vec_free (ns_id); + vec_free (sock_name); + vec_free (api_sock_name); +done: + REPLY_MACRO2 (VL_API_APP_NAMESPACE_ADD_DEL_V3_REPLY, ({ + if (!rv) + rmp->appns_index = clib_host_to_net_u32 (appns_index); + })); +} + +static void vl_api_session_rule_add_del_t_handler (vl_api_session_rule_add_del_t * mp) { vl_api_session_rule_add_del_reply_t *rmp; @@ -937,7 +981,10 @@ vl_api_session_rule_add_del_t_handler (vl_api_session_rule_add_del_t * mp) rv = vnet_session_rule_add_del (&args); if (rv) - clib_warning ("rule add del returned: %d", rv); + { + clib_warning ("rule add del returned: %U", format_session_error, rv); + rv = VNET_API_ERROR_UNSPECIFIED; + } vec_free (table_args->tag); REPLY_MACRO (VL_API_SESSION_RULE_ADD_DEL_REPLY); } @@ -1040,7 +1087,6 @@ send_session_rules_table_details (session_rules_table_t * srt, u8 fib_proto, if (is_local || fib_proto == FIB_PROTOCOL_IP4) { u8 *tag = 0; - /* *INDENT-OFF* */ srt16 = &srt->session_rules_tables_16; pool_foreach (rule16, srt16->rules) { ri = mma_rules_table_rule_index_16 (srt16, rule16); @@ -1048,12 +1094,10 @@ send_session_rules_table_details (session_rules_table_t * srt, u8 fib_proto, send_session_rule_details4 (rule16, is_local, tp, appns_index, tag, reg, context); } - /* *INDENT-ON* */ } if (is_local || fib_proto == FIB_PROTOCOL_IP6) { u8 *tag = 0; - /* *INDENT-OFF* */ srt40 = &srt->session_rules_tables_40; pool_foreach (rule40, srt40->rules) { ri = mma_rules_table_rule_index_40 (srt40, rule40); @@ -1061,7 +1105,6 @@ send_session_rules_table_details (session_rules_table_t * srt, u8 fib_proto, send_session_rule_details6 (rule40, is_local, tp, appns_index, tag, reg, context); } - /* *INDENT-ON* */ } } @@ -1076,7 +1119,6 @@ vl_api_session_rules_dump_t_handler (vl_api_session_rules_dump_t * mp) if (!reg) return; - /* *INDENT-OFF* */ session_table_foreach (st, ({ for (tp = 0; tp < TRANSPORT_N_PROTOS; tp++) { @@ -1086,7 +1128,6 @@ vl_api_session_rules_dump_t_handler (vl_api_session_rules_dump_t * mp) mp->context); } })); - /* *INDENT-ON* */ } static void @@ -1131,12 +1172,10 @@ vl_api_app_add_cert_key_pair_t_handler (vl_api_app_add_cert_key_pair_t * mp) rv = vnet_app_add_cert_key_pair (a); done: - /* *INDENT-OFF* */ REPLY_MACRO2 (VL_API_APP_ADD_CERT_KEY_PAIR_REPLY, ({ if (!rv) rmp->index = clib_host_to_net_u32 (a->index); })); - /* *INDENT-ON* */ } static void @@ -1152,6 +1191,12 @@ vl_api_app_del_cert_key_pair_t_handler (vl_api_app_del_cert_key_pair_t * mp) } ckpair_index = clib_net_to_host_u32 (mp->index); rv = vnet_app_del_cert_key_pair (ckpair_index); + if (rv) + { + clib_warning ("vnet_app_del_cert_key_pair: %U", format_session_error, + rv); + rv = VNET_API_ERROR_UNSPECIFIED; + } done: REPLY_MACRO (VL_API_APP_DEL_CERT_KEY_PAIR_REPLY); @@ -1177,36 +1222,11 @@ VL_MSG_API_REAPER_FUNCTION (application_reaper_cb); * Socket api functions */ -static void -sapi_send_fds (app_worker_t * app_wrk, int *fds, int n_fds) -{ - app_sapi_msg_t smsg = { 0 }; - app_namespace_t *app_ns; - application_t *app; - clib_socket_t *cs; - u32 cs_index; - - app = application_get (app_wrk->app_index); - app_ns = app_namespace_get (app->ns_index); - cs_index = appns_sapi_handle_sock_index (app_wrk->api_client_index); - cs = appns_sapi_get_socket (app_ns, cs_index); - if (PREDICT_FALSE (!cs)) - return; - - /* There's no payload for the message only the type */ - smsg.type = APP_SAPI_MSG_TYPE_SEND_FDS; - clib_socket_sendmsg (cs, &smsg, sizeof (smsg), fds, n_fds); -} - static int mq_send_add_segment_sapi_cb (u32 app_wrk_index, u64 segment_handle) { - int fds[SESSION_N_FD_TYPE], n_fds = 0; - svm_msg_q_msg_t _msg, *msg = &_msg; - session_app_add_segment_msg_t *mp; + session_app_add_segment_msg_t m = { 0 }; app_worker_t *app_wrk; - session_event_t *evt; - svm_msg_q_t *app_mq; fifo_segment_t *fs; ssvm_private_t *sp; u8 fd_flags = 0; @@ -1218,33 +1238,15 @@ mq_send_add_segment_sapi_cb (u32 app_wrk_index, u64 segment_handle) ASSERT (ssvm_type (sp) == SSVM_SEGMENT_MEMFD); fd_flags |= SESSION_FD_F_MEMFD_SEGMENT; - fds[n_fds] = sp->fd; - n_fds += 1; - app_mq = app_wrk->event_queue; - if (mq_try_lock_and_alloc_msg (app_mq, msg)) - return -1; + m.segment_size = sp->ssvm_size; + m.fd_flags = fd_flags; + m.segment_handle = segment_handle; + strncpy ((char *) m.segment_name, (char *) sp->name, + sizeof (m.segment_name) - 1); - /* - * Send the fd over api socket - */ - sapi_send_fds (app_wrk, fds, n_fds); - - /* - * Send the actual message over mq - */ - evt = svm_msg_q_msg_data (app_mq, msg); - clib_memset (evt, 0, sizeof (*evt)); - evt->event_type = SESSION_CTRL_EVT_APP_ADD_SEGMENT; - mp = (session_app_add_segment_msg_t *) evt->data; - clib_memset (mp, 0, sizeof (*mp)); - mp->segment_size = sp->ssvm_size; - mp->fd_flags = fd_flags; - mp->segment_handle = segment_handle; - strncpy ((char *) mp->segment_name, (char *) sp->name, - sizeof (mp->segment_name) - 1); - - svm_msg_q_add_and_unlock (app_mq, msg); + app_wrk_send_ctrl_evt_fd (app_wrk, SESSION_CTRL_EVT_APP_ADD_SEGMENT, &m, + sizeof (m), sp->fd); return 0; } @@ -1252,25 +1254,15 @@ mq_send_add_segment_sapi_cb (u32 app_wrk_index, u64 segment_handle) static int mq_send_del_segment_sapi_cb (u32 app_wrk_index, u64 segment_handle) { - svm_msg_q_msg_t _msg, *msg = &_msg; - session_app_del_segment_msg_t *mp; + session_app_del_segment_msg_t m = { 0 }; app_worker_t *app_wrk; - session_event_t *evt; - svm_msg_q_t *app_mq; app_wrk = app_worker_get (app_wrk_index); - app_mq = app_wrk->event_queue; - if (mq_try_lock_and_alloc_msg (app_mq, msg)) - return -1; + m.segment_handle = segment_handle; - evt = svm_msg_q_msg_data (app_mq, msg); - clib_memset (evt, 0, sizeof (*evt)); - evt->event_type = SESSION_CTRL_EVT_APP_DEL_SEGMENT; - mp = (session_app_del_segment_msg_t *) evt->data; - clib_memset (mp, 0, sizeof (*mp)); - mp->segment_handle = segment_handle; - svm_msg_q_add_and_unlock (app_mq, msg); + app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_APP_DEL_SEGMENT, &m, + sizeof (m)); return 0; } @@ -1282,8 +1274,12 @@ static session_cb_vft_t session_mq_sapi_cb_vft = { .session_reset_callback = mq_send_session_reset_cb, .session_migrate_callback = mq_send_session_migrate_cb, .session_cleanup_callback = mq_send_session_cleanup_cb, + .session_listened_callback = mq_send_session_bound_cb, + .session_unlistened_callback = mq_send_unlisten_cb, .add_segment_callback = mq_send_add_segment_sapi_cb, .del_segment_callback = mq_send_del_segment_sapi_cb, + .builtin_app_rx_callback = mq_send_io_rx_event, + .builtin_app_tx_callback = mq_send_io_tx_event, }; static void @@ -1385,7 +1381,7 @@ done: vec_free (fds); } -static void +void sapi_socket_close_w_handle (u32 api_handle) { app_namespace_t *app_ns = app_namespace_get (api_handle >> 16); @@ -1423,7 +1419,7 @@ sapi_add_del_worker_handler (app_namespace_t * app_ns, app = application_get_if_valid (mp->app_index); if (!app) { - rv = VNET_API_ERROR_INVALID_VALUE; + rv = SESSION_E_INVALID; goto done; } @@ -1438,15 +1434,13 @@ sapi_add_del_worker_handler (app_namespace_t * app_ns, rv = vnet_app_worker_add_del (&args); if (rv) { - clib_warning ("app worker add/del returned: %d", rv); + clib_warning ("app worker add/del returned: %U", format_session_error, + rv); goto done; } if (!mp->is_add) - { - sapi_socket_close_w_handle (sapi_handle); - goto done; - } + goto done; /* Send fifo segment fd if needed */ if (ssvm_type (args.segment) == SSVM_SEGMENT_MEMFD) @@ -1464,15 +1458,20 @@ sapi_add_del_worker_handler (app_namespace_t * app_ns, done: + /* With app sock api socket expected to be closed, no reply */ + if (!mp->is_add && appns_sapi_enabled ()) + return; + msg.type = APP_SAPI_MSG_TYPE_ADD_DEL_WORKER_REPLY; rmp = &msg.worker_add_del_reply; rmp->retval = rv; rmp->is_add = mp->is_add; + rmp->wrk_index = mp->wrk_index; rmp->api_client_handle = sapi_handle; - rmp->wrk_index = args.wrk_map_index; - rmp->segment_handle = args.segment_handle; if (!rv && mp->is_add) { + rmp->wrk_index = args.wrk_map_index; + rmp->segment_handle = args.segment_handle; /* No segment name and size. This supports only memfds */ rmp->app_event_queue_address = fifo_segment_msg_q_offset ((fifo_segment_t *) args.segment, 0); @@ -1488,6 +1487,108 @@ done: clib_socket_sendmsg (cs, &msg, sizeof (msg), fds, n_fds); } +/* This is a workaround for the case when session layer starts reading + * the socket before the client actualy sends the data + */ +static clib_error_t * +sapi_socket_receive_wait (clib_socket_t *cs, u8 *msg, u32 msg_len) +{ + clib_error_t *err; + int n_tries = 5; + + while (1) + { + err = clib_socket_recvmsg (cs, msg, msg_len, 0, 0); + if (!err) + break; + + if (!n_tries) + return err; + + n_tries--; + usleep (1); + } + + return err; +} + +static void +sapi_add_del_cert_key_handler (app_namespace_t *app_ns, clib_socket_t *cs, + app_sapi_cert_key_add_del_msg_t *mp) +{ + vnet_app_add_cert_key_pair_args_t _a, *a = &_a; + app_sapi_cert_key_add_del_reply_msg_t *rmp; + app_sapi_msg_t msg = { 0 }; + int rv = 0; + + if (mp->is_add) + { + const u32 max_certkey_len = 2e4, max_cert_len = 1e4, max_key_len = 1e4; + clib_error_t *err; + u8 *certkey = 0; + u32 key_len; + + if (mp->certkey_len > max_certkey_len) + { + rv = SESSION_E_INVALID; + goto send_reply; + } + + vec_validate (certkey, mp->certkey_len - 1); + + err = sapi_socket_receive_wait (cs, certkey, mp->certkey_len); + if (err) + { + clib_error_report (err); + rv = SESSION_E_INVALID; + goto send_reply; + } + + if (mp->cert_len > max_cert_len) + { + rv = SESSION_E_INVALID; + goto send_reply; + } + + if (mp->certkey_len < mp->cert_len) + { + rv = SESSION_E_INVALID; + goto send_reply; + } + + key_len = mp->certkey_len - mp->cert_len; + if (key_len > max_key_len) + { + rv = SESSION_E_INVALID; + goto send_reply; + } + + clib_memset (a, 0, sizeof (*a)); + a->cert = certkey; + a->key = certkey + mp->cert_len; + a->cert_len = mp->cert_len; + a->key_len = key_len; + rv = vnet_app_add_cert_key_pair (a); + + vec_free (certkey); + } + else + { + rv = vnet_app_del_cert_key_pair (mp->index); + } + +send_reply: + + msg.type = APP_SAPI_MSG_TYPE_ADD_DEL_CERT_KEY_REPLY; + rmp = &msg.cert_key_add_del_reply; + rmp->retval = rv; + rmp->context = mp->context; + if (!rv && mp->is_add) + rmp->index = a->index; + + clib_socket_sendmsg (cs, &msg, sizeof (msg), 0, 0); +} + static void sapi_socket_detach (app_namespace_t * app_ns, clib_socket_t * cs) { @@ -1496,11 +1597,12 @@ sapi_socket_detach (app_namespace_t * app_ns, clib_socket_t * cs) u32 api_client_handle; api_client_handle = appns_sapi_socket_handle (app_ns, cs); - sapi_socket_close_w_handle (api_client_handle); /* Cleanup everything because app worker closed socket or crashed */ handle = (app_ns_api_handle_t *) & cs->private_data; - app_wrk = app_worker_get (handle->aah_app_wrk_index); + app_wrk = app_worker_get_if_valid (handle->aah_app_wrk_index); + if (!app_wrk) + return; vnet_app_worker_add_del_args_t args = { .app_index = app_wrk->app_index, @@ -1548,6 +1650,9 @@ sapi_sock_read_ready (clib_file_t * cf) case APP_SAPI_MSG_TYPE_ADD_DEL_WORKER: sapi_add_del_worker_handler (app_ns, cs, &msg.worker_add_del); break; + case APP_SAPI_MSG_TYPE_ADD_DEL_CERT_KEY: + sapi_add_del_cert_key_handler (app_ns, cs, &msg.cert_key_add_del); + break; default: clib_warning ("app wrk %u unknown message type: %u", handle->aah_app_wrk_index, msg.type); @@ -1635,6 +1740,23 @@ error: return err; } +void +appns_sapi_del_ns_socket (app_namespace_t *app_ns) +{ + app_ns_api_handle_t *handle; + clib_socket_t *cs; + + pool_foreach (cs, app_ns->app_sockets) + { + handle = (app_ns_api_handle_t *) &cs->private_data; + clib_file_del_by_index (&file_main, handle->aah_file_index); + + clib_socket_close (cs); + clib_socket_free (cs); + } + pool_free (app_ns->app_sockets); +} + int appns_sapi_add_ns_socket (app_namespace_t * app_ns) { @@ -1644,49 +1766,42 @@ appns_sapi_add_ns_socket (app_namespace_t * app_ns) struct stat file_stat; clib_error_t *err; clib_socket_t *cs; - u8 *dir = 0; - int rv = 0; + char dir[4096]; - vec_add (dir, vlib_unix_get_runtime_dir (), - strlen (vlib_unix_get_runtime_dir ())); - vec_add (dir, (u8 *) subdir, strlen (subdir)); + snprintf (dir, sizeof (dir), "%s%s", vlib_unix_get_runtime_dir (), subdir); - err = vlib_unix_recursive_mkdir ((char *) dir); - if (err) - { - clib_error_report (err); - rv = -1; - goto error; - } - - /* Use abstract sockets if a netns was provided */ - if (app_ns->netns) - app_ns->sock_name = format (0, "@vpp/session/%v%c", app_ns->ns_id, 0); - else - app_ns->sock_name = format (0, "%v%v%c", dir, app_ns->ns_id, 0); + if (!app_ns->sock_name) + app_ns->sock_name = format (0, "%s%v%c", dir, app_ns->ns_id, 0); /* * Create and initialize socket to listen on */ cs = appns_sapi_alloc_socket (app_ns); - cs->config = (char *) app_ns->sock_name; + cs->config = (char *) vec_dup (app_ns->sock_name); cs->flags = CLIB_SOCKET_F_IS_SERVER | CLIB_SOCKET_F_ALLOW_GROUP_WRITE | CLIB_SOCKET_F_SEQPACKET | CLIB_SOCKET_F_PASSCRED; - if ((err = clib_socket_init_netns (cs, app_ns->netns))) + if (clib_socket_prefix_get_type (cs->config) == CLIB_SOCKET_TYPE_UNIX) { - clib_error_report (err); - rv = -1; - goto error; + err = vlib_unix_recursive_mkdir ((char *) dir); + if (err) + { + clib_error_report (err); + return SESSION_E_SYSCALL; + } } - if (!app_ns->netns && stat ((char *) app_ns->sock_name, &file_stat) == -1) + if ((err = clib_socket_init (cs))) { - rv = -1; - goto error; + clib_error_report (err); + return -1; } + if (clib_socket_prefix_get_type (cs->config) == CLIB_SOCKET_TYPE_UNIX && + stat ((char *) app_ns->sock_name, &file_stat) == -1) + return -1; + /* * Start polling it */ @@ -1703,22 +1818,7 @@ appns_sapi_add_ns_socket (app_namespace_t * app_ns) handle->aah_file_index = clib_file_add (&file_main, &cf); handle->aah_app_wrk_index = APP_INVALID_INDEX; -error: - vec_free (dir); - return rv; -} - -static void -vl_api_application_tls_cert_add_t_handler ( - vl_api_application_tls_cert_add_t *mp) -{ - /* deprecated */ -} - -static void -vl_api_application_tls_key_add_t_handler (vl_api_application_tls_key_add_t *mp) -{ - /* deprecated */ + return 0; } #include <vnet/session/session.api.c> diff --git a/src/vnet/session/session_cli.c b/src/vnet/session/session_cli.c index 24d8cfb1e24..569a77bccc1 100644 --- a/src/vnet/session/session_cli.c +++ b/src/vnet/session/session_cli.c @@ -145,8 +145,11 @@ format_session (u8 * s, va_list * args) else if (ss->session_state == SESSION_STATE_CONNECTING) { if (ss->flags & SESSION_F_HALF_OPEN) - s = format (s, "%U%v", format_transport_half_open_connection, tp, - ss->connection_index, ss->thread_index, verbose, str); + { + s = format (s, "%U", format_transport_half_open_connection, tp, + ss->connection_index, ss->thread_index, verbose); + s = format (s, "%v", str); + } else s = format (s, "%U", format_transport_connection, tp, ss->connection_index, ss->thread_index, verbose); @@ -259,7 +262,6 @@ unformat_session (unformat_input_t * input, va_list * args) if (s) { *result = s; - session_pool_remove_peeker (s->thread_index); return 1; } return 0; @@ -340,7 +342,6 @@ session_cli_show_all_sessions (vlib_main_t * vm, int verbose) n_closed = 0; - /* *INDENT-OFF* */ pool_foreach (s, pool) { if (s->session_state >= SESSION_STATE_TRANSPORT_DELETED) { @@ -349,7 +350,6 @@ session_cli_show_all_sessions (vlib_main_t * vm, int verbose) } vlib_cli_output (vm, "%U", format_session, s, verbose); } - /* *INDENT-ON* */ if (!n_closed) vlib_cli_output (vm, "Thread %d: active sessions %u", thread_index, @@ -488,7 +488,6 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input, { u8 one_session = 0, do_listeners = 0, sst, do_elog = 0, do_filter = 0; u32 track_index, thread_index = 0, start = 0, end = ~0, session_index; - unformat_input_t _line_input, *line_input = &_line_input; transport_proto_t transport_proto = TRANSPORT_PROTO_INVALID; session_state_t state = SESSION_N_STATES, *states = 0; session_main_t *smm = &session_main; @@ -502,26 +501,20 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input, session_cli_return_if_not_enabled (); - if (!unformat_user (input, unformat_line_input, line_input)) - { - session_cli_show_all_sessions (vm, 0); - return 0; - } - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - if (unformat (line_input, "verbose %d", &verbose)) + if (unformat (input, "verbose %d", &verbose)) ; - else if (unformat (line_input, "verbose")) + else if (unformat (input, "verbose")) verbose = 1; - else if (unformat (line_input, "listeners %U", unformat_transport_proto, + else if (unformat (input, "listeners %U", unformat_transport_proto, &transport_proto)) do_listeners = 1; - else if (unformat (line_input, "%U", unformat_session, &s)) + else if (unformat (input, "%U", unformat_session, &s)) { one_session = 1; } - else if (unformat (line_input, "thread %u index %u", &thread_index, + else if (unformat (input, "thread %u index %u", &thread_index, &session_index)) { s = session_get_if_valid (session_index, thread_index); @@ -532,19 +525,17 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input, } one_session = 1; } - else if (unformat (line_input, "thread %u", &thread_index)) + else if (unformat (input, "thread %u", &thread_index)) { do_filter = 1; } - else - if (unformat (line_input, "state %U", unformat_session_state, &state)) + else if (unformat (input, "state %U", unformat_session_state, &state)) { vec_add1 (states, state); do_filter = 1; } - else if (unformat (line_input, "proto %U index %u", - unformat_transport_proto, &transport_proto, - &transport_index)) + else if (unformat (input, "proto %U index %u", unformat_transport_proto, + &transport_proto, &transport_index)) { transport_connection_t *tc; tc = transport_get_connection (transport_proto, transport_index, @@ -565,34 +556,34 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input, } one_session = 1; } - else if (unformat (line_input, "proto %U", unformat_transport_proto, + else if (unformat (input, "proto %U", unformat_transport_proto, &transport_proto)) do_filter = 1; - else if (unformat (line_input, "range %u %u", &start, &end)) + else if (unformat (input, "range %u %u", &start, &end)) do_filter = 1; - else if (unformat (line_input, "range %u", &start)) + else if (unformat (input, "range %u", &start)) { end = start + 50; do_filter = 1; } - else if (unformat (line_input, "elog")) + else if (unformat (input, "elog")) do_elog = 1; - else if (unformat (line_input, "protos")) + else if (unformat (input, "protos")) { vlib_cli_output (vm, "%U", format_transport_protos); goto done; } - else if (unformat (line_input, "states")) + else if (unformat (input, "states")) { session_cli_print_session_states (vm); goto done; } - else if (unformat (line_input, "events")) + else if (unformat (input, "events")) do_events = 1; else { error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); + format_unformat_error, input); goto done; } } @@ -625,7 +616,6 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_output (vm, "%-" SESSION_CLI_ID_LEN "s%-24s", "Listener", "App"); - /* *INDENT-OFF* */ pool_foreach (s, smm->wrk[0].sessions) { if (s->session_state != SESSION_STATE_LISTENING || s->session_type != sst) @@ -635,7 +625,6 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_output (vm, "%U%-25v%", format_session, s, 0, app_name); } - /* *INDENT-ON* */ goto done; } @@ -661,12 +650,10 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input, session_cli_show_all_sessions (vm, verbose); done: - unformat_free (line_input); vec_free (states); return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (vlib_cli_show_session_command) = { .path = "show session", @@ -676,7 +663,6 @@ VLIB_CLI_COMMAND (vlib_cli_show_session_command) = "[protos] [states] ", .function = show_session_command_fn, }; -/* *INDENT-ON* */ static int clear_session (session_t * s) @@ -728,27 +714,23 @@ clear_session_command_fn (vlib_main_t * vm, unformat_input_t * input, if (clear_all) { - /* *INDENT-OFF* */ vec_foreach (wrk, smm->wrk) { pool_foreach (session, wrk->sessions) { clear_session (session); } }; - /* *INDENT-ON* */ } return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (clear_session_command, static) = { .path = "clear session", .short_help = "clear session thread <thread> session <index>", .function = clear_session_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * show_session_fifo_trace_command_fn (vlib_main_t * vm, @@ -791,14 +773,12 @@ show_session_fifo_trace_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_session_fifo_trace_command, static) = { .path = "show session fifo trace", .short_help = "show session fifo trace <session>", .function = show_session_fifo_trace_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * session_replay_fifo_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -838,53 +818,98 @@ session_replay_fifo_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (session_replay_fifo_trace_command, static) = { .path = "session replay fifo", .short_help = "session replay fifo <session>", .function = session_replay_fifo_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * session_enable_disable_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - unformat_input_t _line_input, *line_input = &_line_input; - u8 is_en = 1; - clib_error_t *error; - - if (!unformat_user (input, unformat_line_input, line_input)) - return clib_error_return (0, "expected enable | disable"); + u8 is_en = 2; - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - if (unformat (line_input, "enable")) + if (unformat (input, "enable")) is_en = 1; - else if (unformat (line_input, "disable")) + else if (unformat (input, "disable")) is_en = 0; else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - unformat_free (line_input); - return error; - } + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); } - unformat_free (line_input); + if (is_en > 1) + return clib_error_return (0, "expected enable | disable"); + return vnet_session_enable_disable (vm, is_en); } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (session_enable_disable_command, static) = { .path = "session", .short_help = "session [enable|disable]", .function = session_enable_disable_fn, }; -/* *INDENT-ON* */ + +static clib_error_t * +show_session_stats_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + session_main_t *smm = &session_main; + session_worker_t *wrk; + unsigned int *e; + + if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + return clib_error_return (0, "unknown input `%U'", format_unformat_error, + input); + + vec_foreach (wrk, smm->wrk) + { + vlib_cli_output (vm, "Thread %u:\n", wrk - smm->wrk); + e = wrk->stats.errors; +#define _(name, str) \ + if (e[SESSION_EP_##name]) \ + vlib_cli_output (vm, " %lu %s", e[SESSION_EP_##name], str); + foreach_session_error +#undef _ + } + return 0; +} + +VLIB_CLI_COMMAND (show_session_stats_command, static) = { + .path = "show session stats", + .short_help = "show session stats", + .function = show_session_stats_fn, +}; + +static clib_error_t * +clear_session_stats_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + session_main_t *smm = &session_main; + session_worker_t *wrk; + + if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + return clib_error_return (0, "unknown input `%U'", format_unformat_error, + input); + + vec_foreach (wrk, smm->wrk) + { + clib_memset (&wrk->stats, 0, sizeof (wrk->stats)); + } + + return 0; +} + +VLIB_CLI_COMMAND (clear_session_stats_command, static) = { + .path = "clear session stats", + .short_help = "clear session stats", + .function = clear_session_stats_fn, +}; /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/session/session_debug.c b/src/vnet/session/session_debug.c index 349d1ec9b46..2a50adac5dd 100644 --- a/src/vnet/session/session_debug.c +++ b/src/vnet/session/session_debug.c @@ -52,15 +52,20 @@ show_session_dbg_clock_cycles_fn (vlib_main_t * vm, unformat_input_t * input, } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_session_dbg_clock_cycles_command, static) = { .path = "show session dbg clock_cycles", .short_help = "show session dbg clock_cycles", .function = show_session_dbg_clock_cycles_fn, }; -/* *INDENT-ON* */ +static_always_inline f64 +session_dbg_time_now (u32 thread) +{ + vlib_main_t *vm = vlib_get_main_by_index (thread); + + return clib_time_now (&vm->clib_time) + vm->time_offset; +} static clib_error_t * clear_session_dbg_clock_cycles_fn (vlib_main_t * vm, unformat_input_t * input, @@ -77,7 +82,7 @@ clear_session_dbg_clock_cycles_fn (vlib_main_t * vm, unformat_input_t * input, { sde = &session_dbg_main.wrk[thread]; clib_memset (sde, 0, sizeof (session_dbg_evts_t)); - sde->last_time = vlib_time_now (vlib_mains[thread]); + sde->last_time = session_dbg_time_now (thread); sde->start_time = sde->last_time; } @@ -85,14 +90,12 @@ clear_session_dbg_clock_cycles_fn (vlib_main_t * vm, unformat_input_t * input, } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (clear_session_clock_cycles_command, static) = { .path = "clear session dbg clock_cycles", .short_help = "clear session dbg clock_cycles", .function = clear_session_dbg_clock_cycles_fn, }; -/* *INDENT-ON* */ void session_debug_init (void) @@ -107,15 +110,99 @@ session_debug_init (void) for (thread = 0; thread < num_threads; thread++) { clib_memset (&sdm->wrk[thread], 0, sizeof (session_dbg_evts_t)); - sdm->wrk[thread].start_time = vlib_time_now (vlib_mains[thread]); + sdm->wrk[thread].start_time = session_dbg_time_now (thread); + } +} + +static const char *session_evt_grp_str[] = { +#define _(sym, str) str, + foreach_session_evt_grp +#undef _ +}; + +static void +session_debug_show_groups (vlib_main_t *vm) +{ + session_dbg_main_t *sdm = &session_dbg_main; + int i = 0; + + vlib_cli_output (vm, "%-10s%-30s%-10s", "Index", "Group", "Level"); + + for (i = 0; i < SESSION_EVT_N_GRP; i++) + vlib_cli_output (vm, "%-10d%-30s%-10d", i, session_evt_grp_str[i], + sdm->grp_dbg_lvl[i]); +} + +static clib_error_t * +session_debug_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + session_dbg_main_t *sdm = &session_dbg_main; + u32 group, level = ~0; + clib_error_t *error = 0; + u8 is_show = 0; + uword *bitmap = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "show")) + is_show = 1; + else if (unformat (input, "group %U", unformat_bitmap_list, &bitmap)) + ; + else if (unformat (input, "level %d", &level)) + ; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + } + + if (is_show) + { + session_debug_show_groups (vm); + goto done; + } + if (level == ~0) + { + vlib_cli_output (vm, "level must be entered"); + goto done; + } + + group = clib_bitmap_last_set (bitmap); + if (group == ~0) + { + vlib_cli_output (vm, "group must be entered"); + goto done; + } + if (group >= SESSION_EVT_N_GRP) + { + vlib_cli_output (vm, "group out of bounds"); + goto done; } + clib_bitmap_foreach (group, bitmap) + sdm->grp_dbg_lvl[group] = level; + +done: + + clib_bitmap_free (bitmap); + return error; } + +VLIB_CLI_COMMAND (session_debug_command, static) = { + .path = "session debug", + .short_help = "session debug {show | debug group <list> level <n>}", + .function = session_debug_fn, + .is_mp_safe = 1, +}; + #else void session_debug_init (void) { } -#endif +#endif /* SESSION_DEBUG */ void dump_thread_0_event_queue (void) @@ -144,6 +231,8 @@ dump_thread_0_event_queue (void) { case SESSION_IO_EVT_TX: s0 = session_get_if_valid (e->session_index, my_thread_index); + if (!s0) + break; fformat (stdout, "[%04d] TX session %d\n", i, s0->session_index); break; @@ -155,6 +244,8 @@ dump_thread_0_event_queue (void) case SESSION_IO_EVT_BUILTIN_RX: s0 = session_get_if_valid (e->session_index, my_thread_index); + if (!s0) + break; fformat (stdout, "[%04d] builtin_rx %d\n", i, s0->session_index); break; @@ -180,28 +271,18 @@ dump_thread_0_event_queue (void) static u8 session_node_cmp_event (session_event_t * e, svm_fifo_t * f) { - session_t *s; switch (e->event_type) { case SESSION_IO_EVT_RX: case SESSION_IO_EVT_TX: case SESSION_IO_EVT_BUILTIN_RX: - case SESSION_IO_EVT_BUILTIN_TX: + case SESSION_IO_EVT_TX_MAIN: case SESSION_IO_EVT_TX_FLUSH: if (e->session_index == f->shr->master_session_index) return 1; break; case SESSION_CTRL_EVT_CLOSE: - break; case SESSION_CTRL_EVT_RPC: - s = session_get_from_handle (e->session_handle); - if (!s) - { - clib_warning ("session has event but doesn't exist!"); - break; - } - if (s->rx_fifo == f || s->tx_fifo == f) - return 1; break; default: break; @@ -217,7 +298,6 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e) session_worker_t *wrk; int i, index, found = 0; svm_msg_q_msg_t *msg; - svm_msg_q_ring_t *ring; svm_msg_q_t *mq; u8 thread_index; @@ -234,8 +314,7 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e) for (i = 0; i < sq->cursize; i++) { msg = (svm_msg_q_msg_t *) (&sq->data[0] + sq->elsize * index); - ring = svm_msg_q_ring (mq, msg->ring_index); - clib_memcpy_fast (e, svm_msg_q_msg_data (mq, msg), ring->elsize); + clib_memcpy_fast (e, svm_msg_q_msg_data (mq, msg), sizeof (*e)); found = session_node_cmp_event (e, f); if (found) return 1; @@ -245,7 +324,6 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e) * Search pending events vector */ - /* *INDENT-OFF* */ clib_llist_foreach (wrk->event_elts, evt_list, pool_elt_at_index (wrk->event_elts, wrk->new_head), elt, ({ @@ -256,9 +334,7 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e) goto done; } })); - /* *INDENT-ON* */ - /* *INDENT-OFF* */ clib_llist_foreach (wrk->event_elts, evt_list, pool_elt_at_index (wrk->event_elts, wrk->old_head), elt, ({ @@ -269,7 +345,6 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e) goto done; } })); - /* *INDENT-ON* */ done: return found; diff --git a/src/vnet/session/session_debug.h b/src/vnet/session/session_debug.h index 9e49a35dbe6..d433ef47fb1 100644 --- a/src/vnet/session/session_debug.h +++ b/src/vnet/session/session_debug.h @@ -17,49 +17,81 @@ #include <vnet/session/transport.h> #include <vlib/vlib.h> - -#define foreach_session_dbg_evt \ - _(ENQ, "enqueue") \ - _(DEQ, "dequeue") \ - _(DEQ_NODE, "dequeue") \ - _(POLL_GAP_TRACK, "poll gap track") \ - _(POLL_DISPATCH_TIME, "dispatch time") \ - _(DISPATCH_START, "dispatch start") \ - _(DISPATCH_END, "dispatch end") \ - _(FREE, "session free") \ - _(DSP_CNTRS, "dispatch counters") \ - _(IO_EVT_COUNTS, "io evt counts") \ - _(EVT_COUNTS, "ctrl evt counts") \ +#include <vpp/vnet/config.h> + +#define foreach_session_dbg_evt \ + _ (ENQ, DEQ_EVTS, 1, "enqueue") \ + _ (DEQ, DEQ_EVTS, 1, "dequeue") \ + _ (DEQ_NODE, DISPATCH_DBG, 1, "dequeue") \ + _ (POLL_GAP_TRACK, EVT_POLL_DBG, 1, "poll gap track") \ + _ (POLL_DISPATCH_TIME, EVT_POLL_DBG, 1, "dispatch time") \ + _ (DISPATCH_START, CLOCKS_EVT_DBG, 1, "dispatch start") \ + _ (DISPATCH_END, CLOCKS_EVT_DBG, 1, "dispatch end") \ + _ (DSP_CNTRS, CLOCKS_EVT_DBG, 1, "dispatch counters") \ + _ (STATE_CHANGE, SM, 1, "session state change") \ + _ (FREE, SM, 1, "session free") \ + _ (IO_EVT_COUNTS, COUNTS_EVT_DBG, 1, "io evt counts") \ + _ (COUNTS, COUNTS_EVT_DBG, 1, "ctrl evt counts") typedef enum _session_evt_dbg { -#define _(sym, str) SESSION_EVT_##sym, +#define _(sym, grp, lvl, str) SESSION_EVT_##sym, foreach_session_dbg_evt #undef _ } session_evt_dbg_e; -#define foreach_session_events \ -_(CLK_UPDATE_TIME, 1, 1, "Time Update Time") \ -_(CLK_MQ_DEQ, 1, 1, "Time MQ Dequeue") \ -_(CLK_CTRL_EVTS, 1, 1, "Time Ctrl Events") \ -_(CLK_NEW_IO_EVTS, 1, 1, "Time New IO Events") \ -_(CLK_OLD_IO_EVTS, 1, 1, "Time Old IO Events") \ -_(CLK_TOTAL, 1, 1, "Time Total in Node") \ -_(CLK_START, 1, 1, "Time Since Last Reset") \ - \ -_(CNT_MQ_EVTS, 1, 0, "# of MQ Events Processed" ) \ -_(CNT_CTRL_EVTS, 1, 0, "# of Ctrl Events Processed" ) \ -_(CNT_NEW_EVTS, 1, 0, "# of New Events Processed" ) \ -_(CNT_OLD_EVTS, 1, 0, "# of Old Events Processed" ) \ -_(CNT_IO_EVTS, 1, 0, "# of Events Processed" ) \ -_(CNT_NODE_CALL, 1, 0, "# of Node Calls") \ - \ -_(BASE_OFFSET_IO_EVTS, 0, 0, "NULL") \ -_(SESSION_IO_EVT_RX, 1, 0, "# of IO Event RX") \ -_(SESSION_IO_EVT_TX, 1, 0, "# of IO Event TX") \ -_(SESSION_IO_EVT_TX_FLUSH, 1, 0, "# of IO Event TX Flush") \ -_(SESSION_IO_EVT_BUILTIN_RX, 1, 0, "# of IO Event BuiltIn RX") \ -_(SESSION_IO_EVT_BUILTIN_TX, 1, 0, "# of IO Event BuiltIn TX") \ +typedef enum session_evt_lvl_ +{ +#define _(sym, grp, lvl, str) SESSION_EVT_##sym##_LVL = lvl, + foreach_session_dbg_evt +#undef _ +} session_evt_lvl_e; + +#define foreach_session_evt_grp \ + _ (DEQ_EVTS, "dequeue/enqueue events") \ + _ (DISPATCH_DBG, "dispatch") \ + _ (EVT_POLL_DBG, "event poll") \ + _ (SM, "state machine") \ + _ (CLOCKS_EVT_DBG, "clocks events") \ + _ (COUNTS_EVT_DBG, "counts events") + +typedef enum session_evt_grp_ +{ +#define _(sym, str) SESSION_EVT_GRP_##sym, + foreach_session_evt_grp +#undef _ + SESSION_EVT_N_GRP +} session_evt_grp_e; + +typedef enum session_evt_to_grp_ +{ +#define _(sym, grp, lvl, str) SESSION_EVT_##sym##_GRP = SESSION_EVT_GRP_##grp, + foreach_session_dbg_evt +#undef _ +} session_evt_to_grp_e; + +#define foreach_session_events \ + _ (CLK_UPDATE_TIME, 1, 1, "Time Update Time") \ + _ (CLK_MQ_DEQ, 1, 1, "Time MQ Dequeue") \ + _ (CLK_CTRL_EVTS, 1, 1, "Time Ctrl Events") \ + _ (CLK_NEW_IO_EVTS, 1, 1, "Time New IO Events") \ + _ (CLK_OLD_IO_EVTS, 1, 1, "Time Old IO Events") \ + _ (CLK_TOTAL, 1, 1, "Time Total in Node") \ + _ (CLK_START, 1, 1, "Time Since Last Reset") \ + \ + _ (CNT_MQ_EVTS, 1, 0, "# of MQ Events Processed") \ + _ (CNT_CTRL_EVTS, 1, 0, "# of Ctrl Events Processed") \ + _ (CNT_NEW_EVTS, 1, 0, "# of New Events Processed") \ + _ (CNT_OLD_EVTS, 1, 0, "# of Old Events Processed") \ + _ (CNT_IO_EVTS, 1, 0, "# of Events Processed") \ + _ (CNT_NODE_CALL, 1, 0, "# of Node Calls") \ + \ + _ (BASE_OFFSET_IO_EVTS, 0, 0, "NULL") \ + _ (SESSION_IO_EVT_RX, 1, 0, "# of IO Event RX") \ + _ (SESSION_IO_EVT_TX, 1, 0, "# of IO Event TX") \ + _ (SESSION_IO_EVT_TX_FLUSH, 1, 0, "# of IO Event TX Flush") \ + _ (SESSION_IO_EVT_BUILTIN_RX, 1, 0, "# of IO Event BuiltIn RX") \ + _ (SESSION_IO_EVT_TX_MAIN, 1, 0, "# of IO Event TX Main") typedef enum { @@ -90,17 +122,28 @@ typedef struct session_dbg_evts_t typedef struct session_dbg_main_ { session_dbg_evts_t *wrk; + u8 grp_dbg_lvl[SESSION_EVT_N_GRP]; } session_dbg_main_t; extern session_dbg_main_t session_dbg_main; -#define SESSION_DEBUG 0 * (TRANSPORT_DEBUG > 0) -#define SESSION_DEQ_EVTS (0) -#define SESSION_DISPATCH_DBG (0) -#define SESSION_EVT_POLL_DBG (0) -#define SESSION_SM (0) +#if defined VPP_SESSION_DEBUG && (TRANSPORT_DEBUG > 0) +#define SESSION_DEBUG (1) +#define SESSION_DEQ_EVTS (1) +#define SESSION_DISPATCH_DBG (1) +#define SESSION_EVT_POLL_DBG (1) +#define SESSION_SM (1) +#define SESSION_CLOCKS_EVT_DBG (1) +#define SESSION_COUNTS_EVT_DBG (1) +#else +#define SESSION_DEBUG (0) +#define SESSION_DEQ_EVTS (0) +#define SESSION_DISPATCH_DBG (0) +#define SESSION_EVT_POLL_DBG (0) +#define SESSION_SM (0) #define SESSION_CLOCKS_EVT_DBG (0) #define SESSION_COUNTS_EVT_DBG (0) +#endif #if SESSION_DEBUG @@ -123,17 +166,43 @@ extern session_dbg_main_t session_dbg_main; ed = ELOG_DATA (&vlib_global_main.elog_main, _e) #if SESSION_SM -#define SESSION_EVT_FREE_HANDLER(_s) \ -{ \ - ELOG_TYPE_DECLARE (_e) = \ - { \ - .format = "free: idx %u", \ - .format_args = "i4", \ - }; \ - DEC_SESSION_ETD(_s, _e, 1); \ - ed->data[0] = _s->session_index; \ -} +#define SESSION_EVT_STATE_CHANGE_HANDLER(_s) \ + { \ + ELOG_TYPE_DECLARE (_e) = { \ + .format = "%s: idx %u", \ + .format_args = "t4i4", \ + .n_enum_strings = 12, \ + .enum_strings = { \ + "created", \ + "listening", \ + "connecting", \ + "accepting", \ + "ready", \ + "opened", \ + "transport closing", \ + "closing", \ + "app closed", \ + "transport closed", \ + "closed", \ + "transport deleted", \ + }, \ + }; \ + DEC_SESSION_ETD (_s, _e, 2); \ + ed->data[0] = _s->session_state; \ + ed->data[1] = _s->session_index; \ + } + +#define SESSION_EVT_FREE_HANDLER(_s) \ + { \ + ELOG_TYPE_DECLARE (_e) = { \ + .format = "free: idx %u", \ + .format_args = "i4", \ + }; \ + DEC_SESSION_ED (_e, 1); \ + ed->data[0] = _s->session_index; \ + } #else +#define SESSION_EVT_STATE_CHANGE_HANDLER(_s) #define SESSION_EVT_FREE_HANDLER(_s) #endif @@ -282,17 +351,17 @@ extern session_dbg_main_t session_dbg_main; counters[SESS_Q_##_node_evt].u64 += _cnt; \ } -#define SESSION_IO_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk) \ -{ \ - u8 type = SESS_Q_BASE_OFFSET_IO_EVTS + _node_evt + 1; \ - session_dbg_evts_t *sde; \ - sde = &session_dbg_main.wrk[_wrk->vm->thread_index]; \ - sde->counters[type].u64 += _cnt; \ - sde->counters[SESS_Q_CNT_IO_EVTS].u64 += _cnt ; \ -} +#define SESSION_EVT_IO_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk) \ + { \ + u8 type = SESS_Q_BASE_OFFSET_IO_EVTS + _node_evt + 1; \ + session_dbg_evts_t *sde; \ + sde = &session_dbg_main.wrk[_wrk->vm->thread_index]; \ + sde->counters[type].u64 += _cnt; \ + sde->counters[SESS_Q_CNT_IO_EVTS].u64 += _cnt; \ + } #else #define SESSION_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk) -#define SESSION_IO_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk) +#define SESSION_EVT_IO_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk) #endif /*SESSION_COUNTS_EVT_DBG */ @@ -322,8 +391,18 @@ extern session_dbg_main_t session_dbg_main; #define CONCAT_HELPER(_a, _b) _a##_b #define CC(_a, _b) CONCAT_HELPER(_a, _b) -#define SESSION_EVT(_evt, _args...) CC(_evt, _HANDLER)(_args) - +#define session_evt_lvl(_evt) CC (_evt, _LVL) +#define session_evt_grp(_evt) CC (_evt, _GRP) +#define session_evt_grp_dbg_lvl(_evt) \ + session_dbg_main.grp_dbg_lvl[session_evt_grp (_evt)] +#define SESSION_EVT(_evt, _args...) \ + do \ + { \ + if (PREDICT_FALSE (session_evt_grp_dbg_lvl (_evt) >= \ + session_evt_lvl (_evt))) \ + CC (_evt, _HANDLER) (_args); \ + } \ + while (0) #else #define SESSION_EVT(_evt, _args...) #define SESSION_DBG(_fmt, _args...) diff --git a/src/vnet/session/session_input.c b/src/vnet/session/session_input.c new file mode 100644 index 00000000000..73b777127fd --- /dev/null +++ b/src/vnet/session/session_input.c @@ -0,0 +1,343 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2023 Cisco Systems, Inc. + */ + +#include <vnet/session/session.h> +#include <vnet/session/application.h> + +static inline int +mq_try_lock (svm_msg_q_t *mq) +{ + int rv, n_try = 0; + + while (n_try < 100) + { + rv = svm_msg_q_try_lock (mq); + if (!rv) + return 0; + n_try += 1; + usleep (1); + } + + return -1; +} + +always_inline u8 +mq_event_ring_index (session_evt_type_t et) +{ + return (et >= SESSION_CTRL_EVT_RPC ? SESSION_MQ_CTRL_EVT_RING : + SESSION_MQ_IO_EVT_RING); +} + +void +app_worker_del_all_events (app_worker_t *app_wrk) +{ + session_worker_t *wrk; + session_event_t *evt; + u32 thread_index; + session_t *s; + + for (thread_index = 0; thread_index < vec_len (app_wrk->wrk_evts); + thread_index++) + { + while (clib_fifo_elts (app_wrk->wrk_evts[thread_index])) + { + clib_fifo_sub2 (app_wrk->wrk_evts[thread_index], evt); + switch (evt->event_type) + { + case SESSION_CTRL_EVT_MIGRATED: + s = session_get (evt->session_index, thread_index); + transport_cleanup (session_get_transport_proto (s), + s->connection_index, s->thread_index); + session_free (s); + break; + case SESSION_CTRL_EVT_CLEANUP: + s = session_get (evt->as_u64[0] & 0xffffffff, thread_index); + if (evt->as_u64[0] >> 32 != SESSION_CLEANUP_SESSION) + break; + uword_to_pointer (evt->as_u64[1], void (*) (session_t * s)) (s); + break; + case SESSION_CTRL_EVT_HALF_CLEANUP: + s = ho_session_get (evt->session_index); + pool_put_index (app_wrk->half_open_table, s->ho_index); + session_free (s); + break; + default: + break; + } + } + wrk = session_main_get_worker (thread_index); + clib_bitmap_set (wrk->app_wrks_pending_ntf, app_wrk->wrk_index, 0); + } +} + +always_inline int +app_worker_flush_events_inline (app_worker_t *app_wrk, u32 thread_index, + u8 is_builtin) +{ + application_t *app = application_get (app_wrk->app_index); + svm_msg_q_t *mq = app_wrk->event_queue; + u8 ring_index, mq_is_cong; + session_state_t old_state; + session_event_t *evt; + u32 n_evts = 128, i; + session_t *s; + int rv; + + n_evts = clib_min (n_evts, clib_fifo_elts (app_wrk->wrk_evts[thread_index])); + + if (!is_builtin) + { + mq_is_cong = app_worker_mq_is_congested (app_wrk); + if (mq_try_lock (mq)) + { + app_worker_set_mq_wrk_congested (app_wrk, thread_index); + return 0; + } + } + + for (i = 0; i < n_evts; i++) + { + evt = clib_fifo_head (app_wrk->wrk_evts[thread_index]); + if (!is_builtin) + { + ring_index = mq_event_ring_index (evt->event_type); + if (svm_msg_q_or_ring_is_full (mq, ring_index)) + { + app_worker_set_mq_wrk_congested (app_wrk, thread_index); + break; + } + } + + switch (evt->event_type) + { + case SESSION_IO_EVT_RX: + s = session_get (evt->session_index, thread_index); + s->flags &= ~SESSION_F_RX_EVT; + /* Application didn't confirm accept yet */ + if (PREDICT_FALSE (s->session_state == SESSION_STATE_ACCEPTING || + s->session_state == SESSION_STATE_CONNECTING)) + break; + app->cb_fns.builtin_app_rx_callback (s); + break; + /* Handle sessions that might not be on current thread */ + case SESSION_IO_EVT_BUILTIN_RX: + s = session_get_from_handle_if_valid (evt->session_handle); + if (!s) + break; + s->flags &= ~SESSION_F_RX_EVT; + if (PREDICT_FALSE (s->session_state == SESSION_STATE_ACCEPTING || + s->session_state == SESSION_STATE_CONNECTING)) + break; + app->cb_fns.builtin_app_rx_callback (s); + break; + case SESSION_IO_EVT_TX: + s = session_get (evt->session_index, thread_index); + app->cb_fns.builtin_app_tx_callback (s); + break; + case SESSION_IO_EVT_TX_MAIN: + s = session_get_from_handle_if_valid (evt->session_handle); + if (!s) + break; + app->cb_fns.builtin_app_tx_callback (s); + break; + case SESSION_CTRL_EVT_BOUND: + /* No app cb function currently */ + if (is_builtin) + break; + app->cb_fns.session_listened_callback ( + app_wrk->wrk_index, evt->as_u64[1] >> 32, evt->session_handle, + evt->as_u64[1] & 0xffffffff); + break; + case SESSION_CTRL_EVT_ACCEPTED: + s = session_get (evt->session_index, thread_index); + old_state = s->session_state; + if (app->cb_fns.session_accept_callback (s)) + { + session_detach_app (s); + break; + } + if (is_builtin) + { + if (old_state >= SESSION_STATE_TRANSPORT_CLOSING) + { + session_set_state (s, + clib_max (old_state, s->session_state)); + if (!(s->flags & SESSION_F_APP_CLOSED)) + app->cb_fns.session_disconnect_callback (s); + } + } + break; + case SESSION_CTRL_EVT_CONNECTED: + if (!(evt->as_u64[1] & 0xffffffff)) + { + s = session_get (evt->session_index, thread_index); + old_state = s->session_state; + } + else + s = 0; + rv = app->cb_fns.session_connected_callback ( + app_wrk->wrk_index, evt->as_u64[1] >> 32, s, + evt->as_u64[1] & 0xffffffff); + if (!s) + break; + if (rv) + { + session_detach_app (s); + break; + } + if (old_state >= SESSION_STATE_TRANSPORT_CLOSING) + { + session_set_state (s, clib_max (old_state, s->session_state)); + if (!(s->flags & SESSION_F_APP_CLOSED)) + app->cb_fns.session_disconnect_callback (s); + } + break; + case SESSION_CTRL_EVT_DISCONNECTED: + s = session_get (evt->session_index, thread_index); + if (!(s->flags & SESSION_F_APP_CLOSED)) + app->cb_fns.session_disconnect_callback (s); + break; + case SESSION_CTRL_EVT_RESET: + s = session_get (evt->session_index, thread_index); + if (!(s->flags & SESSION_F_APP_CLOSED)) + app->cb_fns.session_reset_callback (s); + break; + case SESSION_CTRL_EVT_UNLISTEN_REPLY: + if (is_builtin) + break; + app->cb_fns.session_unlistened_callback ( + app_wrk->wrk_index, evt->session_handle, evt->as_u64[1] >> 32, + evt->as_u64[1] & 0xffffffff); + break; + case SESSION_CTRL_EVT_MIGRATED: + s = session_get (evt->session_index, thread_index); + app->cb_fns.session_migrate_callback (s, evt->as_u64[1]); + transport_cleanup (session_get_transport_proto (s), + s->connection_index, s->thread_index); + session_free (s); + /* Notify app that it has data on the new session */ + s = session_get_from_handle (evt->as_u64[1]); + session_send_io_evt_to_thread (s->rx_fifo, + SESSION_IO_EVT_BUILTIN_RX); + break; + case SESSION_CTRL_EVT_TRANSPORT_CLOSED: + s = session_get (evt->session_index, thread_index); + /* Notification enqueued before session was refused by app */ + if (PREDICT_FALSE (s->app_wrk_index == APP_INVALID_INDEX)) + break; + if (app->cb_fns.session_transport_closed_callback) + app->cb_fns.session_transport_closed_callback (s); + break; + case SESSION_CTRL_EVT_CLEANUP: + s = session_get (evt->as_u64[0] & 0xffffffff, thread_index); + /* Notification enqueued before session was refused by app */ + if (PREDICT_TRUE (s->app_wrk_index != APP_INVALID_INDEX)) + { + if (app->cb_fns.session_cleanup_callback) + app->cb_fns.session_cleanup_callback (s, evt->as_u64[0] >> 32); + } + if (evt->as_u64[0] >> 32 != SESSION_CLEANUP_SESSION) + break; + uword_to_pointer (evt->as_u64[1], void (*) (session_t * s)) (s); + break; + case SESSION_CTRL_EVT_HALF_CLEANUP: + s = ho_session_get (evt->session_index); + ASSERT (session_vlib_thread_is_cl_thread ()); + if (app->cb_fns.half_open_cleanup_callback) + app->cb_fns.half_open_cleanup_callback (s); + pool_put_index (app_wrk->half_open_table, s->ho_index); + session_free (s); + break; + case SESSION_CTRL_EVT_APP_ADD_SEGMENT: + app->cb_fns.add_segment_callback (app_wrk->wrk_index, + evt->as_u64[1]); + break; + case SESSION_CTRL_EVT_APP_DEL_SEGMENT: + app->cb_fns.del_segment_callback (app_wrk->wrk_index, + evt->as_u64[1]); + break; + default: + clib_warning ("unexpected event: %u", evt->event_type); + ASSERT (0); + break; + } + clib_fifo_advance_head (app_wrk->wrk_evts[thread_index], 1); + } + + if (!is_builtin) + { + svm_msg_q_unlock (mq); + if (mq_is_cong && i == n_evts) + app_worker_unset_wrk_mq_congested (app_wrk, thread_index); + } + + return 0; +} + +int +app_wrk_flush_wrk_events (app_worker_t *app_wrk, u32 thread_index) +{ + if (app_worker_application_is_builtin (app_wrk)) + return app_worker_flush_events_inline (app_wrk, thread_index, + 1 /* is_builtin */); + else + return app_worker_flush_events_inline (app_wrk, thread_index, + 0 /* is_builtin */); +} + +static inline int +session_wrk_flush_events (session_worker_t *wrk) +{ + app_worker_t *app_wrk; + uword app_wrk_index; + u32 thread_index; + + thread_index = wrk->vm->thread_index; + app_wrk_index = clib_bitmap_first_set (wrk->app_wrks_pending_ntf); + + while (app_wrk_index != ~0) + { + app_wrk = app_worker_get_if_valid (app_wrk_index); + /* app_wrk events are flushed on free, so should be valid here */ + ASSERT (app_wrk != 0); + app_wrk_flush_wrk_events (app_wrk, thread_index); + + if (!clib_fifo_elts (app_wrk->wrk_evts[thread_index])) + clib_bitmap_set (wrk->app_wrks_pending_ntf, app_wrk->wrk_index, 0); + + app_wrk_index = + clib_bitmap_next_set (wrk->app_wrks_pending_ntf, app_wrk_index + 1); + } + + if (!clib_bitmap_is_zero (wrk->app_wrks_pending_ntf)) + vlib_node_set_interrupt_pending (wrk->vm, session_input_node.index); + + return 0; +} + +VLIB_NODE_FN (session_input_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + u32 thread_index = vm->thread_index; + session_worker_t *wrk; + + wrk = session_main_get_worker (thread_index); + session_wrk_flush_events (wrk); + + return 0; +} + +VLIB_REGISTER_NODE (session_input_node) = { + .name = "session-input", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_DISABLED, +}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */
\ No newline at end of file diff --git a/src/vnet/session/session_lookup.c b/src/vnet/session/session_lookup.c index 6e060cb119d..ff20bc2d835 100644 --- a/src/vnet/session/session_lookup.c +++ b/src/vnet/session/session_lookup.c @@ -29,13 +29,14 @@ #include <vnet/session/session.h> #include <vnet/session/application.h> +static session_lookup_main_t sl_main; + /** * Network namespace index (i.e., fib index) to session lookup table. We * should have one per network protocol type but for now we only support IP4/6 */ static u32 *fib_index_to_table_index[2]; -/* *INDENT-OFF* */ /* 16 octets */ typedef CLIB_PACKED (struct { union @@ -72,7 +73,6 @@ typedef CLIB_PACKED (struct { u64 as_u64[6]; }; }) v6_connection_key_t; -/* *INDENT-ON* */ typedef clib_bihash_kv_16_8_t session_kv4_t; typedef clib_bihash_kv_48_8_t session_kv6_t; @@ -155,29 +155,70 @@ make_v6_ss_kv_from_tc (session_kv6_t * kv, transport_connection_t * tc) tc->rmt_port, tc->proto); } +static inline u8 +session_table_alloc_needs_sync (void) +{ + return !vlib_thread_is_main_w_barrier () && (vlib_num_workers () > 1); +} + +static_always_inline u8 +session_table_is_alloced (u8 fib_proto, u32 fib_index) +{ + return (vec_len (fib_index_to_table_index[fib_proto]) > fib_index && + fib_index_to_table_index[fib_proto][fib_index] != ~0); +} + static session_table_t * session_table_get_or_alloc (u8 fib_proto, u32 fib_index) { session_table_t *st; u32 table_index; + ASSERT (fib_index != ~0); - if (vec_len (fib_index_to_table_index[fib_proto]) > fib_index && - fib_index_to_table_index[fib_proto][fib_index] != ~0) + + if (session_table_is_alloced (fib_proto, fib_index)) { table_index = fib_index_to_table_index[fib_proto][fib_index]; return session_table_get (table_index); } + + u8 needs_sync = session_table_alloc_needs_sync (); + session_lookup_main_t *slm = &sl_main; + + /* Stop workers, otherwise consumers might be affected. This is + * acceptable because new tables should seldom be allocated */ + if (needs_sync) + { + vlib_workers_sync (); + + /* We might have a race, only one worker allowed at once */ + clib_spinlock_lock (&slm->st_alloc_lock); + } + + /* Another worker just allocated this table */ + if (session_table_is_alloced (fib_proto, fib_index)) + { + table_index = fib_index_to_table_index[fib_proto][fib_index]; + st = session_table_get (table_index); + } else { st = session_table_alloc (); - table_index = session_table_index (st); + st->active_fib_proto = fib_proto; + session_table_init (st, fib_proto); vec_validate_init_empty (fib_index_to_table_index[fib_proto], fib_index, ~0); + table_index = session_table_index (st); fib_index_to_table_index[fib_proto][fib_index] = table_index; - st->active_fib_proto = fib_proto; - session_table_init (st, fib_proto); - return st; } + + if (needs_sync) + { + clib_spinlock_unlock (&slm->st_alloc_lock); + vlib_workers_continue (); + } + + return st; } static session_table_t * @@ -1046,9 +1087,7 @@ session_lookup_connection4 (u32 fib_index, ip4_address_t * lcl, /** * Lookup session with ip4 and transport layer information * - * Important note: this may look into another thread's pool table and - * register as 'peeker'. Caller should call @ref session_pool_remove_peeker as - * if needed as soon as possible. + * Important note: this may look into another thread's pool table * * Lookup logic is similar to that of @ref session_lookup_connection_wt4 but * this returns a session as opposed to a transport connection and it does not @@ -1145,7 +1184,6 @@ session_lookup_connection_wt6 (u32 fib_index, ip6_address_t * lcl, rv = clib_bihash_search_inline_48_8 (&st->v6_session_hash, &kv6); if (rv == 0) { - ASSERT ((u32) (kv6.value >> 32) == thread_index); if (PREDICT_FALSE ((u32) (kv6.value >> 32) != thread_index)) { *result = SESSION_LOOKUP_RESULT_WRONG_THREAD; @@ -1313,8 +1351,8 @@ session_lookup_connection (u32 fib_index, ip46_address_t * lcl, lcl_port, rmt_port, proto); } -int -vnet_session_rule_add_del (session_rule_add_del_args_t * args) +session_error_t +vnet_session_rule_add_del (session_rule_add_del_args_t *args) { app_namespace_t *app_ns = app_namespace_get (args->appns_index); session_rules_table_t *srt; @@ -1324,14 +1362,14 @@ vnet_session_rule_add_del (session_rule_add_del_args_t * args) int rv = 0; if (!app_ns) - return VNET_API_ERROR_APP_INVALID_NS; + return SESSION_E_INVALID_NS; if (args->scope > 3) - return VNET_API_ERROR_INVALID_VALUE; + return SESSION_E_INVALID; if (args->transport_proto != TRANSPORT_PROTO_TCP && args->transport_proto != TRANSPORT_PROTO_UDP) - return VNET_API_ERROR_INVALID_VALUE; + return SESSION_E_INVALID; if ((args->scope & SESSION_RULE_SCOPE_GLOBAL) || args->scope == 0) { @@ -1452,6 +1490,7 @@ session_rule_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { u32 proto = ~0, lcl_port, rmt_port, action = 0, lcl_plen = 0, rmt_plen = 0; + clib_error_t *error = 0; u32 appns_index, scope = 0; ip46_address_t lcl_ip, rmt_ip; u8 is_ip4 = 1, conn_set = 0; @@ -1501,29 +1540,32 @@ session_rule_command_fn (vlib_main_t * vm, unformat_input_t * input, else if (unformat (input, "tag %_%v%_", &tag)) ; else - return clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } } if (proto == ~0) { vlib_cli_output (vm, "proto must be set"); - return 0; + goto done; } if (is_add && !conn_set && action == ~0) { vlib_cli_output (vm, "connection and action must be set for add"); - return 0; + goto done; } if (!is_add && !tag && !conn_set) { vlib_cli_output (vm, "connection or tag must be set for delete"); - return 0; + goto done; } if (vec_len (tag) > SESSION_RULE_TAG_MAX_LEN) { vlib_cli_output (vm, "tag too long (max u64)"); - return 0; + goto done; } if (ns_id) @@ -1532,7 +1574,7 @@ session_rule_command_fn (vlib_main_t * vm, unformat_input_t * input, if (!app_ns) { vlib_cli_output (vm, "namespace %v does not exist", ns_id); - return 0; + goto done; } } else @@ -1559,13 +1601,14 @@ session_rule_command_fn (vlib_main_t * vm, unformat_input_t * input, .scope = scope, }; if ((rv = vnet_session_rule_add_del (&args))) - return clib_error_return (0, "rule add del returned %u", rv); + error = clib_error_return (0, "rule add del returned %u", rv); +done: + vec_free (ns_id); vec_free (tag); - return 0; + return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (session_rule_command, static) = { .path = "session rule", @@ -1573,7 +1616,6 @@ VLIB_CLI_COMMAND (session_rule_command, static) = "<lcl-ip/plen> <lcl-port> <rmt-ip/plen> <rmt-port> action <action>", .function = session_rule_command_fn, }; -/* *INDENT-ON* */ void session_lookup_dump_rules_table (u32 fib_index, u8 fib_proto, @@ -1696,7 +1738,6 @@ show_session_rules_command_fn (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_session_rules_command, static) = { .path = "show session rules", @@ -1704,11 +1745,93 @@ VLIB_CLI_COMMAND (show_session_rules_command, static) = "<lcl-port> <rmt-ip/plen> <rmt-port> scope <scope>]", .function = show_session_rules_command_fn, }; -/* *INDENT-ON* */ + +u8 * +format_session_lookup_tables (u8 *s, va_list *args) +{ + u32 fib_proto = va_arg (*args, u32); + u32 *fibs, num_fibs = 0, fib_index, indent; + session_table_t *st; + u64 total_mem = 0; + + fibs = fib_index_to_table_index[fib_proto]; + + for (fib_index = 0; fib_index < vec_len (fibs); fib_index++) + { + if (fibs[fib_index] == ~0) + continue; + + num_fibs += 1; + st = session_table_get (fibs[fib_index]); + total_mem += session_table_memory_size (st); + } + + indent = format_get_indent (s); + s = format (s, "active fibs:\t%u\n", num_fibs); + s = format (s, "%Umax fib-index:\t%u\n", format_white_space, indent, + vec_len (fibs) - 1); + s = format (s, "%Utable memory:\t%U\n", format_white_space, indent, + format_memory_size, total_mem); + s = format (s, "%Uvec memory:\t%U\n", format_white_space, indent, + format_memory_size, vec_mem_size (fibs)); + + return s; +} + +static clib_error_t * +show_session_lookup_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + session_table_t *st; + u32 fib_index = ~0; + + session_cli_return_if_not_enabled (); + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "table %u", &fib_index)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + if (fib_index != ~0) + { + st = session_table_get_for_fib_index (FIB_PROTOCOL_IP4, fib_index); + if (st) + vlib_cli_output (vm, "%U", format_session_table, st); + else + vlib_cli_output (vm, "no ip4 table for fib-index %u", fib_index); + st = session_table_get_for_fib_index (FIB_PROTOCOL_IP6, fib_index); + if (st) + vlib_cli_output (vm, "%U", format_session_table, st); + else + vlib_cli_output (vm, "no ip6 table for fib-index %u", fib_index); + goto done; + } + + vlib_cli_output (vm, "ip4 fib lookup tables:\n %U", + format_session_lookup_tables, FIB_PROTOCOL_IP4); + vlib_cli_output (vm, "ip6 fib lookup tables:\n %U", + format_session_lookup_tables, FIB_PROTOCOL_IP6); + +done: + return 0; +} + +VLIB_CLI_COMMAND (show_session_lookup_command, static) = { + .path = "show session lookup", + .short_help = "show session lookup [table <fib-index>]", + .function = show_session_lookup_command_fn, +}; void session_lookup_init (void) { + session_lookup_main_t *slm = &sl_main; + + clib_spinlock_init (&slm->st_alloc_lock); + /* * Allocate default table and map it to fib_index 0 */ diff --git a/src/vnet/session/session_lookup.h b/src/vnet/session/session_lookup.h index c1037dff8c9..f9ffc15165a 100644 --- a/src/vnet/session/session_lookup.h +++ b/src/vnet/session/session_lookup.h @@ -29,6 +29,11 @@ typedef enum session_lookup_result_ SESSION_LOOKUP_RESULT_FILTERED } session_lookup_result_t; +typedef struct session_lookup_main_ +{ + clib_spinlock_t st_alloc_lock; +} session_lookup_main_t; + session_t *session_lookup_safe4 (u32 fib_index, ip4_address_t * lcl, ip4_address_t * rmt, u16 lcl_port, u16 rmt_port, u8 proto); @@ -130,7 +135,7 @@ typedef struct _session_rule_add_del_args u8 transport_proto; } session_rule_add_del_args_t; -int vnet_session_rule_add_del (session_rule_add_del_args_t * args); +session_error_t vnet_session_rule_add_del (session_rule_add_del_args_t *args); void session_lookup_set_tables_appns (app_namespace_t * app_ns); void session_lookup_init (void); diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c index b8b5ce2d8de..0ec158fb429 100644 --- a/src/vnet/session/session_node.c +++ b/src/vnet/session/session_node.c @@ -26,12 +26,28 @@ #include <svm/queue.h> #include <sys/timerfd.h> -#define app_check_thread_and_barrier(_fn, _arg) \ - if (!vlib_thread_is_main_w_barrier ()) \ - { \ - vlib_rpc_call_main_thread (_fn, (u8 *) _arg, sizeof(*_arg)); \ - return; \ - } +static inline void +session_wrk_send_evt_to_main (session_worker_t *wrk, session_evt_elt_t *elt) +{ + session_evt_elt_t *he; + uword thread_index; + u8 is_empty; + + thread_index = wrk->vm->thread_index; + he = clib_llist_elt (wrk->event_elts, wrk->evts_pending_main); + is_empty = clib_llist_is_empty (wrk->event_elts, evt_list, he); + clib_llist_add_tail (wrk->event_elts, evt_list, elt, he); + if (is_empty) + session_send_rpc_evt_to_thread (0, session_wrk_handle_evts_main_rpc, + uword_to_pointer (thread_index, void *)); +} + +#define app_check_thread_and_barrier(_wrk, _elt) \ + if (!vlib_thread_is_main_w_barrier ()) \ + { \ + session_wrk_send_evt_to_main (wrk, elt); \ + return; \ + } static void session_wrk_timerfd_update (session_worker_t *wrk, u64 time_ns) @@ -93,16 +109,17 @@ session_mq_free_ext_config (application_t *app, uword offset) } static void -session_mq_listen_handler (void *data) +session_mq_listen_handler (session_worker_t *wrk, session_evt_elt_t *elt) { - session_listen_msg_t *mp = (session_listen_msg_t *) data; vnet_listen_args_t _a, *a = &_a; + session_listen_msg_t *mp; app_worker_t *app_wrk; application_t *app; int rv; - app_check_thread_and_barrier (session_mq_listen_handler, mp); + app_check_thread_and_barrier (wrk, elt); + mp = session_evt_ctrl_data (wrk, elt); app = application_lookup (mp->client_index); if (!app) return; @@ -122,26 +139,31 @@ session_mq_listen_handler (void *data) a->sep_ext.ext_cfg = session_mq_get_ext_config (app, mp->ext_config); if ((rv = vnet_listen (a))) - clib_warning ("listen returned: %U", format_session_error, rv); + session_worker_stat_error_inc (wrk, rv, 1); app_wrk = application_get_worker (app, mp->wrk_index); - mq_send_session_bound_cb (app_wrk->wrk_index, mp->context, a->handle, rv); + app_worker_listened_notify (app_wrk, a->handle, mp->context, rv); if (mp->ext_config) session_mq_free_ext_config (app, mp->ext_config); + + /* Make sure events are flushed before releasing barrier, to avoid + * potential race with accept. */ + app_wrk_flush_wrk_events (app_wrk, 0); } static void -session_mq_listen_uri_handler (void *data) +session_mq_listen_uri_handler (session_worker_t *wrk, session_evt_elt_t *elt) { - session_listen_uri_msg_t *mp = (session_listen_uri_msg_t *) data; vnet_listen_args_t _a, *a = &_a; + session_listen_uri_msg_t *mp; app_worker_t *app_wrk; application_t *app; int rv; - app_check_thread_and_barrier (session_mq_listen_uri_handler, mp); + app_check_thread_and_barrier (wrk, elt); + mp = session_evt_ctrl_data (wrk, elt); app = application_lookup (mp->client_index); if (!app) return; @@ -152,7 +174,8 @@ session_mq_listen_uri_handler (void *data) rv = vnet_bind_uri (a); app_wrk = application_get_worker (app, 0); - mq_send_session_bound_cb (app_wrk->wrk_index, mp->context, a->handle, rv); + app_worker_listened_notify (app_wrk, a->handle, mp->context, rv); + app_wrk_flush_wrk_events (app_wrk, 0); } static void @@ -160,6 +183,7 @@ session_mq_connect_one (session_connect_msg_t *mp) { vnet_connect_args_t _a, *a = &_a; app_worker_t *app_wrk; + session_worker_t *wrk; application_t *app; int rv; @@ -173,6 +197,7 @@ session_mq_connect_one (session_connect_msg_t *mp) a->sep.port = mp->port; a->sep.transport_proto = mp->proto; a->sep.peer.fib_index = mp->vrf; + a->sep.dscp = mp->dscp; clib_memcpy_fast (&a->sep.peer.ip, &mp->lcl_ip, sizeof (mp->lcl_ip)); if (mp->is_ip4) { @@ -192,9 +217,10 @@ session_mq_connect_one (session_connect_msg_t *mp) if ((rv = vnet_connect (a))) { - clib_warning ("connect returned: %U", format_session_error, rv); + wrk = session_main_get_worker (vlib_get_thread_index ()); + session_worker_stat_error_inc (wrk, rv, 1); app_wrk = application_get_worker (app, mp->wrk_index); - mq_send_session_connected_cb (app_wrk->wrk_index, mp->context, 0, rv); + app_worker_connect_notify (app_wrk, 0, rv, mp->context); } if (mp->ext_config) @@ -205,23 +231,20 @@ static void session_mq_handle_connects_rpc (void *arg) { u32 max_connects = 32, n_connects = 0; - vlib_main_t *vm = vlib_get_main (); session_evt_elt_t *he, *elt, *next; - session_worker_t *fwrk, *wrk; + session_worker_t *fwrk; - ASSERT (vlib_get_thread_index () == 0); + ASSERT (session_vlib_thread_is_cl_thread ()); /* Pending connects on linked list pertaining to first worker */ - fwrk = session_main_get_worker (1); + fwrk = session_main_get_worker (transport_cl_thread ()); if (!fwrk->n_pending_connects) - goto update_state; - - vlib_worker_thread_barrier_sync (vm); + return; he = clib_llist_elt (fwrk->event_elts, fwrk->pending_connects); elt = clib_llist_next (fwrk->event_elts, evt_list, he); - /* Avoid holding the barrier for too long */ + /* Avoid holding the worker for too long */ while (n_connects < max_connects && elt != he) { next = clib_llist_next (fwrk->event_elts, evt_list, elt); @@ -235,45 +258,10 @@ session_mq_handle_connects_rpc (void *arg) /* Decrement with worker barrier */ fwrk->n_pending_connects -= n_connects; - - vlib_worker_thread_barrier_release (vm); - -update_state: - - /* Switch worker to poll mode if it was in interrupt mode and had work or - * back to interrupt if threshold of loops without a connect is passed. - * While in poll mode, reprogram connects rpc */ - wrk = session_main_get_worker (0); - if (wrk->state != SESSION_WRK_POLLING) + if (fwrk->n_pending_connects > 0) { - if (n_connects) - { - session_wrk_set_state (wrk, SESSION_WRK_POLLING); - vlib_node_set_state (vm, session_queue_node.index, - VLIB_NODE_STATE_POLLING); - wrk->no_connect_loops = 0; - } - } - else - { - if (!n_connects) - { - if (++wrk->no_connect_loops > 1e5) - { - session_wrk_set_state (wrk, SESSION_WRK_INTERRUPT); - vlib_node_set_state (vm, session_queue_node.index, - VLIB_NODE_STATE_INTERRUPT); - } - } - else - wrk->no_connect_loops = 0; - } - - if (wrk->state == SESSION_WRK_POLLING) - { - elt = session_evt_alloc_ctrl (wrk); - elt->evt.event_type = SESSION_CTRL_EVT_RPC; - elt->evt.rpc_args.fp = session_mq_handle_connects_rpc; + session_send_rpc_evt_to_thread_force (fwrk->vm->thread_index, + session_mq_handle_connects_rpc, 0); } } @@ -283,20 +271,28 @@ session_mq_connect_handler (session_worker_t *wrk, session_evt_elt_t *elt) u32 thread_index = wrk - session_main.wrk; session_evt_elt_t *he; - /* No workers, so just deal with the connect now */ - if (PREDICT_FALSE (!thread_index)) + if (PREDICT_FALSE (thread_index > transport_cl_thread ())) { - session_mq_connect_one (session_evt_ctrl_data (wrk, elt)); + clib_warning ("Connect on wrong thread. Dropping"); return; } - if (PREDICT_FALSE (thread_index != 1)) + /* If on worker, check if main has any pending messages. Avoids reordering + * with other control messages that need to be handled by main + */ + if (thread_index) { - clib_warning ("Connect on wrong thread. Dropping"); - return; + he = clib_llist_elt (wrk->event_elts, wrk->evts_pending_main); + + /* Events pending on main, postpone to avoid reordering */ + if (!clib_llist_is_empty (wrk->event_elts, evt_list, he)) + { + clib_llist_add_tail (wrk->event_elts, evt_list, elt, he); + return; + } } - /* Add to pending list to be handled by main thread */ + /* Add to pending list to be handled by first worker */ he = clib_llist_elt (wrk->event_elts, wrk->pending_connects); clib_llist_add_tail (wrk->event_elts, evt_list, elt, he); @@ -304,23 +300,23 @@ session_mq_connect_handler (session_worker_t *wrk, session_evt_elt_t *elt) wrk->n_pending_connects += 1; if (wrk->n_pending_connects == 1) { - vlib_node_set_interrupt_pending (vlib_get_main_by_index (0), - session_queue_node.index); - session_send_rpc_evt_to_thread (0, session_mq_handle_connects_rpc, 0); + session_send_rpc_evt_to_thread_force (thread_index, + session_mq_handle_connects_rpc, 0); } } static void -session_mq_connect_uri_handler (void *data) +session_mq_connect_uri_handler (session_worker_t *wrk, session_evt_elt_t *elt) { - session_connect_uri_msg_t *mp = (session_connect_uri_msg_t *) data; vnet_connect_args_t _a, *a = &_a; + session_connect_uri_msg_t *mp; app_worker_t *app_wrk; application_t *app; int rv; - app_check_thread_and_barrier (session_mq_connect_uri_handler, mp); + app_check_thread_and_barrier (wrk, elt); + mp = session_evt_ctrl_data (wrk, elt); app = application_lookup (mp->client_index); if (!app) return; @@ -331,9 +327,9 @@ session_mq_connect_uri_handler (void *data) a->app_index = app->app_index; if ((rv = vnet_connect_uri (a))) { - clib_warning ("connect_uri returned: %d", rv); + session_worker_stat_error_inc (wrk, rv, 1); app_wrk = application_get_worker (app, 0 /* default wrk only */ ); - mq_send_session_connected_cb (app_wrk->wrk_index, mp->context, 0, rv); + app_worker_connect_notify (app_wrk, 0, rv, mp->context); } } @@ -370,14 +366,15 @@ session_mq_disconnect_handler (void *data) } static void -app_mq_detach_handler (void *data) +app_mq_detach_handler (session_worker_t *wrk, session_evt_elt_t *elt) { - session_app_detach_msg_t *mp = (session_app_detach_msg_t *) data; vnet_app_detach_args_t _a, *a = &_a; + session_app_detach_msg_t *mp; application_t *app; - app_check_thread_and_barrier (app_mq_detach_handler, mp); + app_check_thread_and_barrier (wrk, elt); + mp = session_evt_ctrl_data (wrk, elt); app = application_lookup (mp->client_index); if (!app) return; @@ -388,18 +385,19 @@ app_mq_detach_handler (void *data) } static void -session_mq_unlisten_rpc (session_unlisten_msg_t *mp) +session_mq_unlisten_handler (session_worker_t *wrk, session_evt_elt_t *elt) { - vlib_main_t *vm = vlib_get_main (); vnet_unlisten_args_t _a, *a = &_a; + session_unlisten_msg_t *mp; app_worker_t *app_wrk; session_handle_t sh; application_t *app; - u32 context; int rv; + app_check_thread_and_barrier (wrk, elt); + + mp = session_evt_ctrl_data (wrk, elt); sh = mp->handle; - context = mp->context; app = application_lookup (mp->client_index); if (!app) @@ -410,65 +408,34 @@ session_mq_unlisten_rpc (session_unlisten_msg_t *mp) a->handle = sh; a->wrk_map_index = mp->wrk_index; - vlib_worker_thread_barrier_sync (vm); - if ((rv = vnet_unlisten (a))) - clib_warning ("unlisten returned: %d", rv); - - vlib_worker_thread_barrier_release (vm); + session_worker_stat_error_inc (wrk, rv, 1); app_wrk = application_get_worker (app, a->wrk_map_index); if (!app_wrk) return; - mq_send_unlisten_reply (app_wrk, sh, context, rv); - clib_mem_free (mp); -} - -static void -session_mq_unlisten_handler (session_worker_t *wrk, session_evt_elt_t *elt) -{ - u32 thread_index = wrk - session_main.wrk; - session_unlisten_msg_t *mp, *arg; - - mp = session_evt_ctrl_data (wrk, elt); - arg = clib_mem_alloc (sizeof (session_unlisten_msg_t)); - clib_memcpy_fast (arg, mp, sizeof (*arg)); - - if (PREDICT_FALSE (!thread_index)) - { - session_mq_unlisten_rpc (arg); - return; - } - - session_send_rpc_evt_to_thread_force (0, session_mq_unlisten_rpc, arg); + app_worker_unlisten_reply (app_wrk, sh, mp->context, rv); } static void -session_mq_accepted_reply_handler (void *data) +session_mq_accepted_reply_handler (session_worker_t *wrk, + session_evt_elt_t *elt) { - session_accepted_reply_msg_t *mp = (session_accepted_reply_msg_t *) data; vnet_disconnect_args_t _a = { 0 }, *a = &_a; + session_accepted_reply_msg_t *mp; session_state_t old_state; app_worker_t *app_wrk; session_t *s; - /* Server isn't interested, kill the session */ - if (mp->retval) - { - a->app_index = mp->context; - a->handle = mp->handle; - vnet_disconnect_session (a); - return; - } + mp = session_evt_ctrl_data (wrk, elt); /* Mail this back from the main thread. We're not polling in main * thread so we're using other workers for notifications. */ - if (vlib_num_workers () && vlib_get_thread_index () != 0 - && session_thread_from_handle (mp->handle) == 0) + if (session_thread_from_handle (mp->handle) == 0 && vlib_num_workers () && + vlib_get_thread_index () != 0) { - vlib_rpc_call_main_thread (session_mq_accepted_reply_handler, - (u8 *) mp, sizeof (*mp)); + session_wrk_send_evt_to_main (wrk, elt); return; } @@ -483,27 +450,36 @@ session_mq_accepted_reply_handler (void *data) return; } - if (!session_has_transport (s)) + /* Server isn't interested, disconnect the session */ + if (mp->retval) { - s->session_state = SESSION_STATE_READY; - if (ct_session_connect_notify (s, SESSION_E_NONE)) - return; + a->app_index = mp->context; + a->handle = mp->handle; + vnet_disconnect_session (a); + s->app_wrk_index = SESSION_INVALID_INDEX; + return; } - else + + /* Special handling for cut-through sessions */ + if (!session_has_transport (s)) { - old_state = s->session_state; - s->session_state = SESSION_STATE_READY; + session_set_state (s, SESSION_STATE_READY); + ct_session_connect_notify (s, SESSION_E_NONE); + return; + } - if (!svm_fifo_is_empty_prod (s->rx_fifo)) - app_worker_lock_and_send_event (app_wrk, s, SESSION_IO_EVT_RX); + old_state = s->session_state; + session_set_state (s, SESSION_STATE_READY); - /* Closed while waiting for app to reply. Resend disconnect */ - if (old_state >= SESSION_STATE_TRANSPORT_CLOSING) - { - app_worker_close_notify (app_wrk, s); - s->session_state = old_state; - return; - } + if (!svm_fifo_is_empty_prod (s->rx_fifo)) + app_worker_rx_notify (app_wrk, s); + + /* Closed while waiting for app to reply. Resend disconnect */ + if (old_state >= SESSION_STATE_TRANSPORT_CLOSING) + { + app_worker_close_notify (app_wrk, s); + session_set_state (s, old_state); + return; } } @@ -515,15 +491,13 @@ session_mq_reset_reply_handler (void *data) app_worker_t *app_wrk; session_t *s; application_t *app; - u32 index, thread_index; mp = (session_reset_reply_msg_t *) data; app = application_lookup (mp->context); if (!app) return; - session_parse_handle (mp->handle, &index, &thread_index); - s = session_get_if_valid (index, thread_index); + s = session_get_from_handle_if_valid (mp->handle); /* No session or not the right session */ if (!s || s->session_state < SESSION_STATE_TRANSPORT_CLOSING) @@ -633,6 +607,7 @@ session_mq_worker_update_handler (void *data) session_event_t *evt; session_t *s; application_t *app; + int rv; app = application_lookup (mp->client_index); if (!app) @@ -669,7 +644,9 @@ session_mq_worker_update_handler (void *data) return; } - app_worker_own_session (app_wrk, s); + rv = app_worker_own_session (app_wrk, s); + if (rv) + session_stat_error_inc (rv, 1); /* * Send reply @@ -696,7 +673,7 @@ session_mq_worker_update_handler (void *data) session_send_io_evt_to_thread (s->tx_fifo, SESSION_IO_EVT_TX); if (s->rx_fifo && !svm_fifo_is_empty (s->rx_fifo)) - app_worker_lock_and_send_event (app_wrk, s, SESSION_IO_EVT_RX); + app_worker_rx_notify (app_wrk, s); if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING) app_worker_close_notify (app_wrk, s); @@ -774,6 +751,67 @@ session_mq_transport_attr_handler (void *data) svm_msg_q_add_and_unlock (app_wrk->event_queue, msg); } +void +session_wrk_handle_evts_main_rpc (void *args) +{ + vlib_main_t *vm = vlib_get_main (); + clib_llist_index_t ei, next_ei; + session_evt_elt_t *he, *elt; + session_worker_t *fwrk; + u32 thread_index; + + vlib_worker_thread_barrier_sync (vm); + + thread_index = pointer_to_uword (args); + fwrk = session_main_get_worker (thread_index); + + he = clib_llist_elt (fwrk->event_elts, fwrk->evts_pending_main); + ei = clib_llist_next_index (he, evt_list); + + while (ei != fwrk->evts_pending_main) + { + elt = clib_llist_elt (fwrk->event_elts, ei); + next_ei = clib_llist_next_index (elt, evt_list); + clib_llist_remove (fwrk->event_elts, evt_list, elt); + switch (elt->evt.event_type) + { + case SESSION_CTRL_EVT_LISTEN: + session_mq_listen_handler (fwrk, elt); + break; + case SESSION_CTRL_EVT_UNLISTEN: + session_mq_unlisten_handler (fwrk, elt); + break; + case SESSION_CTRL_EVT_APP_DETACH: + app_mq_detach_handler (fwrk, elt); + break; + case SESSION_CTRL_EVT_CONNECT_URI: + session_mq_connect_uri_handler (fwrk, elt); + break; + case SESSION_CTRL_EVT_ACCEPTED_REPLY: + session_mq_accepted_reply_handler (fwrk, elt); + break; + case SESSION_CTRL_EVT_CONNECT: + session_mq_connect_handler (fwrk, elt); + break; + default: + clib_warning ("unhandled %u", elt->evt.event_type); + ALWAYS_ASSERT (0); + break; + } + + /* Regrab element in case pool moved */ + elt = clib_llist_elt (fwrk->event_elts, ei); + if (!clib_llist_elt_is_linked (elt, evt_list)) + { + session_evt_ctrl_data_free (fwrk, elt); + clib_llist_put (fwrk->event_elts, elt); + } + ei = next_ei; + } + + vlib_worker_thread_barrier_release (vm); +} + vlib_node_registration_t session_queue_node; typedef struct @@ -795,21 +833,21 @@ format_session_queue_trace (u8 * s, va_list * args) return s; } -#define foreach_session_queue_error \ -_(TX, "Packets transmitted") \ -_(TIMER, "Timer events") \ -_(NO_BUFFER, "Out of buffers") +#define foreach_session_queue_error \ + _ (TX, tx, INFO, "Packets transmitted") \ + _ (TIMER, timer, INFO, "Timer events") \ + _ (NO_BUFFER, no_buffer, ERROR, "Out of buffers") typedef enum { -#define _(sym,str) SESSION_QUEUE_ERROR_##sym, +#define _(f, n, s, d) SESSION_QUEUE_ERROR_##f, foreach_session_queue_error #undef _ SESSION_QUEUE_N_ERROR, } session_queue_error_t; -static char *session_queue_error_strings[] = { -#define _(sym,string) string, +static vlib_error_desc_t session_error_counters[] = { +#define _(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s }, foreach_session_queue_error #undef _ }; @@ -822,36 +860,134 @@ enum }; static void -session_tx_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node, - u32 next_index, u32 * to_next, u16 n_segs, - session_t * s, u32 n_trace) +session_tx_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node, + u32 next_index, vlib_buffer_t **bufs, u16 n_segs, + session_t *s, u32 n_trace) { + vlib_buffer_t **b = bufs; + while (n_trace && n_segs) { - vlib_buffer_t *b = vlib_get_buffer (vm, to_next[0]); - if (PREDICT_TRUE - (vlib_trace_buffer - (vm, node, next_index, b, 1 /* follow_chain */ ))) + if (PREDICT_TRUE (vlib_trace_buffer (vm, node, next_index, b[0], + 1 /* follow_chain */))) { session_queue_trace_t *t = - vlib_add_trace (vm, node, b, sizeof (*t)); + vlib_add_trace (vm, node, b[0], sizeof (*t)); t->session_index = s->session_index; t->server_thread_index = s->thread_index; n_trace--; } - to_next++; + b++; n_segs--; } vlib_set_trace_count (vm, node, n_trace); } +always_inline int +session_tx_fill_dma_transfers (session_worker_t *wrk, + session_tx_context_t *ctx, vlib_buffer_t *b) +{ + vlib_main_t *vm = wrk->vm; + u32 len_to_deq; + u8 *data0 = NULL; + int n_bytes_read, len_write; + svm_fifo_seg_t data_fs[2]; + + u32 n_segs = 2; + u16 n_transfers = 0; + /* + * Start with the first buffer in chain + */ + b->error = 0; + b->flags = VNET_BUFFER_F_LOCALLY_ORIGINATED; + b->current_data = 0; + data0 = vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN); + len_to_deq = clib_min (ctx->left_to_snd, ctx->deq_per_first_buf); + + n_bytes_read = svm_fifo_segments (ctx->s->tx_fifo, ctx->sp.tx_offset, + data_fs, &n_segs, len_to_deq); + + len_write = n_bytes_read; + ASSERT (n_bytes_read == len_to_deq); + + while (n_bytes_read) + { + wrk->batch_num++; + vlib_dma_batch_add (vm, wrk->batch, data0, data_fs[n_transfers].data, + data_fs[n_transfers].len); + data0 += data_fs[n_transfers].len; + n_bytes_read -= data_fs[n_transfers].len; + n_transfers++; + } + return len_write; +} + +always_inline int +session_tx_fill_dma_transfers_tail (session_worker_t *wrk, + session_tx_context_t *ctx, + vlib_buffer_t *b, u32 len_to_deq, u8 *data) +{ + vlib_main_t *vm = wrk->vm; + int n_bytes_read, len_write; + svm_fifo_seg_t data_fs[2]; + u32 n_segs = 2; + u16 n_transfers = 0; + + n_bytes_read = svm_fifo_segments (ctx->s->tx_fifo, ctx->sp.tx_offset, + data_fs, &n_segs, len_to_deq); + + len_write = n_bytes_read; + + ASSERT (n_bytes_read == len_to_deq); + + while (n_bytes_read) + { + wrk->batch_num++; + vlib_dma_batch_add (vm, wrk->batch, data, data_fs[n_transfers].data, + data_fs[n_transfers].len); + data += data_fs[n_transfers].len; + n_bytes_read -= data_fs[n_transfers].len; + n_transfers++; + } + + return len_write; +} + +always_inline int +session_tx_copy_data (session_worker_t *wrk, session_tx_context_t *ctx, + vlib_buffer_t *b, u32 len_to_deq, u8 *data0) +{ + int n_bytes_read; + if (PREDICT_TRUE (!wrk->dma_enabled)) + n_bytes_read = + svm_fifo_peek (ctx->s->tx_fifo, ctx->sp.tx_offset, len_to_deq, data0); + else + n_bytes_read = session_tx_fill_dma_transfers (wrk, ctx, b); + return n_bytes_read; +} + +always_inline int +session_tx_copy_data_tail (session_worker_t *wrk, session_tx_context_t *ctx, + vlib_buffer_t *b, u32 len_to_deq, u8 *data) +{ + int n_bytes_read; + if (PREDICT_TRUE (!wrk->dma_enabled)) + n_bytes_read = + svm_fifo_peek (ctx->s->tx_fifo, ctx->sp.tx_offset, len_to_deq, data); + else + n_bytes_read = + session_tx_fill_dma_transfers_tail (wrk, ctx, b, len_to_deq, data); + return n_bytes_read; +} + always_inline void -session_tx_fifo_chain_tail (vlib_main_t * vm, session_tx_context_t * ctx, - vlib_buffer_t * b, u16 * n_bufs, u8 peek_data) +session_tx_fifo_chain_tail (session_worker_t *wrk, session_tx_context_t *ctx, + vlib_buffer_t *b, u16 *n_bufs, u8 peek_data) { + vlib_main_t *vm = wrk->vm; vlib_buffer_t *chain_b, *prev_b; u32 chain_bi0, to_deq, left_from_seg; - u16 len_to_deq, n_bytes_read; + int len_to_deq, n_bytes_read; u8 *data, j; b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; @@ -873,8 +1009,8 @@ session_tx_fifo_chain_tail (vlib_main_t * vm, session_tx_context_t * ctx, data = vlib_buffer_get_current (chain_b); if (peek_data) { - n_bytes_read = svm_fifo_peek (ctx->s->tx_fifo, - ctx->sp.tx_offset, len_to_deq, data); + n_bytes_read = + session_tx_copy_data_tail (wrk, ctx, b, len_to_deq, data); ctx->sp.tx_offset += n_bytes_read; } else @@ -931,13 +1067,12 @@ session_tx_fifo_chain_tail (vlib_main_t * vm, session_tx_context_t * ctx, } always_inline void -session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx, - vlib_buffer_t * b, u16 * n_bufs, u8 peek_data) +session_tx_fill_buffer (session_worker_t *wrk, session_tx_context_t *ctx, + vlib_buffer_t *b, u16 *n_bufs, u8 peek_data) { u32 len_to_deq; u8 *data0; int n_bytes_read; - /* * Start with the first buffer in chain */ @@ -950,8 +1085,7 @@ session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx, if (peek_data) { - n_bytes_read = svm_fifo_peek (ctx->s->tx_fifo, ctx->sp.tx_offset, - len_to_deq, data0); + n_bytes_read = session_tx_copy_data (wrk, ctx, b, len_to_deq, data0); ASSERT (n_bytes_read > 0); /* Keep track of progress locally, transport is also supposed to * increment it independently when pushing the header */ @@ -973,10 +1107,10 @@ session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx, n_bytes_read = svm_fifo_peek (f, offset, deq_now, data0); ASSERT (n_bytes_read > 0); - if (ctx->s->session_state == SESSION_STATE_LISTENING) + if (transport_connection_is_cless (ctx->tc)) { - ip_copy (&ctx->tc->rmt_ip, &hdr->rmt_ip, ctx->tc->is_ip4); - ctx->tc->rmt_port = hdr->rmt_port; + clib_memcpy_fast (data0 - sizeof (session_dgram_hdr_t), hdr, + sizeof (*hdr)); } hdr->data_offset += n_bytes_read; if (hdr->data_offset == hdr->data_length) @@ -998,6 +1132,7 @@ session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx, ASSERT (n_bytes_read > 0); } } + b->current_length = n_bytes_read; ctx->left_to_snd -= n_bytes_read; @@ -1005,7 +1140,7 @@ session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx, * Fill in the remaining buffers in the chain, if any */ if (PREDICT_FALSE (ctx->n_bufs_per_seg > 1 && ctx->left_to_snd)) - session_tx_fifo_chain_tail (vm, ctx, b, n_bufs, peek_data); + session_tx_fifo_chain_tail (wrk, ctx, b, n_bufs, peek_data); } always_inline u8 @@ -1018,7 +1153,15 @@ session_tx_not_ready (session_t * s, u8 peek_data) /* Can retransmit for closed sessions but can't send new data if * session is not ready or closed */ else if (s->session_state < SESSION_STATE_READY) - return 1; + { + /* Allow accepting session to send custom packets. + * For instance, tcp want to send acks in established, but + * the app has not called accept() yet */ + if (s->session_state == SESSION_STATE_ACCEPTING && + (s->flags & SESSION_F_CUSTOM_TX)) + return 0; + return 1; + } else if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSED) { /* Allow closed transports to still send custom packets. @@ -1029,6 +1172,11 @@ session_tx_not_ready (session_t * s, u8 peek_data) return 2; } } + else + { + if (s->session_state == SESSION_STATE_TRANSPORT_DELETED) + return 2; + } return 0; } @@ -1085,9 +1233,28 @@ session_tx_set_dequeue_params (vlib_main_t * vm, session_tx_context_t * ctx, svm_fifo_peek (ctx->s->tx_fifo, 0, sizeof (ctx->hdr), (u8 *) & ctx->hdr); + /* Zero length dgrams not supported */ + if (PREDICT_FALSE (ctx->hdr.data_length == 0)) + { + svm_fifo_dequeue_drop (ctx->s->tx_fifo, sizeof (ctx->hdr)); + ctx->max_len_to_snd = 0; + return; + } + /* We cannot be sure apps have not enqueued incomplete dgrams */ + if (PREDICT_FALSE (ctx->max_dequeue < + ctx->hdr.data_length + sizeof (ctx->hdr))) + { + ctx->max_len_to_snd = 0; + return; + } ASSERT (ctx->hdr.data_length > ctx->hdr.data_offset); len = ctx->hdr.data_length - ctx->hdr.data_offset; + if (ctx->hdr.gso_size) + { + ctx->sp.snd_mss = clib_min (ctx->sp.snd_mss, ctx->hdr.gso_size); + } + /* Process multiple dgrams if smaller than min (buf_space, mss). * This avoids handling multiple dgrams if they require buffer * chains */ @@ -1107,11 +1274,13 @@ session_tx_set_dequeue_params (vlib_main_t * vm, session_tx_context_t * ctx, { svm_fifo_peek (ctx->s->tx_fifo, offset, sizeof (ctx->hdr), (u8 *) & hdr); - ASSERT (hdr.data_length > hdr.data_offset); dgram_len = hdr.data_length - hdr.data_offset; - if (len + dgram_len > ctx->max_dequeue - || first_dgram_len != dgram_len) + if (offset + sizeof (hdr) + hdr.data_length > + ctx->max_dequeue || + first_dgram_len != dgram_len) break; + /* Assert here to allow test above with zero length dgrams */ + ASSERT (hdr.data_length > hdr.data_offset); len += dgram_len; offset += sizeof (hdr) + hdr.data_length; } @@ -1180,8 +1349,30 @@ session_tx_maybe_reschedule (session_worker_t * wrk, svm_fifo_unset_event (s->tx_fifo); if (svm_fifo_max_dequeue_cons (s->tx_fifo) > ctx->sp.tx_offset) - if (svm_fifo_set_event (s->tx_fifo)) - session_evt_add_head_old (wrk, elt); + { + if (svm_fifo_set_event (s->tx_fifo)) + session_evt_add_head_old (wrk, elt); + } + else + { + transport_connection_deschedule (ctx->tc); + } +} + +always_inline void +session_tx_add_pending_buffer (session_worker_t *wrk, u32 bi, u32 next_index) +{ + if (PREDICT_TRUE (!wrk->dma_enabled)) + { + vec_add1 (wrk->pending_tx_buffers, bi); + vec_add1 (wrk->pending_tx_nexts, next_index); + } + else + { + session_dma_transfer *dma_transfer = &wrk->dma_trans[wrk->trans_tail]; + vec_add1 (dma_transfer->pending_tx_buffers, bi); + vec_add1 (dma_transfer->pending_tx_nexts, next_index); + } } always_inline int @@ -1227,9 +1418,12 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk, ctx->sp.max_burst_size = max_burst; n_custom_tx = ctx->transport_vft->custom_tx (ctx->tc, &ctx->sp); *n_tx_packets += n_custom_tx; - if (PREDICT_FALSE - (ctx->s->session_state >= SESSION_STATE_TRANSPORT_CLOSED)) - return SESSION_TX_OK; + if (PREDICT_FALSE (ctx->s->session_state >= + SESSION_STATE_TRANSPORT_CLOSED)) + { + svm_fifo_unset_event (ctx->s->tx_fifo); + return SESSION_TX_OK; + } max_burst -= n_custom_tx; if (!max_burst || (ctx->s->flags & SESSION_F_CUSTOM_TX)) { @@ -1238,6 +1432,11 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk, } } + /* Connection previously descheduled because it had no data to send. + * Clear descheduled flag and reset pacer if in use */ + if (transport_connection_is_descheduled (ctx->tc)) + transport_connection_clear_descheduled (ctx->tc); + transport_connection_snd_params (ctx->tc, &ctx->sp); if (!ctx->sp.snd_space) @@ -1300,6 +1499,8 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk, ctx->left_to_snd = ctx->max_len_to_snd; n_left = ctx->n_segs_per_evt; + vec_validate (ctx->transport_pending_bufs, n_left); + while (n_left >= 4) { vlib_buffer_t *b0, *b1; @@ -1318,18 +1519,15 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk, b0 = vlib_get_buffer (vm, bi0); b1 = vlib_get_buffer (vm, bi1); - session_tx_fill_buffer (vm, ctx, b0, &n_bufs, peek_data); - session_tx_fill_buffer (vm, ctx, b1, &n_bufs, peek_data); - - ctx->transport_vft->push_header (ctx->tc, b0); - ctx->transport_vft->push_header (ctx->tc, b1); + session_tx_fill_buffer (wrk, ctx, b0, &n_bufs, peek_data); + session_tx_fill_buffer (wrk, ctx, b1, &n_bufs, peek_data); + ctx->transport_pending_bufs[ctx->n_segs_per_evt - n_left] = b0; + ctx->transport_pending_bufs[ctx->n_segs_per_evt - n_left + 1] = b1; n_left -= 2; - vec_add1 (wrk->pending_tx_buffers, bi0); - vec_add1 (wrk->pending_tx_buffers, bi1); - vec_add1 (wrk->pending_tx_nexts, next_index); - vec_add1 (wrk->pending_tx_nexts, next_index); + session_tx_add_pending_buffer (wrk, bi0, next_index); + session_tx_add_pending_buffer (wrk, bi1, next_index); } while (n_left) { @@ -1345,20 +1543,20 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk, bi0 = ctx->tx_buffers[--n_bufs]; b0 = vlib_get_buffer (vm, bi0); - session_tx_fill_buffer (vm, ctx, b0, &n_bufs, peek_data); - - /* Ask transport to push header after current_length and - * total_length_not_including_first_buffer are updated */ - ctx->transport_vft->push_header (ctx->tc, b0); + session_tx_fill_buffer (wrk, ctx, b0, &n_bufs, peek_data); + ctx->transport_pending_bufs[ctx->n_segs_per_evt - n_left] = b0; n_left -= 1; - vec_add1 (wrk->pending_tx_buffers, bi0); - vec_add1 (wrk->pending_tx_nexts, next_index); + session_tx_add_pending_buffer (wrk, bi0, next_index); } + /* Ask transport to push headers */ + ctx->transport_vft->push_header (ctx->tc, ctx->transport_pending_bufs, + ctx->n_segs_per_evt); + if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node)) > 0)) - session_tx_trace_frame (vm, node, next_index, wrk->pending_tx_buffers, + session_tx_trace_frame (vm, node, next_index, ctx->transport_pending_bufs, ctx->n_segs_per_evt, ctx->s, n_trace); if (PREDICT_FALSE (n_bufs)) @@ -1367,7 +1565,7 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk, *n_tx_packets += ctx->n_segs_per_evt; SESSION_EVT (SESSION_EVT_DEQ, ctx->s, ctx->max_len_to_snd, ctx->max_dequeue, - ctx->s->tx_fifo->has_event, wrk->last_vlib_time); + ctx->s->tx_fifo->shr->has_event, wrk->last_vlib_time); ASSERT (ctx->left_to_snd == 0); @@ -1412,20 +1610,30 @@ session_tx_fifo_dequeue_internal (session_worker_t * wrk, { transport_send_params_t *sp = &wrk->ctx.sp; session_t *s = wrk->ctx.s; + clib_llist_index_t ei; u32 n_packets; - if (PREDICT_FALSE (s->session_state >= SESSION_STATE_TRANSPORT_CLOSED)) + if (PREDICT_FALSE ((s->session_state >= SESSION_STATE_TRANSPORT_CLOSED) || + (s->session_state == SESSION_STATE_CONNECTING && + (s->flags & SESSION_F_HALF_OPEN)))) return 0; /* Clear custom-tx flag used to request reschedule for tx */ s->flags &= ~SESSION_F_CUSTOM_TX; + sp->flags = 0; + sp->bytes_dequeued = 0; sp->max_burst_size = clib_min (SESSION_NODE_FRAME_SIZE - *n_tx_packets, TRANSPORT_PACER_MAX_BURST_PKTS); + /* Grab elt index since app transports can enqueue events on tx */ + ei = clib_llist_entry_index (wrk->event_elts, elt); + n_packets = transport_custom_tx (session_get_transport_proto (s), s, sp); *n_tx_packets += n_packets; + elt = clib_llist_elt (wrk->event_elts, ei); + if (s->flags & SESSION_F_CUSTOM_TX) { session_evt_add_old (wrk, elt); @@ -1438,8 +1646,8 @@ session_tx_fifo_dequeue_internal (session_worker_t * wrk, session_evt_add_head_old (wrk, elt); } - if (sp->max_burst_size && - svm_fifo_needs_deq_ntf (s->tx_fifo, sp->max_burst_size)) + if (sp->bytes_dequeued && + svm_fifo_needs_deq_ntf (s->tx_fifo, sp->bytes_dequeued)) session_dequeue_notify (s); return n_packets; @@ -1491,10 +1699,10 @@ session_event_dispatch_ctrl (session_worker_t * wrk, session_evt_elt_t * elt) session_transport_reset (s); break; case SESSION_CTRL_EVT_LISTEN: - session_mq_listen_handler (session_evt_ctrl_data (wrk, elt)); + session_mq_listen_handler (wrk, elt); break; case SESSION_CTRL_EVT_LISTEN_URI: - session_mq_listen_uri_handler (session_evt_ctrl_data (wrk, elt)); + session_mq_listen_uri_handler (wrk, elt); break; case SESSION_CTRL_EVT_UNLISTEN: session_mq_unlisten_handler (wrk, elt); @@ -1503,7 +1711,7 @@ session_event_dispatch_ctrl (session_worker_t * wrk, session_evt_elt_t * elt) session_mq_connect_handler (wrk, elt); break; case SESSION_CTRL_EVT_CONNECT_URI: - session_mq_connect_uri_handler (session_evt_ctrl_data (wrk, elt)); + session_mq_connect_uri_handler (wrk, elt); break; case SESSION_CTRL_EVT_SHUTDOWN: session_mq_shutdown_handler (session_evt_ctrl_data (wrk, elt)); @@ -1515,7 +1723,7 @@ session_event_dispatch_ctrl (session_worker_t * wrk, session_evt_elt_t * elt) session_mq_disconnected_handler (session_evt_ctrl_data (wrk, elt)); break; case SESSION_CTRL_EVT_ACCEPTED_REPLY: - session_mq_accepted_reply_handler (session_evt_ctrl_data (wrk, elt)); + session_mq_accepted_reply_handler (wrk, elt); break; case SESSION_CTRL_EVT_DISCONNECTED_REPLY: session_mq_disconnected_reply_handler (session_evt_ctrl_data (wrk, @@ -1528,7 +1736,7 @@ session_event_dispatch_ctrl (session_worker_t * wrk, session_evt_elt_t * elt) session_mq_worker_update_handler (session_evt_ctrl_data (wrk, elt)); break; case SESSION_CTRL_EVT_APP_DETACH: - app_mq_detach_handler (session_evt_ctrl_data (wrk, elt)); + app_mq_detach_handler (wrk, elt); break; case SESSION_CTRL_EVT_APP_WRK_RPC: session_mq_app_wrk_rpc_handler (session_evt_ctrl_data (wrk, elt)); @@ -1572,7 +1780,7 @@ session_event_dispatch_io (session_worker_t * wrk, vlib_node_runtime_t * node, s = session_event_get_session (wrk, e); if (PREDICT_FALSE (!s)) break; - CLIB_PREFETCH (s->tx_fifo, 2 * CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (s->tx_fifo, sizeof (*(s->tx_fifo)), LOAD); wrk->ctx.s = s; /* Spray packets in per session type frames, since they go to * different nodes */ @@ -1580,7 +1788,7 @@ session_event_dispatch_io (session_worker_t * wrk, vlib_node_runtime_t * node, break; case SESSION_IO_EVT_RX: s = session_event_get_session (wrk, e); - if (!s) + if (!s || s->session_state >= SESSION_STATE_TRANSPORT_CLOSED) break; transport_app_rx_evt (session_get_transport_proto (s), s->connection_index, s->thread_index); @@ -1591,19 +1799,21 @@ session_event_dispatch_io (session_worker_t * wrk, vlib_node_runtime_t * node, break; svm_fifo_unset_event (s->rx_fifo); app_wrk = app_worker_get (s->app_wrk_index); - app_worker_builtin_rx (app_wrk, s); + app_worker_rx_notify (app_wrk, s); break; - case SESSION_IO_EVT_BUILTIN_TX: - s = session_get_from_handle_if_valid (e->session_handle); + case SESSION_IO_EVT_TX_MAIN: + s = session_get_if_valid (e->session_index, 0 /* main thread */); + if (PREDICT_FALSE (!s)) + break; wrk->ctx.s = s; if (PREDICT_TRUE (s != 0)) - session_tx_fifo_dequeue_internal (wrk, node, elt, n_tx_packets); + (smm->session_tx_fns[s->session_type]) (wrk, node, elt, n_tx_packets); break; default: clib_warning ("unhandled event type %d", e->event_type); } - SESSION_EVT (SESSION_IO_EVT_COUNTS, e->event_type, 1, wrk); + SESSION_EVT (SESSION_EVT_IO_EVT_COUNTS, e->event_type, 1, wrk); /* Regrab elements in case pool moved */ elt = clib_llist_elt (wrk->event_elts, ei); @@ -1611,14 +1821,22 @@ session_event_dispatch_io (session_worker_t * wrk, vlib_node_runtime_t * node, clib_llist_put (wrk->event_elts, elt); } -/* *INDENT-OFF* */ static const u32 session_evt_msg_sizes[] = { #define _(symc, sym) \ [SESSION_CTRL_EVT_ ## symc] = sizeof (session_ ## sym ##_msg_t), foreach_session_ctrl_evt #undef _ }; -/* *INDENT-ON* */ + +always_inline void +session_update_time_subscribers (session_main_t *smm, clib_time_type_t now, + u32 thread_index) +{ + session_update_time_fn *fn; + + vec_foreach (fn, smm->update_time_fns) + (*fn) (now, thread_index); +} always_inline void session_evt_add_to_list (session_worker_t * wrk, session_event_t * evt) @@ -1652,9 +1870,9 @@ static void session_flush_pending_tx_buffers (session_worker_t * wrk, vlib_node_runtime_t * node) { - vlib_buffer_enqueue_to_next (wrk->vm, node, wrk->pending_tx_buffers, - wrk->pending_tx_nexts, - vec_len (wrk->pending_tx_nexts)); + vlib_buffer_enqueue_to_next_vec (wrk->vm, node, &wrk->pending_tx_buffers, + &wrk->pending_tx_nexts, + vec_len (wrk->pending_tx_nexts)); vec_reset_length (wrk->pending_tx_buffers); vec_reset_length (wrk->pending_tx_nexts); } @@ -1685,7 +1903,7 @@ session_wrk_update_state (session_worker_t *wrk) if (wrk->state == SESSION_WRK_POLLING) { - if (clib_llist_elts (wrk->event_elts) == 4 && + if (clib_llist_elts (wrk->event_elts) == 5 && vlib_last_vectors_per_main_loop (vm) < 1) { session_wrk_set_state (wrk, SESSION_WRK_INTERRUPT); @@ -1695,7 +1913,7 @@ session_wrk_update_state (session_worker_t *wrk) } else if (wrk->state == SESSION_WRK_INTERRUPT) { - if (clib_llist_elts (wrk->event_elts) > 4 || + if (clib_llist_elts (wrk->event_elts) > 5 || vlib_last_vectors_per_main_loop (vm) > 1) { session_wrk_set_state (wrk, SESSION_WRK_POLLING); @@ -1734,10 +1952,19 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, /* * Update transport time */ - transport_update_time (wrk->last_vlib_time, thread_index); + session_update_time_subscribers (smm, wrk->last_vlib_time, thread_index); n_tx_packets = vec_len (wrk->pending_tx_buffers); SESSION_EVT (SESSION_EVT_DSP_CNTRS, UPDATE_TIME, wrk); + if (PREDICT_FALSE (wrk->dma_enabled)) + { + if (wrk->trans_head == ((wrk->trans_tail + 1) & (wrk->trans_size - 1))) + return 0; + wrk->batch = vlib_dma_batch_new (vm, wrk->config_index); + if (!wrk->batch) + return 0; + } + /* * Dequeue new internal mq events */ @@ -1807,6 +2034,20 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, }; } + if (PREDICT_FALSE (wrk->dma_enabled)) + { + if (wrk->batch_num) + { + vlib_dma_batch_set_cookie (vm, wrk->batch, wrk->trans_tail); + wrk->batch_num = 0; + wrk->trans_tail++; + if (wrk->trans_tail == wrk->trans_size) + wrk->trans_tail = 0; + } + + vlib_dma_batch_submit (vm, wrk->batch); + } + SESSION_EVT (SESSION_EVT_DSP_CNTRS, OLD_IO_EVTS, wrk); if (vec_len (wrk->pending_tx_buffers)) @@ -1823,19 +2064,16 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, return n_tx_packets; } -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (session_queue_node) = -{ +VLIB_REGISTER_NODE (session_queue_node) = { .function = session_queue_node_fn, .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED, .name = "session-queue", .format_trace = format_session_queue_trace, .type = VLIB_NODE_TYPE_INPUT, - .n_errors = ARRAY_LEN (session_queue_error_strings), - .error_strings = session_queue_error_strings, + .n_errors = SESSION_QUEUE_N_ERROR, + .error_counters = session_error_counters, .state = VLIB_NODE_STATE_DISABLED, }; -/* *INDENT-ON* */ static clib_error_t * session_wrk_tfd_read_ready (clib_file_t *cf) @@ -1939,7 +2177,6 @@ session_queue_process (vlib_main_t * vm, vlib_node_runtime_t * rt, return 0; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (session_queue_process_node) = { .function = session_queue_process, @@ -1947,7 +2184,6 @@ VLIB_REGISTER_NODE (session_queue_process_node) = .name = "session-queue-process", .state = VLIB_NODE_STATE_DISABLED, }; -/* *INDENT-ON* */ static_always_inline uword session_queue_pre_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1960,7 +2196,6 @@ session_queue_pre_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, return session_queue_node_fn (vm, node, frame); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (session_queue_pre_input_node) = { .function = session_queue_pre_input_inline, @@ -1968,7 +2203,6 @@ VLIB_REGISTER_NODE (session_queue_pre_input_node) = .name = "session-queue-main", .state = VLIB_NODE_STATE_DISABLED, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/session/session_rules_table.c b/src/vnet/session/session_rules_table.c index 34bd6a38676..70a702cf55c 100644 --- a/src/vnet/session/session_rules_table.c +++ b/src/vnet/session/session_rules_table.c @@ -386,11 +386,11 @@ session_rules_table_lookup6 (session_rules_table_t * srt, * @param srt table where rule should be added * @param args rule arguments * - * @return 0 if success, clib_error_t error otherwise + * @return 0 if success, session_error_t error otherwise */ -int -session_rules_table_add_del (session_rules_table_t * srt, - session_rule_table_add_del_args_t * args) +session_error_t +session_rules_table_add_del (session_rules_table_t *srt, + session_rule_table_add_del_args_t *args) { u8 fib_proto = args->rmt.fp_proto, *rt; u32 ri_from_tag, ri; @@ -398,7 +398,7 @@ session_rules_table_add_del (session_rules_table_t * srt, ri_from_tag = session_rules_table_rule_for_tag (srt, args->tag); if (args->is_add && ri_from_tag != SESSION_RULES_TABLE_INVALID_INDEX) - return VNET_API_ERROR_INVALID_VALUE; + return SESSION_E_INVALID; if (fib_proto == FIB_PROTOCOL_IP4) { @@ -509,11 +509,18 @@ session_rules_table_add_del (session_rules_table_t * srt, } } else - return VNET_API_ERROR_INVALID_VALUE_2; + return SESSION_E_INVALID; return 0; } void +session_rules_table_free (session_rules_table_t *srt) +{ + mma_rules_table_free_16 (&srt->session_rules_tables_16); + mma_rules_table_free_40 (&srt->session_rules_tables_40); +} + +void session_rules_table_init (session_rules_table_t * srt) { mma_rules_table_16_t *srt4; @@ -598,11 +605,9 @@ session_rules_table_cli_dump (vlib_main_t * vm, session_rules_table_t * srt, srt4 = &srt->session_rules_tables_16; vlib_cli_output (vm, "IP4 rules"); - /* *INDENT-OFF* */ pool_foreach (sr4, srt4->rules) { vlib_cli_output (vm, "%U", format_session_rule4, srt, sr4); } - /* *INDENT-ON* */ } else if (fib_proto == FIB_PROTOCOL_IP6) @@ -612,11 +617,9 @@ session_rules_table_cli_dump (vlib_main_t * vm, session_rules_table_t * srt, srt6 = &srt->session_rules_tables_40; vlib_cli_output (vm, "IP6 rules"); - /* *INDENT-OFF* */ pool_foreach (sr6, srt6->rules) { vlib_cli_output (vm, "%U", format_session_rule6, srt, sr6); } - /* *INDENT-ON* */ } } diff --git a/src/vnet/session/session_rules_table.h b/src/vnet/session/session_rules_table.h index 8679cb8a0c7..010d50a6398 100644 --- a/src/vnet/session/session_rules_table.h +++ b/src/vnet/session/session_rules_table.h @@ -18,11 +18,11 @@ #include <vnet/vnet.h> #include <vnet/fib/fib.h> +#include <vnet/session/session_types.h> #include <vnet/session/transport.h> #include <vnet/session/mma_16.h> #include <vnet/session/mma_40.h> -/* *INDENT-OFF* */ typedef CLIB_PACKED (struct { union @@ -52,7 +52,6 @@ typedef CLIB_PACKED (struct u64 as_u64[5]; }; }) session_mask_or_match_6_t; -/* *INDENT-ON* */ #define SESSION_RULE_TAG_MAX_LEN 64 #define SESSION_RULES_TABLE_INVALID_INDEX MMA_TABLE_INVALID_INDEX @@ -111,11 +110,13 @@ void session_rules_table_show_rule (vlib_main_t * vm, ip46_address_t * lcl_ip, u16 lcl_port, ip46_address_t * rmt_ip, u16 rmt_port, u8 is_ip4); -int session_rules_table_add_del (session_rules_table_t * srt, - session_rule_table_add_del_args_t * args); +session_error_t +session_rules_table_add_del (session_rules_table_t *srt, + session_rule_table_add_del_args_t *args); u8 *session_rules_table_rule_tag (session_rules_table_t * srt, u32 ri, u8 is_ip4); void session_rules_table_init (session_rules_table_t * srt); +void session_rules_table_free (session_rules_table_t *srt); #endif /* SRC_VNET_SESSION_SESSION_RULES_TABLE_H_ */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/session/session_table.c b/src/vnet/session/session_table.c index d0b576fda7b..dbbe771979c 100644 --- a/src/vnet/session/session_table.c +++ b/src/vnet/session/session_table.c @@ -60,6 +60,31 @@ session_table_get (u32 table_index) _(v6,halfopen,buckets,20000) \ _(v6,halfopen,memory,(64<<20)) +void +session_table_free (session_table_t *slt, u8 fib_proto) +{ + u8 all = fib_proto > FIB_PROTOCOL_IP6 ? 1 : 0; + int i; + + for (i = 0; i < TRANSPORT_N_PROTOS; i++) + session_rules_table_free (&slt->session_rules[i]); + + vec_free (slt->session_rules); + + if (fib_proto == FIB_PROTOCOL_IP4 || all) + { + clib_bihash_free_16_8 (&slt->v4_session_hash); + clib_bihash_free_16_8 (&slt->v4_half_open_hash); + } + if (fib_proto == FIB_PROTOCOL_IP6 || all) + { + clib_bihash_free_48_8 (&slt->v6_session_hash); + clib_bihash_free_48_8 (&slt->v6_half_open_hash); + } + + pool_put (lookup_tables, slt); +} + /** * Initialize session table hash tables * @@ -160,7 +185,66 @@ ip4_session_table_walk (clib_bihash_16_8_t * hash, &ctx); } -/* *INDENT-ON* */ +u32 +session_table_memory_size (session_table_t *st) +{ + u64 total_size = 0; + + if (clib_bihash_is_initialised_16_8 (&st->v4_session_hash)) + { + clib_bihash_alloc_chunk_16_8_t *c = st->v4_session_hash.chunks; + while (c) + { + total_size += c->size; + c = c->next; + } + c = st->v4_half_open_hash.chunks; + while (c) + { + total_size += c->size; + c = c->next; + } + } + + if (clib_bihash_is_initialised_48_8 (&st->v6_session_hash)) + { + clib_bihash_alloc_chunk_48_8_t *c = st->v6_session_hash.chunks; + while (c) + { + total_size += c->size; + c = c->next; + } + c = st->v6_half_open_hash.chunks; + while (c) + { + total_size += c->size; + c = c->next; + } + } + + return total_size; +} + +u8 * +format_session_table (u8 *s, va_list *args) +{ + session_table_t *st = va_arg (*args, session_table_t *); + + if (clib_bihash_is_initialised_16_8 (&st->v4_session_hash)) + { + s = format (s, "%U", format_bihash_16_8, &st->v4_session_hash, 0); + s = format (s, "%U", format_bihash_16_8, &st->v4_half_open_hash, 0); + } + + if (clib_bihash_is_initialised_48_8 (&st->v6_session_hash)) + { + s = format (s, "%U", format_bihash_48_8, &st->v6_session_hash, 0); + s = format (s, "%U", format_bihash_48_8, &st->v6_half_open_hash, 0); + } + + return s; +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/session/session_table.h b/src/vnet/session/session_table.h index ead3c302681..636b8d77bee 100644 --- a/src/vnet/session/session_table.h +++ b/src/vnet/session/session_table.h @@ -67,6 +67,10 @@ session_table_t *session_table_alloc (void); session_table_t *session_table_get (u32 table_index); u32 session_table_index (session_table_t * slt); void session_table_init (session_table_t * slt, u8 fib_proto); +void session_table_free (session_table_t *slt, u8 fib_proto); + +u32 session_table_memory_size (session_table_t *st); +u8 *format_session_table (u8 *s, va_list *args); /* Internal, try not to use it! */ session_table_t *_get_session_tables (); @@ -75,7 +79,6 @@ session_table_t *_get_session_tables (); pool_foreach (VAR, _get_session_tables ()) BODY #endif /* SRC_VNET_SESSION_SESSION_TABLE_H_ */ -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/session/session_test.c b/src/vnet/session/session_test.c new file mode 100644 index 00000000000..770e7263024 --- /dev/null +++ b/src/vnet/session/session_test.c @@ -0,0 +1,363 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2021 Cisco Systems, Inc. + */ + +#include <vat/vat.h> +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vppinfra/error.h> +#include <vpp/api/types.h> + +#include <vnet/ip/ip_types_api.h> + +#define __plugin_msg_base session_test_main.msg_id_base +#include <vlibapi/vat_helper_macros.h> + +#include <vlibmemory/vlib.api_enum.h> +#include <vlibmemory/vlib.api_types.h> + +/* Declare message IDs */ +#include <vnet/format_fns.h> +#include <vnet/session/session.api_enum.h> +#include <vnet/session/session.api_types.h> + +#define vl_endianfun /* define message structures */ +#include <vnet/session/session.api.h> +#undef vl_endianfun + +typedef struct +{ + /* API message ID base */ + u16 msg_id_base; + u32 ping_id; + vat_main_t *vat_main; +} session_test_main_t; + +static session_test_main_t session_test_main; + +static int +api_session_rule_add_del (vat_main_t *vam) +{ + vl_api_session_rule_add_del_t *mp; + unformat_input_t *i = vam->input; + u32 proto = ~0, lcl_port, rmt_port, action = 0, lcl_plen, rmt_plen; + u32 appns_index = 0, scope = 0; + ip4_address_t lcl_ip4, rmt_ip4; + ip6_address_t lcl_ip6, rmt_ip6; + u8 is_ip4 = 1, conn_set = 0; + u8 is_add = 1, *tag = 0; + int ret; + fib_prefix_t lcl, rmt; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "del")) + is_add = 0; + else if (unformat (i, "add")) + ; + else if (unformat (i, "proto tcp")) + proto = 0; + else if (unformat (i, "proto udp")) + proto = 1; + else if (unformat (i, "appns %d", &appns_index)) + ; + else if (unformat (i, "scope %d", &scope)) + ; + else if (unformat (i, "tag %_%v%_", &tag)) + ; + else if (unformat (i, "%U/%d %d %U/%d %d", unformat_ip4_address, + &lcl_ip4, &lcl_plen, &lcl_port, unformat_ip4_address, + &rmt_ip4, &rmt_plen, &rmt_port)) + { + is_ip4 = 1; + conn_set = 1; + } + else if (unformat (i, "%U/%d %d %U/%d %d", unformat_ip6_address, + &lcl_ip6, &lcl_plen, &lcl_port, unformat_ip6_address, + &rmt_ip6, &rmt_plen, &rmt_port)) + { + is_ip4 = 0; + conn_set = 1; + } + else if (unformat (i, "action %d", &action)) + ; + else + break; + } + if (proto == ~0 || !conn_set || action == ~0) + { + errmsg ("transport proto, connection and action must be set"); + return -99; + } + + if (scope > 3) + { + errmsg ("scope should be 0-3"); + return -99; + } + + M (SESSION_RULE_ADD_DEL, mp); + + clib_memset (&lcl, 0, sizeof (lcl)); + clib_memset (&rmt, 0, sizeof (rmt)); + if (is_ip4) + { + ip_set (&lcl.fp_addr, &lcl_ip4, 1); + ip_set (&rmt.fp_addr, &rmt_ip4, 1); + lcl.fp_len = lcl_plen; + rmt.fp_len = rmt_plen; + } + else + { + ip_set (&lcl.fp_addr, &lcl_ip6, 0); + ip_set (&rmt.fp_addr, &rmt_ip6, 0); + lcl.fp_len = lcl_plen; + rmt.fp_len = rmt_plen; + } + + ip_prefix_encode (&lcl, &mp->lcl); + ip_prefix_encode (&rmt, &mp->rmt); + mp->lcl_port = clib_host_to_net_u16 ((u16) lcl_port); + mp->rmt_port = clib_host_to_net_u16 ((u16) rmt_port); + mp->transport_proto = + proto ? TRANSPORT_PROTO_API_UDP : TRANSPORT_PROTO_API_TCP; + mp->action_index = clib_host_to_net_u32 (action); + mp->appns_index = clib_host_to_net_u32 (appns_index); + mp->scope = scope; + mp->is_add = is_add; + if (tag) + { + clib_memcpy (mp->tag, tag, vec_len (tag)); + vec_free (tag); + } + + S (mp); + W (ret); + return ret; +} + +static void +vl_api_app_attach_reply_t_handler (vl_api_app_attach_reply_t *mp) +{ +} + +static void +vl_api_app_add_cert_key_pair_reply_t_handler ( + vl_api_app_add_cert_key_pair_reply_t *mp) +{ +} + +static int +api_app_attach (vat_main_t *vat) +{ + return -1; +} + +static int +api_application_detach (vat_main_t *vat) +{ + return -1; +} + +static int +api_app_del_cert_key_pair (vat_main_t *vat) +{ + return -1; +} + +static int +api_app_add_cert_key_pair (vat_main_t *vat) +{ + return -1; +} + +static int +api_session_rules_dump (vat_main_t *vam) +{ + vl_api_session_rules_dump_t *mp; + vl_api_control_ping_t *mp_ping; + int ret; + + if (!vam->json_output) + { + print (vam->ofp, "%=20s", "Session Rules"); + } + + M (SESSION_RULES_DUMP, mp); + /* send it... */ + S (mp); + + /* Use a control ping for synchronization */ + PING (&session_test_main, mp_ping); + S (mp_ping); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static void +vl_api_session_rules_details_t_handler (vl_api_session_rules_details_t *mp) +{ + vat_main_t *vam = &vat_main; + fib_prefix_t lcl, rmt; + + ip_prefix_decode (&mp->lcl, &lcl); + ip_prefix_decode (&mp->rmt, &rmt); + + if (lcl.fp_proto == FIB_PROTOCOL_IP4) + { + print (vam->ofp, + "appns %u tp %u scope %d %U/%d %d %U/%d %d action: %d tag: %s", + clib_net_to_host_u32 (mp->appns_index), mp->transport_proto, + mp->scope, format_ip4_address, &lcl.fp_addr.ip4, lcl.fp_len, + clib_net_to_host_u16 (mp->lcl_port), format_ip4_address, + &rmt.fp_addr.ip4, rmt.fp_len, clib_net_to_host_u16 (mp->rmt_port), + clib_net_to_host_u32 (mp->action_index), mp->tag); + } + else + { + print (vam->ofp, + "appns %u tp %u scope %d %U/%d %d %U/%d %d action: %d tag: %s", + clib_net_to_host_u32 (mp->appns_index), mp->transport_proto, + mp->scope, format_ip6_address, &lcl.fp_addr.ip6, lcl.fp_len, + clib_net_to_host_u16 (mp->lcl_port), format_ip6_address, + &rmt.fp_addr.ip6, rmt.fp_len, clib_net_to_host_u16 (mp->rmt_port), + clib_net_to_host_u32 (mp->action_index), mp->tag); + } +} + +static void +vl_api_app_namespace_add_del_reply_t_handler ( + vl_api_app_namespace_add_del_reply_t *mp) +{ + vat_main_t *vam = &vat_main; + i32 retval = ntohl (mp->retval); + if (vam->async_mode) + { + vam->async_errors += (retval < 0); + } + else + { + vam->retval = retval; + if (retval == 0) + errmsg ("app ns index %d\n", ntohl (mp->appns_index)); + vam->result_ready = 1; + } +} + +static void +vl_api_app_namespace_add_del_v2_reply_t_handler ( + vl_api_app_namespace_add_del_v2_reply_t *vat) +{ +} + +static void +vl_api_app_worker_add_del_reply_t_handler ( + vl_api_app_worker_add_del_reply_t *vat) +{ +} + +static int +api_app_namespace_add_del_v2 (vat_main_t *vat) +{ + return -1; +} + +static int +api_session_enable_disable (vat_main_t *vat) +{ + return -1; +} + +static int +api_app_worker_add_del (vat_main_t *vat) +{ + return -1; +} + +static int +api_app_namespace_add_del (vat_main_t *vam) +{ + vl_api_app_namespace_add_del_t *mp; + unformat_input_t *i = vam->input; + u8 *ns_id = 0, secret_set = 0, sw_if_index_set = 0; + u32 sw_if_index, ip4_fib_id, ip6_fib_id; + u64 secret; + int ret; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "id %_%v%_", &ns_id)) + ; + else if (unformat (i, "secret %lu", &secret)) + secret_set = 1; + else if (unformat (i, "sw_if_index %d", &sw_if_index)) + sw_if_index_set = 1; + else if (unformat (i, "ip4_fib_id %d", &ip4_fib_id)) + ; + else if (unformat (i, "ip6_fib_id %d", &ip6_fib_id)) + ; + else + break; + } + if (!ns_id || !secret_set || !sw_if_index_set) + { + errmsg ("namespace id, secret and sw_if_index must be set"); + return -99; + } + if (vec_len (ns_id) > 64) + { + errmsg ("namespace id too long"); + return -99; + } + M (APP_NAMESPACE_ADD_DEL, mp); + + vl_api_vec_to_api_string (ns_id, &mp->namespace_id); + mp->secret = clib_host_to_net_u64 (secret); + mp->sw_if_index = clib_host_to_net_u32 (sw_if_index); + mp->ip4_fib_id = clib_host_to_net_u32 (ip4_fib_id); + mp->ip6_fib_id = clib_host_to_net_u32 (ip6_fib_id); + vec_free (ns_id); + S (mp); + W (ret); + return ret; +} + +static void +vl_api_app_namespace_add_del_v4_reply_t_handler ( + vl_api_app_namespace_add_del_v4_reply_t *mp) +{ +} + +static int +api_app_namespace_add_del_v4 (vat_main_t *vat) +{ + return -1; +} + +static void +vl_api_app_namespace_add_del_v3_reply_t_handler ( + vl_api_app_namespace_add_del_v3_reply_t *mp) +{ +} + +static int +api_app_namespace_add_del_v3 (vat_main_t *vat) +{ + return -1; +} + +static int +api_session_sapi_enable_disable (vat_main_t *vat) +{ + return -1; +} + +#include <vnet/session/session.api_test.c> + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/session_types.h b/src/vnet/session/session_types.h index 246978e0ac3..5e650727d61 100644 --- a/src/vnet/session/session_types.h +++ b/src/vnet/session/session_types.h @@ -22,8 +22,22 @@ #define SESSION_INVALID_INDEX ((u32)~0) #define SESSION_INVALID_HANDLE ((u64)~0) #define SESSION_CTRL_MSG_MAX_SIZE 86 +#define SESSION_CTRL_MSG_TX_MAX_SIZE 160 #define SESSION_NODE_FRAME_SIZE 128 +typedef u8 session_type_t; +typedef u64 session_handle_t; + +typedef union session_handle_tu_ +{ + session_handle_t handle; + struct + { + u32 session_index; + u32 thread_index; + }; +} __attribute__ ((__transparent_union__)) session_handle_tu_t; + #define foreach_session_endpoint_fields \ foreach_transport_endpoint_cfg_fields \ _(u8, transport_proto) \ @@ -35,6 +49,23 @@ typedef struct _session_endpoint #undef _ } session_endpoint_t; +#define foreach_session_endpoint_cfg_flags _ (PROXY_LISTEN, "proxy listener") + +typedef enum session_endpoint_cfg_flags_bits_ +{ +#define _(sym, str) SESSION_ENDPT_CFG_F_BIT_##sym, + foreach_session_endpoint_cfg_flags +#undef _ +} __clib_packed session_endpoint_cfg_flags_bits_t; + +typedef enum session_endpoint_cfg_flags_ +{ +#define _(sym, str) \ + SESSION_ENDPT_CFG_F_##sym = 1 << SESSION_ENDPT_CFG_F_BIT_##sym, + foreach_session_endpoint_cfg_flags +#undef _ +} __clib_packed session_endpoint_cfg_flags_t; + typedef struct _session_endpoint_cfg { #define _(type, name) type name; @@ -45,7 +76,7 @@ typedef struct _session_endpoint_cfg u32 ns_index; u8 original_tp; u64 parent_handle; - u8 flags; + session_endpoint_cfg_flags_t flags; transport_endpt_ext_cfg_t *ext_cfg; } session_endpoint_cfg_t; @@ -107,9 +138,6 @@ session_endpoint_is_zero (session_endpoint_t * sep) return ip_is_zero (&sep->ip, sep->is_ip4); } -typedef u8 session_type_t; -typedef u64 session_handle_t; - typedef enum { SESSION_CLEANUP_TRANSPORT, @@ -126,19 +154,19 @@ typedef enum session_ft_action_ /* * Session states */ -#define foreach_session_state \ - _(CREATED, "created") \ - _(LISTENING, "listening") \ - _(CONNECTING, "connecting") \ - _(ACCEPTING, "accepting") \ - _(READY, "ready") \ - _(OPENED, "opened") \ - _(TRANSPORT_CLOSING, "transport-closing") \ - _(CLOSING, "closing") \ - _(APP_CLOSED, "app-closed") \ - _(TRANSPORT_CLOSED, "transport-closed") \ - _(CLOSED, "closed") \ - _(TRANSPORT_DELETED, "transport-deleted") \ +#define foreach_session_state \ + _ (CREATED, "created") \ + _ (LISTENING, "listening") \ + _ (CONNECTING, "connecting") \ + _ (ACCEPTING, "accepting") \ + _ (READY, "ready") \ + _ (OPENED, "opened") \ + _ (TRANSPORT_CLOSING, "transport-closing") \ + _ (CLOSING, "closing") \ + _ (APP_CLOSED, "app-closed") \ + _ (TRANSPORT_CLOSED, "transport-closed") \ + _ (CLOSED, "closed") \ + _ (TRANSPORT_DELETED, "transport-deleted") typedef enum { @@ -146,7 +174,7 @@ typedef enum foreach_session_state #undef _ SESSION_N_STATES, -} session_state_t; +} __clib_packed session_state_t; #define foreach_session_flag \ _ (RX_EVT, "rx-event") \ @@ -155,7 +183,9 @@ typedef enum _ (IS_MIGRATING, "migrating") \ _ (UNIDIRECTIONAL, "unidirectional") \ _ (CUSTOM_FIFO_TUNING, "custom-fifo-tuning") \ - _ (HALF_OPEN, "half-open") + _ (HALF_OPEN, "half-open") \ + _ (APP_CLOSED, "app-closed") \ + _ (IS_CLESS, "connectionless") typedef enum session_flags_bits_ { @@ -178,38 +208,42 @@ typedef struct session_ svm_fifo_t *rx_fifo; svm_fifo_t *tx_fifo; + union + { + session_handle_t handle; + struct + { + /** Index in thread pool where session was allocated */ + u32 session_index; + + /** Index of the thread that allocated the session */ + u32 thread_index; + }; + }; + /** Type built from transport and network protocol types */ session_type_t session_type; /** State in session layer state machine. See @ref session_state_t */ - volatile u8 session_state; - - /** Index in thread pool where session was allocated */ - u32 session_index; + volatile session_state_t session_state; /** Index of the app worker that owns the session */ u32 app_wrk_index; - /** Index of the thread that allocated the session */ - u8 thread_index; - /** Session flags. See @ref session_flags_t */ - u32 flags; + session_flags_t flags; /** Index of the transport connection associated to the session */ u32 connection_index; - /** Index of application that owns the listener. Set only if a listener */ - u32 app_index; + /** App listener index in app's listener pool if a listener */ + u32 al_index; union { /** Parent listener session index if the result of an accept */ session_handle_t listener_handle; - /** App listener index in app's listener pool if a listener */ - u32 al_index; - /** Index in app worker's half-open table if a half-open */ u32 ho_index; }; @@ -282,45 +316,35 @@ session_tx_is_dgram (session_t * s) always_inline session_handle_t session_handle (session_t * s) { - return ((u64) s->thread_index << 32) | (u64) s->session_index; + return s->handle; } always_inline u32 -session_index_from_handle (session_handle_t handle) +session_index_from_handle (session_handle_tu_t handle) { - return handle & 0xFFFFFFFF; + return handle.session_index; } always_inline u32 -session_thread_from_handle (session_handle_t handle) +session_thread_from_handle (session_handle_tu_t handle) { - return handle >> 32; + return handle.thread_index; } always_inline void -session_parse_handle (session_handle_t handle, u32 * index, - u32 * thread_index) +session_parse_handle (session_handle_tu_t handle, u32 *index, + u32 *thread_index) { - *index = session_index_from_handle (handle); - *thread_index = session_thread_from_handle (handle); + *index = handle.session_index; + *thread_index = handle.thread_index; } static inline session_handle_t session_make_handle (u32 session_index, u32 data) { - return (((u64) data << 32) | (u64) session_index); -} - -always_inline u32 -session_handle_index (session_handle_t ho_handle) -{ - return (ho_handle & 0xffffffff); -} - -always_inline u32 -session_handle_data (session_handle_t ho_handle) -{ - return (ho_handle >> 32); + return ((session_handle_tu_t){ .session_index = session_index, + .thread_index = data }) + .handle; } typedef enum @@ -329,7 +353,7 @@ typedef enum SESSION_IO_EVT_TX, SESSION_IO_EVT_TX_FLUSH, SESSION_IO_EVT_BUILTIN_RX, - SESSION_IO_EVT_BUILTIN_TX, + SESSION_IO_EVT_TX_MAIN, SESSION_CTRL_EVT_RPC, SESSION_CTRL_EVT_HALF_CLOSE, SESSION_CTRL_EVT_CLOSE, @@ -360,6 +384,8 @@ typedef enum SESSION_CTRL_EVT_APP_WRK_RPC, SESSION_CTRL_EVT_TRANSPORT_ATTR, SESSION_CTRL_EVT_TRANSPORT_ATTR_REPLY, + SESSION_CTRL_EVT_TRANSPORT_CLOSED, + SESSION_CTRL_EVT_HALF_CLEANUP, } session_evt_type_t; #define foreach_session_ctrl_evt \ @@ -394,7 +420,6 @@ typedef enum #define FIFO_EVENT_APP_TX SESSION_IO_EVT_TX #define FIFO_EVENT_DISCONNECT SESSION_CTRL_EVT_CLOSE #define FIFO_EVENT_BUILTIN_RX SESSION_IO_EVT_BUILTIN_RX -#define FIFO_EVENT_BUILTIN_TX SESSION_IO_EVT_BUILTIN_TX typedef enum { @@ -419,6 +444,7 @@ typedef struct session_handle_t session_handle; session_rpc_args_t rpc_args; u32 ctrl_data_index; + u64 as_u64[2]; struct { u8 data[0]; @@ -443,12 +469,12 @@ typedef struct session_dgram_header_ u16 rmt_port; u16 lcl_port; u8 is_ip4; + u16 gso_size; } __clib_packed session_dgram_hdr_t; #define SESSION_CONN_ID_LEN 37 -#define SESSION_CONN_HDR_LEN 45 - -STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 8), +#define SESSION_CONN_HDR_LEN 47 +STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 10), "session conn id wrong length"); #define foreach_session_error \ @@ -466,9 +492,12 @@ STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 8), _ (NOLISTEN, "not listening") \ _ (NOSESSION, "session does not exist") \ _ (NOAPP, "app not attached") \ + _ (APP_ATTACHED, "app already attached") \ _ (PORTINUSE, "lcl port in use") \ _ (IPINUSE, "ip in use") \ _ (ALREADY_LISTENING, "ip port pair already listened on") \ + _ (ADDR_NOT_IN_USE, "address not in use") \ + _ (INVALID, "invalid value") \ _ (INVALID_RMT_IP, "invalid remote ip") \ _ (INVALID_APPWRK, "invalid app worker") \ _ (INVALID_NS, "invalid namespace") \ @@ -486,7 +515,10 @@ STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 8), _ (NOEXTCFG, "no extended transport config") \ _ (NOCRYPTOENG, "no crypto engine") \ _ (NOCRYPTOCKP, "cert key pair not found ") \ - _ (LOCAL_CONNECT, "could not connect with local scope") + _ (LOCAL_CONNECT, "could not connect with local scope") \ + _ (WRONG_NS_SECRET, "wrong ns secret") \ + _ (SYSCALL, "system call error") \ + _ (TRANSPORT_NO_REG, "transport was not registered") typedef enum session_error_p_ { diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c index 526f1a2da15..1c2a9261d3c 100644 --- a/src/vnet/session/transport.c +++ b/src/vnet/session/transport.c @@ -17,36 +17,31 @@ #include <vnet/session/session.h> #include <vnet/fib/fib.h> -typedef struct local_endpoint_ -{ - transport_endpoint_t ep; - int refcnt; -} local_endpoint_t; - /** * Per-type vector of transport protocol virtual function tables */ transport_proto_vft_t *tp_vfts; -/* - * Port allocator seed - */ -static u32 port_allocator_seed; - -/* - * Local endpoints table - */ -static transport_endpoint_table_t local_endpoints_table; +typedef struct local_endpoint_ +{ + transport_endpoint_t ep; + transport_proto_t proto; + int refcnt; +} local_endpoint_t; -/* - * Pool of local endpoints - */ -static local_endpoint_t *local_endpoints; +typedef struct transport_main_ +{ + transport_endpoint_table_t local_endpoints_table; + local_endpoint_t *local_endpoints; + u32 *lcl_endpts_freelist; + u32 port_allocator_seed; + u16 port_allocator_min_src_port; + u16 port_allocator_max_src_port; + u8 lcl_endpts_cleanup_pending; + clib_spinlock_t local_endpoints_lock; +} transport_main_t; -/* - * Local endpoints pool lock - */ -static clib_spinlock_t local_endpoints_lock; +static transport_main_t tp_main; u8 * format_transport_proto (u8 * s, va_list * args) @@ -76,6 +71,35 @@ format_transport_proto_short (u8 * s, va_list * args) return s; } +const char *transport_flags_str[] = { +#define _(sym, str) str, + foreach_transport_connection_flag +#undef _ +}; + +u8 * +format_transport_flags (u8 *s, va_list *args) +{ + transport_connection_flags_t flags; + int i, last = -1; + + flags = va_arg (*args, transport_connection_flags_t); + + for (i = 0; i < TRANSPORT_CONNECTION_N_FLAGS; i++) + if (flags & (1 << i)) + last = i; + + for (i = 0; i < last; i++) + { + if (flags & (1 << i)) + s = format (s, "%s, ", transport_flags_str[i]); + } + if (last >= 0) + s = format (s, "%s", transport_flags_str[last]); + + return s; +} + u8 * format_transport_connection (u8 * s, va_list * args) { @@ -100,8 +124,8 @@ format_transport_connection (u8 * s, va_list * args) if (transport_connection_is_tx_paced (tc)) s = format (s, "%Upacer: %U\n", format_white_space, indent, format_transport_pacer, &tc->pacer, tc->thread_index); - s = format (s, "%Utransport: flags 0x%x\n", format_white_space, indent, - tc->flags); + s = format (s, "%Utransport: flags: %U\n", format_white_space, indent, + format_transport_flags, tc->flags); } return s; } @@ -124,14 +148,13 @@ u8 * format_transport_half_open_connection (u8 * s, va_list * args) { u32 transport_proto = va_arg (*args, u32); - u32 ho_index = va_arg (*args, u32); transport_proto_vft_t *tp_vft; tp_vft = transport_protocol_get_vft (transport_proto); if (!tp_vft) return s; - s = format (s, "%U", tp_vft->format_half_open, ho_index); + s = (tp_vft->format_half_open) (s, args); return s; } @@ -314,6 +337,8 @@ transport_cleanup_half_open (transport_proto_t tp, u32 conn_index) int transport_connect (transport_proto_t tp, transport_endpoint_cfg_t * tep) { + if (PREDICT_FALSE (!tp_vfts[tp].connect)) + return SESSION_E_TRANSPORT_NO_REG; return tp_vfts[tp].connect (tep); } @@ -341,8 +366,10 @@ transport_reset (transport_proto_t tp, u32 conn_index, u8 thread_index) u32 transport_start_listen (transport_proto_t tp, u32 session_index, - transport_endpoint_t * tep) + transport_endpoint_cfg_t *tep) { + if (PREDICT_FALSE (!tp_vfts[tp].start_listen)) + return SESSION_E_TRANSPORT_NO_REG; return tp_vfts[tp].start_listen (session_index, tep); } @@ -420,67 +447,148 @@ transport_connection_attribute (transport_proto_t tp, u32 conn_index, #define PORT_MASK ((1 << 16)- 1) void -transport_endpoint_del (u32 tepi) +transport_endpoint_free (u32 tepi) { - clib_spinlock_lock_if_init (&local_endpoints_lock); - pool_put_index (local_endpoints, tepi); - clib_spinlock_unlock_if_init (&local_endpoints_lock); + transport_main_t *tm = &tp_main; + pool_put_index (tm->local_endpoints, tepi); } always_inline local_endpoint_t * -transport_endpoint_new (void) +transport_endpoint_alloc (void) { + transport_main_t *tm = &tp_main; local_endpoint_t *lep; - pool_get_zero (local_endpoints, lep); + + ASSERT (vlib_get_thread_index () <= transport_cl_thread ()); + + pool_get_aligned_safe (tm->local_endpoints, lep, 0); return lep; } +static void +transport_cleanup_freelist (void) +{ + transport_main_t *tm = &tp_main; + local_endpoint_t *lep; + u32 *lep_indexp; + + clib_spinlock_lock (&tm->local_endpoints_lock); + + vec_foreach (lep_indexp, tm->lcl_endpts_freelist) + { + lep = pool_elt_at_index (tm->local_endpoints, *lep_indexp); + + /* Port re-shared after attempt to cleanup */ + if (lep->refcnt > 0) + continue; + + transport_endpoint_table_del (&tm->local_endpoints_table, lep->proto, + &lep->ep); + transport_endpoint_free (*lep_indexp); + } + + vec_reset_length (tm->lcl_endpts_freelist); + + tm->lcl_endpts_cleanup_pending = 0; + + clib_spinlock_unlock (&tm->local_endpoints_lock); +} + void -transport_endpoint_cleanup (u8 proto, ip46_address_t * lcl_ip, u16 port) +transport_program_endpoint_cleanup (u32 lepi) +{ + transport_main_t *tm = &tp_main; + u8 flush_fl = 0; + + /* All workers can free connections. Synchronize access to freelist */ + clib_spinlock_lock (&tm->local_endpoints_lock); + + vec_add1 (tm->lcl_endpts_freelist, lepi); + + /* Avoid accumulating lots of endpoints for cleanup */ + if (!tm->lcl_endpts_cleanup_pending && + vec_len (tm->lcl_endpts_freelist) > 32) + { + tm->lcl_endpts_cleanup_pending = 1; + flush_fl = 1; + } + + clib_spinlock_unlock (&tm->local_endpoints_lock); + + if (flush_fl) + session_send_rpc_evt_to_thread_force (transport_cl_thread (), + transport_cleanup_freelist, 0); +} + +int +transport_release_local_endpoint (u8 proto, ip46_address_t *lcl_ip, u16 port) { + transport_main_t *tm = &tp_main; local_endpoint_t *lep; u32 lepi; - /* Cleanup local endpoint if this was an active connect */ - lepi = transport_endpoint_lookup (&local_endpoints_table, proto, lcl_ip, - clib_net_to_host_u16 (port)); - if (lepi != ENDPOINT_INVALID_INDEX) + lepi = transport_endpoint_lookup (&tm->local_endpoints_table, proto, lcl_ip, + port); + if (lepi == ENDPOINT_INVALID_INDEX) + return -1; + + /* First worker may be cleaning up ports so avoid touching free bitmap */ + lep = &tm->local_endpoints[lepi]; + ASSERT (lep->refcnt >= 1); + + /* Local endpoint no longer in use, program cleanup */ + if (!clib_atomic_sub_fetch (&lep->refcnt, 1)) { - lep = pool_elt_at_index (local_endpoints, lepi); - if (!clib_atomic_sub_fetch (&lep->refcnt, 1)) - { - transport_endpoint_table_del (&local_endpoints_table, proto, - &lep->ep); - transport_endpoint_del (lepi); - } + transport_program_endpoint_cleanup (lepi); + return 0; } + + /* Not an error, just in idication that endpoint was not cleaned up */ + return -1; } -static void -transport_endpoint_mark_used (u8 proto, ip46_address_t * ip, u16 port) +static int +transport_endpoint_mark_used (u8 proto, ip46_address_t *ip, u16 port) { + transport_main_t *tm = &tp_main; local_endpoint_t *lep; - clib_spinlock_lock_if_init (&local_endpoints_lock); - lep = transport_endpoint_new (); + u32 tei; + + ASSERT (vlib_get_thread_index () <= transport_cl_thread ()); + + tei = + transport_endpoint_lookup (&tm->local_endpoints_table, proto, ip, port); + if (tei != ENDPOINT_INVALID_INDEX) + return SESSION_E_PORTINUSE; + + /* Pool reallocs with worker barrier */ + lep = transport_endpoint_alloc (); clib_memcpy_fast (&lep->ep.ip, ip, sizeof (*ip)); lep->ep.port = port; + lep->proto = proto; lep->refcnt = 1; - transport_endpoint_table_add (&local_endpoints_table, proto, &lep->ep, - lep - local_endpoints); - clib_spinlock_unlock_if_init (&local_endpoints_lock); + + transport_endpoint_table_add (&tm->local_endpoints_table, proto, &lep->ep, + lep - tm->local_endpoints); + + return 0; } void transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip, u16 port) { + transport_main_t *tm = &tp_main; local_endpoint_t *lep; u32 lepi; - lepi = transport_endpoint_lookup (&local_endpoints_table, proto, lcl_ip, - clib_net_to_host_u16 (port)); + /* Active opens should call this only from a control thread, which are also + * used to allocate and free ports. So, pool has only one writer and + * potentially many readers. Listeners are allocated with barrier */ + lepi = transport_endpoint_lookup (&tm->local_endpoints_table, proto, lcl_ip, + port); if (lepi != ENDPOINT_INVALID_INDEX) { - lep = pool_elt_at_index (local_endpoints, lepi); + lep = pool_elt_at_index (tm->local_endpoints, lepi); clib_atomic_add_fetch (&lep->refcnt, 1); } } @@ -488,18 +596,22 @@ transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip, u16 port) /** * Allocate local port and add if successful add entry to local endpoint * table to mark the pair as used. + * + * @return port in net order or -1 if port cannot be allocated */ int -transport_alloc_local_port (u8 proto, ip46_address_t * ip) +transport_alloc_local_port (u8 proto, ip46_address_t *lcl_addr, + transport_endpoint_cfg_t *rmt) { - u16 min = 1024, max = 65535; /* XXX configurable ? */ + transport_main_t *tm = &tp_main; + u16 min = tm->port_allocator_min_src_port; + u16 max = tm->port_allocator_max_src_port; int tries, limit; - u32 tei; limit = max - min; - /* Only support active opens from thread 0 */ - ASSERT (vlib_get_thread_index () == 0); + /* Only support active opens from one of ctrl threads */ + ASSERT (vlib_get_thread_index () <= transport_cl_thread ()); /* Search for first free slot */ for (tries = 0; tries < limit; tries++) @@ -509,19 +621,26 @@ transport_alloc_local_port (u8 proto, ip46_address_t * ip) /* Find a port in the specified range */ while (1) { - port = random_u32 (&port_allocator_seed) & PORT_MASK; + port = random_u32 (&tm->port_allocator_seed) & PORT_MASK; if (PREDICT_TRUE (port >= min && port < max)) - break; + { + port = clib_host_to_net_u16 (port); + break; + } } - /* Look it up. If not found, we're done */ - tei = transport_endpoint_lookup (&local_endpoints_table, proto, ip, - port); - if (tei == ENDPOINT_INVALID_INDEX) - { - transport_endpoint_mark_used (proto, ip, port); - return port; - } + if (!transport_endpoint_mark_used (proto, lcl_addr, port)) + return port; + + /* IP:port pair already in use, check if 6-tuple available */ + if (session_lookup_connection (rmt->fib_index, lcl_addr, &rmt->ip, port, + rmt->port, proto, rmt->is_ip4)) + continue; + + /* 6-tuple is available so increment lcl endpoint refcount */ + transport_share_local_endpoint (proto, lcl_addr, port); + + return port; } return -1; } @@ -549,14 +668,14 @@ transport_get_interface_ip (u32 sw_if_index, u8 is_ip4, ip46_address_t * addr) } static session_error_t -transport_find_local_ip_for_remote (u32 sw_if_index, - transport_endpoint_t * rmt, - ip46_address_t * lcl_addr) +transport_find_local_ip_for_remote (u32 *sw_if_index, + transport_endpoint_t *rmt, + ip46_address_t *lcl_addr) { fib_node_index_t fei; fib_prefix_t prefix; - if (sw_if_index == ENDPOINT_INVALID_INDEX) + if (*sw_if_index == ENDPOINT_INVALID_INDEX) { /* Find a FIB path to the destination */ clib_memcpy_fast (&prefix.fp_addr, &rmt->ip, sizeof (rmt->ip)); @@ -570,13 +689,13 @@ transport_find_local_ip_for_remote (u32 sw_if_index, if (fei == FIB_NODE_INDEX_INVALID) return SESSION_E_NOROUTE; - sw_if_index = fib_entry_get_resolving_interface (fei); - if (sw_if_index == ENDPOINT_INVALID_INDEX) + *sw_if_index = fib_entry_get_resolving_interface (fei); + if (*sw_if_index == ENDPOINT_INVALID_INDEX) return SESSION_E_NOINTF; } clib_memset (lcl_addr, 0, sizeof (*lcl_addr)); - return transport_get_interface_ip (sw_if_index, rmt->is_ip4, lcl_addr); + return transport_get_interface_ip (*sw_if_index, rmt->is_ip4, lcl_addr); } int @@ -584,16 +703,16 @@ transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt_cfg, ip46_address_t * lcl_addr, u16 * lcl_port) { transport_endpoint_t *rmt = (transport_endpoint_t *) rmt_cfg; + transport_main_t *tm = &tp_main; session_error_t error; int port; - u32 tei; /* * Find the local address */ if (ip_is_zero (&rmt_cfg->peer.ip, rmt_cfg->peer.is_ip4)) { - error = transport_find_local_ip_for_remote (rmt_cfg->peer.sw_if_index, + error = transport_find_local_ip_for_remote (&rmt_cfg->peer.sw_if_index, rmt, lcl_addr); if (error) return error; @@ -605,26 +724,37 @@ transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt_cfg, sizeof (rmt_cfg->peer.ip)); } + /* Cleanup freelist if need be */ + if (vec_len (tm->lcl_endpts_freelist)) + transport_cleanup_freelist (); + /* * Allocate source port */ if (rmt_cfg->peer.port == 0) { - port = transport_alloc_local_port (proto, lcl_addr); + port = transport_alloc_local_port (proto, lcl_addr, rmt_cfg); if (port < 1) return SESSION_E_NOPORT; *lcl_port = port; } else { - port = clib_net_to_host_u16 (rmt_cfg->peer.port); - *lcl_port = port; - tei = transport_endpoint_lookup (&local_endpoints_table, proto, - lcl_addr, port); - if (tei != ENDPOINT_INVALID_INDEX) + *lcl_port = rmt_cfg->peer.port; + + if (!transport_endpoint_mark_used (proto, lcl_addr, rmt_cfg->peer.port)) + return 0; + + /* IP:port pair already in use, check if 6-tuple available */ + if (session_lookup_connection (rmt->fib_index, lcl_addr, &rmt->ip, + rmt_cfg->peer.port, rmt->port, proto, + rmt->is_ip4)) return SESSION_E_PORTINUSE; - transport_endpoint_mark_used (proto, lcl_addr, port); + /* 6-tuple is available so increment lcl endpoint refcount */ + transport_share_local_endpoint (proto, lcl_addr, rmt_cfg->peer.port); + + return 0; } return 0; @@ -660,15 +790,15 @@ static inline u32 spacer_max_burst (spacer_t * pacer, clib_us_time_t time_now) { u64 n_periods = (time_now - pacer->last_update); - u64 inc; + i64 inc; if ((inc = (f32) n_periods * pacer->tokens_per_period) > 10) { pacer->last_update = time_now; - pacer->bucket = clib_min (pacer->bucket + inc, pacer->max_burst); + pacer->bucket = clib_min (pacer->bucket + inc, (i64) pacer->max_burst); } - return pacer->bucket > 0 ? pacer->max_burst : 0; + return pacer->bucket >= 0 ? pacer->max_burst : 0; } static inline void @@ -790,7 +920,7 @@ void transport_connection_reschedule (transport_connection_t * tc) { tc->flags &= ~TRANSPORT_CONNECTION_F_DESCHED; - transport_connection_tx_pacer_reset_bucket (tc, TRANSPORT_PACER_MIN_BURST); + transport_connection_tx_pacer_reset_bucket (tc, 0 /* bucket */); if (transport_max_tx_dequeue (tc)) sesssion_reschedule_tx (tc); else @@ -830,6 +960,9 @@ transport_enable_disable (vlib_main_t * vm, u8 is_en) { if (vft->enable) (vft->enable) (vm, is_en); + + if (vft->update_time) + session_register_update_time_fn (vft->update_time, is_en); } } @@ -838,6 +971,7 @@ transport_init (void) { vlib_thread_main_t *vtm = vlib_get_thread_main (); session_main_t *smm = vnet_get_session_main (); + transport_main_t *tm = &tp_main; u32 num_threads; if (smm->local_endpoints_table_buckets == 0) @@ -846,15 +980,18 @@ transport_init (void) smm->local_endpoints_table_memory = 512 << 20; /* Initialize [port-allocator] random number seed */ - port_allocator_seed = (u32) clib_cpu_time_now (); + tm->port_allocator_seed = (u32) clib_cpu_time_now (); + tm->port_allocator_min_src_port = smm->port_allocator_min_src_port; + tm->port_allocator_max_src_port = smm->port_allocator_max_src_port; - clib_bihash_init_24_8 (&local_endpoints_table, "local endpoints table", + clib_bihash_init_24_8 (&tm->local_endpoints_table, "local endpoints table", smm->local_endpoints_table_buckets, smm->local_endpoints_table_memory); + clib_spinlock_init (&tm->local_endpoints_lock); + num_threads = 1 /* main thread */ + vtm->n_threads; if (num_threads > 1) { - clib_spinlock_init (&local_endpoints_lock); /* Main not polled if there are workers */ smm->transport_cl_thread = 1; } diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h index 447552c539e..e6ba1ecbc5f 100644 --- a/src/vnet/session/transport.h +++ b/src/vnet/session/transport.h @@ -57,6 +57,7 @@ typedef struct transport_send_params_ struct { u32 max_burst_size; + u32 bytes_dequeued; }; }; transport_snd_flags_t flags; @@ -65,13 +66,12 @@ typedef struct transport_send_params_ /* * Transport protocol virtual function table */ -/* *INDENT-OFF* */ typedef struct _transport_proto_vft { /* * Setup */ - u32 (*start_listen) (u32 session_index, transport_endpoint_t * lcl); + u32 (*start_listen) (u32 session_index, transport_endpoint_cfg_t *lcl); u32 (*stop_listen) (u32 conn_index); int (*connect) (transport_endpoint_cfg_t * rmt); void (*half_close) (u32 conn_index, u32 thread_index); @@ -85,7 +85,8 @@ typedef struct _transport_proto_vft * Transmission */ - u32 (*push_header) (transport_connection_t * tconn, vlib_buffer_t * b); + u32 (*push_header) (transport_connection_t *tconn, vlib_buffer_t **b, + u32 n_bufs); int (*send_params) (transport_connection_t * tconn, transport_send_params_t *sp); void (*update_time) (f64 time_now, u8 thread_index); @@ -123,16 +124,13 @@ typedef struct _transport_proto_vft */ transport_options_t transport_options; } transport_proto_vft_t; -/* *INDENT-ON* */ extern transport_proto_vft_t *tp_vfts; -#define transport_proto_foreach(VAR, BODY) \ -do { \ - for (VAR = 0; VAR < vec_len (tp_vfts); VAR++) \ - if (tp_vfts[VAR].push_header != 0) \ - do { BODY; } while (0); \ -} while (0) +#define transport_proto_foreach(VAR, VAR_ALLOW_BM) \ + for (VAR = 0; VAR < vec_len (tp_vfts); VAR++) \ + if (tp_vfts[VAR].push_header != 0) \ + if (VAR_ALLOW_BM & (1 << VAR)) int transport_connect (transport_proto_t tp, transport_endpoint_cfg_t * tep); void transport_half_close (transport_proto_t tp, u32 conn_index, @@ -140,7 +138,7 @@ void transport_half_close (transport_proto_t tp, u32 conn_index, void transport_close (transport_proto_t tp, u32 conn_index, u8 thread_index); void transport_reset (transport_proto_t tp, u32 conn_index, u8 thread_index); u32 transport_start_listen (transport_proto_t tp, u32 session_index, - transport_endpoint_t * tep); + transport_endpoint_cfg_t *tep); u32 transport_stop_listen (transport_proto_t tp, u32 conn_index); void transport_cleanup (transport_proto_t tp, u32 conn_index, u8 thread_index); @@ -246,13 +244,14 @@ transport_register_new_protocol (const transport_proto_vft_t * vft, transport_proto_vft_t *transport_protocol_get_vft (transport_proto_t tp); void transport_update_time (clib_time_type_t time_now, u8 thread_index); -int transport_alloc_local_port (u8 proto, ip46_address_t * ip); -int transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt, - ip46_address_t * lcl_addr, - u16 * lcl_port); +int transport_alloc_local_port (u8 proto, ip46_address_t *ip, + transport_endpoint_cfg_t *rmt); +int transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t *rmt, + ip46_address_t *lcl_addr, u16 *lcl_port); void transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip, u16 port); -void transport_endpoint_cleanup (u8 proto, ip46_address_t * lcl_ip, u16 port); +int transport_release_local_endpoint (u8 proto, ip46_address_t *lcl_ip, + u16 port); void transport_enable_disable (vlib_main_t * vm, u8 is_en); void transport_init (void); @@ -329,6 +328,19 @@ transport_connection_is_tx_paced (transport_connection_t * tc) return (tc->flags & TRANSPORT_CONNECTION_F_IS_TX_PACED); } +/** + * Clear descheduled flag and update pacer if needed + * + * To add session to scheduler use @ref transport_connection_reschedule + */ +always_inline void +transport_connection_clear_descheduled (transport_connection_t *tc) +{ + tc->flags &= ~TRANSPORT_CONNECTION_F_DESCHED; + if (transport_connection_is_tx_paced (tc)) + transport_connection_tx_pacer_reset_bucket (tc, 0 /* bucket */); +} + u8 *format_transport_pacer (u8 * s, va_list * args); /** diff --git a/src/vnet/session/transport_types.h b/src/vnet/session/transport_types.h index 9ea1f2102b4..b3469fa9fdb 100644 --- a/src/vnet/session/transport_types.h +++ b/src/vnet/session/transport_types.h @@ -21,10 +21,8 @@ #include <vnet/tcp/tcp_debug.h> #include <vppinfra/bihash_24_8.h> - #define TRANSPORT_MAX_HDRS_LEN 140 /* Max number of bytes for headers */ - typedef enum transport_dequeue_type_ { TRANSPORT_TX_PEEK, /**< reliable transport protos */ @@ -42,24 +40,35 @@ typedef enum transport_service_type_ TRANSPORT_N_SERVICES } transport_service_type_t; +/* + * IS_TX_PACED : Connection sending is paced + * NO_LOOKUP: Don't register connection in lookup. Does not apply to local + * apps and transports using the network layer (udp/tcp) + * DESCHED: Connection descheduled by the session layer + * CLESS: Connection is "connection less". Some important implications of that + * are that connections are not pinned to workers and listeners will + * have fifos associated to them + */ +#define foreach_transport_connection_flag \ + _ (IS_TX_PACED, "tx_paced") \ + _ (NO_LOOKUP, "no_lookup") \ + _ (DESCHED, "descheduled") \ + _ (CLESS, "connectionless") + +typedef enum transport_connection_flags_bits_ +{ +#define _(sym, str) TRANSPORT_CONNECTION_F_BIT_##sym, + foreach_transport_connection_flag +#undef _ + TRANSPORT_CONNECTION_N_FLAGS +} transport_connection_flags_bits_t; + typedef enum transport_connection_flags_ { - TRANSPORT_CONNECTION_F_IS_TX_PACED = 1 << 0, - /** - * Don't register connection in lookup. Does not apply to local apps - * and transports using the network layer (udp/tcp) - */ - TRANSPORT_CONNECTION_F_NO_LOOKUP = 1 << 1, - /** - * Connection descheduled by the session layer. - */ - TRANSPORT_CONNECTION_F_DESCHED = 1 << 2, - /** - * Connection is "connection less". Some important implications of that - * are that connections are not pinned to workers and listeners will - * have fifos associated to them - */ - TRANSPORT_CONNECTION_F_CLESS = 1 << 3, +#define _(sym, str) \ + TRANSPORT_CONNECTION_F_##sym = 1 << TRANSPORT_CONNECTION_F_BIT_##sym, + foreach_transport_connection_flag +#undef _ } transport_connection_flags_t; typedef struct _spacer @@ -106,6 +115,7 @@ typedef struct _transport_connection u32 c_index; /**< Connection index in transport pool */ u32 thread_index; /**< Worker-thread index */ u8 flags; /**< Transport specific flags */ + u8 dscp; /**< Differentiated Services Code Point */ /*fib_node_index_t rmt_fei; dpo_id_t rmt_dpo; */ @@ -114,7 +124,7 @@ typedef struct _transport_connection #if TRANSPORT_DEBUG elog_track_t elog_track; /**< Event logging */ - u32 cc_stat_tstamp; /**< CC stats timestamp */ + f64 cc_stat_tstamp; /**< CC stats timestamp */ #endif /** @@ -146,6 +156,7 @@ typedef struct _transport_connection #define c_stats connection.stats #define c_pacer connection.pacer #define c_flags connection.flags +#define c_dscp connection.dscp #define s_ho_handle pacer.bytes_per_sec } transport_connection_t; @@ -164,7 +175,8 @@ STATIC_ASSERT (sizeof (transport_connection_t) <= 128, _ (TLS, "tls", "J") \ _ (QUIC, "quic", "Q") \ _ (DTLS, "dtls", "D") \ - _ (SRTP, "srtp", "R") + _ (SRTP, "srtp", "R") \ + _ (HTTP, "http", "H") typedef enum _transport_proto { @@ -175,6 +187,7 @@ typedef enum _transport_proto u8 *format_transport_proto (u8 * s, va_list * args); u8 *format_transport_proto_short (u8 * s, va_list * args); +u8 *format_transport_flags (u8 *s, va_list *args); u8 *format_transport_connection (u8 * s, va_list * args); u8 *format_transport_listen_connection (u8 * s, va_list * args); u8 *format_transport_half_open_connection (u8 * s, va_list * args); @@ -209,6 +222,7 @@ typedef enum transport_endpt_cfg_flags_ _ (u32, next_node_index) \ _ (u32, next_node_opaque) \ _ (u16, mss) \ + _ (u8, dscp) \ _ (u8, transport_flags) \ /* clang-format on */ |