diff options
-rw-r--r-- | MAINTAINERS | 43 | ||||
-rw-r--r-- | Makefile | 19 | ||||
-rw-r--r-- | extras/hs-test/infra/suite_ldp.go | 5 | ||||
-rw-r--r-- | extras/hs-test/infra/suite_vpp_proxy.go | 1 | ||||
-rw-r--r-- | extras/hs-test/infra/utils.go | 32 | ||||
-rw-r--r-- | extras/hs-test/ldp_test.go | 8 | ||||
-rw-r--r-- | extras/hs-test/proxy_test.go | 2 | ||||
-rw-r--r-- | src/plugins/acl/public_inlines.h | 4 | ||||
-rw-r--r-- | src/plugins/dma_intel/dsa.c | 2 | ||||
-rw-r--r-- | src/plugins/http/CMakeLists.txt | 6 | ||||
-rw-r--r-- | src/plugins/http/http2/hpack.c | 1 | ||||
-rw-r--r-- | src/plugins/http/http2/hpack.h | 1 | ||||
-rw-r--r-- | src/plugins/http/http2/http2.c | 411 | ||||
-rw-r--r-- | src/vcl/vcl_private.c | 49 | ||||
-rw-r--r-- | src/vcl/vcl_private.h | 6 | ||||
-rw-r--r-- | src/vcl/vppcom.c | 3 | ||||
-rw-r--r-- | src/vlib/main.c | 315 | ||||
-rw-r--r-- | src/vlib/main.h | 5 | ||||
-rw-r--r-- | src/vlib/node.c | 13 | ||||
-rw-r--r-- | src/vlib/node.h | 98 | ||||
-rw-r--r-- | src/vlib/node_cli.c | 3 | ||||
-rw-r--r-- | src/vlib/node_funcs.h | 77 | ||||
-rw-r--r-- | src/vlib/threads.c | 222 | ||||
-rw-r--r-- | src/vlib/time.h | 3 | ||||
-rw-r--r-- | src/vlib/unix/input.c | 20 | ||||
-rw-r--r-- | test/asf/test_session_sdl.py | 5 | ||||
-rw-r--r-- | test/test_pg_stream.py | 13 |
27 files changed, 946 insertions, 421 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 4ad31098826..124a197a73e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -198,7 +198,6 @@ F: src/examples/srv6-sample-localsid/ VNET IPSec I: ipsec M: Neale Ranns <neale@graphiant.com> -M: Radu Nicolau <radu.nicolau@intel.com> M: Fan Zhang <fanzhang.oss@gmail.com> F: src/vnet/ipsec/ @@ -231,7 +230,7 @@ F: src/plugins/crypto_openssl/ Plugin - Crypto - ipsecmb I: crypto-ipsecmb M: Neale Ranns <neale@graphiant.com> -M: Fan Zhang <fanzhang.oss@gmail.com> +M: Fan Zhang <fanzhang.oss@gmail.com> F: src/plugins/crypto_ipsecmb/ Plugin - Crypto - sw_scheduler @@ -396,8 +395,8 @@ F: src/plugins/acl/ Plugin - NAT I: nat M: Ole Troan <ot@cisco.com> -M: Filip Varga <fivarga@cisco.com> -M: Klement Sekera <klement.sekera@gmail.com> +M: Filip Varga <fivarga@cisco.com> +M: Klement Sekera <klement.sekera@gmail.com> F: src/plugins/nat/ Plugin - PNAT Static match and rewrite engine @@ -439,7 +438,7 @@ F: src/plugins/dpdk/ Plugin - DPDK Crypto I: dpdk-cryptodev -M: Radu Nicolau <radu.nicolau@intel.com> +M: Kai Ji <kai.ji@intel.com> M: Fan Zhang <fanzhang.oss@gmail.com> F: src/plugins/dpdk/cryptodev/ @@ -573,7 +572,7 @@ Plugin - Time-based MAC filter I: mactime Y: src/plugins/mactime/FEATURE.yaml M: Dave Barach <vpp@barachs.net> -F: src/plugins/mactime/ +F: src/plugins/mactime/ Plugin - Network Delay Simulator I: nsim @@ -593,14 +592,14 @@ M: Florin Coras <fcoras@cisco.com> F: src/plugins/unittest/ Plugin - IDPF Device driver -I: idpf -M: Ting Xu <ting.xu@intel.com> -F: src/plugins/idpf/ +I: idpf +M: Jeff Shaw <jeffrey.b.shaw@intel.com> +F: src/plugins/idpf/ Plugin - Intel DMA engines -I: dma_intel -M: Marvin Liu <yong.liu@intel.com> -F: src/plugins/dma_intel +I: dma_intel +M: Jeff Shaw <jeffrey.b.shaw@intel.com> +F: src/plugins/dma_intel/ Test Infrastructure I: tests @@ -647,7 +646,7 @@ Binary API Compiler for Python I: vppapigen M: Ole Troan <otroan@employees.org> F: src/tools/vppapigen/ -F: extras/scripts/crcchecker.py +F: extras/scripts/crcchecker.py API trace tool I: vppapitrace @@ -718,7 +717,7 @@ F: src/vpp-api/python Plugin - Cisco Discovery Protocol I: cdp M: vpp-dev Mailing List <vpp-dev@fd.io> -C: Unmaintained +C: Unmaintained F: src/plugins/cdp/ Plugin - Source VRF Select @@ -734,13 +733,13 @@ F: src/plugins/ct6 Plugin - Steal The NIC I: stn M: vpp-dev Mailing List <vpp-dev@fd.io> -C: Unmaintained +C: Unmaintained F: src/plugins/stn Plugin - IOAM I: ioam M: vpp-dev Mailing List <vpp-dev@fd.io> -C: Unmaintained +C: Unmaintained F: src/plugins/ioam Plugin - Awkward chained buffer geometry tool @@ -833,14 +832,14 @@ M: Florin Coras <fcoras@cisco.com> F: src/plugins/prom Plugin - BPF Trace Filter -I: bpf_trace_filter -M: Mohammed Hawari <mohammed@hawari.fr> -F: src/plugins/bpf_trace_filter +I: bpf_trace_filter +M: Mohammed Hawari <mohammed@hawari.fr> +F: src/plugins/bpf_trace_filter Plugin - NPTv6 -I: npt66 -M: Ole Troan <otroan@employees.org> -F: src/plugins/npt66 +I: npt66 +M: Ole Troan <otroan@employees.org> +F: src/plugins/npt66 Plugin - Trace node I: tracenode @@ -87,12 +87,12 @@ endif # +libganglia1-dev if building the gmond plugin -DEB_DEPENDS = curl build-essential autoconf automake ccache -DEB_DEPENDS += debhelper dkms git libtool libapr1-dev dh-python -DEB_DEPENDS += libconfuse-dev git-review exuberant-ctags cscope pkg-config -DEB_DEPENDS += clang gcovr lcov chrpath autoconf libnuma-dev +DEB_DEPENDS = curl build-essential ccache +DEB_DEPENDS += debhelper git dh-python +DEB_DEPENDS += git-review exuberant-ctags cscope +DEB_DEPENDS += clang gcovr lcov chrpath DEB_DEPENDS += python3-all python3-setuptools check -DEB_DEPENDS += libffi-dev python3-ply libunwind-dev +DEB_DEPENDS += python3-ply libunwind-dev DEB_DEPENDS += cmake ninja-build python3-jsonschema python3-yaml DEB_DEPENDS += python3-venv # ensurepip DEB_DEPENDS += python3-dev python3-pip @@ -102,7 +102,6 @@ DEB_DEPENDS += python3-virtualenv DEB_DEPENDS += libssl-dev DEB_DEPENDS += libelf-dev libpcap-dev # for libxdp (af_xdp) DEB_DEPENDS += iperf3 # for 'make test TEST=vcl' -DEB_DEPENDS += nasm DEB_DEPENDS += iperf ethtool # for 'make test TEST=vm_vpp_interfaces' DEB_DEPENDS += libpcap-dev DEB_DEPENDS += tshark @@ -124,42 +123,34 @@ else ifeq ($(OS_VERSION_ID),22.04) # overwrite clang-format version to run `make checkstyle` successfully # TODO: remove once ubuntu 20.04 is deprecated and extras/scripts/checkstyle.sh is upgraded to 15 export CLANG_FORMAT_VER=15 - LIBFFI=libffi7 DEB_DEPENDS += enchant-2 # for docs else ifeq ($(OS_VERSION_ID),20.04) DEB_DEPENDS += python3-virtualenv DEB_DEPENDS += libssl-dev DEB_DEPENDS += clang clang-format-11 - LIBFFI=libffi7 DEB_DEPENDS += enchant-2 # for docs else ifeq ($(OS_ID)-$(OS_VERSION_ID),debian-10) DEB_DEPENDS += virtualenv else ifeq ($(OS_ID)-$(OS_VERSION_ID),debian-11) DEB_DEPENDS += virtualenv DEB_DEPENDS += clang clang-format-11 - LIBFFI=libffi7 else ifeq ($(OS_ID)-$(OS_VERSION_ID),debian-12) DEB_DEPENDS += virtualenv DEB_DEPENDS += clang-14 clang-format-15 # for extras/scripts/checkstyle.sh # TODO: remove once ubuntu 20.04 is deprecated and extras/scripts/checkstyle.sh is upgraded to -15 export CLANG_FORMAT_VER=15 - LIBFFI=libffi8 else ifeq ($(OS_ID)-$(OS_VERSION_ID),debian-13) DEB_DEPENDS += virtualenv DEB_DEPENDS += clang-19 clang-format-19 # for extras/scripts/checkstyle.sh # TODO: remove once ubuntu 20.04 is deprecated and extras/scripts/checkstyle.sh is upgraded to -15 export CLANG_FORMAT_VER=15 - LIBFFI=libffi8 else DEB_DEPENDS += clang-11 clang-format-11 - LIBFFI=libffi7 DEB_DEPENDS += enchant-2 # for docs endif -DEB_DEPENDS += $(LIBFFI) - RPM_DEPENDS = glibc-static RPM_DEPENDS += apr-devel RPM_DEPENDS += numactl-devel diff --git a/extras/hs-test/infra/suite_ldp.go b/extras/hs-test/infra/suite_ldp.go index 408fea31325..53fe10086d0 100644 --- a/extras/hs-test/infra/suite_ldp.go +++ b/extras/hs-test/infra/suite_ldp.go @@ -97,6 +97,11 @@ func (s *LdpSuite) SetupTest() { } func (s *LdpSuite) TearDownTest() { + if CurrentSpecReport().Failed() { + s.CollectIperfLogs(s.Containers.ServerVpp) + s.CollectRedisServerLogs(s.Containers.ServerVpp) + } + for _, container := range s.StartedContainers { delete(container.EnvVars, "LD_PRELOAD") delete(container.EnvVars, "VCL_CONFIG") diff --git a/extras/hs-test/infra/suite_vpp_proxy.go b/extras/hs-test/infra/suite_vpp_proxy.go index 252d01eac9a..137bbb679d8 100644 --- a/extras/hs-test/infra/suite_vpp_proxy.go +++ b/extras/hs-test/infra/suite_vpp_proxy.go @@ -93,6 +93,7 @@ func (s *VppProxySuite) TearDownTest() { s.Log(vpp.Vppctl("show session verbose 2")) s.Log(vpp.Vppctl("show error")) s.CollectNginxLogs(s.Containers.NginxServerTransient) + s.CollectIperfLogs(s.Containers.IperfS) } s.HstSuite.TearDownTest() } diff --git a/extras/hs-test/infra/utils.go b/extras/hs-test/infra/utils.go index bd603f863fc..2f4328b4a74 100644 --- a/extras/hs-test/infra/utils.go +++ b/extras/hs-test/infra/utils.go @@ -18,6 +18,8 @@ import ( const networkTopologyDir string = "topo-network/" const containerTopologyDir string = "topo-containers/" const HttpCapsuleTypeDatagram = uint64(0) +const iperfLogFileName = "iperf.log" +const redisLogFileName = "redis-server.log" type Stanza struct { content string @@ -223,6 +225,36 @@ func (s *HstSuite) CollectEnvoyLogs(envoyContainer *Container) { } } +func (s *HstSuite) IperfLogFileName(serverContainer *Container) string { + return serverContainer.GetContainerWorkDir() + "/" + serverContainer.Name + "-" + iperfLogFileName +} + +func (s *HstSuite) CollectIperfLogs(serverContainer *Container) { + targetDir := serverContainer.Suite.getLogDirPath() + source := serverContainer.GetHostWorkDir() + "/" + serverContainer.Name + "-" + iperfLogFileName + cmd := exec.Command("cp", "-t", targetDir, source) + s.Log(cmd.String()) + err := cmd.Run() + if err != nil { + s.Log(fmt.Sprint(err)) + } +} + +func (s *HstSuite) RedisServerLogFileName(serverContainer *Container) string { + return serverContainer.GetContainerWorkDir() + "/" + serverContainer.Name + "-" + redisLogFileName +} + +func (s *HstSuite) CollectRedisServerLogs(serverContainer *Container) { + targetDir := serverContainer.Suite.getLogDirPath() + source := serverContainer.GetHostWorkDir() + "/" + serverContainer.Name + "-" + redisLogFileName + cmd := exec.Command("cp", "-t", targetDir, source) + s.Log(cmd.String()) + err := cmd.Run() + if err != nil { + s.Log(fmt.Sprint(err)) + } +} + func (s *HstSuite) StartIperfServerApp(running chan error, done chan struct{}, env []string) { cmd := exec.Command("iperf3", "-4", "-s", "-p", s.GetPortFromPpid()) if env != nil { diff --git a/extras/hs-test/ldp_test.go b/extras/hs-test/ldp_test.go index 3120f1c5813..6ce0943d6c6 100644 --- a/extras/hs-test/ldp_test.go +++ b/extras/hs-test/ldp_test.go @@ -54,7 +54,7 @@ func ldPreloadIperfVpp(s *LdpSuite, useUdp bool) { go func() { defer GinkgoRecover() - cmd := "iperf3 -4 -s -p " + s.GetPortFromPpid() + cmd := "iperf3 -4 -s -p " + s.GetPortFromPpid() + " --logfile " + s.IperfLogFileName(s.Containers.ServerVpp) s.StartServerApp(s.Containers.ServerVpp, "iperf3", cmd, srvCh, stopServerCh) }() @@ -90,7 +90,11 @@ func RedisBenchmarkTest(s *LdpSuite) { go func() { defer GinkgoRecover() - cmd := "redis-server --daemonize yes --protected-mode no --bind " + serverVethAddress + // Avoid redis warning during startup + s.Containers.ServerVpp.Exec(false, "sysctl vm.overcommit_memory=1") + // Note: --save "" disables snapshotting which during upgrade to ubuntu 24.04 was + // observed to corrupt vcl memory / heap. Needs more debugging. + cmd := "redis-server --daemonize yes --protected-mode no --save \"\" --bind " + serverVethAddress + " --loglevel notice --logfile " + s.RedisServerLogFileName(s.Containers.ServerVpp) s.StartServerApp(s.Containers.ServerVpp, "redis-server", cmd, runningSrv, doneSrv) }() diff --git a/extras/hs-test/proxy_test.go b/extras/hs-test/proxy_test.go index 7a8d9d9b6fb..b6b4df5d0cd 100644 --- a/extras/hs-test/proxy_test.go +++ b/extras/hs-test/proxy_test.go @@ -93,7 +93,7 @@ func vppProxyIperfMTTest(s *VppProxySuite, proto string) { go func() { defer GinkgoRecover() - cmd := fmt.Sprintf("iperf3 -4 -s -B %s -p %s", s.ServerAddr(), fmt.Sprint(s.ServerPort())) + cmd := fmt.Sprintf("iperf3 -4 -s -B %s -p %s --logfile %s", s.ServerAddr(), fmt.Sprint(s.ServerPort()), s.IperfLogFileName(s.Containers.IperfS)) s.StartServerApp(s.Containers.IperfS, "iperf3", cmd, srvCh, stopServerCh) }() diff --git a/src/plugins/acl/public_inlines.h b/src/plugins/acl/public_inlines.h index eb9f0de920f..80edfd674d3 100644 --- a/src/plugins/acl/public_inlines.h +++ b/src/plugins/acl/public_inlines.h @@ -268,8 +268,8 @@ fa_acl_match_ip6_addr (ip6_address_t * addr1, ip6_address_t * addr2, } if (prefixlen % 8) { - u8 b1 = *((u8 *) addr1 + 1 + prefixlen / 8); - u8 b2 = *((u8 *) addr2 + 1 + prefixlen / 8); + u8 b1 = *((u8 *) addr1 + prefixlen / 8); + u8 b2 = *((u8 *) addr2 + prefixlen / 8); u8 mask0 = (0xff - ((1 << (8 - (prefixlen % 8))) - 1)); return (b1 & mask0) == b2; } diff --git a/src/plugins/dma_intel/dsa.c b/src/plugins/dma_intel/dsa.c index 473f2efa93e..20a90e34b0e 100644 --- a/src/plugins/dma_intel/dsa.c +++ b/src/plugins/dma_intel/dsa.c @@ -103,7 +103,6 @@ intel_dsa_batch_fallback (vlib_main_t *vm, intel_dsa_batch_t *b, clib_memcpy_fast (desc->dst, desc->src, desc->size); } b->status = INTEL_DSA_STATUS_CPU_SUCCESS; - ch->submitted++; return; } @@ -407,6 +406,7 @@ intel_dsa_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, /* fallback to software if exception happened */ intel_dsa_batch_fallback (vm, b, ch); glitch = 1 & b->barrier_before_last; + t->pending_batches[n++] = b; } else { diff --git a/src/plugins/http/CMakeLists.txt b/src/plugins/http/CMakeLists.txt index 58cb4c000e3..ca2c0a9dc05 100644 --- a/src/plugins/http/CMakeLists.txt +++ b/src/plugins/http/CMakeLists.txt @@ -11,9 +11,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +option(VPP_ENABLE_HTTP_2 "Build http plugin with HTTP/2 enabled" OFF) +if(VPP_ENABLE_HTTP_2) + add_compile_definitions(HTTP_2_ENABLE=1) +endif() + add_vpp_plugin(http SOURCES http2/hpack.c + http2/http2.c http2/frame.c http.c http_buffer.c diff --git a/src/plugins/http/http2/hpack.c b/src/plugins/http/http2/hpack.c index 6dcf5f6c19b..24fc3202dc5 100644 --- a/src/plugins/http/http2/hpack.c +++ b/src/plugins/http/http2/hpack.c @@ -4,7 +4,6 @@ #include <vppinfra/error.h> #include <vppinfra/ring.h> -#include <http/http.h> #include <http/http2/hpack.h> #include <http/http2/huffman_table.h> #include <http/http_status_codes.h> diff --git a/src/plugins/http/http2/hpack.h b/src/plugins/http/http2/hpack.h index 2a2936b7611..9f3e62e65ce 100644 --- a/src/plugins/http/http2/hpack.h +++ b/src/plugins/http/http2/hpack.h @@ -7,6 +7,7 @@ #include <vppinfra/types.h> #include <http/http2/http2.h> +#include <http/http.h> #define HPACK_INVALID_INT CLIB_UWORD_MAX #if uword_bits == 64 diff --git a/src/plugins/http/http2/http2.c b/src/plugins/http/http2/http2.c new file mode 100644 index 00000000000..035620c5184 --- /dev/null +++ b/src/plugins/http/http2/http2.c @@ -0,0 +1,411 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. + */ + +#include <http/http2/hpack.h> +#include <http/http_private.h> + +#ifndef HTTP_2_ENABLE +#define HTTP_2_ENABLE 0 +#endif + +typedef struct http2_req_ +{ + http_req_t base; + u32 stream_id; + u64 peer_window; +} http2_req_t; + +typedef struct http2_conn_ctx_ +{ + http2_conn_settings_t peer_settings; + hpack_dynamic_table_t decoder_dynamic_table; + u64 peer_window; + uword *req_by_stream_id; +} http2_conn_ctx_t; + +typedef struct http2_main_ +{ + http2_conn_ctx_t **conn_pool; + http2_req_t **req_pool; + http2_conn_settings_t settings; +} http2_main_t; + +static http2_main_t http2_main; + +http2_conn_ctx_t * +http2_conn_ctx_alloc_w_thread (http_conn_t *hc) +{ + http2_main_t *h2m = &http2_main; + http2_conn_ctx_t *h2c; + + pool_get_aligned_safe (h2m->conn_pool[hc->c_thread_index], h2c, + CLIB_CACHE_LINE_BYTES); + clib_memset (h2c, 0, sizeof (*h2c)); + h2c->peer_settings = http2_default_conn_settings; + h2c->peer_window = h2c->peer_settings.initial_window_size; + h2c->req_by_stream_id = hash_create (0, sizeof (uword)); + hc->opaque = + uword_to_pointer (h2c - h2m->conn_pool[hc->c_thread_index], void *); + return h2c; +} + +static inline http2_conn_ctx_t * +http2_conn_ctx_get_w_thread (http_conn_t *hc) +{ + http2_main_t *h2m = &http2_main; + u32 h2c_index = pointer_to_uword (hc->opaque); + return pool_elt_at_index (h2m->conn_pool[hc->c_thread_index], h2c_index); +} + +static inline void +http2_conn_ctx_free (http_conn_t *hc) +{ + http2_main_t *h2m = &http2_main; + http2_conn_ctx_t *h2c; + + h2c = http2_conn_ctx_get_w_thread (hc); + hpack_dynamic_table_free (&h2c->decoder_dynamic_table); + hash_free (h2c->req_by_stream_id); + if (CLIB_DEBUG) + memset (h2c, 0xba, sizeof (*h2c)); + pool_put (h2m->conn_pool[hc->c_thread_index], h2c); +} + +http2_req_t * +http2_conn_alloc_req (http_conn_t *hc, u32 stream_id) +{ + http2_main_t *h2m = &http2_main; + http2_conn_ctx_t *h2c; + http2_req_t *req; + u32 req_index; + http_req_handle_t hr_handle; + + pool_get_aligned_safe (h2m->req_pool[hc->c_thread_index], req, + CLIB_CACHE_LINE_BYTES); + clib_memset (req, 0, sizeof (*req)); + req->base.hr_pa_session_handle = SESSION_INVALID_HANDLE; + req_index = req - h2m->req_pool[hc->c_thread_index]; + hr_handle.version = HTTP_VERSION_2; + hr_handle.req_index = req_index; + req->base.hr_req_handle = hr_handle.as_u32; + req->base.hr_hc_index = hc->hc_hc_index; + req->base.c_thread_index = hc->c_thread_index; + req->stream_id = stream_id; + h2c = http2_conn_ctx_get_w_thread (hc); + req->peer_window = h2c->peer_settings.initial_window_size; + hash_set (h2c->req_by_stream_id, stream_id, req_index); + return req; +} + +static inline void +http2_conn_free_req (http2_conn_ctx_t *h2c, http2_req_t *req, u32 thread_index) +{ + http2_main_t *h2m = &http2_main; + + vec_free (req->base.headers); + vec_free (req->base.target); + http_buffer_free (&req->base.tx_buf); + hash_unset (h2c->req_by_stream_id, req->stream_id); + if (CLIB_DEBUG) + memset (req, 0xba, sizeof (*req)); + pool_put (h2m->req_pool[thread_index], req); +} + +http2_req_t * +http2_conn_get_req (http_conn_t *hc, u32 stream_id) +{ + http2_main_t *h2m = &http2_main; + http2_conn_ctx_t *h2c; + uword *p; + + h2c = http2_conn_ctx_get_w_thread (hc); + p = hash_get (h2c->req_by_stream_id, stream_id); + if (p) + { + return pool_elt_at_index (h2m->req_pool[hc->c_thread_index], p[0]); + } + else + { + HTTP_DBG (1, "hc [%u]%x streamId %u not found", hc->c_thread_index, + hc->hc_hc_index, stream_id); + return 0; + } +} + +always_inline http2_req_t * +http2_req_get (u32 req_index, u32 thread_index) +{ + http2_main_t *h2m = &http2_main; + + return pool_elt_at_index (h2m->req_pool[thread_index], req_index); +} + +/*****************/ +/* http core VFT */ +/*****************/ + +static u32 +http2_hc_index_get_by_req_index (u32 req_index, u32 thread_index) +{ + http2_req_t *req; + + req = http2_req_get (req_index, thread_index); + return req->base.hr_hc_index; +} + +static transport_connection_t * +http2_req_get_connection (u32 req_index, u32 thread_index) +{ + http2_req_t *req; + req = http2_req_get (req_index, thread_index); + return &(req->base.connection); +} + +static u8 * +format_http2_req (u8 *s, va_list *args) +{ + http2_req_t *req = va_arg (*args, http2_req_t *); + http_conn_t *hc = va_arg (*args, http_conn_t *); + session_t *ts; + + ts = session_get_from_handle (hc->hc_tc_session_handle); + s = format (s, "[%d:%d][H2] stream_id %u app_wrk %u hc_index %u ts %d:%d", + req->base.c_thread_index, req->base.c_s_index, req->stream_id, + req->base.hr_pa_wrk_index, req->base.hr_hc_index, + ts->thread_index, ts->session_index); + + return s; +} + +static u8 * +http2_format_req (u8 *s, va_list *args) +{ + u32 req_index = va_arg (*args, u32); + u32 thread_index = va_arg (*args, u32); + http_conn_t *hc = va_arg (*args, http_conn_t *); + u32 verbose = va_arg (*args, u32); + http2_req_t *req; + + req = http2_req_get (req_index, thread_index); + + s = format (s, "%-" SESSION_CLI_ID_LEN "U", format_http2_req, req, hc); + if (verbose) + { + s = + format (s, "%-" SESSION_CLI_STATE_LEN "U", format_http_conn_state, hc); + if (verbose > 1) + s = format (s, "\n"); + } + + return s; +} + +static void +http2_app_tx_callback (http_conn_t *hc, u32 req_index, + transport_send_params_t *sp) +{ + /* TODO: run state machine */ +} + +static void +http2_app_rx_evt_callback (http_conn_t *hc, u32 req_index, u32 thread_index) +{ + /* TODO: continue tunnel RX */ +} + +static void +http2_app_close_callback (http_conn_t *hc, u32 req_index, u32 thread_index) +{ + /* TODO: confirm close or wait until all app data drained */ +} + +static void +http2_app_reset_callback (http_conn_t *hc, u32 req_index, u32 thread_index) +{ + /* TODO: send RST_STREAM frame */ +} + +static int +http2_transport_connected_callback (http_conn_t *hc) +{ + /* TODO */ + return -1; +} + +static void +http2_transport_rx_callback (http_conn_t *hc) +{ + /* TODO: run state machine or handle control frames on stream 0 */ +} + +static void +http2_transport_close_callback (http_conn_t *hc) +{ + u32 req_index, stream_id; + http2_req_t *req; + http2_conn_ctx_t *h2c; + + if (!(hc->flags & HTTP_CONN_F_HAS_REQUEST)) + return; + + h2c = http2_conn_ctx_get_w_thread (hc); + hash_foreach (stream_id, req_index, h2c->req_by_stream_id, ({ + req = http2_req_get (req_index, hc->c_thread_index); + session_transport_closing_notify (&req->base.connection); + })); +} + +static void +http2_transport_reset_callback (http_conn_t *hc) +{ + u32 req_index, stream_id; + http2_req_t *req; + http2_conn_ctx_t *h2c; + + if (!(hc->flags & HTTP_CONN_F_HAS_REQUEST)) + return; + + h2c = http2_conn_ctx_get_w_thread (hc); + hash_foreach (stream_id, req_index, h2c->req_by_stream_id, ({ + req = http2_req_get (req_index, hc->c_thread_index); + session_transport_reset_notify (&req->base.connection); + })); +} + +static void +http2_transport_conn_reschedule_callback (http_conn_t *hc) +{ + /* TODO */ +} + +static void +http2_conn_cleanup_callback (http_conn_t *hc) +{ + u32 req_index, stream_id, *req_index_p, *req_indices = 0; + http2_req_t *req; + http2_conn_ctx_t *h2c; + + h2c = http2_conn_ctx_get_w_thread (hc); + hash_foreach (stream_id, req_index, h2c->req_by_stream_id, + ({ vec_add1 (req_indices, req_index); })); + + vec_foreach (req_index_p, req_indices) + { + req = http2_req_get (*req_index_p, hc->c_thread_index); + session_transport_delete_notify (&req->base.connection); + http2_conn_free_req (h2c, req, hc->c_thread_index); + } + + vec_free (req_indices); + http2_conn_ctx_free (hc); +} + +static void +http2_enable_callback (void) +{ + http2_main_t *h2m = &http2_main; + vlib_thread_main_t *vtm = vlib_get_thread_main (); + u32 num_threads; + + num_threads = 1 /* main thread */ + vtm->n_threads; + + vec_validate (h2m->conn_pool, num_threads - 1); + vec_validate (h2m->req_pool, num_threads - 1); +} + +static int +http2_update_settings (http_settings_t type, u32 value) +{ + http2_main_t *h2m = &http2_main; + + switch (type) + { +#define _(v, label, member, min, max, default_value, err_code) \ + case HTTP2_SETTINGS_##label: \ + if (!(value >= min && value <= max)) \ + return -1; \ + h2m->settings.member = value; \ + return 0; + foreach_http2_settings +#undef _ + default : return -1; + } +} + +static uword +http2_unformat_config_callback (unformat_input_t *input) +{ + u32 value; + + if (!input) + return 0; + + unformat_skip_white_space (input); + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "initial-window-size %u", &value)) + { + if (http2_update_settings (HTTP2_SETTINGS_INITIAL_WINDOW_SIZE, + value)) + return 0; + } + else if (unformat (input, "max-frame-size %u", &value)) + { + if (http2_update_settings (HTTP2_SETTINGS_MAX_FRAME_SIZE, value)) + return 0; + } + else if (unformat (input, "max-header-list-size %u", &value)) + { + if (http2_update_settings (HTTP2_SETTINGS_MAX_HEADER_LIST_SIZE, + value)) + return 0; + } + else if (unformat (input, "header-table-size %u", &value)) + { + if (http2_update_settings (HTTP2_SETTINGS_HEADER_TABLE_SIZE, value)) + return 0; + } + else + return 0; + } + return 1; +} + +const static http_engine_vft_t http2_engine = { + .name = "http2", + .hc_index_get_by_req_index = http2_hc_index_get_by_req_index, + .req_get_connection = http2_req_get_connection, + .format_req = http2_format_req, + .app_tx_callback = http2_app_tx_callback, + .app_rx_evt_callback = http2_app_rx_evt_callback, + .app_close_callback = http2_app_close_callback, + .app_reset_callback = http2_app_reset_callback, + .transport_connected_callback = http2_transport_connected_callback, + .transport_rx_callback = http2_transport_rx_callback, + .transport_close_callback = http2_transport_close_callback, + .transport_reset_callback = http2_transport_reset_callback, + .transport_conn_reschedule_callback = + http2_transport_conn_reschedule_callback, + .conn_cleanup_callback = http2_conn_cleanup_callback, + .enable_callback = http2_enable_callback, + .unformat_cfg_callback = http2_unformat_config_callback, +}; + +clib_error_t * +http2_init (vlib_main_t *vm) +{ + http2_main_t *h2m = &http2_main; + + clib_warning ("http/2 enabled"); + h2m->settings = http2_default_conn_settings; + http_register_engine (&http2_engine, HTTP_VERSION_2); + + return 0; +} + +#if HTTP_2_ENABLE > 0 +VLIB_INIT_FUNCTION (http2_init) = { + .runs_after = VLIB_INITS ("http_transport_init"), +}; +#endif diff --git a/src/vcl/vcl_private.c b/src/vcl/vcl_private.c index 6892688da5a..1adc8cb6666 100644 --- a/src/vcl/vcl_private.c +++ b/src/vcl/vcl_private.c @@ -189,6 +189,53 @@ vcl_worker_cleanup_cb (void *arg) } void +vcl_worker_detached_start_signal_mq (vcl_worker_t *wrk) +{ + /* Generate mq epfd events using pipes to hopefully force + * calls into epoll_wait which retries attaching to vpp */ + if (!wrk->detached_pipefds[0]) + { + if (pipe (wrk->detached_pipefds)) + { + VDBG (0, "failed to add mq eventfd to mq epoll fd"); + exit (1); + } + } + + struct epoll_event evt = {}; + evt.events = EPOLLIN; + evt.data.u32 = wrk->detached_pipefds[0]; + if (epoll_ctl (wrk->mqs_epfd, EPOLL_CTL_ADD, wrk->detached_pipefds[0], + &evt) < 0) + { + VDBG (0, "failed to add mq eventfd to mq epoll fd"); + exit (1); + } + + int sig = 1, __clib_unused rv; + rv = write (wrk->detached_pipefds[1], &sig, 1); +} + +void +vcl_worker_detached_signal_mq (vcl_worker_t *wrk) +{ + int buf, __clib_unused rv; + rv = read (wrk->detached_pipefds[0], &buf, 1); + rv = write (wrk->detached_pipefds[1], &buf, 1); +} + +void +vcl_worker_detached_stop_signal_mq (vcl_worker_t *wrk) +{ + if (epoll_ctl (wrk->mqs_epfd, EPOLL_CTL_DEL, wrk->detached_pipefds[0], 0) < + 0) + { + VDBG (0, "failed to del mq eventfd to mq epoll fd"); + exit (1); + } +} + +void vcl_worker_detach_sessions (vcl_worker_t *wrk) { session_event_t *e; @@ -239,6 +286,8 @@ vcl_worker_detach_sessions (vcl_worker_t *wrk) vec_free (seg_indices); hash_free (seg_indices_map); + + vcl_worker_detached_start_signal_mq (wrk); } void diff --git a/src/vcl/vcl_private.h b/src/vcl/vcl_private.h index c92bb58169d..72f0a8327f0 100644 --- a/src/vcl/vcl_private.h +++ b/src/vcl/vcl_private.h @@ -325,6 +325,9 @@ typedef struct vcl_worker_ /* functions to be called pre/post wait if vcl managed by vls */ vcl_worker_wait_mq_fn pre_wait_fn; vcl_worker_wait_mq_fn post_wait_fn; + + /* mq_epfd signal pipes when wrk detached from vpp */ + int detached_pipefds[2]; } vcl_worker_t; STATIC_ASSERT (sizeof (session_disconnected_msg_t) <= 16, @@ -799,6 +802,9 @@ void vcl_worker_detach_sessions (vcl_worker_t *wrk); void vcl_worker_set_wait_mq_fns (vcl_worker_wait_mq_fn pre_wait, vcl_worker_wait_mq_fn post_wait); +void vcl_worker_detached_start_signal_mq (vcl_worker_t *wrk); +void vcl_worker_detached_signal_mq (vcl_worker_t *wrk); +void vcl_worker_detached_stop_signal_mq (vcl_worker_t *wrk); /* * VCL Binary API */ diff --git a/src/vcl/vppcom.c b/src/vcl/vppcom.c index 1e9c915ce39..8bc5f42d4a6 100644 --- a/src/vcl/vppcom.c +++ b/src/vcl/vppcom.c @@ -1404,6 +1404,8 @@ vcl_api_retry_attach (vcl_worker_t *wrk) { vcl_session_t *s; + vcl_worker_detached_signal_mq (wrk); + clib_spinlock_lock (&vcm->workers_lock); if (vcl_is_first_reattach_to_execute ()) { @@ -1412,6 +1414,7 @@ vcl_api_retry_attach (vcl_worker_t *wrk) clib_spinlock_unlock (&vcm->workers_lock); return; } + vcl_worker_detached_stop_signal_mq (wrk); vcl_set_reattach_counter (); clib_spinlock_unlock (&vcm->workers_lock); } diff --git a/src/vlib/main.c b/src/vlib/main.c index 731e788d30d..c4b1de41069 100644 --- a/src/vlib/main.c +++ b/src/vlib/main.c @@ -871,16 +871,16 @@ add_trajectory_trace (vlib_buffer_t * b, u32 node_index) } static_always_inline u64 -dispatch_node (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_node_type_t type, - vlib_node_state_t dispatch_state, - vlib_frame_t * frame, u64 last_time_stamp) +dispatch_node (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_node_type_t type, vlib_frame_t *frame, + vlib_node_dispatch_reason_t dispatch_reason, + u64 last_time_stamp) { uword n, v; u64 t; vlib_node_main_t *nm = &vm->node_main; vlib_next_frame_t *nf; + vlib_node_type_atts_t attr = node_type_attrs[type]; if (CLIB_DEBUG > 0) { @@ -888,15 +888,14 @@ dispatch_node (vlib_main_t * vm, ASSERT (n->type == type); } - /* Only non-internal nodes may be disabled. */ - if (type != VLIB_NODE_TYPE_INTERNAL && node->state != dispatch_state) + if (attr.can_be_disabled == 0 && node->state == VLIB_NODE_STATE_DISABLED) { - ASSERT (type != VLIB_NODE_TYPE_INTERNAL); + ASSERT (0); /* disabled node should not be dispatched */ return last_time_stamp; } - if ((type == VLIB_NODE_TYPE_PRE_INPUT || type == VLIB_NODE_TYPE_INPUT) - && dispatch_state != VLIB_NODE_STATE_INTERRUPT) + if (attr.decrement_main_loop_per_calls_if_polling && + node->state == VLIB_NODE_STATE_POLLING) { u32 c = node->input_main_loops_per_call; /* Only call node when count reaches zero. */ @@ -938,18 +937,13 @@ dispatch_node (vlib_main_t * vm, vlib_buffer_t *b = vlib_get_buffer (vm, from[i]); add_trajectory_trace (b, node->node_index); } - if (PREDICT_TRUE (vm->dispatch_wrapper_fn == 0)) - n = node->function (vm, node, frame); - else - n = vm->dispatch_wrapper_fn (vm, node, frame); } + + node->dispatch_reason = dispatch_reason; + if (PREDICT_TRUE (vm->dispatch_wrapper_fn == 0)) + n = node->function (vm, node, frame); else - { - if (PREDICT_TRUE (vm->dispatch_wrapper_fn == 0)) - n = node->function (vm, node, frame); - else - n = vm->dispatch_wrapper_fn (vm, node, frame); - } + n = vm->dispatch_wrapper_fn (vm, node, frame); t = clib_cpu_time_now (); @@ -968,7 +962,8 @@ dispatch_node (vlib_main_t * vm, /* When in adaptive mode and vector rate crosses threshold switch to polling mode and vice versa. */ - if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_ADAPTIVE_MODE)) + if (PREDICT_FALSE (attr.supports_adaptive_mode && + node->flags & VLIB_NODE_FLAG_ADAPTIVE_MODE)) { ELOG_TYPE_DECLARE (e) = { @@ -985,8 +980,8 @@ dispatch_node (vlib_main_t * vm, u32 node_name, vector_length, is_polling; } *ed; - if ((dispatch_state == VLIB_NODE_STATE_INTERRUPT - && v >= nm->polling_threshold_vector_length) && + if ((node->state == VLIB_NODE_STATE_INTERRUPT && + v >= nm->polling_threshold_vector_length) && !(node->flags & VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE)) { @@ -1012,8 +1007,8 @@ dispatch_node (vlib_main_t * vm, ed->is_polling = 1; } } - else if (dispatch_state == VLIB_NODE_STATE_POLLING - && v <= nm->interrupt_threshold_vector_length) + else if (node->state == VLIB_NODE_STATE_POLLING && + v <= nm->interrupt_threshold_vector_length) { vlib_node_t *n = vlib_get_node (vm, node->node_index); if (node->flags & @@ -1103,10 +1098,9 @@ dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index, n->flags |= (nf->flags & VLIB_FRAME_TRACE) ? VLIB_NODE_FLAG_TRACE : 0; nf->flags &= ~VLIB_FRAME_TRACE; - last_time_stamp = dispatch_node (vm, n, - VLIB_NODE_TYPE_INTERNAL, - VLIB_NODE_STATE_POLLING, - f, last_time_stamp); + last_time_stamp = + dispatch_node (vm, n, VLIB_NODE_TYPE_INTERNAL, f, + VLIB_NODE_DISPATCH_REASON_PENDING_FRAME, last_time_stamp); /* Internal node vector-rate accounting, for summary stats */ vm->internal_node_vectors += f->n_vectors; vm->internal_node_calls++; @@ -1260,6 +1254,20 @@ vlib_process_resume (vlib_main_t * vm, vlib_process_t * p) return r; } +static void +process_timer_start (vlib_main_t *vm, vlib_process_t *p, u32 runtime_index) +{ + TWT (tw_timer_wheel) *tw = (TWT (tw_timer_wheel) *) vm->timing_wheel; + vlib_tw_event_t e = { .type = VLIB_TW_EVENT_T_PROCESS_NODE, + .index = runtime_index }; + + if (p->resume_clock_interval == 0) + return; + + p->stop_timer_handle = TW (tw_timer_start) (tw, e.as_u32, 0 /* timer_id */, + p->resume_clock_interval); +} + static u64 dispatch_process (vlib_main_t * vm, vlib_process_t * p, vlib_frame_t * f, u64 last_time_stamp) @@ -1309,15 +1317,7 @@ dispatch_process (vlib_main_t * vm, p->n_suspends += 1; p->suspended_process_frame_index = pf - nm->suspended_process_frames; - if (p->resume_clock_interval) - { - TWT (tw_timer_wheel) *tw = (TWT (tw_timer_wheel) *) nm->timing_wheel; - p->stop_timer_handle = - TW (tw_timer_start) (tw, - vlib_timing_wheel_data_set_suspended_process ( - node->runtime_index) /* [sic] pool idex */, - 0 /* timer_id */, p->resume_clock_interval); - } + process_timer_start (vm, p, node->runtime_index); } else p->state = VLIB_PROCESS_STATE_NOT_STARTED; @@ -1419,15 +1419,7 @@ dispatch_suspended_process (vlib_main_t *vm, vlib_process_restore_t *r, /* Suspend it again. */ n_vectors = 0; p->n_suspends += 1; - if (p->resume_clock_interval) - { - p->stop_timer_handle = - TW (tw_timer_start) ((TWT (tw_timer_wheel) *) nm->timing_wheel, - vlib_timing_wheel_data_set_suspended_process - (node->runtime_index) /* [sic] pool idex */ , - 0 /* timer_id */ , - p->resume_clock_interval); - } + process_timer_start (vm, p, node->runtime_index); } else { @@ -1452,6 +1444,61 @@ dispatch_suspended_process (vlib_main_t *vm, vlib_process_restore_t *r, return t; } +static void +process_expired_timer_cb (u32 *expired_timer_handles) +{ + vlib_main_t *vm = vlib_get_main (); + vlib_node_main_t *nm = &vm->node_main; + u32 *handle; + + vec_foreach (handle, expired_timer_handles) + { + vlib_tw_event_t e = { .as_u32 = *handle }; + vlib_process_restore_t restore = {}; + + if (e.type == VLIB_TW_EVENT_T_TIMED_EVENT) + { + restore.reason = VLIB_PROCESS_RESTORE_REASON_TIMED_EVENT; + restore.timed_event_data_pool_index = e.index; + vec_add1 (nm->process_restore_current, restore); + } + else if (e.type == VLIB_TW_EVENT_T_PROCESS_NODE) + { + vlib_process_t *p = vec_elt (nm->processes, e.index); + p->stop_timer_handle = ~0; + restore.reason = VLIB_PROCESS_RESTORE_REASON_CLOCK; + restore.runtime_index = e.index; + vec_add1 (nm->process_restore_current, restore); + } + else if (e.type == VLIB_TW_EVENT_T_SCHED_NODE) + { + vec_add1 (nm->sched_node_pending, e.index); + } + else + ASSERT (0); + } +} + +static void +vlib_tw_init (vlib_main_t *vm) +{ + TWT (tw_timer_wheel) *tw = (TWT (tw_timer_wheel) *) vm->timing_wheel; + tw = clib_mem_alloc_aligned (sizeof (TWT (tw_timer_wheel)), + CLIB_CACHE_LINE_BYTES); + /* Create the process timing wheel */ + TW (tw_timer_wheel_init) + (tw, process_expired_timer_cb /* callback */, 1 / VLIB_TW_TICKS_PER_SECOND, + ~0 /* max expirations per call */); + vm->timing_wheel = tw; +} + +static void +vlib_tw_expire_timers (vlib_main_t *vm) +{ + TWT (tw_timer_wheel) *tw = (TWT (tw_timer_wheel) *) vm->timing_wheel; + TW (tw_timer_expire_timers) (tw, vlib_time_now (vm)); +} + static_always_inline void vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) { @@ -1548,58 +1595,59 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) clib_call_callbacks (vm->worker_thread_main_loop_callbacks, vm, cpu_time_now); - /* Process pre-input nodes. */ cpu_time_now = clib_cpu_time_now (); - vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]) - cpu_time_now = dispatch_node (vm, n, - VLIB_NODE_TYPE_PRE_INPUT, - VLIB_NODE_STATE_POLLING, - /* frame */ 0, - cpu_time_now); - - if (clib_interrupt_is_any_pending (nm->pre_input_node_interrupts)) - { - int int_num = -1; - while ((int_num = clib_interrupt_get_next_and_clear ( - nm->pre_input_node_interrupts, int_num)) != -1) + for (vlib_node_type_t nt = 0; nt < VLIB_N_NODE_TYPE; nt++) + { + if (node_type_attrs[nt].can_be_polled) + vec_foreach (n, nm->nodes_by_type[nt]) + if (n->state == VLIB_NODE_STATE_POLLING) + cpu_time_now = dispatch_node ( + vm, n, nt, + /* frame */ 0, VLIB_NODE_DISPATCH_REASON_POLL, cpu_time_now); + + if (node_type_attrs[nt].may_receive_interrupts && + nm->node_interrupts[nt] && + clib_interrupt_is_any_pending (nm->node_interrupts[nt])) { - vlib_node_runtime_t *n; - n = vec_elt_at_index ( - nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT], int_num); - cpu_time_now = dispatch_node (vm, n, VLIB_NODE_TYPE_PRE_INPUT, - VLIB_NODE_STATE_INTERRUPT, - /* frame */ 0, cpu_time_now); + int int_num = -1; + + while ((int_num = clib_interrupt_get_next_and_clear ( + nm->node_interrupts[nt], int_num)) != -1) + { + vlib_node_runtime_t *n; + n = vec_elt_at_index (nm->nodes_by_type[nt], int_num); + cpu_time_now = dispatch_node ( + vm, n, nt, + /* frame */ 0, VLIB_NODE_DISPATCH_REASON_INTERRUPT, + cpu_time_now); + } } } - /* Next process input nodes. */ - vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]) - cpu_time_now = dispatch_node (vm, n, - VLIB_NODE_TYPE_INPUT, - VLIB_NODE_STATE_POLLING, - /* frame */ 0, - cpu_time_now); - - if (PREDICT_TRUE (is_main && vm->queue_signal_pending == 0)) - vm->queue_signal_callback (vm); - - if (clib_interrupt_is_any_pending (nm->input_node_interrupts)) + /* Process sched nodes. */ + if (vec_len (nm->sched_node_pending)) { - int int_num = -1; - - while ((int_num = clib_interrupt_get_next_and_clear ( - nm->input_node_interrupts, int_num)) != -1) + vec_foreach_index (i, nm->sched_node_pending) { - vlib_node_runtime_t *n; - n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT], - int_num); - cpu_time_now = dispatch_node (vm, n, VLIB_NODE_TYPE_INPUT, - VLIB_NODE_STATE_INTERRUPT, - /* frame */ 0, cpu_time_now); + vlib_node_t *n = vlib_get_node (vm, nm->sched_node_pending[i]); + if (n->type == VLIB_NODE_TYPE_SCHED) + { + vlib_node_runtime_t *nr = + vlib_node_get_runtime (vm, n->index); + nr->stop_timer_handle_plus_1 = 0; + cpu_time_now = dispatch_node ( + vm, nr, VLIB_NODE_TYPE_SCHED, + /* frame */ 0, VLIB_NODE_DISPATCH_REASON_SCHED, + cpu_time_now); + } } + vec_reset_length (nm->sched_node_pending); } + if (PREDICT_TRUE (is_main && vm->queue_signal_pending == 0)) + vm->queue_signal_callback (vm); + /* Input nodes may have added work to the pending vector. Process pending vector until there is nothing left. All pending vectors will be processed from input -> output. */ @@ -1632,8 +1680,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) if (PREDICT_FALSE (vm->elog_trace_graph_dispatch)) ed = ELOG_DATA (&vlib_global_main.elog_main, es); - TW (tw_timer_expire_timers) - ((TWT (tw_timer_wheel) *) nm->timing_wheel, vlib_time_now (vm)); + vlib_tw_expire_timers (vm); ASSERT (nm->process_restore_current != 0); @@ -1664,7 +1711,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) p->stop_timer_handle = ~0; void *data; data = vlib_process_signal_event_helper ( - nm, n, p, te->event_type_index, te->n_data_elts, + vm, nm, n, p, te->event_type_index, te->n_data_elts, te->n_data_elt_bytes); if (te->n_data_bytes < sizeof (te->inline_event_data)) clib_memcpy_fast (data, te->inline_event_data, @@ -1691,6 +1738,9 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) nm->process_restore_next); } } + else + vlib_tw_expire_timers (vm); + vlib_increment_main_loop_counter (vm); /* Record time stamp in case there are no enabled nodes and above calls do not update time stamp. */ @@ -1725,18 +1775,6 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) } } -static void -vlib_main_loop (vlib_main_t * vm) -{ - vlib_main_or_worker_loop (vm, /* is_main */ 1); -} - -void -vlib_worker_loop (vlib_main_t * vm) -{ - vlib_main_or_worker_loop (vm, /* is_main */ 0); -} - vlib_global_main_t vlib_global_main; void @@ -1862,34 +1900,6 @@ vl_api_get_elog_trace_api_messages (void) return 0; } -static void -process_expired_timer_cb (u32 *expired_timer_handles) -{ - vlib_main_t *vm = vlib_get_main (); - vlib_node_main_t *nm = &vm->node_main; - u32 *handle; - - vec_foreach (handle, expired_timer_handles) - { - u32 index = vlib_timing_wheel_data_get_index (*handle); - vlib_process_restore_t restore = {}; - - if (vlib_timing_wheel_data_is_timed_event (*handle)) - { - restore.reason = VLIB_PROCESS_RESTORE_REASON_TIMED_EVENT; - restore.timed_event_data_pool_index = index; - } - else - { - vlib_process_t *p = vec_elt (nm->processes, index); - p->stop_timer_handle = ~0; - restore.reason = VLIB_PROCESS_RESTORE_REASON_CLOCK; - restore.runtime_index = index; - } - vec_add1 (nm->process_restore_current, restore); - } -} - /* Main function. */ int vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) @@ -1988,20 +1998,13 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) if ((error = vlib_call_all_init_functions (vm))) goto done; - nm->timing_wheel = clib_mem_alloc_aligned (sizeof (TWT (tw_timer_wheel)), - CLIB_CACHE_LINE_BYTES); + vlib_tw_init (vm); vec_validate (nm->process_restore_current, 10); vec_validate (nm->process_restore_next, 10); vec_set_len (nm->process_restore_current, 0); vec_set_len (nm->process_restore_next, 0); - /* Create the process timing wheel */ - TW (tw_timer_wheel_init) - ((TWT (tw_timer_wheel) *) nm->timing_wheel, - process_expired_timer_cb /* callback */, 10e-6 /* timer period 10us */, - ~0 /* max expirations per call */); - vec_validate (vm->pending_rpc_requests, 0); vec_set_len (vm->pending_rpc_requests, 0); vec_validate (vm->processing_rpc_requests, 0); @@ -2052,7 +2055,7 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) goto done; } - vlib_main_loop (vm); + vlib_main_or_worker_loop (vm, /* is_main */ 1); done: /* Stop worker threads, barrier will not be released */ @@ -2072,6 +2075,38 @@ done: return vm->main_loop_exit_status; } +static void +vlib_worker_thread_fn (void *arg) +{ + vlib_global_main_t *vgm = vlib_get_global_main (); + vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg; + vlib_main_t *vm = vlib_get_main (); + clib_error_t *e; + + ASSERT (vm->thread_index == vlib_get_thread_index ()); + + vlib_worker_thread_init (w); + clib_time_init (&vm->clib_time); + clib_mem_set_heap (w->thread_mheap); + vlib_tw_init (vm); + + vm->worker_init_functions_called = hash_create (0, 0); + + e = vlib_call_init_exit_functions_no_sort ( + vm, &vgm->worker_init_function_registrations, 1 /* call_once */, + 0 /* is_global */); + if (e) + clib_error_report (e); + + vlib_main_or_worker_loop (vm, /* is_main */ 0); +} + +VLIB_REGISTER_THREAD (worker_thread_reg, static) = { + .name = "workers", + .short_name = "wk", + .function = vlib_worker_thread_fn, +}; + vlib_main_t * vlib_get_main_not_inline (void) { diff --git a/src/vlib/main.h b/src/vlib/main.h index 94b8c4fa954..907346beda2 100644 --- a/src/vlib/main.h +++ b/src/vlib/main.h @@ -258,6 +258,9 @@ typedef struct vlib_main_t u32 buffer_alloc_success_seed; f64 buffer_alloc_success_rate; + /* Timing wheel for scheduling time-based node dispatch. */ + void *timing_wheel; + #ifdef CLIB_SANITIZE_ADDR /* address sanitizer stack save */ void *asan_stack_save; @@ -319,8 +322,6 @@ typedef struct vlib_global_main_t /* Global main structure. */ extern vlib_global_main_t vlib_global_main; -void vlib_worker_loop (vlib_main_t * vm); - always_inline f64 vlib_time_now (vlib_main_t * vm) { diff --git a/src/vlib/node.c b/src/vlib/node.c index c0572f3cf83..edeb6dc70a7 100644 --- a/src/vlib/node.c +++ b/src/vlib/node.c @@ -518,7 +518,7 @@ vlib_register_node (vlib_main_t *vm, vlib_node_registration_t *r, char *fmt, vlib_node_runtime_t *rt; u32 i; - if (n->type == VLIB_NODE_TYPE_PROCESS) + if (node_type_attrs[n->type].is_process) { vlib_process_t *p; uword log2_n_stack_bytes; @@ -559,15 +559,14 @@ vlib_register_node (vlib_main_t *vm, vlib_node_registration_t *r, char *fmt, { vec_add2_aligned (nm->nodes_by_type[n->type], rt, 1, /* align */ CLIB_CACHE_LINE_BYTES); - if (n->type == VLIB_NODE_TYPE_INPUT) - clib_interrupt_resize (&nm->input_node_interrupts, - vec_len (nm->nodes_by_type[n->type])); - else if (n->type == VLIB_NODE_TYPE_PRE_INPUT) - clib_interrupt_resize (&nm->pre_input_node_interrupts, - vec_len (nm->nodes_by_type[n->type])); + n->runtime_index = rt - nm->nodes_by_type[n->type]; } + if (node_type_attrs[n->type].may_receive_interrupts) + clib_interrupt_resize (&nm->node_interrupts[n->type], + vec_len (nm->nodes_by_type[n->type])); + if (n->type == VLIB_NODE_TYPE_INPUT) nm->input_node_counts_by_state[n->state] += 1; diff --git a/src/vlib/node.h b/src/vlib/node.h index bb6d8f818a8..22d5a4843d7 100644 --- a/src/vlib/node.h +++ b/src/vlib/node.h @@ -83,9 +83,46 @@ typedef enum /* "Process" nodes which can be suspended and later resumed. */ VLIB_NODE_TYPE_PROCESS, + /* Nodes to by called by per-thread timing wheel. */ + VLIB_NODE_TYPE_SCHED, + VLIB_N_NODE_TYPE, } vlib_node_type_t; +typedef struct +{ + u8 can_be_disabled : 1; + u8 may_receive_interrupts : 1; + u8 decrement_main_loop_per_calls_if_polling : 1; + u8 supports_adaptive_mode : 1; + u8 can_be_polled : 1; + u8 is_process : 1; +} vlib_node_type_atts_t; + +static const vlib_node_type_atts_t node_type_attrs[VLIB_N_NODE_TYPE] ={ + [VLIB_NODE_TYPE_PRE_INPUT] = { + .can_be_disabled = 1, + .may_receive_interrupts = 1, + .decrement_main_loop_per_calls_if_polling = 1, + .can_be_polled = 1, + }, + [VLIB_NODE_TYPE_INPUT] = { + .can_be_disabled = 1, + .may_receive_interrupts = 1, + .decrement_main_loop_per_calls_if_polling = 1, + .supports_adaptive_mode = 1, + .can_be_polled = 1, + }, + [VLIB_NODE_TYPE_PROCESS] = { + .can_be_disabled = 1, + .is_process = 1, + }, + [VLIB_NODE_TYPE_SCHED] = { + .can_be_disabled = 1, + .may_receive_interrupts = 1, + }, +}; + typedef struct _vlib_node_fn_registration { vlib_node_function_t *function; @@ -245,7 +282,16 @@ typedef enum foreach_vlib_node_state #undef _ VLIB_N_NODE_STATE, -} vlib_node_state_t; +} __clib_packed vlib_node_state_t; + +typedef enum +{ + VLIB_NODE_DISPATCH_REASON_UNKNOWN = 0, + VLIB_NODE_DISPATCH_REASON_PENDING_FRAME, + VLIB_NODE_DISPATCH_REASON_POLL, + VLIB_NODE_DISPATCH_REASON_INTERRUPT, + VLIB_NODE_DISPATCH_REASON_SCHED, +} __clib_packed vlib_node_dispatch_reason_t; typedef struct vlib_node_t { @@ -498,7 +544,10 @@ typedef struct vlib_node_runtime_t u16 flags; /**< Copy of main node flags. */ - u16 state; /**< Input node state. */ + vlib_node_state_t state; /**< Input node state. */ + + vlib_node_dispatch_reason_t + dispatch_reason; /**< Reason for running this node. */ u16 n_next_nodes; @@ -507,6 +556,9 @@ typedef struct vlib_node_runtime_t last time this node ran. Set to zero before first run of this node. */ + u32 stop_timer_handle_plus_1; /**< Timing wheel stop handle for + SCHED node incremented by 1, + 0 = no timer running. */ CLIB_ALIGN_MARK (runtime_data_pad, 8); @@ -679,29 +731,22 @@ typedef struct } vlib_signal_timed_event_data_t; -always_inline uword -vlib_timing_wheel_data_is_timed_event (u32 d) -{ - return d & 1; -} - -always_inline u32 -vlib_timing_wheel_data_set_suspended_process (u32 i) -{ - return 0 + 2 * i; -} - -always_inline u32 -vlib_timing_wheel_data_set_timed_event (u32 i) +typedef enum { - return 1 + 2 * i; -} + VLIB_TW_EVENT_T_PROCESS_NODE = 1, + VLIB_TW_EVENT_T_TIMED_EVENT = 2, + VLIB_TW_EVENT_T_SCHED_NODE = 3, +} vlib_tw_event_type_t; -always_inline uword -vlib_timing_wheel_data_get_index (u32 d) +typedef union { - return d / 2; -} + struct + { + u32 type : 2; /* vlib_tw_event_type_t */ + u32 index : 30; + }; + u32 as_u32; +} vlib_tw_event_t; typedef struct { @@ -727,8 +772,7 @@ typedef struct vlib_node_runtime_t *nodes_by_type[VLIB_N_NODE_TYPE]; /* Node runtime indices for input nodes with pending interrupts. */ - void *input_node_interrupts; - void *pre_input_node_interrupts; + void *node_interrupts[VLIB_N_NODE_TYPE]; /* Input nodes are switched from/to interrupt to/from polling mode when average vector length goes above/below polling/interrupt @@ -742,14 +786,14 @@ typedef struct /* Vector of internal node's frames waiting to be called. */ vlib_pending_frame_t *pending_frames; - /* Timing wheel for scheduling time-based node dispatch. */ - void *timing_wheel; - vlib_signal_timed_event_data_t *signal_timed_event_data_pool; /* Vector of process nodes waiting for restore */ vlib_process_restore_t *process_restore_current; + /* Vector of sched nodes waiting to be calleed */ + u32 *sched_node_pending; + /* Vector of process nodes waiting for restore in next greaph scheduler run */ vlib_process_restore_t *process_restore_next; diff --git a/src/vlib/node_cli.c b/src/vlib/node_cli.c index 16e904e8433..375b17bd7ae 100644 --- a/src/vlib/node_cli.c +++ b/src/vlib/node_cli.c @@ -734,6 +734,9 @@ show_node (vlib_main_t * vm, unformat_input_t * input, case VLIB_NODE_TYPE_PROCESS: type_str = "process"; break; + case VLIB_NODE_TYPE_SCHED: + type_str = "sched"; + break; default: type_str = "unknown"; } diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h index ffa17ba7bb1..91fedaa4c9c 100644 --- a/src/vlib/node_funcs.h +++ b/src/vlib/node_funcs.h @@ -50,6 +50,8 @@ #include <vppinfra/tw_timer_1t_3w_1024sl_ov.h> #include <vppinfra/interrupt.h> +#define VLIB_TW_TICKS_PER_SECOND 1e5 /* 10 us */ + #ifdef CLIB_SANITIZE_ADDR #include <sanitizer/asan_interface.h> #endif @@ -249,17 +251,9 @@ vlib_node_set_interrupt_pending (vlib_main_t *vm, u32 node_index) { vlib_node_main_t *nm = &vm->node_main; vlib_node_t *n = vec_elt (nm->nodes, node_index); - void *interrupts = 0; + void *interrupts = nm->node_interrupts[n->type]; - if (n->type == VLIB_NODE_TYPE_INPUT) - interrupts = nm->input_node_interrupts; - else if (n->type == VLIB_NODE_TYPE_PRE_INPUT) - interrupts = nm->pre_input_node_interrupts; - else - { - ASSERT (0); - return; - } + ASSERT (interrupts); if (vm != vlib_get_main ()) clib_interrupt_set_atomic (interrupts, n->runtime_index); @@ -267,6 +261,25 @@ vlib_node_set_interrupt_pending (vlib_main_t *vm, u32 node_index) clib_interrupt_set (interrupts, n->runtime_index); } +always_inline void +vlib_node_schedule (vlib_main_t *vm, u32 node_index, f64 dt) +{ + TWT (tw_timer_wheel) *tw = (TWT (tw_timer_wheel) *) vm->timing_wheel; + u64 ticks; + + vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, node_index); + vlib_tw_event_t e = { + .type = VLIB_TW_EVENT_T_SCHED_NODE, + .index = node_index, + }; + + dt = flt_round_nearest (dt * VLIB_TW_TICKS_PER_SECOND); + ticks = clib_max ((u64) dt, 1); + + rt->stop_timer_handle_plus_1 = + 1 + TW (tw_timer_start) (tw, e.as_u32, 0 /* timer_id */, ticks); +} + always_inline vlib_process_t * vlib_get_process_from_node (vlib_main_t * vm, vlib_node_t * node) { @@ -570,14 +583,14 @@ vlib_get_current_process_node_index (vlib_main_t * vm) return process->node_runtime.node_index; } -/** Returns TRUE if a process suspend time is less than 10us +/** Returns TRUE if a process suspend time is less than vlib timer wheel tick @param dt - remaining poll time in seconds - @returns 1 if dt < 10e-6, 0 otherwise + @returns 1 if dt < 1/VLIB_TW_TICKS_PER_SECOND, 0 otherwise */ always_inline uword vlib_process_suspend_time_is_zero (f64 dt) { - return dt < 10e-6; + return dt < (1 / VLIB_TW_TICKS_PER_SECOND); } /** Suspend a vlib cooperative multi-tasking thread for a period of time @@ -601,7 +614,7 @@ vlib_process_suspend (vlib_main_t * vm, f64 dt) if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND) { /* expiration time in 10us ticks */ - p->resume_clock_interval = dt * 1e5; + p->resume_clock_interval = dt * VLIB_TW_TICKS_PER_SECOND; vlib_process_start_switch_stack (vm, 0); clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND); } @@ -912,7 +925,7 @@ vlib_process_wait_for_event_or_clock (vlib_main_t * vm, f64 dt) r = clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND); if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND) { - p->resume_clock_interval = dt * 1e5; + p->resume_clock_interval = dt * VLIB_TW_TICKS_PER_SECOND; vlib_process_start_switch_stack (vm, 0); clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND); } @@ -963,12 +976,11 @@ vlib_process_delete_one_time_event (vlib_main_t * vm, uword node_index, } always_inline void * -vlib_process_signal_event_helper (vlib_node_main_t * nm, - vlib_node_t * n, - vlib_process_t * p, - uword t, +vlib_process_signal_event_helper (vlib_main_t *vm, vlib_node_main_t *nm, + vlib_node_t *n, vlib_process_t *p, uword t, uword n_data_elts, uword n_data_elt_bytes) { + TWT (tw_timer_wheel) *tw = (TWT (tw_timer_wheel) *) vm->timing_wheel; uword add_to_pending = 0, delete_from_wheel = 0; u8 *data_to_be_written_by_caller; vec_attr_t va = { .elt_sz = n_data_elt_bytes }; @@ -1016,8 +1028,7 @@ vlib_process_signal_event_helper (vlib_node_main_t * nm, break; } - if (TW (tw_timer_handle_is_free) ((TWT (tw_timer_wheel) *) nm->timing_wheel, - p->stop_timer_handle)) + if (TW (tw_timer_handle_is_free) (tw, p->stop_timer_handle)) delete_from_wheel = 0; /* Never add current process to pending vector since current process is @@ -1036,8 +1047,7 @@ vlib_process_signal_event_helper (vlib_node_main_t * nm, if (delete_from_wheel) { - TW (tw_timer_stop) - ((TWT (tw_timer_wheel) *) nm->timing_wheel, p->stop_timer_handle); + TW (tw_timer_stop) (tw, p->stop_timer_handle); p->stop_timer_handle = ~0; } @@ -1069,7 +1079,7 @@ vlib_process_signal_event_data (vlib_main_t * vm, else t = h[0]; - return vlib_process_signal_event_helper (nm, n, p, t, n_data_elts, + return vlib_process_signal_event_helper (vm, nm, n, p, t, n_data_elts, n_data_elt_bytes); } @@ -1080,6 +1090,7 @@ vlib_process_signal_event_at_time (vlib_main_t * vm, uword type_opaque, uword n_data_elts, uword n_data_elt_bytes) { + TWT (tw_timer_wheel) *tw = (TWT (tw_timer_wheel) *) vm->timing_wheel; vlib_node_main_t *nm = &vm->node_main; vlib_node_t *n = vlib_get_node (vm, node_index); vlib_process_t *p = vec_elt (nm->processes, n->runtime_index); @@ -1097,7 +1108,7 @@ vlib_process_signal_event_at_time (vlib_main_t * vm, t = h[0]; if (vlib_process_suspend_time_is_zero (dt)) - return vlib_process_signal_event_helper (nm, n, p, t, n_data_elts, + return vlib_process_signal_event_helper (vm, nm, n, p, t, n_data_elts, n_data_elt_bytes); else { @@ -1118,11 +1129,13 @@ vlib_process_signal_event_at_time (vlib_main_t * vm, te->event_type_index = t; p->stop_timer_handle = - TW (tw_timer_start) ((TWT (tw_timer_wheel) *) nm->timing_wheel, - vlib_timing_wheel_data_set_timed_event - (te - nm->signal_timed_event_data_pool), - 0 /* timer_id */ , - (vlib_time_now (vm) + dt) * 1e5); + TW (tw_timer_start) (tw, + (vlib_tw_event_t){ + .type = VLIB_TW_EVENT_T_TIMED_EVENT, + .index = te - nm->signal_timed_event_data_pool, + } + .as_u32, + 0 /* timer_id */, dt * VLIB_TW_TICKS_PER_SECOND); /* Inline data big enough to hold event? */ if (te->n_data_bytes < sizeof (te->inline_event_data)) @@ -1146,8 +1159,8 @@ vlib_process_signal_one_time_event_data (vlib_main_t * vm, vlib_node_main_t *nm = &vm->node_main; vlib_node_t *n = vlib_get_node (vm, node_index); vlib_process_t *p = vec_elt (nm->processes, n->runtime_index); - return vlib_process_signal_event_helper (nm, n, p, type_index, n_data_elts, - n_data_elt_bytes); + return vlib_process_signal_event_helper (vm, nm, n, p, type_index, + n_data_elts, n_data_elt_bytes); } always_inline void diff --git a/src/vlib/threads.c b/src/vlib/threads.c index 9dce1b8c8bd..0a5779c2d2e 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -705,52 +705,29 @@ start_workers (vlib_main_t * vm) vec_add1 (nm_clone->nodes, n); n++; } - nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] = - vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL], - CLIB_CACHE_LINE_BYTES); - vec_foreach (rt, - nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]) - { - vlib_node_t *n = vlib_get_node (vm, rt->node_index); - /* copy initial runtime_data from node */ - if (n->runtime_data && n->runtime_data_bytes > 0) - clib_memcpy (rt->runtime_data, n->runtime_data, - clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, - n->runtime_data_bytes)); - } - - nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] = - vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT], - CLIB_CACHE_LINE_BYTES); - clib_interrupt_init ( - &nm_clone->input_node_interrupts, - vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])); - clib_interrupt_init ( - &nm_clone->pre_input_node_interrupts, - vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])); - vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]) - { - vlib_node_t *n = vlib_get_node (vm, rt->node_index); - /* copy initial runtime_data from node */ - if (n->runtime_data && n->runtime_data_bytes > 0) - clib_memcpy (rt->runtime_data, n->runtime_data, - clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, - n->runtime_data_bytes)); - } - - nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT] = - vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT], - CLIB_CACHE_LINE_BYTES); - vec_foreach (rt, - nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]) - { - vlib_node_t *n = vlib_get_node (vm, rt->node_index); - /* copy initial runtime_data from node */ - if (n->runtime_data && n->runtime_data_bytes > 0) - clib_memcpy (rt->runtime_data, n->runtime_data, - clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, - n->runtime_data_bytes)); - } + + foreach_int (nt, VLIB_NODE_TYPE_INTERNAL, + VLIB_NODE_TYPE_PRE_INPUT, VLIB_NODE_TYPE_INPUT, + VLIB_NODE_TYPE_SCHED) + { + u32 n_nodes = vec_len (nm_clone->nodes_by_type[nt]); + nm_clone->nodes_by_type[nt] = vec_dup_aligned ( + nm->nodes_by_type[nt], CLIB_CACHE_LINE_BYTES); + + if (node_type_attrs[nt].may_receive_interrupts) + clib_interrupt_init (&nm_clone->node_interrupts[nt], + n_nodes); + + vec_foreach (rt, nm_clone->nodes_by_type[nt]) + { + vlib_node_t *n = vlib_get_node (vm, rt->node_index); + /* copy initial runtime_data from node */ + if (n->runtime_data && n->runtime_data_bytes > 0) + clib_memcpy (rt->runtime_data, n->runtime_data, + clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, + n->runtime_data_bytes)); + } + } nm_clone->processes = vec_dup_aligned (nm->processes, CLIB_CACHE_LINE_BYTES); @@ -1016,101 +993,53 @@ vlib_worker_thread_node_refork (void) vec_free (old_nodes_clone); + /* re-clone nodes */ - /* re-clone internal nodes */ - old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]; - nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] = - vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL], - CLIB_CACHE_LINE_BYTES); - - vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]) - { - vlib_node_t *n = vlib_get_node (vm, rt->node_index); - /* copy runtime_data, will be overwritten later for existing rt */ - if (n->runtime_data && n->runtime_data_bytes > 0) - clib_memcpy_fast (rt->runtime_data, n->runtime_data, - clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, - n->runtime_data_bytes)); - } - - for (j = 0; j < vec_len (old_rt); j++) + foreach_int (nt, VLIB_NODE_TYPE_INTERNAL, VLIB_NODE_TYPE_PRE_INPUT, + VLIB_NODE_TYPE_INPUT, VLIB_NODE_TYPE_SCHED) { - rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index); - rt->state = old_rt[j].state; - rt->flags = old_rt[j].flags; - clib_memcpy_fast (rt->runtime_data, old_rt[j].runtime_data, - VLIB_NODE_RUNTIME_DATA_SIZE); - } + old_rt = nm_clone->nodes_by_type[nt]; + u32 n_nodes = vec_len (nm->nodes_by_type[nt]); - vec_free (old_rt); - - /* re-clone input nodes */ - old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]; - nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] = - vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT], - CLIB_CACHE_LINE_BYTES); - clib_interrupt_resize ( - &nm_clone->input_node_interrupts, - vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])); - clib_interrupt_resize ( - &nm_clone->pre_input_node_interrupts, - vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])); - - vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]) - { - vlib_node_t *n = vlib_get_node (vm, rt->node_index); - /* copy runtime_data, will be overwritten later for existing rt */ - if (n->runtime_data && n->runtime_data_bytes > 0) - clib_memcpy_fast (rt->runtime_data, n->runtime_data, - clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, - n->runtime_data_bytes)); - } + nm_clone->nodes_by_type[nt] = + vec_dup_aligned (nm->nodes_by_type[nt], CLIB_CACHE_LINE_BYTES); - for (j = 0; j < vec_len (old_rt); j++) - { - rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index); - rt->state = old_rt[j].state; - rt->flags = old_rt[j].flags; - clib_memcpy_fast (rt->runtime_data, old_rt[j].runtime_data, - VLIB_NODE_RUNTIME_DATA_SIZE); - } + if (nm_clone->node_interrupts[nt]) + clib_interrupt_resize (&nm_clone->node_interrupts[nt], n_nodes); - for (j = vec_len (old_rt); - j < vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]); j++) - { - rt = &nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT][j]; - nm_clone->input_node_counts_by_state[rt->state] += 1; - } - - vec_free (old_rt); + vec_foreach (rt, nm_clone->nodes_by_type[nt]) + { + vlib_node_t *n = vlib_get_node (vm, rt->node_index); + /* copy runtime_data, will be overwritten later for existing rt */ + if (n->runtime_data && n->runtime_data_bytes > 0) + clib_memcpy_fast ( + rt->runtime_data, n->runtime_data, + clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, n->runtime_data_bytes)); + } - /* re-clone pre-input nodes */ - old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]; - nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT] = - vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT], - CLIB_CACHE_LINE_BYTES); + for (j = 0; j < vec_len (old_rt); j++) + { + rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index); + rt->state = old_rt[j].state; + rt->flags = old_rt[j].flags; + clib_memcpy_fast (rt->runtime_data, old_rt[j].runtime_data, + VLIB_NODE_RUNTIME_DATA_SIZE); + } - vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]) - { - vlib_node_t *n = vlib_get_node (vm, rt->node_index); - /* copy runtime_data, will be overwritten later for existing rt */ - if (n->runtime_data && n->runtime_data_bytes > 0) - clib_memcpy_fast (rt->runtime_data, n->runtime_data, - clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, - n->runtime_data_bytes)); - } + if (nt == VLIB_NODE_TYPE_INPUT) + { + for (j = vec_len (old_rt); + j < vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]); + j++) + { + rt = &nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT][j]; + nm_clone->input_node_counts_by_state[rt->state] += 1; + } + } - for (j = 0; j < vec_len (old_rt); j++) - { - rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index); - rt->state = old_rt[j].state; - rt->flags = old_rt[j].flags; - clib_memcpy_fast (rt->runtime_data, old_rt[j].runtime_data, - VLIB_NODE_RUNTIME_DATA_SIZE); + vec_free (old_rt); } - vec_free (old_rt); - vec_free (nm_clone->processes); nm_clone->processes = vec_dup_aligned (nm->processes, CLIB_CACHE_LINE_BYTES); @@ -1630,37 +1559,6 @@ vlib_worker_flush_pending_rpc_requests (vlib_main_t *vm) clib_spinlock_unlock_if_init (&vm_global->pending_rpc_lock); } -void -vlib_worker_thread_fn (void *arg) -{ - vlib_global_main_t *vgm = vlib_get_global_main (); - vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg; - vlib_main_t *vm = vlib_get_main (); - clib_error_t *e; - - ASSERT (vm->thread_index == vlib_get_thread_index ()); - - vlib_worker_thread_init (w); - clib_time_init (&vm->clib_time); - clib_mem_set_heap (w->thread_mheap); - - vm->worker_init_functions_called = hash_create (0, 0); - - e = vlib_call_init_exit_functions_no_sort ( - vm, &vgm->worker_init_function_registrations, 1 /* call_once */, - 0 /* is_global */); - if (e) - clib_error_report (e); - - vlib_worker_loop (vm); -} - -VLIB_REGISTER_THREAD (worker_thread_reg, static) = { - .name = "workers", - .short_name = "wk", - .function = vlib_worker_thread_fn, -}; - extern clib_march_fn_registration *vlib_frame_queue_dequeue_with_aux_fn_march_fn_registrations; extern clib_march_fn_registration diff --git a/src/vlib/time.h b/src/vlib/time.h index 61873bb2ef3..a9dc5395661 100644 --- a/src/vlib/time.h +++ b/src/vlib/time.h @@ -12,8 +12,7 @@ static inline f64 vlib_time_get_next_timer (vlib_main_t *vm) { - vlib_node_main_t *nm = &vm->node_main; - TWT (tw_timer_wheel) *wheel = nm->timing_wheel; + TWT (tw_timer_wheel) *wheel = vm->timing_wheel; return TW (tw_timer_first_expires_in_ticks) (wheel) * wheel->timer_interval; } diff --git a/src/vlib/unix/input.c b/src/vlib/unix/input.c index e96cd902466..302d1eb1194 100644 --- a/src/vlib/unix/input.c +++ b/src/vlib/unix/input.c @@ -130,6 +130,17 @@ linux_epoll_file_update (clib_file_t * f, clib_file_update_type_t update_type) } } +static int +is_int_pending (vlib_node_main_t *nm) +{ + + for (int nt = 0; nt < VLIB_N_NODE_TYPE; nt++) + if (nm->node_interrupts[nt] && + clib_interrupt_is_any_pending (nm->node_interrupts[nt])) + return 1; + return 0; +} + static_always_inline uword linux_epoll_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, u32 thread_index) @@ -174,8 +185,8 @@ linux_epoll_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, else if (is_main && vector_rate < 2 && vm->api_queue_nonempty == 0 && nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0) { - ticks_until_expiration = TW (tw_timer_first_expires_in_ticks) - ((TWT (tw_timer_wheel) *) nm->timing_wheel); + ticks_until_expiration = TW (tw_timer_first_expires_in_ticks) ( + (TWT (tw_timer_wheel) *) vm->timing_wheel); /* Nothing on the fast wheel, sleep 10ms */ if (ticks_until_expiration == TW_SLOTS_PER_RING) @@ -250,10 +261,7 @@ linux_epoll_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, while (nanosleep (&ts, &tsrem) < 0) ts = tsrem; if (*vlib_worker_threads->wait_at_barrier || - clib_interrupt_is_any_pending ( - nm->input_node_interrupts) || - clib_interrupt_is_any_pending ( - nm->pre_input_node_interrupts)) + is_int_pending (nm)) goto done; } } diff --git a/test/asf/test_session_sdl.py b/test/asf/test_session_sdl.py index 53301f7bd6c..d2e30daa9f2 100644 --- a/test/asf/test_session_sdl.py +++ b/test/asf/test_session_sdl.py @@ -5,6 +5,7 @@ import unittest from framework import VppTestCase from asfframework import VppTestRunner, tag_fixme_vpp_workers from ipaddress import IPv4Network, IPv6Network +from config import config from vpp_ip_route import ( VppIpRoute, @@ -25,6 +26,10 @@ class TestSessionSDL(VppTestCase): @classmethod def setUpClass(cls): + # increase vapi timeout, to avoid + # failures reported on test-cov + if config.gcov: + cls.vapi_response_timeout = 20 super(TestSessionSDL, cls).setUpClass() @classmethod diff --git a/test/test_pg_stream.py b/test/test_pg_stream.py index 471c85c43f0..915a5aa73d9 100644 --- a/test/test_pg_stream.py +++ b/test/test_pg_stream.py @@ -11,6 +11,7 @@ from scapy.layers.inet6 import IPv6 from framework import VppTestCase from asfframework import VppTestRunner +from config import config class TestPgStream(VppTestCase): @@ -19,6 +20,18 @@ class TestPgStream(VppTestCase): def __init__(self, *args): VppTestCase.__init__(self, *args) + @classmethod + def setUpClass(cls): + # increase vapi timeout, to avoid + # failures reported on test-cov + if config.gcov: + cls.vapi_response_timeout = 20 + super(TestPgStream, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(TestPgStream, cls).tearDownClass() + def setUp(self): super(TestPgStream, self).setUp() |