diff options
163 files changed, 10629 insertions, 3727 deletions
diff --git a/.gitignore b/.gitignore index 8d5b84605c4..9d2ecd8f6b3 100644 --- a/.gitignore +++ b/.gitignore @@ -16,7 +16,7 @@ /build-root/*.tar.xz /build-root/*.changes /build-root/rpmbuild/ -/build-root/test-coverage/ +/build-root/test-coverage*/ /build-root/test/ /build-config.mk /build/external/*.tar.gz @@ -23,6 +23,11 @@ MACHINE=$(shell uname -m) SUDO?=sudo -E DPDK_CONFIG?=no-pci +# we prefer clang by default +ifeq ($(CC),cc) + CC=clang +endif + ifeq ($(strip $(SHELL)),) $(error "bash not found, VPP requires bash to build") endif @@ -85,7 +90,7 @@ endif DEB_DEPENDS = curl build-essential autoconf automake ccache DEB_DEPENDS += debhelper dkms git libtool libapr1-dev dh-python DEB_DEPENDS += libconfuse-dev git-review exuberant-ctags cscope pkg-config -DEB_DEPENDS += gcovr lcov chrpath autoconf libnuma-dev +DEB_DEPENDS += clang gcovr lcov chrpath autoconf libnuma-dev DEB_DEPENDS += python3-all python3-setuptools check DEB_DEPENDS += libffi-dev python3-ply libunwind-dev DEB_DEPENDS += cmake ninja-build python3-jsonschema python3-yaml @@ -166,6 +171,7 @@ RPM_DEPENDS += xmlto RPM_DEPENDS += elfutils-libelf-devel libpcap-devel RPM_DEPENDS += libnl3-devel libmnl-devel RPM_DEPENDS += nasm +RPM_DEPENDS += socat ifeq ($(OS_ID),fedora) RPM_DEPENDS += dnf-utils @@ -296,7 +302,7 @@ help: @echo " build - build debug binaries" @echo " build-release - build release binaries" @echo " build-coverity - build coverity artifacts" - @echo " build-vpp-gcov - build gcov vpp only" + @echo " build-gcov - build gcov vpp only" @echo " rebuild - wipe and build debug binaries" @echo " rebuild-release - wipe and build release binaries" @echo " run - run debug binary" @@ -316,6 +322,7 @@ help: @echo " install-ext-dep[s] - install external development dependencies" @echo " install-opt-deps - install optional dependencies" @echo " ctags - (re)generate ctags database" + @echo " etags - (re)generate etags database" @echo " gtags - (re)generate gtags database" @echo " cscope - (re)generate cscope database" @echo " compdb - (re)generate compile_commands.json" @@ -451,7 +458,7 @@ endif install-deps: install-dep define make - @$(MAKE) -C $(BR) PLATFORM=$(PLATFORM) TAG=$(1) $(2) + @$(MAKE) -C $(BR) CC=$(CC) PLATFORM=$(PLATFORM) TAG=$(1) $(2) endef $(BR)/scripts/.version: @@ -506,9 +513,10 @@ rebuild: wipe build build-release: $(BR)/.deps.ok $(call make,$(PLATFORM),$(addsuffix -install,$(TARGETS))) -.PHONY: build-vpp-gcov -build-vpp-gcov: - $(call test,vpp_gcov) +.PHONY: build-gcov +build-gcov: $(BR)/.deps.ok + $(eval CC=gcc) + $(call make,vpp_gcov,$(addsuffix -install,$(TARGETS))) .PHONY: wipe-release wipe-release: test-wipe $(BR)/.deps.ok @@ -539,16 +547,10 @@ endef .PHONY: test test: -ifeq ($(CC),cc) - $(eval CC=clang) -endif $(call test,vpp,test) .PHONY: test-debug test-debug: -ifeq ($(CC),cc) - $(eval CC=clang) -endif $(call test,vpp_debug,test) .PHONY: test-cov @@ -558,9 +560,13 @@ test-cov: $(call test,vpp_gcov,cov) .PHONY: test-cov-hs -test-cov-hs: - @$(MAKE) -C extras/hs-test build-cov - @$(MAKE) -C extras/hs-test test-cov +test-cov-hs: build-gcov + @$(MAKE) CC=$(CC) -C extras/hs-test test-cov \ + VPP_BUILD_DIR=$(BR)/build-vpp_gcov-native/vpp + +.PHONY: test-cov-post-standalone +test-cov-post-standalone: + $(MAKE) CC=$(CC) -C test cov-post VPP_BUILD_DIR=$(BR)/build-vpp_gcov-native/vpp .PHONY: test-cov-both test-cov-both: @@ -790,14 +796,14 @@ pkg-srpm: dist .PHONY: install-ext-deps install-ext-deps: - $(MAKE) -C build/external install-$(PKG) + $(MAKE) CC=$(CC) -C build/external install-$(PKG) .PHONY: install-ext-dep install-ext-dep: install-ext-deps .PHONY: install-opt-deps install-opt-deps: - $(MAKE) -C build/optional install-$(PKG) + $(MAKE) CC=$(CC) -C build/optional install-$(PKG) .PHONY: json-api-files json-api-files: @@ -820,6 +826,11 @@ ctags: ctags.files @ctags --totals --tag-relative=yes -L $< @rm $< +.PHONY: etags +etags: ctags.files + @ctags -e --totals -L $< + @rm $< + .PHONY: gtags gtags: ctags @gtags --gtagslabel=ctags @@ -923,13 +934,13 @@ docs: .PHONY: pkg-verify pkg-verify: install-dep $(BR)/.deps.ok install-ext-deps $(call banner,"Building for PLATFORM=vpp") - @$(MAKE) -C build-root PLATFORM=vpp TAG=vpp wipe-all install-packages + @$(MAKE) CC=$(CC) -C build-root PLATFORM=vpp TAG=vpp wipe-all install-packages $(call banner,"Building sample-plugin") - @$(MAKE) -C build-root PLATFORM=vpp TAG=vpp sample-plugin-install + @$(MAKE) CC=$(CC) -C build-root PLATFORM=vpp TAG=vpp sample-plugin-install $(call banner,"Building libmemif") - @$(MAKE) -C build-root PLATFORM=vpp TAG=vpp libmemif-install + @$(MAKE) CC=gcc -C build-root PLATFORM=vpp TAG=vpp libmemif-install $(call banner,"Building $(PKG) packages") - @$(MAKE) pkg-$(PKG) + @$(MAKE) CC=$(CC) pkg-$(PKG) # Note: 'make verify' target is not used by ci-management scripts MAKE_VERIFY_GATE_OS ?= ubuntu-22.04 @@ -939,7 +950,7 @@ ifeq ($(OS_ID)-$(OS_VERSION_ID),$(MAKE_VERIFY_GATE_OS)) $(call banner,"Testing vppapigen") @src/tools/vppapigen/test_vppapigen.py $(call banner,"Running tests") - @$(MAKE) COMPRESS_FAILED_TEST_LOGS=yes RETRIES=3 test + @$(MAKE) CC=$(CC) COMPRESS_FAILED_TEST_LOGS=yes RETRIES=3 test else $(call banner,"Skipping tests. Tests under 'make verify' supported on $(MAKE_VERIFY_GATE_OS)") endif diff --git a/build-root/Makefile b/build-root/Makefile index b62a671d58f..b529acb5b12 100644 --- a/build-root/Makefile +++ b/build-root/Makefile @@ -667,7 +667,7 @@ MAKE_PARALLEL_FLAGS ?= $(if $($(PACKAGE)_make_parallel_fails),,-j $(MAKE_PARALLE # Make command shorthand for packages & tools. PACKAGE_MAKE = \ - $(MAKE) \ + $(MAKE) CC=$(CC) \ -C $(PACKAGE_BUILD_DIR) \ $($(PACKAGE)_make_args) \ $(MAKE_PARALLEL_FLAGS) diff --git a/build/external/packages/xdp-tools.mk b/build/external/packages/xdp-tools.mk index 08d94e42466..57f5e0ae83a 100644 --- a/build/external/packages/xdp-tools.mk +++ b/build/external/packages/xdp-tools.mk @@ -25,7 +25,7 @@ define xdp-tools_config_cmds endef define xdp-tools_build_cmds - @cd ${xdp-tools_src_dir} && $(MAKE) V=1 BUILD_STATIC_ONLY=y > $(xdp-tools_build_log) + @cd ${xdp-tools_src_dir} && $(MAKE) CC=gcc V=1 BUILD_STATIC_ONLY=y > $(xdp-tools_build_log) endef define xdp-tools_install_cmds diff --git a/build/external/patches/dpdk_24.11.1/0001-net-ice-fix-how-ice-driver-handles-flows.patch b/build/external/patches/dpdk_24.11.1/0001-net-ice-fix-how-ice-driver-handles-flows.patch new file mode 100644 index 00000000000..2808b142557 --- /dev/null +++ b/build/external/patches/dpdk_24.11.1/0001-net-ice-fix-how-ice-driver-handles-flows.patch @@ -0,0 +1,64 @@ +From b33b5d621d340d033cd1d181b0ab0c09dc9bd787 Mon Sep 17 00:00:00 2001 +From: Vladimir Medvedkin <vladimir.medvedkin@intel.com> +Date: Thu, 30 Jan 2025 15:50:11 +0000 +Subject: [PATCH] net/ice: fix how ice driver handles flows + +Currently ICE PMD uses group attribute to select the appropriate HW engine +to offload the flow. This behavior violates the rte_flow API, existing +documentation/examples, and reveals hardware specific details. + +This patch eliminates the use of the group attribute and runs each engine +parser in the order they work in the HW pipeline. + +Fixes: 9c5f0070fa3f ("net/ice: map group to pipeline stage") +Cc: qi.z.zhang@intel.com +Cc: stable@dpdk.org + +Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com> +--- + drivers/net/ice/ice_generic_flow.c | 27 ++++++++++++++------------- + 1 file changed, 14 insertions(+), 13 deletions(-) + +diff --git a/drivers/net/ice/ice_generic_flow.c b/drivers/net/ice/ice_generic_flow.c +index 50d760004f..666c8f3cb9 100644 +--- a/drivers/net/ice/ice_generic_flow.c ++++ b/drivers/net/ice/ice_generic_flow.c +@@ -2295,21 +2295,22 @@ ice_flow_process_filter(struct rte_eth_dev *dev, + return 0; + } + +- parser = get_flow_parser(attr->group); +- if (parser == NULL) { +- rte_flow_error_set(error, EINVAL, +- RTE_FLOW_ERROR_TYPE_ATTR, +- NULL, "NULL attribute."); +- return -rte_errno; ++ for (int i = 0; i < 3; i++) { ++ parser = get_flow_parser(i); ++ if (parser == NULL) { ++ rte_flow_error_set(error, EINVAL, ++ RTE_FLOW_ERROR_TYPE_ATTR, ++ NULL, "NULL attribute."); ++ return -rte_errno; ++ } ++ if (ice_parse_engine(ad, flow, parser, attr->priority, ++ pattern, actions, error)) { ++ *engine = parser->engine; ++ return 0; ++ } + } + +- if (ice_parse_engine(ad, flow, parser, attr->priority, +- pattern, actions, error)) { +- *engine = parser->engine; +- return 0; +- } else { +- return -rte_errno; +- } ++ return -rte_errno; + } + + static int +-- +2.43.0 + diff --git a/docs/_images/SNAT_Topology.jpg b/docs/_images/NAT_Topology.jpg Binary files differindex 39e3af9ff80..39e3af9ff80 100644 --- a/docs/_images/SNAT_Topology.jpg +++ b/docs/_images/NAT_Topology.jpg diff --git a/docs/aboutvpp/releasenotes/index.rst b/docs/aboutvpp/releasenotes/index.rst index 7e4f7b93f31..8fcb91db560 100644 --- a/docs/aboutvpp/releasenotes/index.rst +++ b/docs/aboutvpp/releasenotes/index.rst @@ -6,6 +6,7 @@ Release notes .. toctree:: :maxdepth: 2 + v25.02 v24.10 v24.06 v24.02 diff --git a/docs/aboutvpp/releasenotes/v25.02.rst b/docs/aboutvpp/releasenotes/v25.02.rst new file mode 100644 index 00000000000..e3e3a151ac7 --- /dev/null +++ b/docs/aboutvpp/releasenotes/v25.02.rst @@ -0,0 +1,623 @@ +Release notes for VPP 25.02 +=========================== + +More than 269 commits since the previous release, including 100 fixes. + +Features +-------- + +- Plugins + + - Crypto - ipsecmb + + - Bump to ipsecmb v2.0 (`de020ab47 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=de020ab47>`_) + + - DPDK + + - Update rdma-core to 55.0 (`eaf1059c8 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=eaf1059c8>`_) + - Bump to DPDK 24.11.1 (`82f9167e4 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=82f9167e4>`_) + + - Host Stack Applications + + - Added request repeating to http client (`d74e440f2 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=d74e440f2>`_) + - Proxying UDP in HTTP/1.1 (`c19cca931 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=c19cca931>`_) + + - Marvell Armada device driver + + - Initial DSA support (`99eed5ec6 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=99eed5ec6>`_) + + - Marvell Octeon device driver + + - Enable h/w vlan tagging support (`e07c5fe46 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=e07c5fe46>`_) + - Add crypto framework (`6937c0b2d <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=6937c0b2d>`_) + - Add support for hmac\_md5 and chachapoly (`3ac40b94c <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=3ac40b94c>`_) + - Update octeon roc version (`d023a7e26 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=d023a7e26>`_) + - Add direct mode changes in crypto datapath (`d3df84523 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=d3df84523>`_) + + - Snort plugin + + - API functions for plugin (`e3ad5aa68 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=e3ad5aa68>`_) + +- Python binding for the VPP API + + - Vpp\_papi asyncio support (`0ad98a8c9 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=0ad98a8c9>`_) + +- VNET + + - Bidirectional Forwarding Detection (BFD) + + - Add support for multihop (`17a918133 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=17a918133>`_) + + - Crypto Infra + + - Add async algo macros for ctr sha2 (`9b58a8ec0 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=9b58a8ec0>`_) + + - New Device Drivers Infra + + - Introduce consistent\_qp feature (`825691419 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=825691419>`_) + - Secondary interfaces support (`61e287b9f <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=61e287b9f>`_) + + - Segment Routing (IPv6 and MPLS) + + - Adding support to SRv6 uA behavior (`3462950ae <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=3462950ae>`_) + + - Session Layer + + - Add support for vcl transport attributes (`28955ceb8 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=28955ceb8>`_) + - Add auto sdl (`6d4dbd4f2 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=6d4dbd4f2>`_) + + - TLS and TLS engine plugins + + - Add async processing support (`b8af24b26 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=b8af24b26>`_) + + +Known issues +------------ + +For the full list of issues please refer to fd.io `JIRA <https://jira.fd.io>`_. + +Fixed issues +------------ + +For the full list of fixed issues please refer to: +- fd.io `JIRA <https://jira.fd.io>`_ +- git `commit log <https://git.fd.io/vpp/log/?h=master>`_ + + +API changes +----------- + +Description of results: + +- *Definition changed*: indicates that the API file was modified between releases. +- *Only in image*: indicates the API is new for this release. +- *Only in file*: indicates the API has been removed in this release. + +============================================================= ================== +Message Name Result +============================================================= ================== +af_xdp_create only in file +af_xdp_create_reply only in file +af_xdp_create_v2 only in file +af_xdp_create_v2_reply only in file +auto_sdl_config only in image +auto_sdl_config_reply only in image +bfd_udp_enable_multihop only in image +bfd_udp_enable_multihop_reply only in image +dev_create_port_if definition changed +http_static_enable only in file +http_static_enable_reply only in file +http_static_enable_v3 only in image +http_static_enable_v3_reply only in image +ikev2_get_sleep_interval only in image +ikev2_get_sleep_interval_reply only in image +ikev2_plugin_set_sleep_interval only in image +ikev2_plugin_set_sleep_interval_reply only in image +pg_delete_interface only in image +pg_delete_interface_reply only in image +session_rules_v2_details only in image +session_rules_v2_dump only in image +session_sdl_add_del_v2 only in image +session_sdl_add_del_v2_reply only in image +session_sdl_v2_details only in image +session_sdl_v2_dump only in image +session_sdl_v3_details only in image +session_sdl_v3_dump only in image +sflow_enable_disable only in image +sflow_enable_disable_reply only in image +sflow_header_bytes_get only in image +sflow_header_bytes_get_reply only in image +sflow_header_bytes_set only in image +sflow_header_bytes_set_reply only in image +sflow_interface_details only in image +sflow_interface_dump only in image +sflow_polling_interval_get only in image +sflow_polling_interval_get_reply only in image +sflow_polling_interval_set only in image +sflow_polling_interval_set_reply only in image +sflow_sampling_rate_get only in image +sflow_sampling_rate_get_reply only in image +sflow_sampling_rate_set only in image +sflow_sampling_rate_set_reply only in image +snort_client_details only in image +snort_client_disconnect only in image +snort_client_disconnect_reply only in image +snort_client_get only in image +snort_client_get_reply only in image +snort_input_mode_get only in image +snort_input_mode_get_reply only in image +snort_input_mode_set only in image +snort_input_mode_set_reply only in image +snort_instance_create only in image +snort_instance_create_reply only in image +snort_instance_delete only in image +snort_instance_delete_reply only in image +snort_instance_details only in image +snort_instance_disconnect only in image +snort_instance_disconnect_reply only in image +snort_instance_get only in image +snort_instance_get_reply only in image +snort_interface_attach only in image +snort_interface_attach_reply only in image +snort_interface_detach only in image +snort_interface_detach_reply only in image +snort_interface_details only in image +snort_interface_get only in image +snort_interface_get_reply only in image +sw_interface_ip4_enable_disable only in image +sw_interface_ip4_enable_disable_reply only in image +============================================================= ================== + +Found 70 api message signature differences + + +Newly deprecated API messages +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +These messages are still there in the API, but can and probably +will disappear in the next release. + +- http_static_enable_v2 +- http_static_enable_v2_reply +- http_static_enable_v3 +- http_static_enable_v3_reply +- session_rules_details +- session_rules_dump +- session_sdl_add_del +- session_sdl_add_del_reply +- session_sdl_details +- session_sdl_dump +- session_sdl_v2_details +- session_sdl_v2_dump + +In-progress API messages +~~~~~~~~~~~~~~~~~~~~~~~~ + +These messages are provided for testing and experimentation only. +They are *not* subject to any compatibility process, +and therefore can arbitrarily change or disappear at *any* moment. +Also they may have less than satisfactory testing, making +them unsuitable for other use than the technology preview. +If you are intending to use these messages in production projects, +please collaborate with the feature maintainer on their productization. + +- abf_itf_attach_add_del +- abf_itf_attach_add_del_reply +- abf_itf_attach_details +- abf_itf_attach_dump +- abf_plugin_get_version +- abf_plugin_get_version_reply +- abf_policy_add_del +- abf_policy_add_del_reply +- abf_policy_details +- abf_policy_dump +- acl_plugin_use_hash_lookup_get +- acl_plugin_use_hash_lookup_get_reply +- acl_plugin_use_hash_lookup_set +- acl_plugin_use_hash_lookup_set_reply +- bpf_trace_filter_set +- bpf_trace_filter_set_reply +- bpf_trace_filter_set_v2 +- bpf_trace_filter_set_v2_reply +- cnat_get_snat_addresses +- cnat_get_snat_addresses_reply +- cnat_session_details +- cnat_session_dump +- cnat_session_purge +- cnat_session_purge_reply +- cnat_set_snat_addresses +- cnat_set_snat_addresses_reply +- cnat_set_snat_policy +- cnat_set_snat_policy_reply +- cnat_snat_policy_add_del_exclude_pfx +- cnat_snat_policy_add_del_exclude_pfx_reply +- cnat_snat_policy_add_del_if +- cnat_snat_policy_add_del_if_reply +- cnat_translation_del +- cnat_translation_del_reply +- cnat_translation_details +- cnat_translation_dump +- cnat_translation_update +- cnat_translation_update_reply +- det44_get_timeouts_reply +- det44_set_timeouts +- det44_set_timeouts_reply +- dev_attach +- dev_attach_reply +- dev_create_port_if +- dev_create_port_if_reply +- dev_detach +- dev_detach_reply +- dev_remove_port_if +- dev_remove_port_if_reply +- flowprobe_get_params +- flowprobe_get_params_reply +- flowprobe_interface_add_del +- flowprobe_interface_add_del_reply +- flowprobe_interface_details +- flowprobe_interface_dump +- flowprobe_set_params +- flowprobe_set_params_reply +- gbp_bridge_domain_add +- gbp_bridge_domain_add_reply +- gbp_bridge_domain_del +- gbp_bridge_domain_del_reply +- gbp_bridge_domain_details +- gbp_bridge_domain_dump +- gbp_bridge_domain_dump_reply +- gbp_contract_add_del +- gbp_contract_add_del_reply +- gbp_contract_details +- gbp_contract_dump +- gbp_endpoint_add +- gbp_endpoint_add_reply +- gbp_endpoint_del +- gbp_endpoint_del_reply +- gbp_endpoint_details +- gbp_endpoint_dump +- gbp_endpoint_group_add +- gbp_endpoint_group_add_reply +- gbp_endpoint_group_del +- gbp_endpoint_group_del_reply +- gbp_endpoint_group_details +- gbp_endpoint_group_dump +- gbp_ext_itf_add_del +- gbp_ext_itf_add_del_reply +- gbp_ext_itf_details +- gbp_ext_itf_dump +- gbp_recirc_add_del +- gbp_recirc_add_del_reply +- gbp_recirc_details +- gbp_recirc_dump +- gbp_route_domain_add +- gbp_route_domain_add_reply +- gbp_route_domain_del +- gbp_route_domain_del_reply +- gbp_route_domain_details +- gbp_route_domain_dump +- gbp_route_domain_dump_reply +- gbp_subnet_add_del +- gbp_subnet_add_del_reply +- gbp_subnet_details +- gbp_subnet_dump +- gbp_vxlan_tunnel_add +- gbp_vxlan_tunnel_add_reply +- gbp_vxlan_tunnel_del +- gbp_vxlan_tunnel_del_reply +- gbp_vxlan_tunnel_details +- gbp_vxlan_tunnel_dump +- gtpu_add_del_forward +- gtpu_add_del_forward_reply +- gtpu_add_del_tunnel_v2 +- gtpu_add_del_tunnel_v2_reply +- gtpu_get_transfer_counts +- gtpu_get_transfer_counts_reply +- gtpu_tunnel_v2_details +- gtpu_tunnel_v2_dump +- ikev2_child_sa_v2_details +- ikev2_child_sa_v2_dump +- ikev2_initiate_del_child_sa +- ikev2_initiate_del_child_sa_reply +- ikev2_initiate_del_ike_sa +- ikev2_initiate_del_ike_sa_reply +- ikev2_initiate_rekey_child_sa +- ikev2_initiate_rekey_child_sa_reply +- ikev2_initiate_sa_init +- ikev2_initiate_sa_init_reply +- ikev2_nonce_get +- ikev2_nonce_get_reply +- ikev2_profile_add_del +- ikev2_profile_add_del_reply +- ikev2_profile_details +- ikev2_profile_disable_natt +- ikev2_profile_disable_natt_reply +- ikev2_profile_dump +- ikev2_profile_set_auth +- ikev2_profile_set_auth_reply +- ikev2_profile_set_id +- ikev2_profile_set_id_reply +- ikev2_profile_set_ipsec_udp_port +- ikev2_profile_set_ipsec_udp_port_reply +- ikev2_profile_set_liveness +- ikev2_profile_set_liveness_reply +- ikev2_profile_set_ts +- ikev2_profile_set_ts_reply +- ikev2_profile_set_udp_encap +- ikev2_profile_set_udp_encap_reply +- ikev2_sa_v3_details +- ikev2_sa_v3_dump +- ikev2_set_esp_transforms +- ikev2_set_esp_transforms_reply +- ikev2_set_ike_transforms +- ikev2_set_ike_transforms_reply +- ikev2_set_local_key +- ikev2_set_local_key_reply +- ikev2_set_responder +- ikev2_set_responder_hostname +- ikev2_set_responder_hostname_reply +- ikev2_set_responder_reply +- ikev2_set_sa_lifetime +- ikev2_set_sa_lifetime_reply +- ikev2_set_tunnel_interface +- ikev2_set_tunnel_interface_reply +- ikev2_traffic_selector_details +- ikev2_traffic_selector_dump +- ip_neighbor_config_get +- ip_neighbor_config_get_reply +- ip_route_add_del_v2 +- ip_route_add_del_v2_reply +- ip_route_lookup_v2 +- ip_route_lookup_v2_reply +- ip_route_v2_details +- ip_route_v2_dump +- ip_session_redirect_add +- ip_session_redirect_add_reply +- ip_session_redirect_add_v2 +- ip_session_redirect_add_v2_reply +- ip_session_redirect_del +- ip_session_redirect_del_reply +- l2_emulation +- l2_emulation_reply +- lcp_default_ns_get_reply +- lcp_default_ns_set +- lcp_default_ns_set_reply +- lcp_itf_pair_add_del_v2 +- lcp_itf_pair_add_del_v2_reply +- lcp_itf_pair_add_del_v3 +- lcp_itf_pair_add_del_v3_reply +- lcp_itf_pair_details +- lldp_details +- mdata_enable_disable +- mdata_enable_disable_reply +- nat44_ed_vrf_tables_v2_details +- nat44_ed_vrf_tables_v2_dump +- nat44_ei_add_del_address_range +- nat44_ei_add_del_address_range_reply +- nat44_ei_add_del_static_mapping +- nat44_ei_add_del_static_mapping_reply +- nat44_ei_address_details +- nat44_ei_address_dump +- nat44_ei_del_session +- nat44_ei_del_session_reply +- nat44_ei_del_user +- nat44_ei_del_user_reply +- nat44_ei_forwarding_enable_disable +- nat44_ei_forwarding_enable_disable_reply +- nat44_ei_ha_flush +- nat44_ei_ha_flush_reply +- nat44_ei_ha_resync +- nat44_ei_ha_resync_completed_event +- nat44_ei_ha_resync_reply +- nat44_ei_ha_set_failover +- nat44_ei_ha_set_failover_reply +- nat44_ei_ha_set_listener +- nat44_ei_ha_set_listener_reply +- nat44_ei_interface_add_del_feature +- nat44_ei_interface_add_del_feature_reply +- nat44_ei_interface_details +- nat44_ei_interface_dump +- nat44_ei_ipfix_enable_disable +- nat44_ei_ipfix_enable_disable_reply +- nat44_ei_plugin_enable_disable +- nat44_ei_plugin_enable_disable_reply +- nat44_ei_set_addr_and_port_alloc_alg +- nat44_ei_set_addr_and_port_alloc_alg_reply +- nat44_ei_set_fq_options +- nat44_ei_set_fq_options_reply +- nat44_ei_set_mss_clamping +- nat44_ei_set_mss_clamping_reply +- nat44_ei_set_timeouts +- nat44_ei_set_timeouts_reply +- nat44_ei_set_workers +- nat44_ei_set_workers_reply +- nat44_ei_show_fq_options +- nat44_ei_show_fq_options_reply +- nat44_ei_show_running_config +- nat44_ei_show_running_config_reply +- nat44_ei_static_mapping_details +- nat44_ei_static_mapping_dump +- nat44_ei_user_details +- nat44_ei_user_dump +- nat44_ei_user_session_details +- nat44_ei_user_session_dump +- nat44_ei_user_session_v2_details +- nat44_ei_user_session_v2_dump +- nat44_ei_worker_details +- nat44_ei_worker_dump +- nat64_plugin_enable_disable +- nat64_plugin_enable_disable_reply +- npt66_binding_add_del +- npt66_binding_add_del_reply +- oddbuf_enable_disable +- oddbuf_enable_disable_reply +- pg_interface_enable_disable_coalesce +- pg_interface_enable_disable_coalesce_reply +- ping_finished_event +- pnat_binding_add +- pnat_binding_add_reply +- pnat_binding_add_v2 +- pnat_binding_add_v2_reply +- pnat_binding_attach +- pnat_binding_attach_reply +- pnat_binding_del +- pnat_binding_del_reply +- pnat_binding_detach +- pnat_binding_detach_reply +- pnat_bindings_details +- pnat_bindings_get +- pnat_bindings_get_reply +- pnat_interfaces_details +- pnat_interfaces_get +- pnat_interfaces_get_reply +- pvti_interface_create +- pvti_interface_create_reply +- pvti_interface_delete +- pvti_interface_delete_reply +- pvti_interface_details +- pvti_interface_dump +- sample_macswap_enable_disable +- sample_macswap_enable_disable_reply +- set_ip_flow_hash_v3 +- set_ip_flow_hash_v3_reply +- sflow_enable_disable +- sflow_enable_disable_reply +- sflow_header_bytes_get +- sflow_header_bytes_get_reply +- sflow_header_bytes_set +- sflow_header_bytes_set_reply +- sflow_interface_details +- sflow_interface_dump +- sflow_polling_interval_get +- sflow_polling_interval_get_reply +- sflow_polling_interval_set +- sflow_polling_interval_set_reply +- sflow_sampling_rate_get +- sflow_sampling_rate_get_reply +- sflow_sampling_rate_set +- sflow_sampling_rate_set_reply +- sr_localsids_with_packet_stats_details +- sr_localsids_with_packet_stats_dump +- sr_mobile_localsid_add_del +- sr_mobile_localsid_add_del_reply +- sr_mobile_policy_add +- sr_mobile_policy_add_reply +- sr_policies_with_sl_index_details +- sr_policies_with_sl_index_dump +- sr_policy_add_v2 +- sr_policy_add_v2_reply +- sr_policy_mod_v2 +- sr_policy_mod_v2_reply +- sw_interface_ip6nd_ra_details +- sw_interface_ip6nd_ra_dump +- sw_interface_set_vxlan_gbp_bypass +- sw_interface_set_vxlan_gbp_bypass_reply +- test_addresses +- test_addresses2 +- test_addresses2_reply +- test_addresses3 +- test_addresses3_reply +- test_addresses_reply +- test_empty +- test_empty_reply +- test_enum +- test_enum_reply +- test_interface +- test_interface_reply +- test_prefix +- test_prefix_reply +- test_string +- test_string2 +- test_string2_reply +- test_string_reply +- test_vla +- test_vla2 +- test_vla2_reply +- test_vla3 +- test_vla3_reply +- test_vla4 +- test_vla4_reply +- test_vla5 +- test_vla5_reply +- test_vla_reply +- trace_capture_packets +- trace_capture_packets_reply +- trace_clear_cache +- trace_clear_cache_reply +- trace_clear_capture +- trace_clear_capture_reply +- trace_details +- trace_dump +- trace_dump_reply +- trace_filter_function_details +- trace_filter_function_dump +- trace_set_filter_function +- trace_set_filter_function_reply +- trace_set_filters +- trace_set_filters_reply +- trace_v2_details +- trace_v2_dump +- tracenode_enable_disable +- tracenode_enable_disable_reply +- vxlan_gbp_tunnel_add_del +- vxlan_gbp_tunnel_add_del_reply +- vxlan_gbp_tunnel_details +- vxlan_gbp_tunnel_dump +- want_ping_finished_events +- want_ping_finished_events_reply + +Patches that changed API definitions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + +``src/plugins/snort/snort.api`` + +* `e3ad5aa68 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=e3ad5aa68>`_ snort: API functions for plugin + +``src/plugins/auto_sdl/auto_sdl.api`` + +* `6d4dbd4f2 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=6d4dbd4f2>`_ session: add auto sdl + +``src/plugins/http_static/http_static.api`` + +* `a4597a74a <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=a4597a74a>`_ http_static: api add keepalive-timeout + +``src/plugins/af_xdp/af_xdp.api`` + +* `0dd47bcf2 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=0dd47bcf2>`_ af_xdp: api cleanup + +``src/plugins/ikev2/ikev2.api`` + +* `379a454aa <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=379a454aa>`_ tests: reduce sleep interval in ikev2 sa rekey test + +``src/plugins/sflow/sflow.api`` + +* `e40f8a90b <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=e40f8a90b>`_ sflow: initial checkin + +``src/vnet/dev/dev.api`` + +* `825691419 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=825691419>`_ dev: introduce consistent_qp feature + +``src/vnet/session/session.api`` + +* `e0c4e6e32 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=e0c4e6e32>`_ session: session table holding free appns index +* `6f173171b <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=6f173171b>`_ session: fix SDL to use remote instead local + +``src/vnet/pg/pg.api`` + +* `0b1bd9df3 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=0b1bd9df3>`_ pg: add support to delete pg interface + +``src/vnet/ip/ip.api`` + +* `18eedde9f <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=18eedde9f>`_ ip: add enable ip4 api + +``src/vnet/bfd/bfd.api`` + +* `17a918133 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=17a918133>`_ bfd: add support for multihop + +``src/vnet/devices/tap/tapv2.api`` + +* `91e63915e <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=91e63915e>`_ tap: update tap_flags + +``src/vnet/srv6/sr_types.api`` + +* `c68c97083 <https://gerrit.fd.io/r/gitweb?p=vpp.git;a=commit;h=c68c97083>`_ sr : enable SRv6 uSID in the SRv6 API diff --git a/docs/developer/extras/vcl_ldpreload.rst b/docs/developer/extras/vcl_ldpreload.rst index 3740db2ffd2..6064a0ce9e8 120000 --- a/docs/developer/extras/vcl_ldpreload.rst +++ b/docs/developer/extras/vcl_ldpreload.rst @@ -1 +1 @@ -../../../extras/vcl-ldpreload/README.rst
\ No newline at end of file +../../../extras/scripts/host-stack/vcl-ldpreload/README.rst
\ No newline at end of file diff --git a/docs/gettingstarted/progressivevpp/index.rst b/docs/gettingstarted/progressivevpp/index.rst index efe31b12db5..e35daf6ab70 100644 --- a/docs/gettingstarted/progressivevpp/index.rst +++ b/docs/gettingstarted/progressivevpp/index.rst @@ -23,3 +23,4 @@ will discuss basic operations, and the state of a running FD.io VPP on a system. twovppinstances.rst routing.rst switching.rst + nat.rst diff --git a/docs/gettingstarted/progressivevpp/nat.rst b/docs/gettingstarted/progressivevpp/nat.rst new file mode 100644 index 00000000000..52f2a767493 --- /dev/null +++ b/docs/gettingstarted/progressivevpp/nat.rst @@ -0,0 +1,159 @@ +.. _nat: + +.. toctree:: + +Network Address Translation +=========================== + +Skills to be Learned +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +#. Abusing networks namespaces for fun and profit +#. Configuring nat address +#. Configuring nat inside and outside interfaces + +FD.io VPP command learned in this exercise +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +#. `nat44 add interface address + <https://fd.io/docs/vpp/master/cli-reference/clis/clicmd_src_plugins_nat_nat44-ed.html#nat44-add-interface-address>`__ +#. `set interface nat44 + <https://fd.io/docs/vpp/master/cli-reference/clis/clicmd_src_plugins_nat_nat44-ed.html#set-interface-nat44>`__ + +Topology +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. figure:: /_images/NAT_Topology.jpg + :alt: NAT Topology + + NAT Topology + +Initial state +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Unlike previous exercises, for this one you want to start tabula rasa. + +Note: You will lose all your existing config in your FD.io VPP instances! + +To clear existing config from previous exercises run: + +.. code-block:: console + + ps -ef | grep vpp | awk '{print $2}'| xargs sudo kill + $ sudo ip link del dev vpp1host + $ sudo ip link del dev vpp1vpp2 + +Install vpp-plugins +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +NAT is supported by a plugin, so the respective package needs to be installed + +.. code-block:: console + + $ sudo apt-get install vpp-plugin-core + +Create FD.io VPP instance +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Create one FD.io VPP instance named vpp1. + +Confirm nat44 plugin is present: + +.. code-block:: console + + # vppctl -s /run/vpp/cli-vpp1.sock show plugins | egrep nat44 + 57. nat44_ei_plugin.so 24.02-rc0~124-g2ab902f28 IPv4 Endpoint-Independent NAT (NAT44 EI) + +Please note that earlier versions if VPP and this document referred to the +``snat`` plugin, which `was renamed <https://www.mail-archive.com/vpp-dev@lists.fd.io/msg03299.html>`__. + +Create veth interfaces +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +#. Create a veth interface with one end named ``vpp1outside`` and the other + named ``vpp1outsidehost`` +#. Assign IP address 10.10.1.1/24 to ``vpp1outsidehost`` +#. Create a veth interface with one end named ``vpp1inside`` and the other + named ``vpp1insidehost`` +#. Assign IP address 10.10.2.1/24 to ``vpp1insidehost`` + +Because we'd like to be able to route \*via\* our vpp instance to an +interface on the same host, we are going to put ``vpp1insidehost`` into a +network namespace + +Create a new network namespace 'inside' + +.. code-block:: console + + $ sudo ip netns add inside + +Move interface vpp1inside into the 'inside' namespace: + +.. code-block:: console + + $ sudo ip link set dev vpp1insidehost up netns inside + +Assign an ip address to ``vpp1insidehost`` + +.. code-block:: console + + $ sudo ip netns exec inside ip addr add 10.10.2.1/24 dev vpp1insidehost + +Create a route inside the ``netns``: + +.. code-block:: console + + $ sudo ip netns exec inside ip route add 10.10.1.0/24 via 10.10.2.2 + +Configure vpp outside interface +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +#. Create a vpp host interface connected to vpp1outside +#. Assign ip address 10.10.1.2/24 +#. Create a vpp host interface connected to vpp1inside +#. Assign ip address 10.10.2.2/24 + +Configure nat44 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Enable the nat44 plugin + +.. code-block:: console + + vpp# nat44 plugin enable + +Configure nat44 to use the address of host-vpp1outside + +.. code-block:: console + + vpp# nat44 add interface address host-vpp1outside + +Configure nat44 inside and outside interfaces + +.. code-block:: console + + vpp# set interface nat44 in host-vpp1inside out host-vpp1outside + +Prepare to Observe NAT +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Observing NAT in this configuration is interesting. To do so, vagrant +ssh a second time into your VM and run: + +.. code-block:: console + + $ sudo tcpdump -s 0 -i vpp1outsidehost + +Also enable tracing on vpp1 + +Ping via NAT +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: console + + $ sudo ip netns exec inside ping -c 3 10.10.1.1 + +Confirm NAT +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Examine the ``tcpdump`` output and vpp1 trace to confirm NAT occurred. diff --git a/extras/hs-test/Makefile b/extras/hs-test/Makefile index 9fbb21c9ccb..5d0940102e8 100644 --- a/extras/hs-test/Makefile +++ b/extras/hs-test/Makefile @@ -57,6 +57,10 @@ ifeq ($(ARCH),) ARCH=$(shell dpkg --print-architecture) endif +ifeq ($(NO_COLOR),) +VERBOSE=false +endif + FORCE_BUILD?=true .PHONY: help @@ -84,12 +88,14 @@ help: @echo " UNCONFIGURE=[true|false] - unconfigure selected test" @echo " DEBUG=[true|false] - attach VPP to GDB" @echo " TEST=[name1,name2...] - specific test(s) to run" + @echo " SKIP=[name1,name2...] - specific test(s) to skip" @echo " CPUS=[n-cpus] - number of cpus to allocate to VPP and containers" @echo " VPPSRC=[path-to-vpp-src] - path to vpp source files (for gdb)" @echo " PARALLEL=[n-cpus] - number of test processes to spawn to run in parallel" @echo " REPEAT=[n] - repeat tests up to N times or until a failure occurs" @echo " CPU0=[true|false] - use cpu0" @echo " DRYRUN=[true|false] - set up containers but don't run tests" + @echo " NO_COLOR=[true|false] - disables colorful Docker and Ginkgo output" .PHONY: list-tests list-tests: @@ -106,13 +112,13 @@ build-vpp-debug: .PHONY: build-vpp-gcov build-vpp-gcov: - @$(MAKE) -C ../.. build-vpp-gcov + @$(MAKE) -C ../.. build-gcov .build.ok: build @touch .build.ok -.build.cov.ok: build-vpp-gcov - @touch .build.cov.ok +.build.cov.ok: build-cov + @touch .build.ok .build_debug.ok: build-debug @touch .build.ok @@ -123,7 +129,7 @@ test: .deps.ok .build.ok @bash ./hs_test.sh --persist=$(PERSIST) --verbose=$(VERBOSE) \ --unconfigure=$(UNCONFIGURE) --debug=$(DEBUG) --test=$(TEST) --cpus=$(CPUS) \ --vppsrc=$(VPPSRC) --parallel=$(PARALLEL) --repeat=$(REPEAT) --cpu0=$(CPU0) \ - --dryrun=$(DRYRUN); \ + --dryrun=$(DRYRUN) --skip=$(SKIP) --no_color=$(NO_COLOR); \ ./script/compress.sh $$? .PHONY: test-debug @@ -132,16 +138,21 @@ test-debug: .deps.ok .build_debug.ok @bash ./hs_test.sh --persist=$(PERSIST) --verbose=$(VERBOSE) \ --unconfigure=$(UNCONFIGURE) --debug=$(DEBUG) --test=$(TEST) --cpus=$(CPUS) \ --vppsrc=$(VPPSRC) --parallel=$(PARALLEL) --repeat=$(REPEAT) --debug_build=true \ - --cpu0=$(CPU0) --dryrun=$(DRYRUN); \ + --cpu0=$(CPU0) --dryrun=$(DRYRUN) --skip=$(SKIP) --no_color=$(NO_COLOR); \ ./script/compress.sh $$? +.PHONY: wipe-lcov +wipe-lcov: + @lcov --zerocounters --directory $(WS_ROOT)/build-root/build-vpp_gcov-native/vpp + .PHONY: test-cov -test-leak: FORCE_BUILD=false -test-cov: .deps.ok .build.cov.ok - @bash ./hs_test.sh --persist=$(PERSIST) --verbose=$(VERBOSE) \ +test-cov: FORCE_BUILD=false +test-cov: .deps.ok .build.cov.ok wipe-lcov + @bash ./hs_test.sh --coverage=true --persist=$(PERSIST) --verbose=$(VERBOSE) \ --unconfigure=$(UNCONFIGURE) --debug=$(DEBUG) --test=$(TEST-HS) --cpus=$(CPUS) \ - --vppsrc=$(VPPSRC) --cpu0=$(CPU0) --dryrun=$(DRYRUN); \ + --vppsrc=$(VPPSRC) --cpu0=$(CPU0) --dryrun=$(DRYRUN) --skip=$(SKIP) --no_color=$(NO_COLOR); \ ./script/compress.sh $$? + $(MAKE) -C ../.. test-cov-post-standalone HS_TEST=1 .PHONY: test-leak test-leak: .deps.ok .build_debug.ok @@ -170,13 +181,13 @@ build-debug: .deps.ok build-vpp-debug build-go @touch .build.ok .deps.ok: - @sudo $(MAKE) install-deps + @$(MAKE) install-deps .PHONY: install-deps install-deps: @rm -f .deps.ok - @apt-get update \ - && apt-get install -y apt-transport-https ca-certificates curl software-properties-common \ + @sudo -E apt-get update + @sudo -E apt-get install -y apt-transport-https ca-certificates curl software-properties-common \ bridge-utils gpg @if [ ! -f /usr/share/keyrings/docker-archive-keyring.gpg ] ; then \ curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg; \ @@ -184,7 +195,7 @@ install-deps: | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null ; \ apt-get update; \ fi - @apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + @sudo -E apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin @touch .deps.ok .goimports.ok: diff --git a/extras/hs-test/hs_test.sh b/extras/hs-test/hs_test.sh index 4a9378c60b4..e67eb7d20ef 100644 --- a/extras/hs-test/hs_test.sh +++ b/extras/hs-test/hs_test.sh @@ -6,13 +6,16 @@ args= focused_test=0 persist_set=0 dryrun_set=0 +coverage_set=0 unconfigure_set=0 debug_set=0 leak_check_set=0 debug_build= ginkgo_args= tc_names=() +skip_names=() dryrun= +no_color= for i in "$@" do @@ -24,6 +27,13 @@ case "${i}" in persist_set=1 fi ;; + --coverage=*) + coverage="${i#*=}" + if [ "$coverage" = "true" ]; then + args="$args -coverage" + coverage_set=1 + fi + ;; --debug=*) debug="${i#*=}" if [ "$debug" = "true" ]; then @@ -65,6 +75,10 @@ case "${i}" in args="$args -verbose" fi ;; + --skip=*) + skip_list="${i#*=}" + IFS=',' read -r -a skip_names <<< "$skip_list" + ;; --parallel=*) ginkgo_args="$ginkgo_args -procs=${i#*=}" ;; @@ -91,6 +105,12 @@ case "${i}" in leak_check_set=1 fi ;; + --no_color=*) + no_color="${i#*=}" + if [ "$no_color" = "true" ]; then + ginkgo_args="$ginkgo_args --no-color" + fi + ;; esac done @@ -103,6 +123,10 @@ for name in "${tc_names[@]}"; do ginkgo_args="$ginkgo_args --focus $name" done +for skip in "${skip_names[@]}"; do + ginkgo_args="$ginkgo_args --skip $skip" +done + if [ $focused_test -eq 0 ] && { [ $persist_set -eq 1 ] || [ $dryrun_set -eq 1 ]; }; then echo -e "\e[1;31mpersist/dryrun flag is not supported while running all tests!\e[1;0m" exit 2 diff --git a/extras/hs-test/http_test.go b/extras/hs-test/http_test.go index 68934550b69..ad43f9d032e 100644 --- a/extras/hs-test/http_test.go +++ b/extras/hs-test/http_test.go @@ -7,7 +7,6 @@ import ( "math/rand" "net" "net/http" - "net/http/httptest" "net/http/httptrace" "os" "strconv" @@ -23,7 +22,7 @@ import ( ) func init() { - RegisterVethTests(HttpCliTest, HttpCliConnectErrorTest) + RegisterVethTests(HttpCliTest, HttpCliConnectErrorTest, HttpCliTlsTest) RegisterSoloVethTests(HttpClientGetMemLeakTest) RegisterNoTopoTests(HeaderServerTest, HttpPersistentConnectionTest, HttpPipeliningTest, HttpStaticMovedTest, HttpStaticNotFoundTest, HttpCliMethodNotAllowedTest, HttpAbsoluteFormUriTest, @@ -31,15 +30,15 @@ func init() { HttpInvalidRequestLineTest, HttpMethodNotImplementedTest, HttpInvalidHeadersTest, HttpContentLengthTest, HttpStaticBuildInUrlGetIfListTest, HttpStaticBuildInUrlGetVersionTest, HttpStaticMacTimeTest, HttpStaticBuildInUrlGetVersionVerboseTest, HttpVersionNotSupportedTest, - HttpInvalidContentLengthTest, HttpInvalidTargetSyntaxTest, HttpStaticPathTraversalTest, HttpUriDecodeTest, + HttpInvalidContentLengthTest, HttpInvalidTargetSyntaxTest, HttpStaticPathSanitizationTest, HttpUriDecodeTest, HttpHeadersTest, HttpStaticFileHandlerTest, HttpStaticFileHandlerDefaultMaxAgeTest, HttpClientTest, HttpClientErrRespTest, HttpClientPostFormTest, HttpClientGet128kbResponseTest, HttpClientGetResponseBodyTest, HttpClientGetNoResponseBodyTest, HttpClientPostFileTest, HttpClientPostFilePtrTest, HttpUnitTest, HttpRequestLineTest, HttpClientGetTimeout, HttpStaticFileHandlerWrkTest, HttpStaticUrlHandlerWrkTest, HttpConnTimeoutTest, - HttpClientGetRepeat, HttpClientPostRepeat, HttpIgnoreH2UpgradeTest, HttpInvalidAuthorityFormUriTest, HttpHeaderErrorConnectionDropTest) + HttpClientGetRepeatTest, HttpClientPostRepeatTest, HttpIgnoreH2UpgradeTest, HttpInvalidAuthorityFormUriTest, HttpHeaderErrorConnectionDropTest) RegisterNoTopoSoloTests(HttpStaticPromTest, HttpGetTpsTest, HttpGetTpsInterruptModeTest, PromConcurrentConnectionsTest, PromMemLeakTest, HttpClientPostMemLeakTest, HttpInvalidClientRequestMemLeakTest, HttpPostTpsTest, HttpPostTpsInterruptModeTest, - PromConsecutiveConnectionsTest, HttpGetTpsTlsTest, HttpPostTpsTlsTest) + PromConsecutiveConnectionsTest, HttpGetTpsTlsTest, HttpPostTpsTlsTest, HttpClientGetRepeatMTTest, HttpClientPtrGetRepeatMTTest) } const wwwRootPath = "/tmp/www_root" @@ -242,6 +241,25 @@ func HttpCliTest(s *VethsSuite) { s.AssertContains(o, "</html>", "</html> not found in the result!") } +func HttpCliTlsTest(s *VethsSuite) { + uri := "tls://" + s.Interfaces.Server.Ip4AddressString() + "/443" + + s.Containers.ServerVpp.VppInstance.Vppctl("http cli server uri " + uri) + + o := s.Containers.ClientVpp.VppInstance.Vppctl("http cli client" + + " uri " + uri + " query /show/version") + s.Log(o) + s.AssertContains(o, "<html>", "<html> not found in the result!") + s.AssertContains(o, "</html>", "</html> not found in the result!") + + /* second request to test postponed ho-cleanup */ + o = s.Containers.ClientVpp.VppInstance.Vppctl("http cli client" + + " uri " + uri + " query /show/version") + s.Log(o) + s.AssertContains(o, "<html>", "<html> not found in the result!") + s.AssertContains(o, "</html>", "</html> not found in the result!") +} + func HttpCliConnectErrorTest(s *VethsSuite) { uri := "http://" + s.Interfaces.Server.Ip4AddressString() + "/80" @@ -319,9 +337,9 @@ func HttpClientPostFormTest(s *NoTopoSuite) { server.Start() defer server.Close() - uri := "http://" + serverAddress + "/80" + uri := "http://" + serverAddress + "/test" vpp := s.Containers.Vpp.VppInstance - o := vpp.Vppctl("http client post verbose header Hello:World uri " + uri + " target /test data " + body) + o := vpp.Vppctl("http client post verbose header Hello:World uri " + uri + " data " + body) s.Log(o) s.AssertContains(o, "200 OK") @@ -347,7 +365,6 @@ func HttpClientGetNoResponseBodyTest(s *NoTopoSuite) { func httpClientGet(s *NoTopoSuite, response string, size int) { serverAddress := s.HostAddr() vpp := s.Containers.Vpp.VppInstance - server := ghttp.NewUnstartedServer() l, err := net.Listen("tcp", serverAddress+":80") s.AssertNil(err, fmt.Sprint(err)) @@ -355,7 +372,7 @@ func httpClientGet(s *NoTopoSuite, response string, size int) { server.AppendHandlers( ghttp.CombineHandlers( s.LogHttpReq(false), - ghttp.VerifyRequest("GET", "/test"), + ghttp.VerifyRequest("GET", "/"), ghttp.VerifyHeaderKV("Hello", "World"), ghttp.VerifyHeaderKV("Test-H2", "Test-K2"), ghttp.RespondWith(http.StatusOK, string(response), http.Header{"Content-Length": {strconv.Itoa(size)}}), @@ -363,8 +380,8 @@ func httpClientGet(s *NoTopoSuite, response string, size int) { server.Start() defer server.Close() - uri := "http://" + serverAddress + "/80" - cmd := "http client use-ptr verbose header Hello:World header Test-H2:Test-K2 save-to response.txt uri " + uri + " target /test" + uri := "http://" + serverAddress + cmd := "http client use-ptr verbose header Hello:World header Test-H2:Test-K2 save-to response.txt uri " + uri o := vpp.Vppctl(cmd) outputLen := len(o) @@ -382,35 +399,37 @@ func httpClientGet(s *NoTopoSuite, response string, size int) { s.AssertContains(file_contents, response) } -func startSimpleServer(s *NoTopoSuite, replyCount *int, serverAddress string) (server *httptest.Server) { - var err error - server = httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - fmt.Fprintf(w, "Hello") - *replyCount++ - })) - server.Listener, err = net.Listen("tcp", serverAddress+":80") - s.AssertNil(err, "Error while creating listener.") - - server.Start() +func HttpClientGetRepeatMTTest(s *NoTopoSuite) { + httpClientRepeat(s, "", "sessions 2") +} - return server +func HttpClientPtrGetRepeatMTTest(s *NoTopoSuite) { + httpClientRepeat(s, "", "use-ptr sessions 2") } -func HttpClientGetRepeat(s *NoTopoSuite) { - httpClientRepeat(s, "") +func HttpClientGetRepeatTest(s *NoTopoSuite) { + httpClientRepeat(s, "", "") } -func HttpClientPostRepeat(s *NoTopoSuite) { - httpClientRepeat(s, "post") +func HttpClientPostRepeatTest(s *NoTopoSuite) { + httpClientRepeat(s, "post", "") } -func httpClientRepeat(s *NoTopoSuite, requestMethod string) { - replyCount := 0 +func httpClientRepeat(s *NoTopoSuite, requestMethod string, clientArgs string) { vpp := s.Containers.Vpp.VppInstance - serverAddress := s.HostAddr() + logPath := s.Containers.NginxServer.GetContainerWorkDir() + "/" + s.Containers.NginxServer.Name + "-access.log" + serverAddress := s.Interfaces.Tap.Ip4AddressString() + replyCountInt := 0 repeatAmount := 10000 - server := startSimpleServer(s, &replyCount, serverAddress) - defer server.Close() + durationInSec := 10 + var err error + + // recreate interfaces with RX-queues + s.AssertNil(vpp.DeleteTap(s.Interfaces.Tap)) + s.AssertNil(vpp.CreateTap(s.Interfaces.Tap, 2, 2)) + + s.CreateNginxServer() + s.AssertNil(s.Containers.NginxServer.Start()) if requestMethod == "post" { fileName := "/tmp/test_file.txt" @@ -419,41 +438,45 @@ func httpClientRepeat(s *NoTopoSuite, requestMethod string) { requestMethod += " file /tmp/test_file.txt" } - uri := "http://" + serverAddress + "/80" - cmd := fmt.Sprintf("http client %s use-ptr duration 10 header Hello:World uri %s target /index.html", - requestMethod, uri) + uri := "http://" + serverAddress + ":" + s.GetPortFromPpid() + "/index" + cmd := fmt.Sprintf("http client %s %s duration %d header Hello:World uri %s", + requestMethod, clientArgs, durationInSec, uri) - s.Log("Duration 10s") + s.Log("Duration %ds", durationInSec) o := vpp.Vppctl(cmd) - outputLen := len(o) - if outputLen > 500 { - s.Log(o[:500]) - s.Log("* HST Framework: output limited to 500 chars to avoid flooding the console. Output length: " + fmt.Sprint(outputLen)) - } else { - s.Log(o) + s.Log(o) + + replyCount := s.Containers.NginxServer.Exec(false, "awk 'END { print NR }' "+logPath) + if replyCount != "" { + replyCountInt, err = strconv.Atoi(replyCount[:len(replyCount)-1]) + s.AssertNil(err) } - s.Log("Server response count: %d", replyCount) + // empty the log file + s.Containers.NginxServer.Exec(false, "truncate -s 0 "+logPath) + + s.Log("Server response count: %d", replyCountInt) s.AssertNotNil(o) s.AssertNotContains(o, "error") - s.AssertGreaterThan(replyCount, 15000) + s.AssertGreaterThan(replyCountInt, 15000) - cmd = fmt.Sprintf("http client %s use-ptr repeat %d header Hello:World uri %s target /index.html", - requestMethod, repeatAmount, uri) + replyCount = "" + cmd = fmt.Sprintf("http client %s %s repeat %d header Hello:World uri %s", + requestMethod, clientArgs, repeatAmount, uri) - replyCount = 0 + s.AssertNil(err, fmt.Sprint(err)) s.Log("Repeat %d", repeatAmount) o = vpp.Vppctl(cmd) - outputLen = len(o) - if outputLen > 500 { - s.Log(o[:500]) - s.Log("* HST Framework: output limited to 500 chars to avoid flooding the console. Output length: " + fmt.Sprint(outputLen)) - } else { - s.Log(o) + s.Log(o) + + replyCount = s.Containers.NginxServer.Exec(false, "awk 'END { print NR }' "+logPath) + if replyCount != "" { + replyCountInt, err = strconv.Atoi(replyCount[:len(replyCount)-1]) + s.AssertNil(err) } - s.Log("Server response count: %d", replyCount) + s.Log("Server response count: %d", replyCountInt) s.AssertNotNil(o) s.AssertNotContains(o, "error") - s.AssertEqual(repeatAmount, replyCount) + s.AssertEqual(repeatAmount, replyCountInt) } func HttpClientGetTimeout(s *NoTopoSuite) { @@ -475,8 +498,8 @@ func HttpClientGetTimeout(s *NoTopoSuite) { )) server.Start() defer server.Close() - uri := "http://" + serverAddress + "/" + s.GetPortFromPpid() - cmd := "http client verbose timeout 1 uri " + uri + " target /timeout" + uri := "http://" + serverAddress + ":" + s.GetPortFromPpid() + "/timeout" + cmd := "http client verbose timeout 1 uri " + uri o := vpp.Vppctl(cmd) s.Log(o) @@ -505,8 +528,8 @@ func httpClientPostFile(s *NoTopoSuite, usePtr bool, fileSize int) { server.Start() defer server.Close() - uri := "http://" + serverAddress + "/80" - cmd := "http client post verbose uri " + uri + " target /test file " + fileName + uri := "http://" + serverAddress + "/test" + cmd := "http client post verbose uri " + uri + " file " + fileName if usePtr { cmd += " use-ptr" } @@ -860,12 +883,15 @@ func HttpStaticFileHandlerTestFunction(s *NoTopoSuite, max_age string) { s.AssertContains(o, "page.html") } -func HttpStaticPathTraversalTest(s *NoTopoSuite) { +func HttpStaticPathSanitizationTest(s *NoTopoSuite) { vpp := s.Containers.Vpp.VppInstance vpp.Container.Exec(false, "mkdir -p "+wwwRootPath) vpp.Container.Exec(false, "mkdir -p "+"/tmp/secret_folder") err := vpp.Container.CreateFile("/tmp/secret_folder/secret_file.txt", "secret") s.AssertNil(err, fmt.Sprint(err)) + indexContent := "<html><body>index</body></html>" + err = vpp.Container.CreateFile(wwwRootPath+"/index.html", indexContent) + s.AssertNil(err, fmt.Sprint(err)) serverAddress := s.VppAddr() s.Log(vpp.Vppctl("http static server www-root " + wwwRootPath + " uri tcp://" + serverAddress + "/80 debug")) @@ -880,6 +906,26 @@ func HttpStaticPathTraversalTest(s *NoTopoSuite) { s.AssertHttpHeaderNotPresent(resp, "Content-Type") s.AssertHttpHeaderNotPresent(resp, "Cache-Control") s.AssertHttpContentLength(resp, int64(0)) + + req, err = http.NewRequest("GET", "http://"+serverAddress+":80//////fake/directory///../././//../../secret_folder/secret_file.txt", nil) + s.AssertNil(err, fmt.Sprint(err)) + resp, err = client.Do(req) + s.AssertNil(err, fmt.Sprint(err)) + defer resp.Body.Close() + s.Log(DumpHttpResp(resp, true)) + s.AssertHttpStatus(resp, 404) + s.AssertHttpHeaderNotPresent(resp, "Content-Type") + s.AssertHttpHeaderNotPresent(resp, "Cache-Control") + s.AssertHttpContentLength(resp, int64(0)) + + req, err = http.NewRequest("GET", "http://"+serverAddress+":80/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////", nil) + s.AssertNil(err, fmt.Sprint(err)) + resp, err = client.Do(req) + s.AssertNil(err, fmt.Sprint(err)) + defer resp.Body.Close() + s.Log(DumpHttpResp(resp, true)) + s.AssertHttpStatus(resp, 301) + s.AssertHttpHeaderWithValue(resp, "Location", "http://"+serverAddress+"/index.html") } func HttpStaticMovedTest(s *NoTopoSuite) { diff --git a/extras/hs-test/infra/container.go b/extras/hs-test/infra/container.go index 6605c866939..918c19e669e 100644 --- a/extras/hs-test/infra/container.go +++ b/extras/hs-test/infra/container.go @@ -533,12 +533,15 @@ func (c *Container) stop() error { if c.VppInstance != nil && c.VppInstance.ApiStream != nil { c.VppInstance.saveLogs() c.VppInstance.Disconnect() + c.VppInstance.Stop() } + timeout := 0 c.VppInstance = nil c.saveLogs() - c.Suite.Log("Stopping container " + c.Name) - timeout := 0 + if c.Suite.CoverageRun { + timeout = 3 + } if err := c.Suite.Docker.ContainerStop(c.ctx, c.ID, containerTypes.StopOptions{Timeout: &timeout}); err != nil { return err } diff --git a/extras/hs-test/infra/hst_suite.go b/extras/hs-test/infra/hst_suite.go index d91958e33d9..d44b76f3e1f 100644 --- a/extras/hs-test/infra/hst_suite.go +++ b/extras/hs-test/infra/hst_suite.go @@ -34,6 +34,7 @@ const ( var IsPersistent = flag.Bool("persist", false, "persists topology config") var IsVerbose = flag.Bool("verbose", false, "verbose test output") +var IsCoverage = flag.Bool("coverage", false, "use coverage run config") var IsUnconfiguring = flag.Bool("unconfigure", false, "remove topology") var IsVppDebug = flag.Bool("debug", false, "attach gdb to vpp") var NConfiguredCpus = flag.Int("cpus", 1, "number of CPUs assigned to vpp") @@ -62,6 +63,7 @@ type HstSuite struct { Logger *log.Logger LogFile *os.File Docker *client.Client + CoverageRun bool } type colors struct { @@ -170,6 +172,7 @@ func (s *HstSuite) SetupSuite() { Fail("failed to init cpu allocator: " + fmt.Sprint(err)) } s.CpuCount = *NConfiguredCpus + s.CoverageRun = *IsCoverage } func (s *HstSuite) AllocateCpus(containerName string) []int { diff --git a/extras/hs-test/infra/suite_no_topo.go b/extras/hs-test/infra/suite_no_topo.go index 1c7b6fe91c3..d084413f7e6 100644 --- a/extras/hs-test/infra/suite_no_topo.go +++ b/extras/hs-test/infra/suite_no_topo.go @@ -18,13 +18,15 @@ type NoTopoSuite struct { Tap *NetInterface } Containers struct { - Vpp *Container - Nginx *Container - NginxHttp3 *Container - Wrk *Container - Curl *Container - Ab *Container + Vpp *Container + Nginx *Container + NginxHttp3 *Container + NginxServer *Container + Wrk *Container + Curl *Container + Ab *Container } + NginxServerPort string } func RegisterNoTopoTests(tests ...func(s *NoTopoSuite)) { @@ -42,6 +44,7 @@ func (s *NoTopoSuite) SetupSuite() { s.Containers.Vpp = s.GetContainerByName("vpp") s.Containers.Nginx = s.GetContainerByName("nginx") s.Containers.NginxHttp3 = s.GetContainerByName("nginx-http3") + s.Containers.NginxServer = s.GetTransientContainerByName("nginx-server") s.Containers.Wrk = s.GetContainerByName("wrk") s.Containers.Curl = s.GetContainerByName("curl") s.Containers.Ab = s.GetContainerByName("ab") @@ -101,6 +104,28 @@ func (s *NoTopoSuite) CreateNginxConfig(container *Container, multiThreadWorkers ) } +// Creates container and config. +func (s *NoTopoSuite) CreateNginxServer() { + s.AssertNil(s.Containers.NginxServer.Create()) + s.NginxServerPort = s.GetPortFromPpid() + nginxSettings := struct { + LogPrefix string + Address string + Port string + Timeout int + }{ + LogPrefix: s.Containers.NginxServer.Name, + Address: s.Interfaces.Tap.Ip4AddressString(), + Port: s.NginxServerPort, + Timeout: 600, + } + s.Containers.NginxServer.CreateConfigFromTemplate( + "/nginx.conf", + "./resources/nginx/nginx_server.conf", + nginxSettings, + ) +} + func (s *NoTopoSuite) AddNginxVclConfig(multiThreadWorkers bool) { vclFileName := s.Containers.Nginx.GetHostWorkDir() + "/vcl.conf" appSocketApi := fmt.Sprintf("app-socket-api %s/var/run/app_ns_sockets/default", diff --git a/extras/hs-test/infra/vppinstance.go b/extras/hs-test/infra/vppinstance.go index d9aa418c5ec..370d2be38d1 100644 --- a/extras/hs-test/infra/vppinstance.go +++ b/extras/hs-test/infra/vppinstance.go @@ -242,6 +242,14 @@ func (vpp *VppInstance) Start() error { return nil } +func (vpp *VppInstance) Stop() { + pid := strings.TrimSpace(vpp.Container.Exec(false, "pidof vpp")) + // Stop VPP only if it's still running + if len(pid) > 0 { + vpp.Container.Exec(false, "bash -c \"kill -15 "+pid+"\"") + } +} + func (vpp *VppInstance) Vppctl(command string, arguments ...any) string { vppCliCommand := fmt.Sprintf(command, arguments...) containerExecCommand := fmt.Sprintf("docker exec --detach=false %[1]s vppctl -s %[2]s %[3]s", diff --git a/extras/hs-test/script/build_hst.sh b/extras/hs-test/script/build_hst.sh index 4e03453fbe9..78433616146 100755 --- a/extras/hs-test/script/build_hst.sh +++ b/extras/hs-test/script/build_hst.sh @@ -44,7 +44,7 @@ if [ -d "${DOCKER_BUILD_DIR}" ] ; then DOCKER_HST_BUILDER="hst_builder" set -x if ! docker buildx ls --format "{{.Name}}" | grep -q "${DOCKER_HST_BUILDER}"; then - docker buildx create --name=${DOCKER_HST_BUILDER} --driver=docker-container --use --bootstrap || true + docker buildx create --use --driver-opt env.http_proxy="$HTTP_PROXY" --driver-opt env.https_proxy="$HTTP_PROXY" --driver-opt '"env.no_proxy='"$NO_PROXY"'"' --name=${DOCKER_HST_BUILDER} --driver=docker-container --use --bootstrap || true fi set -x DOCKER_CACHE_ARGS="--builder=${DOCKER_HST_BUILDER} --load --cache-to type=local,dest=${DOCKER_CACHE_DIR},mode=max --cache-from type=local,src=${DOCKER_CACHE_DIR}" @@ -99,6 +99,12 @@ docker_build hs-test/nginx-http3 nginx-http3 docker_build hs-test/ab ab docker_build hs-test/wrk wrk +# make it multi-user friendly +if [ -d "${DOCKER_CACHE_DIR}" ] ; then + chgrp -R docker "${DOCKER_CACHE_DIR}" + chmod -R g+rwx "${DOCKER_CACHE_DIR}" +fi + # cleanup detached images images=$(docker images --filter "dangling=true" -q --no-trunc) if [ "$images" != "" ]; then diff --git a/extras/hs-test/topo-containers/single.yaml b/extras/hs-test/topo-containers/single.yaml index b4449dc1918..2f5f31b6879 100644 --- a/extras/hs-test/topo-containers/single.yaml +++ b/extras/hs-test/topo-containers/single.yaml @@ -28,6 +28,14 @@ containers: image: "hs-test/nginx-http3" is-optional: true + - name: "nginx-server" + volumes: + - <<: *shared-vol + container-dir: "/tmp/nginx" + is-default-work-dir: true + image: "hs-test/nginx-server" + is-optional: true + - name: "ab" image: "hs-test/ab" is-optional: true diff --git a/extras/libmemif/CMakeLists.txt b/extras/libmemif/CMakeLists.txt index 1526abdce3a..f2e1008c628 100644 --- a/extras/libmemif/CMakeLists.txt +++ b/extras/libmemif/CMakeLists.txt @@ -17,7 +17,7 @@ project(memif) set(CMAKE_C_STANDARD 11) include(CheckCCompilerFlag) -include(CheckFunctionExists) +include(CheckSymbolExists) find_package(Git REQUIRED) include(ExternalProject) @@ -26,7 +26,9 @@ set(UNITY unity_project) ExternalProject_Add( unity_project GIT_REPOSITORY https://github.com/ThrowTheSwitch/Unity.git - GIT_TAG cf949f45ca6d172a177b00da21310607b97bc7a7 + # TODO: bump to the stable version as soon as it's available, + # current 2.6.0 couldn't be compiled with the modern compilers + GIT_TAG 73237c5d224169c7b4d2ec8321f9ac92e8071708 PREFIX ${PROJECT_BINARY_DIR}/external/${UNITY} INSTALL_COMMAND cmake --install . --prefix ${PROJECT_BINARY_DIR} @@ -72,11 +74,17 @@ endif() message(STATUS "System Architecture: ${CMAKE_SYSTEM_PROCESSOR}") message(STATUS "Libmemif Cacheline Size: ${LIBMEMIF_CACHELINE_SIZE}") -check_function_exists(memfd_create HAVE_MEMFD_CREATE) +list(APPEND CMAKE_REQUIRED_DEFINITIONS "-D_GNU_SOURCE") +check_symbol_exists(memfd_create "sys/mman.h" HAVE_MEMFD_CREATE) if(${HAVE_MEMFD_CREATE}) add_definitions(-DHAVE_MEMFD_CREATE) endif() +check_symbol_exists(strlcpy "string.h" HAVE_STRLCPY) +if(${HAVE_STRLCPY}) + add_definitions(-DHAVE_STRLCPY) +endif() + include_directories(src) add_subdirectory(src) diff --git a/extras/libmemif/src/CMakeLists.txt b/extras/libmemif/src/CMakeLists.txt index 8b3223d0990..0fbcf4b7361 100644 --- a/extras/libmemif/src/CMakeLists.txt +++ b/extras/libmemif/src/CMakeLists.txt @@ -36,12 +36,6 @@ add_library(memif SHARED ${MEMIF_SOURCES}) target_link_libraries(memif ${CMAKE_THREAD_LIBS_INIT}) target_compile_definitions(memif PUBLIC MEMIF_CACHELINE_SIZE=${LIBMEMIF_CACHELINE_SIZE}) -find_library(LIB_BSD bsd) -if(LIB_BSD) - add_compile_definitions(HAS_LIB_BSD) - target_link_libraries(memif ${LIB_BSD}) -endif() - foreach(file ${MEMIF_HEADERS}) get_filename_component(dir ${file} DIRECTORY) install( diff --git a/extras/libmemif/src/memif_private.h b/extras/libmemif/src/memif_private.h index 71a4bc879f4..4ab82894292 100644 --- a/extras/libmemif/src/memif_private.h +++ b/extras/libmemif/src/memif_private.h @@ -67,7 +67,7 @@ _Static_assert (strlen (MEMIF_DEFAULT_APP_NAME) <= MEMIF_NAME_LEN, #define DBG(...) #endif /* MEMIF_DBG */ -#ifndef HAS_LIB_BSD +#ifndef HAVE_STRLCPY static inline size_t strlcpy (char *dest, const char *src, size_t len) { @@ -90,8 +90,6 @@ strlcpy (char *dest, const char *src, size_t len) return (s - src - 1); } -#else -#include <bsd/string.h> #endif typedef enum diff --git a/extras/libmemif/test/suite_main/CMakeLists.txt b/extras/libmemif/test/suite_main/CMakeLists.txt index 7a2940098e0..7505b5f9d7f 100644 --- a/extras/libmemif/test/suite_main/CMakeLists.txt +++ b/extras/libmemif/test/suite_main/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.0) +cmake_minimum_required(VERSION 3.5) set (This MemifMainTest) diff --git a/extras/libmemif/test/suite_socket/CMakeLists.txt b/extras/libmemif/test/suite_socket/CMakeLists.txt index 5ac66a06cfa..3c33095c1de 100644 --- a/extras/libmemif/test/suite_socket/CMakeLists.txt +++ b/extras/libmemif/test/suite_socket/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.0) +cmake_minimum_required(VERSION 3.5) set (This MemifSocketTest) diff --git a/src/scripts/host-stack/cc_plots.py b/extras/scripts/host-stack/cc_plots.py index f7953f223d4..f7953f223d4 100755 --- a/src/scripts/host-stack/cc_plots.py +++ b/extras/scripts/host-stack/cc_plots.py diff --git a/src/scripts/host-stack/convert_evt b/extras/scripts/host-stack/convert_evt index 1aba67d0268..1aba67d0268 100755 --- a/src/scripts/host-stack/convert_evt +++ b/extras/scripts/host-stack/convert_evt diff --git a/extras/scripts/host-stack/perf-tests/conf/dpdk_mt.cli b/extras/scripts/host-stack/perf-tests/conf/dpdk_mt.cli new file mode 100644 index 00000000000..8df8ee1e38c --- /dev/null +++ b/extras/scripts/host-stack/perf-tests/conf/dpdk_mt.cli @@ -0,0 +1,6 @@ +comment {SPDX-License-Identifier: Apache-2.0} +comment {Copyright (c) 2025 Cisco Systems, Inc.} +set int ip address HundredGigabitEthernet16/0/0 6.0.1.1/24 +set int state HundredGigabitEthernet16/0/0 up + +session enable diff --git a/extras/scripts/host-stack/perf-tests/conf/dpdk_st.cli b/extras/scripts/host-stack/perf-tests/conf/dpdk_st.cli new file mode 100644 index 00000000000..97aeb3916f8 --- /dev/null +++ b/extras/scripts/host-stack/perf-tests/conf/dpdk_st.cli @@ -0,0 +1,5 @@ +comment {SPDX-License-Identifier: Apache-2.0} +comment {Copyright (c) 2025 Cisco Systems, Inc.} +set int ip address HundredGigabitEthernet16/0/0 6.0.1.1/24 +set int state HundredGigabitEthernet16/0/0 up +session enable diff --git a/extras/scripts/host-stack/perf-tests/dpdk_mt b/extras/scripts/host-stack/perf-tests/dpdk_mt new file mode 100755 index 00000000000..6e095da4228 --- /dev/null +++ b/extras/scripts/host-stack/perf-tests/dpdk_mt @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2025 Cisco Systems, Inc. + +VPP_WS=${VPP_WS:-/scratch/fcoras/vpp} +CFG_WS="${VPP_WS}"/extras/scripts/host-stack/perf-tests +CFG_DIR="${CFG_WS}"/conf +CFG_FILE=dpdk_mt.cli +BASE_DIR="${VPP_WS}"/build-root +BIN_FLAVOR=release + +DPDK_DEV=0000:16:00.0 +NUM_MBUFS=16536 +IF_RX_DESC=256 +IF_TX_DESC=256 +DPDK_CSUM=enable-tcp-udp-checksum + +SKIP_CORE=0 +MAIN_CORE=1 +WORKERS=4 +CFG_CORELIST_WKS="corelist-workers 2-5" + +HEAP_SIZE=4g +API_SIZE=1G +API_GLOBAL_SIZE=2000M +EVT_LOG_SIZE=400000 + +SOCK=/run/vpp-api.sock +SESSION_CFG="session { event-queue-length 100000 use-app-socket-api }" + +#QUIC_ENABLE=1 +#LOCAL_OSSL=/scratch/fcoras/openssl + +function usage() { + echo "Usage: cmd [-f release|debug] [-d dpdk_device_pci_address] [-w vpp_workspace]" +} + +while getopts "f:d:w:h" opt; do + case ${opt} in + h) usage + exit 0 + ;; + w) VPP_WS=$OPTARG + ;; + f) BIN_FLAVOR=$OPTARG + ;; + d) DPDK_DEV=$OPTARG + ;; + \?) echo "Usage: cmd [-f release|debug] [-d dpdk_device_pci_address]" + exit 1 + ;; + esac +done +shift $((OPTIND -1)) + +source "${CFG_WS}"/start_vpp.sh + +start_vpp diff --git a/extras/scripts/host-stack/perf-tests/dpdk_st b/extras/scripts/host-stack/perf-tests/dpdk_st new file mode 100755 index 00000000000..a35889f0238 --- /dev/null +++ b/extras/scripts/host-stack/perf-tests/dpdk_st @@ -0,0 +1,63 @@ +#!/usr/bin/env bash + +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2025 Cisco Systems, Inc. + +VPP_WS=${VPP_WS:-/scratch/fcoras/vpp} +CFG_WS="${VPP_WS}"/extras/scripts/host-stack/perf-tests +CFG_DIR="${CFG_WS}"/conf +CFG_FILE=dpdk_st.cli +BASE_DIR="${VPP_WS}"/build-root +BIN_FLAVOR=release + +#DPDK_DISABLE=1 +DPDK_DEV=0000:16:00.0 +IF_RX_DESC=512 +IF_TX_DESC=256 +DPDK_CSUM=enable-tcp-udp-checksum + +NUM_MBUFS=16536 + +HEAP_SIZE=4g +API_SIZE=1G +API_GLOBAL_SIZE=2000M +EVT_LOG_SIZE=400000 + +MAIN_CORE=1 +WORKERS=1 +CFG_CORELIST_WKS="corelist-workers 2" + +SOCK=/run/vpp-api.sock +CLI_SOCK=/run/vpp/cli.sock +SESSION_CFG="session { event-queue-length 100000 use-app-socket-api }" + +TCP_CFG="tcp { max-rx-fifo 128m }" +TLS_CFG="tls { fifo-size 1m }" +#QUIC_ENABLE=1 +#LOCAL_OSSL=/scratch/fcoras/openssl + +function usage() { + echo "Usage: cmd [-f release|debug] [-d dpdk_device_pci_address] [-w vpp_workspace]" +} + +while getopts "f:d:w:h" opt; do + case ${opt} in + h) usage + exit 0 + ;; + w) VPP_WS=$OPTARG + ;; + f) BIN_FLAVOR=$OPTARG + ;; + d) DPDK_DEV=$OPTARG + ;; + \?) usage + exit 1 + ;; + esac +done +shift $((OPTIND -1)) + +source "${CFG_WS}"/start_vpp.sh + +start_vpp diff --git a/extras/scripts/host-stack/perf-tests/gdb_ldp_init b/extras/scripts/host-stack/perf-tests/gdb_ldp_init new file mode 100644 index 00000000000..7c2f28e2083 --- /dev/null +++ b/extras/scripts/host-stack/perf-tests/gdb_ldp_init @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2025 Cisco Systems, Inc. + +handle SIGUSR1 nostop noprint pass +set print pretty on +set exec-wrapper env 'LD_PRELOAD=/scratch/fcoras/vpp/build-root/build-vpp-native/vpp/lib/libvcl_ldpreload.so' +set environment LDP_DEBUG=1 +set environment VCL_CONFIG=/scratch/fcoras/vpp/extras/scripts/host-stack/perf-tests/vcl_perf.conf diff --git a/extras/scripts/host-stack/perf-tests/start_vpp.sh b/extras/scripts/host-stack/perf-tests/start_vpp.sh new file mode 100755 index 00000000000..27614713b76 --- /dev/null +++ b/extras/scripts/host-stack/perf-tests/start_vpp.sh @@ -0,0 +1,153 @@ +#!/usr/bin/env bash + +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2025 Cisco Systems, Inc. + +if [ $BIN_FLAVOR = "debug" ] +then + BIN_DIR=install-vpp_debug-native + BIN_EXEC_PREFIX="gdb --args" +else + BIN_DIR=install-vpp-native + BIN_EXEC_PREFIX="gdb --args" +fi + +BIN_PATH=$BASE_DIR/$BIN_DIR + +if [ "$#" -eq 1 ] +then + echo $1 + CFG_FILE=$1 +fi + +CFG_MBUFS="" +if [ -n "$NUM_MBUFS" ]; then + CFG_MBUFS="buffers-per-numa $NUM_MBUFS" +fi + +if [ -z $HEAP_SIZE ]; then + HEAP_SIZE=4g +fi + +if [ -z $SKIP_CORE ]; then + SKIP_CORE=0 +fi + +if [ -z $SKIP_CORE ]; then + MAIN_CORE=1 +fi + +if [ -z $EVT_LOG_SIZE ]; then + EVT_LOG_SIZE=0 +fi + +if [ -z $API_GLOBAL_SIZE ]; then + API_GLOBAL_SIZE=64M +fi + +if [ -z $API_SIZE ]; then + API_SIZE=16M +fi + +if [ -z ${WORKERS+x} ]; then + WORKERS=0 + CFG_RX_QS=1 +else + CFG_WORKERS="workers $WORKERS" + CFG_RX_QS=$(( $WORKERS )) +fi + +SOCK_CFG="" +if [ -n "$SOCK" ]; then + SOCK_CFG="socksvr { socket-name ${SOCK} }" +fi + +if [ -z $IF_RX_DESC ]; then + IF_RX_DESC=4096 +fi + +if [ -z $IF_TX_DESC ]; then + IF_TX_DESC=4096 +fi + +API_PREFIX_CFG="" +if [ -n "$API_PREFIX" ]; then + API_PREFIX_CFG="prefix $API_PREFIX" +fi + +CLI_LISTEN_CFG="cli-listen localhost:5002" +if [ -n "$CLI_SOCK" ]; then + CLI_LISTEN_CFG="cli-listen $CLI_SOCK" +fi + +if [[ -z "$DPDK_DISABLE" ]]; then + DPDK_CFG="dpdk { \ + dev $DPDK_DEV { \ + num-tx-desc $IF_TX_DESC \ + num-rx-desc $IF_RX_DESC \ + num-rx-queues $CFG_RX_QS \ + } \ + $SOCK_MEM_CFG \ + $DPDK_CSUM \ + }" +else + DPDK_PLUGIN_DISABLE="plugin dpdk_plugin.so {disable}" +fi + +if [[ -n "$QUIC_ENABLE" ]]; then + QUIC_PLUGIN="plugin quic_plugin.so {enable}" +fi + +if [[ -n "$SRTP_ENABLE" ]]; then + SRTP_PLUGIN="plugin srtp_plugin.so {enable}" +fi + +# custom openssl locally built +LD_LIBP="" +if [[ -n "${LOCAL_OSSL}" ]]; then + LD_LIBP="LD_LIBRARY_PATH=${LOCAL_OSSL}" +fi + +function start_vpp +{ + sudo $LD_LIBP $BIN_EXEC_PREFIX $BIN_PATH/vpp/bin/vpp \ + unix { \ + interactive log /tmp/vpp.log \ + full-coredump \ + exec $CFG_DIR/$CFG_FILE \ + $CLI_LISTEN_CFG \ + poll-sleep-usec 0 \ + } \ + heapsize $HEAP_SIZE \ + api-trace { on } \ + api-segment { \ + global-size $API_GLOBAL_SIZE \ + api-size $API_SIZE \ + gid vpp \ + $API_PREFIX_CFG \ + } \ + vlib { \ + elog-events $EVT_LOG_SIZE \ + elog-post-mortem-dump \ + } \ + cpu { \ + skip-cores $SKIP_CORE \ + main-core $MAIN_CORE \ + $CFG_CORELIST_WKS \ + } \ + buffers { $CFG_MBUFS } \ + $DPDK_CFG \ + $SESSION_CFG \ + $TCP_CFG \ + $UDP_CFG \ + $SOCK_CFG \ + $TLS_CFG \ + plugins { \ + plugin unittest_plugin.so {enable} \ + plugin http_unittest_plugin.so {enable} \ + $QUIC_PLUGIN \ + $SRTP_PLUGIN \ + $DPDK_PLUGIN_DISABLE \ + } +} + diff --git a/extras/scripts/host-stack/perf-tests/vcl_client b/extras/scripts/host-stack/perf-tests/vcl_client new file mode 100755 index 00000000000..653aeef6c9f --- /dev/null +++ b/extras/scripts/host-stack/perf-tests/vcl_client @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2025 Cisco Systems, Inc. + +VPP_WS=${VPP_WS:-/scratch/fcoras/vpp} +CFG_WS=$VPP_WS/extras/scripts/host-stack/perf-tests +BIN=$VPP_WS/build-root/build-vpp-native/vpp/bin/vcl_test_client +GDB_ENV=$CFG_WS/gdb_vcl_env +VCL_CFG=$CFG_WS/vcl_perf.conf +PARAMS=$@ + +sudo taskset --cpu-list 10 sh -c "VCL_CONFIG=$VCL_CFG $BIN -c $PARAMS" diff --git a/extras/scripts/host-stack/perf-tests/vcl_iperf_client b/extras/scripts/host-stack/perf-tests/vcl_iperf_client new file mode 100755 index 00000000000..94a8b6c4912 --- /dev/null +++ b/extras/scripts/host-stack/perf-tests/vcl_iperf_client @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2025 Cisco Systems, Inc. + +VPP_WS=${VPP_WS:-/scratch/fcoras/vpp} +CFG_WS=$VPP_WS/extras/scripts/host-stack/perf-tests +BIN_PATH=iperf3 +LDP_VAR=$VPP_WS/build-root/install-vpp-native/vpp/lib/x86_64-linux-gnu/libvcl_ldpreload.so +VCL_CFG=$CFG_WS/vcl_perf.conf +PARAMS=$@ + +sudo taskset --cpu-list 6-11 sh -c "LD_PRELOAD=$LDP_VAR VCL_CONFIG=$VCL_CFG $BIN_PATH -4 -c $PARAMS" + +# for tls +# CERT=$CFG_WS/selfsigned.crt +# KEY=$CFG_WS/selfsigned.key +# sudo taskset --cpu-list 6-10 sh -c "LDP_TRANSPARENT_TLS=2 LDP_TLS_CERT_FILE=$CERT LDP_TLS_KEY_FILE=$KEY VCL_CONFIG=$VCL_CFG LD_PRELOAD=$LDP_VAR $BIN_PATH -4 -c $PARAMS" + +# for debugging +# LDP_SCRIPT=$CFG_WS/gdb_ldp_init +# sudo gdb --command=$LDP_SCRIPT --args $BIN_PATH -4 -s $PARAMS diff --git a/extras/scripts/host-stack/perf-tests/vcl_iperf_server b/extras/scripts/host-stack/perf-tests/vcl_iperf_server new file mode 100755 index 00000000000..92dd1fcea5f --- /dev/null +++ b/extras/scripts/host-stack/perf-tests/vcl_iperf_server @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2025 Cisco Systems, Inc. + +VPP_WS=${VPP_WS:-/scratch/fcoras/vpp} +CFG_WS=$VPP_WS/extras/scripts/host-stack/perf-tests +BIN_PATH=iperf3 +LDP_VAR=$VPP_WS/build-root/install-vpp-native/vpp/lib/x86_64-linux-gnu/libvcl_ldpreload.so +VCL_CFG=$CFG_WS/vcl_perf.conf +PARAMS=$@ + +sudo taskset --cpu-list 6-11 sh -c "LD_PRELOAD=$LDP_VAR VCL_CONFIG=$VCL_CFG $BIN_PATH -4 -s $PARAMS" + +# for tls testing +# CERT=$CFG_WS/selfsigned.crt +# KEY=$CFG_WS/selfsigned.key +# sudo taskset --cpu-list 6-11 sh -c "LDP_TRANSPARENT_TLS=1 LDP_TLS_CERT_FILE=$CERT LDP_TLS_KEY_FILE=$KEY LD_PRELOAD=$LDP_VAR VCL_CONFIG=$VCL_CFG $BIN_PATH -4 -s $PARAMS" + +# debugging +# LDP_SCRIPT=/home/fcoras/vpp/gdb_ldp_init +# sudo gdb --command=$LDP_SCRIPT --args $BIN_PATH -4 -s $PARAMS diff --git a/extras/scripts/host-stack/perf-tests/vcl_perf.conf b/extras/scripts/host-stack/perf-tests/vcl_perf.conf new file mode 100644 index 00000000000..791d5fd3eaa --- /dev/null +++ b/extras/scripts/host-stack/perf-tests/vcl_perf.conf @@ -0,0 +1,16 @@ +vcl { + # SPDX-License-Identifier: Apache-2.0 + # Copyright (c) 2025 Cisco Systems, Inc. + rx-fifo-size 4000000 + tx-fifo-size 4000000 + + app-scope-local + app-scope-global + + segment-size 10000000000 + add-segment-size 10000000000 + + app-socket-api /var/run/vpp/app_ns_sockets/default + use-mq-eventfd + event-queue-size 500000 +} diff --git a/extras/scripts/host-stack/perf-tests/vcl_server b/extras/scripts/host-stack/perf-tests/vcl_server new file mode 100755 index 00000000000..4ecc2f1f3a9 --- /dev/null +++ b/extras/scripts/host-stack/perf-tests/vcl_server @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2025 Cisco Systems, Inc. + +VPP_WS=${VPP_WS:-/scratch/fcoras/vpp} +CFG_WS=$VPP_WS/extras/scripts/host-stack/perf-tests +BIN=$VPP_WS/build-root/build-vpp-native/vpp/bin/vcl_test_server +GDB_ENV=$CFG_WS/gdb_init_tls +VCL_CFG=$CFG_WS/vcl_perf.conf +PARAMS=$@ + +sudo taskset --cpu-list 6-11 sh -c "VCL_CONFIG=$VCL_CFG $BIN $PARAMS"
\ No newline at end of file diff --git a/extras/vcl-ldpreload/README.rst b/extras/scripts/host-stack/vcl-ldpreload/README.rst index 7168697ec01..7168697ec01 100644 --- a/extras/vcl-ldpreload/README.rst +++ b/extras/scripts/host-stack/vcl-ldpreload/README.rst diff --git a/extras/vcl-ldpreload/test/common/nginx_test.sh b/extras/scripts/host-stack/vcl-ldpreload/test/common/nginx_test.sh index 73f2911b57c..73f2911b57c 100755 --- a/extras/vcl-ldpreload/test/common/nginx_test.sh +++ b/extras/scripts/host-stack/vcl-ldpreload/test/common/nginx_test.sh diff --git a/extras/vcl-ldpreload/test/common/nginx_welcome.html b/extras/scripts/host-stack/vcl-ldpreload/test/common/nginx_welcome.html index c3850904482..c3850904482 100644 --- a/extras/vcl-ldpreload/test/common/nginx_welcome.html +++ b/extras/scripts/host-stack/vcl-ldpreload/test/common/nginx_welcome.html diff --git a/extras/vcl-ldpreload/test/common/vpp_docker.conf b/extras/scripts/host-stack/vcl-ldpreload/test/common/vpp_docker.conf index 4d18bb2d208..4d18bb2d208 100644 --- a/extras/vcl-ldpreload/test/common/vpp_docker.conf +++ b/extras/scripts/host-stack/vcl-ldpreload/test/common/vpp_docker.conf diff --git a/extras/vcl-ldpreload/test/curl_test.sh b/extras/scripts/host-stack/vcl-ldpreload/test/curl_test.sh index 5c8d2f4a9d4..5c8d2f4a9d4 100755 --- a/extras/vcl-ldpreload/test/curl_test.sh +++ b/extras/scripts/host-stack/vcl-ldpreload/test/curl_test.sh diff --git a/extras/vcl-ldpreload/test/emacs_gdb_curl.sh b/extras/scripts/host-stack/vcl-ldpreload/test/emacs_gdb_curl.sh index a5d6be3dfee..a5d6be3dfee 100755 --- a/extras/vcl-ldpreload/test/emacs_gdb_curl.sh +++ b/extras/scripts/host-stack/vcl-ldpreload/test/emacs_gdb_curl.sh diff --git a/extras/vcl-ldpreload/test/emacs_gdb_vpp.sh b/extras/scripts/host-stack/vcl-ldpreload/test/emacs_gdb_vpp.sh index 460e4077e53..460e4077e53 100755 --- a/extras/vcl-ldpreload/test/emacs_gdb_vpp.sh +++ b/extras/scripts/host-stack/vcl-ldpreload/test/emacs_gdb_vpp.sh diff --git a/extras/vcl-ldpreload/test/wget_test.sh b/extras/scripts/host-stack/vcl-ldpreload/test/wget_test.sh index 78b34f9e481..78b34f9e481 100755 --- a/extras/vcl-ldpreload/test/wget_test.sh +++ b/extras/scripts/host-stack/vcl-ldpreload/test/wget_test.sh diff --git a/src/plugins/abf/abf_itf_attach.c b/src/plugins/abf/abf_itf_attach.c index 04e5c4c40c2..3e55df52562 100644 --- a/src/plugins/abf/abf_itf_attach.c +++ b/src/plugins/abf/abf_itf_attach.c @@ -681,18 +681,20 @@ VLIB_REGISTER_NODE (abf_ip6_node) = } }; -VNET_FEATURE_INIT (abf_ip4_feat, static) = -{ +VNET_FEATURE_INIT (abf_ip4_feat, static) = { .arc_name = "ip4-unicast", .node_name = "abf-input-ip4", - .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"), + .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa", + "ip4-full-reassembly-feature", + "ip4-sv-reassembly-feature"), }; -VNET_FEATURE_INIT (abf_ip6_feat, static) = -{ +VNET_FEATURE_INIT (abf_ip6_feat, static) = { .arc_name = "ip6-unicast", .node_name = "abf-input-ip6", - .runs_after = VNET_FEATURES ("acl-plugin-in-ip6-fa"), + .runs_after = VNET_FEATURES ("acl-plugin-in-ip6-fa", + "ip6-full-reassembly-feature", + "ip6-sv-reassembly-feature"), }; static fib_node_t * diff --git a/src/plugins/dpdk/device/format.c b/src/plugins/dpdk/device/format.c index fd301da8ea5..f0199c929cc 100644 --- a/src/plugins/dpdk/device/format.c +++ b/src/plugins/dpdk/device/format.c @@ -117,8 +117,8 @@ _ (TX_MACSEC, "TX MACSEC") \ _ (TX_OUTER_IPV4, "TX outer IPV4") \ _ (TX_OUTER_IPV6, "TX outer IPV6") \ - _ (TX_OUTER_IP_CKSUM, "Outer IP cksum of Tx pkt. computed by NIC") \ - _ (TX_OUTER_UDP_CKSUM, "TX outer UDP cksum") \ + _ (TX_OUTER_IP_CKSUM, "Outer IP cksum of TX pkt. computed by NIC") \ + _ (TX_OUTER_UDP_CKSUM, "Outer UDP cksum of TX pkt. computed by NIC") \ _ (TX_QINQ, "TX QINQ") \ _ (TX_SCTP_CKSUM, "SCTP cksum of TX pkt. computed by NIC") \ _ (TX_SEC_OFFLOAD, "TX SEC OFFLOAD") \ @@ -133,7 +133,7 @@ _ (TX_TUNNEL_UDP, "TX tunnel UDP") \ _ (TX_TUNNEL_VXLAN, "TX packet is a VXLAN packet") \ _ (TX_TUNNEL_VXLAN_GPE, "TX tunnel VXLAN GPE") \ - _ (TX_UDP_CKSUM, "TX UDP cksum") \ + _ (TX_UDP_CKSUM, "UDP cksum of TX pkt. computed by NIC") \ _ (TX_UDP_SEG, "TX UDP SEG") \ _ (TX_VLAN, "TX packet is a 802.1q VLAN packet") diff --git a/src/plugins/hs_apps/http_client.c b/src/plugins/hs_apps/http_client.c index 20271fc4aea..4701253bf59 100644 --- a/src/plugins/hs_apps/http_client.c +++ b/src/plugins/hs_apps/http_client.c @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: Apache-2.0 - * Copyright(c) 2024 Cisco Systems, Inc. + * Copyright(c) 2025 Cisco Systems, Inc. */ #include <vnet/session/application.h> @@ -12,29 +12,36 @@ typedef struct { + u64 req_per_wrk; + u64 request_count; + f64 start, end; + f64 elapsed_time; +} hc_stats_t; + +typedef struct +{ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); u32 session_index; u32 thread_index; - u32 vpp_session_index; u64 to_recv; u8 is_closed; + hc_stats_t stats; + u64 data_offset; + u8 *resp_headers; + u8 *http_response; + u8 *response_status; } hc_session_t; typedef struct { - u64 request_count; - f64 start, end; - f64 elapsed_time; -} hc_stats_t; - -typedef struct -{ hc_session_t *sessions; u32 thread_index; vlib_main_t *vlib_main; u8 *headers_buf; http_headers_ctx_t req_headers; http_msg_t msg; + u32 session_index; + bool has_common_headers; } hc_worker_t; typedef struct @@ -52,11 +59,7 @@ typedef struct session_endpoint_cfg_t connect_sep; u8 *target; u8 *data; - u64 data_offset; hc_worker_t *wrk; - u8 *resp_headers; - u8 *http_response; - u8 *response_status; hc_http_header_t *custom_header; u8 is_file; u8 use_ptr; @@ -67,6 +70,18 @@ typedef struct u64 repeat_count; f64 duration; bool repeat; + bool multi_session; + u32 done_count; + u32 connected_counter; + u32 worker_index; + u32 max_sessions; + u32 private_segment_size; + u32 prealloc_fifos; + u32 fifo_size; + u8 *appns_id; + u64 appns_secret; + clib_spinlock_t lock; + bool was_transport_closed; } hc_main_t; typedef enum @@ -95,13 +110,6 @@ hc_session_get (u32 session_index, u32 thread_index) return pool_elt_at_index (wrk->sessions, session_index); } -static void -hc_ho_session_free (u32 hs_index) -{ - hc_worker_t *wrk = hc_worker_get (0); - pool_put_index (wrk->sessions, hs_index); -} - static hc_session_t * hc_session_alloc (hc_worker_t *wrk) { @@ -115,14 +123,14 @@ hc_session_alloc (hc_worker_t *wrk) } static int -hc_request (session_t *s, session_error_t err) +hc_request (session_t *s, hc_worker_t *wrk, hc_session_t *hc_session, + session_error_t err) { hc_main_t *hcm = &hc_main; u64 to_send; u32 n_enq; u8 n_segs; int rv; - hc_worker_t *wrk = hc_worker_get (s->thread_index); if (hcm->use_ptr) { @@ -166,7 +174,7 @@ hc_request (session_t *s, session_error_t err) rv = svm_fifo_enqueue (s->tx_fifo, n_enq, hcm->data); if (rv < to_send) { - hcm->data_offset = (rv > 0) ? rv : 0; + hc_session->data_offset = (rv > 0) ? rv : 0; svm_fifo_add_want_deq_ntf (s->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); } } @@ -185,9 +193,8 @@ hc_session_connected_callback (u32 app_index, u32 hc_session_index, { hc_main_t *hcm = &hc_main; hc_worker_t *wrk; - u32 new_hc_index; + hc_session_t *hc_session; hc_http_header_t *header; - HTTP_DBG (1, "ho hc_index: %d", hc_session_index); if (err) { @@ -199,68 +206,89 @@ hc_session_connected_callback (u32 app_index, u32 hc_session_index, } wrk = hc_worker_get (s->thread_index); - hc_session_t *hc_session, *new_hc_session = hc_session_alloc (wrk); - hc_session = hc_session_get (hc_session_index, 0); - new_hc_index = new_hc_session->session_index; - clib_memcpy_fast (new_hc_session, hc_session, sizeof (*hc_session)); - new_hc_session->session_index = new_hc_index; - new_hc_session->thread_index = s->thread_index; - new_hc_session->vpp_session_index = s->session_index; - HTTP_DBG (1, "new hc_index: %d", new_hc_session->session_index); - s->opaque = new_hc_index; + hc_session = hc_session_alloc (wrk); + clib_spinlock_lock_if_init (&hcm->lock); + hcm->connected_counter++; + clib_spinlock_unlock_if_init (&hcm->lock); - if (hcm->req_method == HTTP_REQ_POST) + hc_session->thread_index = s->thread_index; + s->opaque = hc_session->session_index; + wrk->session_index = hc_session->session_index; + + if (hcm->multi_session) { - if (hcm->is_file) - http_add_header ( - &wrk->req_headers, HTTP_HEADER_CONTENT_TYPE, - http_content_type_token (HTTP_CONTENT_APP_OCTET_STREAM)); - else - http_add_header ( - &wrk->req_headers, HTTP_HEADER_CONTENT_TYPE, - http_content_type_token (HTTP_CONTENT_APP_X_WWW_FORM_URLENCODED)); + hc_session->stats.req_per_wrk = hcm->repeat_count / hcm->max_sessions; + clib_spinlock_lock_if_init (&hcm->lock); + /* add remaining requests to the first connected session */ + if (hcm->connected_counter == 1) + { + hc_session->stats.req_per_wrk += + hcm->repeat_count % hcm->max_sessions; + } + clib_spinlock_unlock_if_init (&hcm->lock); } - http_add_header (&wrk->req_headers, HTTP_HEADER_ACCEPT, "*", 1); - - vec_foreach (header, hcm->custom_header) - http_add_custom_header ( - &wrk->req_headers, (const char *) header->name, vec_len (header->name), - (const char *) header->value, vec_len (header->value)); - - clib_warning ("%U", format_http_bytes, wrk->headers_buf, - wrk->req_headers.tail_offset); - wrk->msg.method_type = hcm->req_method; - if (hcm->req_method == HTTP_REQ_POST) - wrk->msg.data.body_len = vec_len (hcm->data); else - wrk->msg.data.body_len = 0; - - wrk->msg.type = HTTP_MSG_REQUEST; - /* request target */ - wrk->msg.data.target_path_len = vec_len (hcm->target); - /* custom headers */ - wrk->msg.data.headers_len = wrk->req_headers.tail_offset; - /* total length */ - wrk->msg.data.len = wrk->msg.data.target_path_len + - wrk->msg.data.headers_len + wrk->msg.data.body_len; - - if (hcm->use_ptr) { - wrk->msg.data.type = HTTP_MSG_DATA_PTR; + hc_session->stats.req_per_wrk = hcm->repeat_count; + hcm->worker_index = s->thread_index; } - else + + if (!wrk->has_common_headers) { - wrk->msg.data.type = HTTP_MSG_DATA_INLINE; - wrk->msg.data.target_path_offset = 0; - wrk->msg.data.headers_offset = wrk->msg.data.target_path_len; - wrk->msg.data.body_offset = - wrk->msg.data.headers_offset + wrk->msg.data.headers_len; + wrk->has_common_headers = true; + if (hcm->req_method == HTTP_REQ_POST) + { + if (hcm->is_file) + http_add_header ( + &wrk->req_headers, HTTP_HEADER_CONTENT_TYPE, + http_content_type_token (HTTP_CONTENT_APP_OCTET_STREAM)); + else + http_add_header (&wrk->req_headers, HTTP_HEADER_CONTENT_TYPE, + http_content_type_token ( + HTTP_CONTENT_APP_X_WWW_FORM_URLENCODED)); + } + http_add_header (&wrk->req_headers, HTTP_HEADER_ACCEPT, "*", 1); + + vec_foreach (header, hcm->custom_header) + http_add_custom_header (&wrk->req_headers, (const char *) header->name, + vec_len (header->name), + (const char *) header->value, + vec_len (header->value)); + + wrk->msg.method_type = hcm->req_method; + if (hcm->req_method == HTTP_REQ_POST) + wrk->msg.data.body_len = vec_len (hcm->data); + else + wrk->msg.data.body_len = 0; + + wrk->msg.type = HTTP_MSG_REQUEST; + /* request target */ + wrk->msg.data.target_path_len = vec_len (hcm->target); + /* custom headers */ + wrk->msg.data.headers_len = wrk->req_headers.tail_offset; + /* total length */ + wrk->msg.data.len = wrk->msg.data.target_path_len + + wrk->msg.data.headers_len + wrk->msg.data.body_len; + + if (hcm->use_ptr) + { + wrk->msg.data.type = HTTP_MSG_DATA_PTR; + } + else + { + wrk->msg.data.type = HTTP_MSG_DATA_INLINE; + wrk->msg.data.target_path_offset = 0; + wrk->msg.data.headers_offset = wrk->msg.data.target_path_len; + wrk->msg.data.body_offset = + wrk->msg.data.headers_offset + wrk->msg.data.headers_len; + } } if (hcm->repeat) - hc_stats.start = vlib_time_now (vlib_get_main_by_index (s->thread_index)); + hc_session->stats.start = + vlib_time_now (vlib_get_main_by_index (s->thread_index)); - return hc_request (s, err); + return hc_request (s, wrk, hc_session, err); } static void @@ -275,21 +303,38 @@ hc_session_disconnect_callback (session_t *s) if ((rv = vnet_disconnect_session (a))) clib_warning ("warning: disconnect returned: %U", format_session_error, rv); + clib_spinlock_lock_if_init (&hcm->lock); + hcm->done_count++; + clib_spinlock_unlock_if_init (&hcm->lock); } static void hc_session_transport_closed_callback (session_t *s) { hc_main_t *hcm = &hc_main; - vlib_process_signal_event_mt (hcm->wrk->vlib_main, hcm->cli_node_index, - HC_TRANSPORT_CLOSED, 0); -} + hc_worker_t *wrk = hc_worker_get (s->thread_index); -static void -hc_ho_cleanup_callback (session_t *s) -{ - HTTP_DBG (1, "ho hc_index: %d:", s->opaque); - hc_ho_session_free (s->opaque); + clib_spinlock_lock_if_init (&hcm->lock); + if (s->session_state == SESSION_STATE_TRANSPORT_CLOSED) + { + hcm->was_transport_closed = true; + } + + /* send an event when all sessions are closed */ + if (hcm->done_count >= hcm->max_sessions) + { + if (hcm->was_transport_closed) + { + vlib_process_signal_event_mt (wrk->vlib_main, hcm->cli_node_index, + HC_TRANSPORT_CLOSED, 0); + } + else + { + vlib_process_signal_event_mt (wrk->vlib_main, hcm->cli_node_index, + HC_REPEAT_DONE, 0); + } + } + clib_spinlock_unlock_if_init (&hcm->lock); } static void @@ -315,20 +360,23 @@ hc_rx_callback (session_t *s) { hc_main_t *hcm = &hc_main; hc_worker_t *wrk = hc_worker_get (s->thread_index); - hc_session_t *hc_session; + hc_session_t *hc_session = hc_session_get (s->opaque, s->thread_index); http_msg_t msg; int rv; + u32 max_deq; session_error_t session_err = 0; int send_err = 0; - hc_session = hc_session_get (s->opaque, s->thread_index); - if (hc_session->is_closed) { clib_warning ("hc_session_index[%d] is closed", s->opaque); return -1; } + max_deq = svm_fifo_max_dequeue_cons (s->rx_fifo); + if (PREDICT_FALSE (max_deq == 0)) + goto done; + if (hc_session->to_recv == 0) { rv = svm_fifo_dequeue (s->rx_fifo, sizeof (msg), (u8 *) &msg); @@ -344,17 +392,20 @@ hc_rx_callback (session_t *s) if (msg.data.headers_len) { - hcm->response_status = - format (0, "%U", format_http_status_code, msg.code); + + if (!hcm->repeat) + hc_session->response_status = + format (0, "%U", format_http_status_code, msg.code); + svm_fifo_dequeue_drop (s->rx_fifo, msg.data.headers_offset); - vec_validate (hcm->resp_headers, msg.data.headers_len - 1); - vec_set_len (hcm->resp_headers, msg.data.headers_len); + vec_validate (hc_session->resp_headers, msg.data.headers_len - 1); + vec_set_len (hc_session->resp_headers, msg.data.headers_len); rv = svm_fifo_dequeue (s->rx_fifo, msg.data.headers_len, - hcm->resp_headers); + hc_session->resp_headers); ASSERT (rv == msg.data.headers_len); - HTTP_DBG (1, (char *) format (0, "%v", hcm->resp_headers)); + HTTP_DBG (1, (char *) format (0, "%v", hc_session->resp_headers)); msg.data.body_offset -= msg.data.headers_len + msg.data.headers_offset; } @@ -372,18 +423,18 @@ hc_rx_callback (session_t *s) { goto done; } - vec_validate (hcm->http_response, msg.data.body_len - 1); - vec_reset_length (hcm->http_response); + vec_validate (hc_session->http_response, msg.data.body_len - 1); + vec_reset_length (hc_session->http_response); } - u32 max_deq = svm_fifo_max_dequeue (s->rx_fifo); + max_deq = svm_fifo_max_dequeue (s->rx_fifo); if (!max_deq) { goto done; } u32 n_deq = clib_min (hc_session->to_recv, max_deq); - u32 curr = vec_len (hcm->http_response); - rv = svm_fifo_dequeue (s->rx_fifo, n_deq, hcm->http_response + curr); + u32 curr = vec_len (hc_session->http_response); + rv = svm_fifo_dequeue (s->rx_fifo, n_deq, hc_session->http_response + curr); if (rv < 0) { clib_warning ("app dequeue(n=%d) failed; rv = %d", n_deq, rv); @@ -393,7 +444,7 @@ hc_rx_callback (session_t *s) } ASSERT (rv == n_deq); - vec_set_len (hcm->http_response, curr + n_deq); + vec_set_len (hc_session->http_response, curr + n_deq); ASSERT (hc_session->to_recv >= rv); hc_session->to_recv -= rv; @@ -402,20 +453,19 @@ done: { if (hcm->repeat) { - hc_stats.request_count++; - hc_stats.end = vlib_time_now (wrk->vlib_main); - hc_stats.elapsed_time = hc_stats.end - hc_stats.start; + hc_session->stats.request_count++; + hc_session->stats.end = vlib_time_now (wrk->vlib_main); + hc_session->stats.elapsed_time = + hc_session->stats.end - hc_session->stats.start; - if (hc_stats.elapsed_time >= hcm->duration && - hc_stats.request_count >= hcm->repeat_count) + if (hc_session->stats.elapsed_time >= hcm->duration && + hc_session->stats.request_count >= hc_session->stats.req_per_wrk) { - vlib_process_signal_event_mt ( - wrk->vlib_main, hcm->cli_node_index, HC_REPEAT_DONE, 0); hc_session_disconnect_callback (s); } else { - send_err = hc_request (s, session_err); + send_err = hc_request (s, wrk, hc_session, session_err); if (send_err) clib_warning ("failed to send request, error %d", send_err); } @@ -434,11 +484,13 @@ static int hc_tx_callback (session_t *s) { hc_main_t *hcm = &hc_main; + hc_session_t *hc_session = hc_session_get (s->opaque, s->thread_index); u64 to_send; int rv; - to_send = vec_len (hcm->data) - hcm->data_offset; - rv = svm_fifo_enqueue (s->tx_fifo, to_send, hcm->data + hcm->data_offset); + to_send = vec_len (hcm->data) - hc_session->data_offset; + rv = svm_fifo_enqueue (s->tx_fifo, to_send, + hcm->data + hc_session->data_offset); if (rv <= 0) { @@ -448,7 +500,7 @@ hc_tx_callback (session_t *s) if (rv < to_send) { - hcm->data_offset += rv; + hc_session->data_offset += rv; svm_fifo_add_want_deq_ntf (s->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); } @@ -465,7 +517,6 @@ static session_cb_vft_t hc_session_cb_vft = { .session_reset_callback = hc_session_reset_callback, .builtin_app_rx_callback = hc_rx_callback, .builtin_app_tx_callback = hc_tx_callback, - .half_open_cleanup_callback = hc_ho_cleanup_callback, }; static clib_error_t * @@ -474,8 +525,12 @@ hc_attach () hc_main_t *hcm = &hc_main; vnet_app_attach_args_t _a, *a = &_a; u64 options[18]; + u32 segment_size = 128 << 20; int rv; + if (hcm->private_segment_size) + segment_size = hcm->private_segment_size; + clib_memset (a, 0, sizeof (*a)); clib_memset (options, 0, sizeof (options)); @@ -483,7 +538,19 @@ hc_attach () a->name = format (0, "http_client"); a->session_cb_vft = &hc_session_cb_vft; a->options = options; + a->options[APP_OPTIONS_SEGMENT_SIZE] = segment_size; + a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = segment_size; + a->options[APP_OPTIONS_RX_FIFO_SIZE] = + hcm->fifo_size ? hcm->fifo_size : 8 << 10; + a->options[APP_OPTIONS_TX_FIFO_SIZE] = + hcm->fifo_size ? hcm->fifo_size : 32 << 10; a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN; + a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = hcm->prealloc_fifos; + if (hcm->appns_id) + { + a->namespace_id = hcm->appns_id; + a->options[APP_OPTIONS_NAMESPACE_SECRET] = hcm->appns_secret; + } if ((rv = vnet_application_attach (a))) return clib_error_return (0, "attach returned: %U", format_session_error, @@ -500,14 +567,19 @@ static int hc_connect_rpc (void *rpc_args) { vnet_connect_args_t *a = rpc_args; - int rv; + int rv = ~0; + hc_main_t *hcm = &hc_main; - rv = vnet_connect (a); - if (rv > 0) - clib_warning (0, "connect returned: %U", format_session_error, rv); + for (u32 i = 0; i < hcm->max_sessions; i++) + { + rv = vnet_connect (a); + if (rv > 0) + clib_warning (0, "connect returned: %U", format_session_error, rv); + } session_endpoint_free_ext_cfgs (&a->sep_ext); vec_free (a); + return rv; } @@ -516,14 +588,10 @@ hc_connect () { hc_main_t *hcm = &hc_main; vnet_connect_args_t *a = 0; - hc_worker_t *wrk; - hc_session_t *hc_session; transport_endpt_ext_cfg_t *ext_cfg; transport_endpt_cfg_http_t http_cfg = { (u32) hcm->timeout, 0 }; - vec_validate (a, 0); clib_memset (a, 0, sizeof (a[0])); - clib_memcpy (&a->sep_ext, &hcm->connect_sep, sizeof (hcm->connect_sep)); a->app_index = hcm->app_index; @@ -531,15 +599,41 @@ hc_connect () &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (http_cfg)); clib_memcpy (ext_cfg->data, &http_cfg, sizeof (http_cfg)); - /* allocate http session on main thread */ - wrk = hc_worker_get (0); - hc_session = hc_session_alloc (wrk); - a->api_context = hc_session->session_index; - session_send_rpc_evt_to_thread_force (transport_cl_thread (), hc_connect_rpc, a); } +static void +hc_get_repeat_stats (vlib_main_t *vm) +{ + hc_main_t *hcm = &hc_main; + hc_worker_t *wrk; + hc_session_t *hc_session; + + if (hcm->repeat) + { + vec_foreach (wrk, hcm->wrk) + { + vec_foreach (hc_session, wrk->sessions) + { + hc_stats.request_count += hc_session->stats.request_count; + hc_session->stats.request_count = 0; + if (hc_stats.elapsed_time < hc_session->stats.elapsed_time) + { + hc_stats.elapsed_time = hc_session->stats.elapsed_time; + hc_session->stats.elapsed_time = 0; + } + } + } + vlib_cli_output (vm, + "< %d request(s) in %.6fs\n< avg latency " + "%.4fms\n< %.2f req/sec", + hc_stats.request_count, hc_stats.elapsed_time, + (hc_stats.elapsed_time / hc_stats.request_count) * 1000, + hc_stats.request_count / hc_stats.elapsed_time); + } +} + static clib_error_t * hc_get_event (vlib_main_t *vm) { @@ -548,6 +642,8 @@ hc_get_event (vlib_main_t *vm) clib_error_t *err = NULL; FILE *file_ptr; u64 event_timeout; + hc_worker_t *wrk; + hc_session_t *hc_session; event_timeout = hcm->timeout ? hcm->timeout : 10; if (event_timeout == hcm->duration) @@ -558,20 +654,26 @@ hc_get_event (vlib_main_t *vm) switch (event_type) { case ~0: + hc_get_repeat_stats (vm); err = clib_error_return (0, "error: timeout"); break; case HC_CONNECT_FAILED: + hc_get_repeat_stats (vm); err = clib_error_return (0, "error: failed to connect"); break; case HC_TRANSPORT_CLOSED: + hc_get_repeat_stats (vm); err = clib_error_return (0, "error: transport closed"); break; case HC_GENERIC_ERR: + hc_get_repeat_stats (vm); err = clib_error_return (0, "error: unknown"); break; case HC_REPLY_RECEIVED: if (hcm->filename) { + wrk = hc_worker_get (hcm->worker_index); + hc_session = hc_session_get (wrk->session_index, wrk->thread_index); file_ptr = fopen ((char *) format (0, "/tmp/%v", hcm->filename), "a"); if (file_ptr == NULL) @@ -580,26 +682,27 @@ hc_get_event (vlib_main_t *vm) } else { - fprintf (file_ptr, "< %s\n< %s\n< %s", hcm->response_status, - hcm->resp_headers, hcm->http_response); + fprintf (file_ptr, "< %s\n< %s\n< %s", + hc_session->response_status, hc_session->resp_headers, + hc_session->http_response); fclose (file_ptr); vlib_cli_output (vm, "file saved (/tmp/%v)", hcm->filename); } } if (hcm->verbose) - vlib_cli_output (vm, "< %v< %v", hcm->response_status, - hcm->resp_headers); - vlib_cli_output (vm, "\n%v\n", hcm->http_response); + { + wrk = hc_worker_get (hcm->worker_index); + hc_session = hc_session_get (wrk->session_index, wrk->thread_index); + vlib_cli_output (vm, "< %v\n< %v\n%v", hc_session->response_status, + hc_session->resp_headers, + hc_session->http_response); + } break; case HC_REPEAT_DONE: - vlib_cli_output (vm, - "< %d request(s) in %.6fs\n< avg latency " - "%.4fms\n< %.2f req/sec", - hc_stats.request_count, hc_stats.elapsed_time, - (hc_stats.elapsed_time / hc_stats.request_count) * 1000, - hc_stats.request_count / hc_stats.elapsed_time); + hc_get_repeat_stats (vm); break; default: + hc_get_repeat_stats (vm); err = clib_error_return (0, "error: unexpected event %d", event_type); break; } @@ -612,15 +715,17 @@ static clib_error_t * hc_run (vlib_main_t *vm) { hc_main_t *hcm = &hc_main; - vlib_thread_main_t *vtm = vlib_get_thread_main (); u32 num_threads; hc_worker_t *wrk; clib_error_t *err; - num_threads = 1 /* main thread */ + vtm->n_threads; + num_threads = 1 /* main thread */ + vlib_num_workers (); + if (vlib_num_workers ()) + clib_spinlock_init (&hcm->lock); vec_validate (hcm->wrk, num_threads - 1); vec_foreach (wrk, hcm->wrk) { + wrk->has_common_headers = false; wrk->thread_index = wrk - hcm->wrk; /* 4k for headers should be enough */ vec_validate (wrk->headers_buf, 4095); @@ -657,10 +762,18 @@ hc_detach () } static void -hcc_worker_cleanup (hc_worker_t *wrk) +hc_worker_cleanup (hc_worker_t *wrk) { - HTTP_DBG (1, "worker cleanup"); + hc_session_t *hc_session; + HTTP_DBG (1, "worker and worker sessions cleanup"); + vec_free (wrk->headers_buf); + vec_foreach (hc_session, wrk->sessions) + { + vec_free (hc_session->resp_headers); + vec_free (hc_session->http_response); + vec_free (hc_session->response_status); + } pool_free (wrk->sessions); } @@ -673,16 +786,14 @@ hc_cleanup () hc_http_header_t *header; vec_foreach (wrk, hcm->wrk) - hcc_worker_cleanup (wrk); + hc_worker_cleanup (wrk); vec_free (hcm->uri); vec_free (hcm->target); vec_free (hcm->data); - vec_free (hcm->resp_headers); - vec_free (hcm->http_response); - vec_free (hcm->response_status); vec_free (hcm->wrk); vec_free (hcm->filename); + vec_free (hcm->appns_id); vec_foreach (header, hcm->custom_header) { vec_free (header->name); @@ -698,6 +809,8 @@ hc_command_fn (vlib_main_t *vm, unformat_input_t *input, hc_main_t *hcm = &hc_main; clib_error_t *err = 0; unformat_input_t _line_input, *line_input = &_line_input; + u64 mem_size; + u8 *appns_id = 0; u8 *path = 0; u8 *file_data; hc_http_header_t new_header; @@ -708,7 +821,16 @@ hc_command_fn (vlib_main_t *vm, unformat_input_t *input, hcm->repeat_count = 0; hcm->duration = 0; hcm->repeat = false; + hcm->multi_session = false; + hcm->done_count = 0; + hcm->connected_counter = 0; + hcm->max_sessions = 1; + hcm->prealloc_fifos = 0; + hcm->private_segment_size = 0; + hcm->fifo_size = 0; + hcm->was_transport_closed = false; hc_stats.request_count = 0; + hc_stats.elapsed_time = 0; if (hcm->attached) return clib_error_return (0, "failed: already running!"); @@ -729,8 +851,6 @@ hc_command_fn (vlib_main_t *vm, unformat_input_t *input, ; else if (unformat (line_input, "data %v", &hcm->data)) hcm->is_file = 0; - else if (unformat (line_input, "target %s", &hcm->target)) - ; else if (unformat (line_input, "file %s", &path)) hcm->is_file = 1; else if (unformat (line_input, "use-ptr")) @@ -761,6 +881,29 @@ hc_command_fn (vlib_main_t *vm, unformat_input_t *input, } else if (unformat (line_input, "duration %f", &hcm->duration)) hcm->repeat = true; + else if (unformat (line_input, "sessions %d", &hcm->max_sessions)) + { + hcm->multi_session = true; + if (hcm->max_sessions <= 1) + { + err = clib_error_return (0, "sessions must be > 1"); + goto done; + } + } + else if (unformat (line_input, "prealloc-fifos %d", + &hcm->prealloc_fifos)) + ; + else if (unformat (line_input, "private-segment-size %U", + unformat_memory_size, &mem_size)) + hcm->private_segment_size = mem_size; + else if (unformat (line_input, "fifo-size %U", unformat_memory_size, + &mem_size)) + hcm->fifo_size = mem_size; + else if (unformat (line_input, "appns %_%v%_", &appns_id)) + ; + else if (unformat (line_input, "secret %lu", &hcm->appns_secret)) + ; + else { err = clib_error_return (0, "unknown input `%U'", @@ -774,11 +917,7 @@ hc_command_fn (vlib_main_t *vm, unformat_input_t *input, err = clib_error_return (0, "URI not defined"); goto done; } - if (!hcm->target) - { - err = clib_error_return (0, "target not defined"); - goto done; - } + if (!hcm->data && hcm->req_method == HTTP_REQ_POST) { if (path) @@ -794,6 +933,7 @@ hc_command_fn (vlib_main_t *vm, unformat_input_t *input, goto done; } } + if (hcm->duration && hcm->repeat_count) { err = clib_error_return ( @@ -801,6 +941,20 @@ hc_command_fn (vlib_main_t *vm, unformat_input_t *input, goto done; } + if (hcm->multi_session && !hcm->repeat) + { + err = clib_error_return ( + 0, "multiple sessions are only supported with request repeating"); + goto done; + } + + if ((rv = parse_target ((char **) &hcm->uri, (char **) &hcm->target))) + { + err = clib_error_return (0, "target parse error: %U", + format_session_error, rv); + goto done; + } + if ((rv = parse_uri ((char *) hcm->uri, &hcm->connect_sep))) { err = @@ -808,6 +962,12 @@ hc_command_fn (vlib_main_t *vm, unformat_input_t *input, goto done; } + if (hcm->duration >= hcm->timeout) + { + hcm->timeout = hcm->duration + 10; + } + hcm->appns_id = appns_id; + if (hcm->repeat) vlib_cli_output (vm, "Running, please wait..."); @@ -842,10 +1002,12 @@ done: VLIB_CLI_COMMAND (hc_command, static) = { .path = "http client", .short_help = - "[post] uri http://<ip-addr> target <origin-form> " + "[post] uri http://<ip-addr>/<origin-form> " "[data <form-urlencoded> | file <file-path>] [use-ptr] " "[save-to <filename>] [header <Key:Value>] [verbose] " - "[timeout <seconds> (default = 10)] [repeat <count> | duration <seconds>]", + "[timeout <seconds> (default = 10)] [repeat <count> | duration <seconds>] " + "[sessions <# of sessions>] [appns <app-ns> secret <appns-secret>] " + "[fifo-size <nM|G>] [private-segment-size <nM|G>] [prealloc-fifos <n>]", .function = hc_command_fn, .is_mp_safe = 1, }; diff --git a/src/plugins/hs_apps/http_client_cli.c b/src/plugins/hs_apps/http_client_cli.c index 4ee3b49444c..8df5bfd10b8 100644 --- a/src/plugins/hs_apps/http_client_cli.c +++ b/src/plugins/hs_apps/http_client_cli.c @@ -62,6 +62,8 @@ typedef struct u8 *http_response; u8 *appns_id; u64 appns_secret; + u32 ckpair_index; + u8 need_crypto; } hcc_main_t; typedef enum @@ -333,6 +335,7 @@ hcc_attach () vnet_app_attach_args_t _a, *a = &_a; u64 options[18]; u32 segment_size = 128 << 20; + vnet_app_add_cert_key_pair_args_t _ck_pair, *ck_pair = &_ck_pair; int rv; if (hcm->private_segment_size) @@ -353,6 +356,7 @@ hcc_attach () hcm->fifo_size ? hcm->fifo_size : 32 << 10; a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN; a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = hcm->prealloc_fifos; + a->options[APP_OPTIONS_TLS_ENGINE] = CRYPTO_ENGINE_OPENSSL; if (hcm->appns_id) { a->namespace_id = hcm->appns_id; @@ -365,6 +369,15 @@ hcc_attach () hcm->app_index = a->app_index; vec_free (a->name); hcm->test_client_attached = 1; + + clib_memset (ck_pair, 0, sizeof (*ck_pair)); + ck_pair->cert = (u8 *) test_srv_crt_rsa; + ck_pair->key = (u8 *) test_srv_key_rsa; + ck_pair->cert_len = test_srv_crt_rsa_len; + ck_pair->key_len = test_srv_key_rsa_len; + vnet_app_add_cert_key_pair (ck_pair); + hcm->ckpair_index = ck_pair->index; + return 0; } @@ -411,6 +424,14 @@ hcc_connect () &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (http_cfg)); clib_memcpy (ext_cfg->data, &http_cfg, sizeof (http_cfg)); + if (hcm->need_crypto) + { + ext_cfg = session_endpoint_add_ext_cfg ( + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO, + sizeof (transport_endpt_crypto_cfg_t)); + ext_cfg->crypto.ckpair_index = hcm->ckpair_index; + } + /* allocate http session on main thread */ wrk = hcc_worker_get (0); hs = hcc_session_alloc (wrk); @@ -581,6 +602,8 @@ hcc_command_fn (vlib_main_t *vm, unformat_input_t *input, err = clib_error_return (0, "Uri parse error: %d", rv); goto done; } + hcm->need_crypto = hcm->connect_sep.transport_proto == TRANSPORT_PROTO_TLS; + hcm->connect_sep.transport_proto = TRANSPORT_PROTO_HTTP; session_enable_disable_args_t args = { .is_en = 1, .rt_engine_type = diff --git a/src/plugins/http/CMakeLists.txt b/src/plugins/http/CMakeLists.txt index 075b8d6817b..58cb4c000e3 100644 --- a/src/plugins/http/CMakeLists.txt +++ b/src/plugins/http/CMakeLists.txt @@ -13,9 +13,12 @@ add_vpp_plugin(http SOURCES + http2/hpack.c + http2/frame.c http.c http_buffer.c http_timer.c + http1.c ) add_vpp_plugin(http_unittest diff --git a/src/plugins/http/extras/mk_huffman_table.py b/src/plugins/http/extras/mk_huffman_table.py new file mode 100644 index 00000000000..378544b0dce --- /dev/null +++ b/src/plugins/http/extras/mk_huffman_table.py @@ -0,0 +1,416 @@ +#!/usr/bin/env python3 +from io import StringIO + + +# SPDX-License-Identifier: Apache-2.0 +# Copyright(c) 2025 Cisco Systems, Inc. + + +# e.g. 5 bit code symbol has 8 slots (2^8-5), last 3 bits are irrelevant +def generate_slots(fh, s, cl): + for i in range(1 << 8 - cl): + fh.write(" { 0x%02X, %d },\n" % (s, cl)) + + +# list of code and code length tuples +huff_code_table = [] + +# Huffman code from RFC7541 Appendix B, EOS removed +rfc7541_huffman_code = """\ + ( 0) |11111111|11000 1ff8 [13] + ( 1) |11111111|11111111|1011000 7fffd8 [23] + ( 2) |11111111|11111111|11111110|0010 fffffe2 [28] + ( 3) |11111111|11111111|11111110|0011 fffffe3 [28] + ( 4) |11111111|11111111|11111110|0100 fffffe4 [28] + ( 5) |11111111|11111111|11111110|0101 fffffe5 [28] + ( 6) |11111111|11111111|11111110|0110 fffffe6 [28] + ( 7) |11111111|11111111|11111110|0111 fffffe7 [28] + ( 8) |11111111|11111111|11111110|1000 fffffe8 [28] + ( 9) |11111111|11111111|11101010 ffffea [24] + ( 10) |11111111|11111111|11111111|111100 3ffffffc [30] + ( 11) |11111111|11111111|11111110|1001 fffffe9 [28] + ( 12) |11111111|11111111|11111110|1010 fffffea [28] + ( 13) |11111111|11111111|11111111|111101 3ffffffd [30] + ( 14) |11111111|11111111|11111110|1011 fffffeb [28] + ( 15) |11111111|11111111|11111110|1100 fffffec [28] + ( 16) |11111111|11111111|11111110|1101 fffffed [28] + ( 17) |11111111|11111111|11111110|1110 fffffee [28] + ( 18) |11111111|11111111|11111110|1111 fffffef [28] + ( 19) |11111111|11111111|11111111|0000 ffffff0 [28] + ( 20) |11111111|11111111|11111111|0001 ffffff1 [28] + ( 21) |11111111|11111111|11111111|0010 ffffff2 [28] + ( 22) |11111111|11111111|11111111|111110 3ffffffe [30] + ( 23) |11111111|11111111|11111111|0011 ffffff3 [28] + ( 24) |11111111|11111111|11111111|0100 ffffff4 [28] + ( 25) |11111111|11111111|11111111|0101 ffffff5 [28] + ( 26) |11111111|11111111|11111111|0110 ffffff6 [28] + ( 27) |11111111|11111111|11111111|0111 ffffff7 [28] + ( 28) |11111111|11111111|11111111|1000 ffffff8 [28] + ( 29) |11111111|11111111|11111111|1001 ffffff9 [28] + ( 30) |11111111|11111111|11111111|1010 ffffffa [28] + ( 31) |11111111|11111111|11111111|1011 ffffffb [28] +' ' ( 32) |010100 14 [ 6] +'!' ( 33) |11111110|00 3f8 [10] +'"' ( 34) |11111110|01 3f9 [10] +'#' ( 35) |11111111|1010 ffa [12] +'$' ( 36) |11111111|11001 1ff9 [13] +'%' ( 37) |010101 15 [ 6] +'&' ( 38) |11111000 f8 [ 8] +''' ( 39) |11111111|010 7fa [11] +'(' ( 40) |11111110|10 3fa [10] +')' ( 41) |11111110|11 3fb [10] +'*' ( 42) |11111001 f9 [ 8] +'+' ( 43) |11111111|011 7fb [11] +',' ( 44) |11111010 fa [ 8] +'-' ( 45) |010110 16 [ 6] +'.' ( 46) |010111 17 [ 6] +'/' ( 47) |011000 18 [ 6] +'0' ( 48) |00000 0 [ 5] +'1' ( 49) |00001 1 [ 5] +'2' ( 50) |00010 2 [ 5] +'3' ( 51) |011001 19 [ 6] +'4' ( 52) |011010 1a [ 6] +'5' ( 53) |011011 1b [ 6] +'6' ( 54) |011100 1c [ 6] +'7' ( 55) |011101 1d [ 6] +'8' ( 56) |011110 1e [ 6] +'9' ( 57) |011111 1f [ 6] +':' ( 58) |1011100 5c [ 7] +';' ( 59) |11111011 fb [ 8] +'<' ( 60) |11111111|1111100 7ffc [15] +'=' ( 61) |100000 20 [ 6] +'>' ( 62) |11111111|1011 ffb [12] +'?' ( 63) |11111111|00 3fc [10] +'@' ( 64) |11111111|11010 1ffa [13] +'A' ( 65) |100001 21 [ 6] +'B' ( 66) |1011101 5d [ 7] +'C' ( 67) |1011110 5e [ 7] +'D' ( 68) |1011111 5f [ 7] +'E' ( 69) |1100000 60 [ 7] +'F' ( 70) |1100001 61 [ 7] +'G' ( 71) |1100010 62 [ 7] +'H' ( 72) |1100011 63 [ 7] +'I' ( 73) |1100100 64 [ 7] +'J' ( 74) |1100101 65 [ 7] +'K' ( 75) |1100110 66 [ 7] +'L' ( 76) |1100111 67 [ 7] +'M' ( 77) |1101000 68 [ 7] +'N' ( 78) |1101001 69 [ 7] +'O' ( 79) |1101010 6a [ 7] +'P' ( 80) |1101011 6b [ 7] +'Q' ( 81) |1101100 6c [ 7] +'R' ( 82) |1101101 6d [ 7] +'S' ( 83) |1101110 6e [ 7] +'T' ( 84) |1101111 6f [ 7] +'U' ( 85) |1110000 70 [ 7] +'V' ( 86) |1110001 71 [ 7] +'W' ( 87) |1110010 72 [ 7] +'X' ( 88) |11111100 fc [ 8] +'Y' ( 89) |1110011 73 [ 7] +'Z' ( 90) |11111101 fd [ 8] +'[' ( 91) |11111111|11011 1ffb [13] +'\' ( 92) |11111111|11111110|000 7fff0 [19] +']' ( 93) |11111111|11100 1ffc [13] +'^' ( 94) |11111111|111100 3ffc [14] +'_' ( 95) |100010 22 [ 6] +'`' ( 96) |11111111|1111101 7ffd [15] +'a' ( 97) |00011 3 [ 5] +'b' ( 98) |100011 23 [ 6] +'c' ( 99) |00100 4 [ 5] +'d' (100) |100100 24 [ 6] +'e' (101) |00101 5 [ 5] +'f' (102) |100101 25 [ 6] +'g' (103) |100110 26 [ 6] +'h' (104) |100111 27 [ 6] +'i' (105) |00110 6 [ 5] +'j' (106) |1110100 74 [ 7] +'k' (107) |1110101 75 [ 7] +'l' (108) |101000 28 [ 6] +'m' (109) |101001 29 [ 6] +'n' (110) |101010 2a [ 6] +'o' (111) |00111 7 [ 5] +'p' (112) |101011 2b [ 6] +'q' (113) |1110110 76 [ 7] +'r' (114) |101100 2c [ 6] +'s' (115) |01000 8 [ 5] +'t' (116) |01001 9 [ 5] +'u' (117) |101101 2d [ 6] +'v' (118) |1110111 77 [ 7] +'w' (119) |1111000 78 [ 7] +'x' (120) |1111001 79 [ 7] +'y' (121) |1111010 7a [ 7] +'z' (122) |1111011 7b [ 7] +'{' (123) |11111111|1111110 7ffe [15] +'|' (124) |11111111|100 7fc [11] +'}' (125) |11111111|111101 3ffd [14] +'~' (126) |11111111|11101 1ffd [13] + (127) |11111111|11111111|11111111|1100 ffffffc [28] + (128) |11111111|11111110|0110 fffe6 [20] + (129) |11111111|11111111|010010 3fffd2 [22] + (130) |11111111|11111110|0111 fffe7 [20] + (131) |11111111|11111110|1000 fffe8 [20] + (132) |11111111|11111111|010011 3fffd3 [22] + (133) |11111111|11111111|010100 3fffd4 [22] + (134) |11111111|11111111|010101 3fffd5 [22] + (135) |11111111|11111111|1011001 7fffd9 [23] + (136) |11111111|11111111|010110 3fffd6 [22] + (137) |11111111|11111111|1011010 7fffda [23] + (138) |11111111|11111111|1011011 7fffdb [23] + (139) |11111111|11111111|1011100 7fffdc [23] + (140) |11111111|11111111|1011101 7fffdd [23] + (141) |11111111|11111111|1011110 7fffde [23] + (142) |11111111|11111111|11101011 ffffeb [24] + (143) |11111111|11111111|1011111 7fffdf [23] + (144) |11111111|11111111|11101100 ffffec [24] + (145) |11111111|11111111|11101101 ffffed [24] + (146) |11111111|11111111|010111 3fffd7 [22] + (147) |11111111|11111111|1100000 7fffe0 [23] + (148) |11111111|11111111|11101110 ffffee [24] + (149) |11111111|11111111|1100001 7fffe1 [23] + (150) |11111111|11111111|1100010 7fffe2 [23] + (151) |11111111|11111111|1100011 7fffe3 [23] + (152) |11111111|11111111|1100100 7fffe4 [23] + (153) |11111111|11111110|11100 1fffdc [21] + (154) |11111111|11111111|011000 3fffd8 [22] + (155) |11111111|11111111|1100101 7fffe5 [23] + (156) |11111111|11111111|011001 3fffd9 [22] + (157) |11111111|11111111|1100110 7fffe6 [23] + (158) |11111111|11111111|1100111 7fffe7 [23] + (159) |11111111|11111111|11101111 ffffef [24] + (160) |11111111|11111111|011010 3fffda [22] + (161) |11111111|11111110|11101 1fffdd [21] + (162) |11111111|11111110|1001 fffe9 [20] + (163) |11111111|11111111|011011 3fffdb [22] + (164) |11111111|11111111|011100 3fffdc [22] + (165) |11111111|11111111|1101000 7fffe8 [23] + (166) |11111111|11111111|1101001 7fffe9 [23] + (167) |11111111|11111110|11110 1fffde [21] + (168) |11111111|11111111|1101010 7fffea [23] + (169) |11111111|11111111|011101 3fffdd [22] + (170) |11111111|11111111|011110 3fffde [22] + (171) |11111111|11111111|11110000 fffff0 [24] + (172) |11111111|11111110|11111 1fffdf [21] + (173) |11111111|11111111|011111 3fffdf [22] + (174) |11111111|11111111|1101011 7fffeb [23] + (175) |11111111|11111111|1101100 7fffec [23] + (176) |11111111|11111111|00000 1fffe0 [21] + (177) |11111111|11111111|00001 1fffe1 [21] + (178) |11111111|11111111|100000 3fffe0 [22] + (179) |11111111|11111111|00010 1fffe2 [21] + (180) |11111111|11111111|1101101 7fffed [23] + (181) |11111111|11111111|100001 3fffe1 [22] + (182) |11111111|11111111|1101110 7fffee [23] + (183) |11111111|11111111|1101111 7fffef [23] + (184) |11111111|11111110|1010 fffea [20] + (185) |11111111|11111111|100010 3fffe2 [22] + (186) |11111111|11111111|100011 3fffe3 [22] + (187) |11111111|11111111|100100 3fffe4 [22] + (188) |11111111|11111111|1110000 7ffff0 [23] + (189) |11111111|11111111|100101 3fffe5 [22] + (190) |11111111|11111111|100110 3fffe6 [22] + (191) |11111111|11111111|1110001 7ffff1 [23] + (192) |11111111|11111111|11111000|00 3ffffe0 [26] + (193) |11111111|11111111|11111000|01 3ffffe1 [26] + (194) |11111111|11111110|1011 fffeb [20] + (195) |11111111|11111110|001 7fff1 [19] + (196) |11111111|11111111|100111 3fffe7 [22] + (197) |11111111|11111111|1110010 7ffff2 [23] + (198) |11111111|11111111|101000 3fffe8 [22] + (199) |11111111|11111111|11110110|0 1ffffec [25] + (200) |11111111|11111111|11111000|10 3ffffe2 [26] + (201) |11111111|11111111|11111000|11 3ffffe3 [26] + (202) |11111111|11111111|11111001|00 3ffffe4 [26] + (203) |11111111|11111111|11111011|110 7ffffde [27] + (204) |11111111|11111111|11111011|111 7ffffdf [27] + (205) |11111111|11111111|11111001|01 3ffffe5 [26] + (206) |11111111|11111111|11110001 fffff1 [24] + (207) |11111111|11111111|11110110|1 1ffffed [25] + (208) |11111111|11111110|010 7fff2 [19] + (209) |11111111|11111111|00011 1fffe3 [21] + (210) |11111111|11111111|11111001|10 3ffffe6 [26] + (211) |11111111|11111111|11111100|000 7ffffe0 [27] + (212) |11111111|11111111|11111100|001 7ffffe1 [27] + (213) |11111111|11111111|11111001|11 3ffffe7 [26] + (214) |11111111|11111111|11111100|010 7ffffe2 [27] + (215) |11111111|11111111|11110010 fffff2 [24] + (216) |11111111|11111111|00100 1fffe4 [21] + (217) |11111111|11111111|00101 1fffe5 [21] + (218) |11111111|11111111|11111010|00 3ffffe8 [26] + (219) |11111111|11111111|11111010|01 3ffffe9 [26] + (220) |11111111|11111111|11111111|1101 ffffffd [28] + (221) |11111111|11111111|11111100|011 7ffffe3 [27] + (222) |11111111|11111111|11111100|100 7ffffe4 [27] + (223) |11111111|11111111|11111100|101 7ffffe5 [27] + (224) |11111111|11111110|1100 fffec [20] + (225) |11111111|11111111|11110011 fffff3 [24] + (226) |11111111|11111110|1101 fffed [20] + (227) |11111111|11111111|00110 1fffe6 [21] + (228) |11111111|11111111|101001 3fffe9 [22] + (229) |11111111|11111111|00111 1fffe7 [21] + (230) |11111111|11111111|01000 1fffe8 [21] + (231) |11111111|11111111|1110011 7ffff3 [23] + (232) |11111111|11111111|101010 3fffea [22] + (233) |11111111|11111111|101011 3fffeb [22] + (234) |11111111|11111111|11110111|0 1ffffee [25] + (235) |11111111|11111111|11110111|1 1ffffef [25] + (236) |11111111|11111111|11110100 fffff4 [24] + (237) |11111111|11111111|11110101 fffff5 [24] + (238) |11111111|11111111|11111010|10 3ffffea [26] + (239) |11111111|11111111|1110100 7ffff4 [23] + (240) |11111111|11111111|11111010|11 3ffffeb [26] + (241) |11111111|11111111|11111100|110 7ffffe6 [27] + (242) |11111111|11111111|11111011|00 3ffffec [26] + (243) |11111111|11111111|11111011|01 3ffffed [26] + (244) |11111111|11111111|11111100|111 7ffffe7 [27] + (245) |11111111|11111111|11111101|000 7ffffe8 [27] + (246) |11111111|11111111|11111101|001 7ffffe9 [27] + (247) |11111111|11111111|11111101|010 7ffffea [27] + (248) |11111111|11111111|11111101|011 7ffffeb [27] + (249) |11111111|11111111|11111111|1110 ffffffe [28] + (250) |11111111|11111111|11111101|100 7ffffec [27] + (251) |11111111|11111111|11111101|101 7ffffed [27] + (252) |11111111|11111111|11111101|110 7ffffee [27] + (253) |11111111|11111111|11111101|111 7ffffef [27] + (254) |11111111|11111111|11111110|000 7fffff0 [27] + (255) |11111111|11111111|11111011|10 3ffffee [26]""" + +# parse Huffman code +for line in StringIO(rfc7541_huffman_code): + # we need just last two columns + l = line.rstrip().split(" ") + # len in bits + code_len = l[-1][1:-1].strip() + # code as hex aligned to LSB + code = l[-2].strip() + huff_code_table.append((code_len, code)) + +f = open("../http2/huffman_table.h", "w") +f.write( + """/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. + */ + +/* generated by mk_huffman_table.py */ + +#ifndef SRC_PLUGINS_HTTP_HUFFMAN_TABLE_H_ +#define SRC_PLUGINS_HTTP_HUFFMAN_TABLE_H_ + +#include <vppinfra/types.h> + +typedef struct +{ + u8 code_len; + u32 code; +} hpack_huffman_symbol_t; + +static hpack_huffman_symbol_t huff_sym_table[] = { +""" +) + +# encoding table +[f.write(" {" + code[0] + ", 0x" + code[1] + "},\n") for code in huff_code_table] + +f.write( + """}; + +typedef struct +{ + u8 symbol; + u8 code_len; +} hpack_huffman_code_t; + +static hpack_huffman_code_t huff_code_table_fast[] = { +""" +) + +# fast decoding table, symbols with code length from 5 to 8 bits (most of printable ASCII characters) +[generate_slots(f, i, 5) for i, code in enumerate(huff_code_table) if code[0] == "5"] +[generate_slots(f, i, 6) for i, code in enumerate(huff_code_table) if code[0] == "6"] +[generate_slots(f, i, 7) for i, code in enumerate(huff_code_table) if code[0] == "7"] +[generate_slots(f, i, 8) for i, code in enumerate(huff_code_table) if code[0] == "8"] + +# last 2 entries are longer codes prefixes, code_len set to 0 +f.write(" { 0x00, 0 },\n") +f.write(" { 0x00, 0 },\n") + +f.write( + """}; + +typedef struct +{ + u32 first_code; + u8 code_len; + u8 symbols[29]; +} hpack_huffman_group_t; + +/* clang-format off */ + +static hpack_huffman_group_t huff_code_table_slow[] = { +""" +) +for i in range(10, 31): + symbols = [ + (symbol, code[1]) + for symbol, code in enumerate(huff_code_table) + if code[0] == str(i) + ] + if symbols: + _, first_code = symbols[0] + f.write(" {\n 0x" + first_code + ", /* first_code */\n") + f.write(" " + str(i) + ", /* code_len */\n") + f.write(" {\n ") + [f.write(" 0x%02X," % s) for s, c in symbols[:10]] + if len(symbols) > 10: + f.write("\n ") + [f.write(" 0x%02X," % s) for s, c in symbols[10:20]] + if len(symbols) > 20: + f.write("\n ") + [f.write(" 0x%02X," % s) for s, c in symbols[20:30]] + f.write("\n } /* symbols */\n },\n") + +f.write( + """}; + +/* clang format-on */ + +always_inline hpack_huffman_group_t * +hpack_huffman_get_group (u32 value) +{ +""" +) + +index = 0 + +symbols = [ + (symbol, code[1]) for symbol, code in enumerate(huff_code_table) if code[0] == "10" +] +_, last_code = symbols[-1] +boundary = (int(last_code, 16) + 1) << 22 +f.write(" if (value < 0x%X)\n" % boundary) +f.write(" return &huff_code_table_slow[%d];\n" % index) +index += 1 + +for i in range(11, 30): + symbols = [ + (symbol, code[1]) + for symbol, code in enumerate(huff_code_table) + if code[0] == str(i) + ] + if symbols: + _, last_code = symbols[-1] + boundary = (int(last_code, 16) + 1) << (32 - i) + f.write(" else if (value < 0x%X)\n" % boundary) + f.write(" return &huff_code_table_slow[%d];\n" % index) + index += 1 + +f.write(" else\n") +f.write(" return &huff_code_table_slow[%d];\n" % index) + +f.write( + """} + +#endif /* SRC_PLUGINS_HTTP_HUFFMAN_TABLE_H_ */ +""" +) + +f.close() diff --git a/src/plugins/http/http.c b/src/plugins/http/http.c index 04a4ad3e0a9..fc5b7d5d72d 100644 --- a/src/plugins/http/http.c +++ b/src/plugins/http/http.c @@ -13,43 +13,79 @@ * limitations under the License. */ +#include <vpp/app/version.h> +#include <vnet/session/application_interface.h> +#include <vnet/session/application.h> + #include <http/http.h> -#include <vnet/session/session.h> +#include <http/http_private.h> #include <http/http_timer.h> -#include <http/http_status_codes.h> -#include <http/http_header_names.h> static http_main_t http_main; - -#define HTTP_FIFO_THRESH (16 << 10) - -/* HTTP state machine result */ -typedef enum http_sm_result_t_ -{ - HTTP_SM_STOP = 0, - HTTP_SM_CONTINUE = 1, - HTTP_SM_ERROR = -1, -} http_sm_result_t; +static http_engine_vft_t *http_vfts; const http_buffer_type_t msg_to_buf_type[] = { [HTTP_MSG_DATA_INLINE] = HTTP_BUFFER_FIFO, [HTTP_MSG_DATA_PTR] = HTTP_BUFFER_PTR, }; -const char *http_upgrade_proto_str[] = { "", -#define _(sym, str) str, - foreach_http_upgrade_proto -#undef _ -}; +void +http_register_engine (const http_engine_vft_t *vft, http_version_t version) +{ + vec_validate (http_vfts, version); + http_vfts[version] = *vft; +} + +always_inline http_version_t +http_version_from_handle (http_conn_handle_t hc_handle) +{ + /* the first 3 bits are http version */ + return hc_handle >> 29; +} + +always_inline u32 +http_conn_index_from_handle (http_conn_handle_t hc_handle) +{ + return hc_handle & 0x1FFFFFFF; +} + +always_inline http_conn_handle_t +http_make_handle (u32 hc_index, http_version_t version) +{ + ASSERT (hc_index <= 0x1FFFFFFF); + return (version << 29) | hc_index; +} + +int +http_v_find_index (u8 *vec, u32 offset, u32 num, char *str) +{ + int start_index = offset; + u32 slen = (u32) strnlen_s_inline (str, 16); + u32 vlen = vec_len (vec); + + ASSERT (slen > 0); -#define expect_char(c) \ - if (*p++ != c) \ - { \ - clib_warning ("unexpected character"); \ - return -1; \ + if (vlen <= slen) + return -1; + + int end_index = vlen - slen; + if (num) + { + if (num < slen) + return -1; + end_index = clib_min (end_index, offset + num - slen); } -static u8 * + for (; start_index <= end_index; start_index++) + { + if (!memcmp (vec + start_index, str, slen)) + return start_index; + } + + return -1; +} + +u8 * format_http_req_state (u8 *s, va_list *va) { http_req_state_t state = va_arg (*va, http_req_state_t); @@ -68,18 +104,7 @@ format_http_req_state (u8 *s, va_list *va) return format (s, "%s", t); } -#define http_req_state_change(_hc, _state) \ - do \ - { \ - HTTP_DBG (1, "changing http req state: %U -> %U", \ - format_http_req_state, (_hc)->req.state, \ - format_http_req_state, _state); \ - ASSERT ((_hc)->req.state != HTTP_REQ_STATE_TUNNEL); \ - (_hc)->req.state = _state; \ - } \ - while (0) - -static u8 * +u8 * format_http_conn_state (u8 *s, va_list *args) { http_conn_t *hc = va_arg (*args, http_conn_t *); @@ -98,6 +123,15 @@ format_http_conn_state (u8 *s, va_list *args) return format (s, "%s", t); } +u8 * +format_http_time_now (u8 *s, va_list *args) +{ + http_conn_t __clib_unused *hc = va_arg (*args, http_conn_t *); + http_main_t *hm = &http_main; + f64 now = clib_timebase_now (&hm->timebase); + return format (s, "%U", format_clib_timebase_time, now); +} + static inline http_worker_t * http_worker_get (u32 thread_index) { @@ -116,6 +150,7 @@ http_conn_alloc_w_thread (u32 thread_index) hc->h_hc_index = hc - wrk->conn_pool; hc->h_pa_session_handle = SESSION_INVALID_HANDLE; hc->h_tc_session_handle = SESSION_INVALID_HANDLE; + hc->version = HTTP_VERSION_NA; return hc->h_hc_index; } @@ -135,13 +170,22 @@ http_conn_get_w_thread_if_valid (u32 hc_index, u32 thread_index) return pool_elt_at_index (wrk->conn_pool, hc_index); } -void +static void http_conn_free (http_conn_t *hc) { http_worker_t *wrk = http_worker_get (hc->c_thread_index); + if (CLIB_DEBUG) + memset (hc, 0xba, sizeof (*hc)); pool_put (wrk->conn_pool, hc); } +static void +http_add_postponed_ho_cleanups (u32 ho_hc_index) +{ + http_main_t *hm = &http_main; + vec_add1 (hm->postponed_ho_free, ho_hc_index); +} + static inline http_conn_t * http_ho_conn_get (u32 ho_hc_index) { @@ -149,25 +193,65 @@ http_ho_conn_get (u32 ho_hc_index) return pool_elt_at_index (hm->ho_conn_pool, ho_hc_index); } -void +static void http_ho_conn_free (http_conn_t *ho_hc) { http_main_t *hm = &http_main; + if (CLIB_DEBUG) + memset (ho_hc, 0xba, sizeof (*ho_hc)); pool_put (hm->ho_conn_pool, ho_hc); } +static void +http_ho_try_free (u32 ho_hc_index) +{ + http_conn_t *ho_hc; + HTTP_DBG (1, "half open: %x", ho_hc_index); + ho_hc = http_ho_conn_get (ho_hc_index); + if (!(ho_hc->flags & HTTP_CONN_F_HO_DONE)) + { + HTTP_DBG (1, "postponed cleanup"); + ho_hc->h_tc_session_handle = SESSION_INVALID_HANDLE; + http_add_postponed_ho_cleanups (ho_hc_index); + return; + } + if (!(ho_hc->flags & HTTP_CONN_F_NO_APP_SESSION)) + session_half_open_delete_notify (&ho_hc->connection); + http_ho_conn_free (ho_hc); +} + +static void +http_flush_postponed_ho_cleanups () +{ + http_main_t *hm = &http_main; + u32 *ho_indexp, *tmp; + + tmp = hm->postponed_ho_free; + hm->postponed_ho_free = hm->ho_free_list; + hm->ho_free_list = tmp; + + vec_foreach (ho_indexp, hm->ho_free_list) + http_ho_try_free (*ho_indexp); + + vec_reset_length (hm->ho_free_list); +} + static inline u32 http_ho_conn_alloc (void) { http_main_t *hm = &http_main; http_conn_t *hc; + if (vec_len (hm->postponed_ho_free)) + http_flush_postponed_ho_cleanups (); + pool_get_aligned_safe (hm->ho_conn_pool, hc, CLIB_CACHE_LINE_BYTES); clib_memset (hc, 0, sizeof (*hc)); hc->h_hc_index = hc - hm->ho_conn_pool; hc->h_pa_session_handle = SESSION_INVALID_HANDLE; hc->h_tc_session_handle = SESSION_INVALID_HANDLE; hc->timeout = HTTP_CONN_TIMEOUT; + hc->version = HTTP_VERSION_NA; return hc->h_hc_index; } @@ -178,18 +262,19 @@ http_listener_alloc (void) http_conn_t *lhc; pool_get_zero (hm->listener_pool, lhc); - lhc->c_c_index = lhc - hm->listener_pool; + lhc->h_hc_index = lhc - hm->listener_pool; lhc->timeout = HTTP_CONN_TIMEOUT; - return lhc->c_c_index; + lhc->version = HTTP_VERSION_NA; + return lhc->h_hc_index; } -http_conn_t * +static http_conn_t * http_listener_get (u32 lhc_index) { return pool_elt_at_index (http_main.listener_pool, lhc_index); } -void +static void http_listener_free (http_conn_t *lhc) { http_main_t *hm = &http_main; @@ -214,6 +299,96 @@ http_disconnect_transport (http_conn_t *hc) clib_warning ("disconnect returned"); } +http_status_code_t +http_sc_by_u16 (u16 status_code) +{ + http_main_t *hm = &http_main; + return hm->sc_by_u16[status_code]; +} + +u8 * +http_get_app_header_list (http_conn_t *hc, http_msg_t *msg) +{ + http_main_t *hm = &http_main; + session_t *as; + u8 *app_headers; + int rv; + + as = session_get_from_handle (hc->h_pa_session_handle); + + if (msg->data.type == HTTP_MSG_DATA_PTR) + { + uword app_headers_ptr; + rv = svm_fifo_dequeue (as->tx_fifo, sizeof (app_headers_ptr), + (u8 *) &app_headers_ptr); + ASSERT (rv == sizeof (app_headers_ptr)); + app_headers = uword_to_pointer (app_headers_ptr, u8 *); + } + else + { + app_headers = hm->app_header_lists[hc->c_thread_index]; + rv = svm_fifo_dequeue (as->tx_fifo, msg->data.headers_len, app_headers); + ASSERT (rv == msg->data.headers_len); + } + + return app_headers; +} + +u8 * +http_get_app_target (http_req_t *req, http_msg_t *msg) +{ + session_t *as; + u8 *target; + int rv; + + as = session_get_from_handle (req->app_session_handle); + + if (msg->data.type == HTTP_MSG_DATA_PTR) + { + uword target_ptr; + rv = svm_fifo_dequeue (as->tx_fifo, sizeof (target_ptr), + (u8 *) &target_ptr); + ASSERT (rv == sizeof (target_ptr)); + target = uword_to_pointer (target_ptr, u8 *); + } + else + { + vec_reset_length (req->target); + vec_validate (req->target, msg->data.target_path_len - 1); + rv = + svm_fifo_dequeue (as->tx_fifo, msg->data.target_path_len, req->target); + ASSERT (rv == msg->data.target_path_len); + target = req->target; + } + return target; +} + +u8 * +http_get_tx_buf (http_conn_t *hc) +{ + http_main_t *hm = &http_main; + u8 *buf = hm->tx_bufs[hc->c_thread_index]; + vec_reset_length (buf); + return buf; +} + +u8 * +http_get_rx_buf (http_conn_t *hc) +{ + http_main_t *hm = &http_main; + u8 *buf = hm->rx_bufs[hc->c_thread_index]; + vec_reset_length (buf); + return buf; +} + +void +http_req_tx_buffer_init (http_req_t *req, http_msg_t *msg) +{ + session_t *as = session_get_from_handle (req->app_session_handle); + http_buffer_init (&req->tx_buf, msg_to_buf_type[msg->data.type], as->tx_fifo, + msg->data.body_len); +} + static void http_conn_invalidate_timer_cb (u32 hs_handle) { @@ -230,7 +405,7 @@ http_conn_invalidate_timer_cb (u32 hs_handle) } hc->timer_handle = HTTP_TIMER_HANDLE_INVALID; - hc->pending_timer = 1; + hc->flags |= HTTP_CONN_F_PENDING_TIMER; } static void @@ -250,7 +425,7 @@ http_conn_timeout_cb (void *hc_handlep) return; } - if (!hc->pending_timer) + if (!(hc->flags & HTTP_CONN_F_PENDING_TIMER)) { HTTP_DBG (1, "timer not pending"); return; @@ -260,6 +435,10 @@ http_conn_timeout_cb (void *hc_handlep) http_disconnect_transport (hc); } +/*************************/ +/* session VFT callbacks */ +/*************************/ + int http_ts_accept_callback (session_t *ts) { @@ -281,12 +460,12 @@ http_ts_accept_callback (session_t *ts) hc->h_tc_session_handle = session_handle (ts); hc->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP; - hc->state = HTTP_CONN_STATE_ESTABLISHED; - http_req_state_change (hc, HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD); ts->session_state = SESSION_STATE_READY; - ts->opaque = hc_index; + /* TODO: TLS set by ALPN result, TCP: first try HTTP/1 */ + hc->version = HTTP_VERSION_1; + ts->opaque = http_make_handle (hc_index, hc->version); /* * Alloc session and initialize @@ -295,7 +474,7 @@ http_ts_accept_callback (session_t *ts) hc->c_s_index = as->session_index; as->app_wrk_index = hc->h_pa_wrk_index; - as->connection_index = hc->c_c_index; + as->connection_index = hc->h_hc_index; as->session_state = SESSION_STATE_ACCEPTING; asl = listen_session_get_from_handle (lhc->h_pa_session_handle); @@ -357,6 +536,7 @@ http_ts_connected_callback (u32 http_app_index, u32 ho_hc_index, session_t *ts, { clib_warning ("half-open hc index %d, error: %U", ho_hc_index, format_session_error, err); + ho_hc->flags |= HTTP_CONN_F_HO_DONE; app_wrk = app_worker_get_if_valid (ho_hc->h_pa_wrk_index); if (app_wrk) app_worker_connect_notify (app_wrk, 0, err, ho_hc->h_pa_app_api_ctx); @@ -368,22 +548,24 @@ http_ts_connected_callback (u32 http_app_index, u32 ho_hc_index, session_t *ts, clib_memcpy_fast (hc, ho_hc, sizeof (*hc)); + /* in chain with TLS there is race on half-open cleanup */ + __atomic_fetch_or (&ho_hc->flags, HTTP_CONN_F_HO_DONE, __ATOMIC_RELEASE); + hc->timer_handle = HTTP_TIMER_HANDLE_INVALID; hc->c_thread_index = ts->thread_index; hc->h_tc_session_handle = session_handle (ts); - hc->c_c_index = new_hc_index; + hc->h_hc_index = new_hc_index; hc->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP; hc->state = HTTP_CONN_STATE_ESTABLISHED; - http_req_state_change (hc, HTTP_REQ_STATE_WAIT_APP_METHOD); - ts->session_state = SESSION_STATE_READY; - ts->opaque = new_hc_index; + /* TODO: TLS set by ALPN result, TCP: prior knowledge (set in ho) */ + ts->opaque = http_make_handle (new_hc_index, hc->version); /* allocate app session and initialize */ as = session_alloc (hc->c_thread_index); hc->c_s_index = as->session_index; - as->connection_index = hc->c_c_index; + as->connection_index = new_hc_index; as->app_wrk_index = hc->h_pa_wrk_index; as->session_state = SESSION_STATE_READY; as->opaque = hc->h_pa_app_api_ctx; @@ -417,1768 +599,43 @@ static void http_ts_disconnect_callback (session_t *ts) { http_conn_t *hc; + u32 hc_index = http_conn_index_from_handle (ts->opaque); + + HTTP_DBG (1, "hc [%u]%x", ts->thread_index, hc_index); - hc = http_conn_get_w_thread (ts->opaque, ts->thread_index); + hc = http_conn_get_w_thread (hc_index, ts->thread_index); if (hc->state < HTTP_CONN_STATE_TRANSPORT_CLOSED) hc->state = HTTP_CONN_STATE_TRANSPORT_CLOSED; - /* Nothing more to rx, propagate to app */ - if (!svm_fifo_max_dequeue_cons (ts->rx_fifo)) - session_transport_closing_notify (&hc->connection); + http_vfts[hc->version].transport_close_callback (hc); } static void http_ts_reset_callback (session_t *ts) { http_conn_t *hc; + u32 hc_index = http_conn_index_from_handle (ts->opaque); - hc = http_conn_get_w_thread (ts->opaque, ts->thread_index); + HTTP_DBG (1, "hc [%u]%x", ts->thread_index, hc_index); + + hc = http_conn_get_w_thread (hc_index, ts->thread_index); hc->state = HTTP_CONN_STATE_CLOSED; - http_buffer_free (&hc->req.tx_buf); - http_req_state_change (hc, HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD); session_transport_reset_notify (&hc->connection); http_disconnect_transport (hc); } -/** - * http error boilerplate - */ -static const char *http_error_template = "HTTP/1.1 %s\r\n" - "Date: %U GMT\r\n" - "Connection: close\r\n" - "Content-Length: 0\r\n\r\n"; - -/** - * http response boilerplate - */ -static const char *http_response_template = "HTTP/1.1 %s\r\n" - "Date: %U GMT\r\n" - "Server: %v\r\n"; - -static const char *content_len_template = "Content-Length: %llu\r\n"; - -static const char *connection_upgrade_template = "Connection: upgrade\r\n" - "Upgrade: %s\r\n"; - -/** - * http request boilerplate - */ -static const char *http_get_request_template = "GET %s HTTP/1.1\r\n" - "Host: %v\r\n" - "User-Agent: %v\r\n"; - -static const char *http_post_request_template = "POST %s HTTP/1.1\r\n" - "Host: %v\r\n" - "User-Agent: %v\r\n" - "Content-Length: %llu\r\n"; - -static u32 -http_send_data (http_conn_t *hc, u8 *data, u32 length) -{ - const u32 max_burst = 64 << 10; - session_t *ts; - u32 to_send; - int rv; - - ts = session_get_from_handle (hc->h_tc_session_handle); - - to_send = clib_min (length, max_burst); - rv = svm_fifo_enqueue (ts->tx_fifo, to_send, data); - if (rv <= 0) - { - clib_warning ("svm_fifo_enqueue failed, rv %d", rv); - return 0; - } - - if (svm_fifo_set_event (ts->tx_fifo)) - session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX); - - return rv; -} - -static void -http_send_error (http_conn_t *hc, http_status_code_t ec) -{ - http_main_t *hm = &http_main; - u8 *data; - f64 now; - - if (ec >= HTTP_N_STATUS) - ec = HTTP_STATUS_INTERNAL_ERROR; - - now = clib_timebase_now (&hm->timebase); - data = format (0, http_error_template, http_status_code_str[ec], - format_clib_timebase_time, now); - HTTP_DBG (3, "%v", data); - http_send_data (hc, data, vec_len (data)); - vec_free (data); -} - -static int -http_read_message (http_conn_t *hc) -{ - u32 max_deq; - session_t *ts; - int n_read; - - ts = session_get_from_handle (hc->h_tc_session_handle); - - max_deq = svm_fifo_max_dequeue (ts->rx_fifo); - if (PREDICT_FALSE (max_deq == 0)) - return -1; - - vec_validate (hc->req.rx_buf, max_deq - 1); - n_read = svm_fifo_peek (ts->rx_fifo, 0, max_deq, hc->req.rx_buf); - ASSERT (n_read == max_deq); - HTTP_DBG (1, "read %u bytes from rx_fifo", n_read); - - return 0; -} - -static void -http_read_message_drop (http_conn_t *hc, u32 len) -{ - session_t *ts; - - ts = session_get_from_handle (hc->h_tc_session_handle); - svm_fifo_dequeue_drop (ts->rx_fifo, len); - vec_reset_length (hc->req.rx_buf); - - if (svm_fifo_is_empty (ts->rx_fifo)) - svm_fifo_unset_event (ts->rx_fifo); -} - -static void -http_read_message_drop_all (http_conn_t *hc) -{ - session_t *ts; - - ts = session_get_from_handle (hc->h_tc_session_handle); - svm_fifo_dequeue_drop_all (ts->rx_fifo); - vec_reset_length (hc->req.rx_buf); - - if (svm_fifo_is_empty (ts->rx_fifo)) - svm_fifo_unset_event (ts->rx_fifo); -} - -/** - * @brief Find the first occurrence of the string in the vector. - * - * @param vec The vector to be scanned. - * @param offset Search offset in the vector. - * @param num Maximum number of characters to be searched if non-zero. - * @param str The string to be searched. - * - * @return @c -1 if the string is not found within the vector; index otherwise. - */ -static inline int -v_find_index (u8 *vec, u32 offset, u32 num, char *str) -{ - int start_index = offset; - u32 slen = (u32) strnlen_s_inline (str, 16); - u32 vlen = vec_len (vec); - - ASSERT (slen > 0); - - if (vlen <= slen) - return -1; - - int end_index = vlen - slen; - if (num) - { - if (num < slen) - return -1; - end_index = clib_min (end_index, offset + num - slen); - } - - for (; start_index <= end_index; start_index++) - { - if (!memcmp (vec + start_index, str, slen)) - return start_index; - } - - return -1; -} - -static void -http_identify_optional_query (http_req_t *req) -{ - int i; - for (i = req->target_path_offset; - i < (req->target_path_offset + req->target_path_len); i++) - { - if (req->rx_buf[i] == '?') - { - req->target_query_offset = i + 1; - req->target_query_len = req->target_path_offset + - req->target_path_len - - req->target_query_offset; - req->target_path_len = - req->target_path_len - req->target_query_len - 1; - break; - } - } -} - -static int -http_parse_target (http_req_t *req) -{ - int i; - u8 *p, *end; - - /* asterisk-form = "*" */ - if ((req->rx_buf[req->target_path_offset] == '*') && - (req->target_path_len == 1)) - { - req->target_form = HTTP_TARGET_ASTERISK_FORM; - /* we do not support OPTIONS request */ - return -1; - } - - /* origin-form = 1*( "/" segment ) [ "?" query ] */ - if (req->rx_buf[req->target_path_offset] == '/') - { - /* drop leading slash */ - req->target_path_len--; - req->target_path_offset++; - req->target_form = HTTP_TARGET_ORIGIN_FORM; - http_identify_optional_query (req); - /* can't be CONNECT method */ - return req->method == HTTP_REQ_CONNECT ? -1 : 0; - } - - /* absolute-form = - * scheme "://" host [ ":" port ] *( "/" segment ) [ "?" query ] */ - if (req->target_path_len > 8 && - !memcmp (req->rx_buf + req->target_path_offset, "http", 4)) - { - req->scheme = HTTP_URL_SCHEME_HTTP; - p = req->rx_buf + req->target_path_offset + 4; - if (*p == 's') - { - p++; - req->scheme = HTTP_URL_SCHEME_HTTPS; - } - if (*p++ == ':') - { - expect_char ('/'); - expect_char ('/'); - req->target_form = HTTP_TARGET_ABSOLUTE_FORM; - req->target_authority_offset = p - req->rx_buf; - req->target_authority_len = 0; - end = req->rx_buf + req->target_path_offset + req->target_path_len; - while (p < end) - { - if (*p == '/') - { - p++; /* drop leading slash */ - req->target_path_offset = p - req->rx_buf; - req->target_path_len = end - p; - break; - } - req->target_authority_len++; - p++; - } - if (!req->target_path_len) - { - clib_warning ("zero length host"); - return -1; - } - http_identify_optional_query (req); - /* can't be CONNECT method */ - return req->method == HTTP_REQ_CONNECT ? -1 : 0; - } - } - - /* authority-form = host ":" port */ - for (i = req->target_path_offset; - i < (req->target_path_offset + req->target_path_len); i++) - { - if ((req->rx_buf[i] == ':') && (isdigit (req->rx_buf[i + 1]))) - { - req->target_authority_len = req->target_path_len; - req->target_path_len = 0; - req->target_authority_offset = req->target_path_offset; - req->target_path_offset = 0; - req->target_form = HTTP_TARGET_AUTHORITY_FORM; - /* "authority-form" is only used for CONNECT requests */ - return req->method == HTTP_REQ_CONNECT ? 0 : -1; - } - } - - return -1; -} - -static int -http_parse_request_line (http_req_t *req, http_status_code_t *ec) -{ - int i, target_len; - u32 next_line_offset, method_offset; - - /* request-line = method SP request-target SP HTTP-version CRLF */ - i = v_find_index (req->rx_buf, 8, 0, "\r\n"); - if (i < 0) - { - clib_warning ("request line incomplete"); - *ec = HTTP_STATUS_BAD_REQUEST; - return -1; - } - HTTP_DBG (2, "request line length: %d", i); - req->control_data_len = i + 2; - next_line_offset = req->control_data_len; - - /* there should be at least one more CRLF */ - if (vec_len (req->rx_buf) < (next_line_offset + 2)) - { - clib_warning ("malformed message, too short"); - *ec = HTTP_STATUS_BAD_REQUEST; - return -1; - } - - /* - * RFC9112 2.2: - * In the interest of robustness, a server that is expecting to receive and - * parse a request-line SHOULD ignore at least one empty line (CRLF) - * received prior to the request-line. - */ - method_offset = req->rx_buf[0] == '\r' && req->rx_buf[1] == '\n' ? 2 : 0; - /* parse method */ - if (!memcmp (req->rx_buf + method_offset, "GET ", 4)) - { - HTTP_DBG (0, "GET method"); - req->method = HTTP_REQ_GET; - req->target_path_offset = method_offset + 4; - } - else if (!memcmp (req->rx_buf + method_offset, "POST ", 5)) - { - HTTP_DBG (0, "POST method"); - req->method = HTTP_REQ_POST; - req->target_path_offset = method_offset + 5; - } - else if (!memcmp (req->rx_buf + method_offset, "CONNECT ", 8)) - { - HTTP_DBG (0, "CONNECT method"); - req->method = HTTP_REQ_CONNECT; - req->upgrade_proto = HTTP_UPGRADE_PROTO_NA; - req->target_path_offset = method_offset + 8; - req->is_tunnel = 1; - } - else - { - if (req->rx_buf[method_offset] - 'A' <= 'Z' - 'A') - { - clib_warning ("method not implemented: %8v", req->rx_buf); - *ec = HTTP_STATUS_NOT_IMPLEMENTED; - return -1; - } - else - { - clib_warning ("not method name: %8v", req->rx_buf); - *ec = HTTP_STATUS_BAD_REQUEST; - return -1; - } - } - - /* find version */ - i = v_find_index (req->rx_buf, next_line_offset - 11, 11, " HTTP/"); - if (i < 0) - { - clib_warning ("HTTP version not present"); - *ec = HTTP_STATUS_BAD_REQUEST; - return -1; - } - /* verify major version */ - if (isdigit (req->rx_buf[i + 6])) - { - if (req->rx_buf[i + 6] != '1') - { - clib_warning ("HTTP major version '%c' not supported", - req->rx_buf[i + 6]); - *ec = HTTP_STATUS_HTTP_VERSION_NOT_SUPPORTED; - return -1; - } - } - else - { - clib_warning ("HTTP major version '%c' is not digit", - req->rx_buf[i + 6]); - *ec = HTTP_STATUS_BAD_REQUEST; - return -1; - } - - /* parse request-target */ - HTTP_DBG (2, "http at %d", i); - target_len = i - req->target_path_offset; - HTTP_DBG (2, "target_len %d", target_len); - if (target_len < 1) - { - clib_warning ("request-target not present"); - *ec = HTTP_STATUS_BAD_REQUEST; - return -1; - } - req->target_path_len = target_len; - req->target_query_offset = 0; - req->target_query_len = 0; - req->target_authority_len = 0; - req->target_authority_offset = 0; - if (http_parse_target (req)) - { - clib_warning ("invalid target"); - *ec = HTTP_STATUS_BAD_REQUEST; - return -1; - } - HTTP_DBG (2, "request-target path length: %u", req->target_path_len); - HTTP_DBG (2, "request-target path offset: %u", req->target_path_offset); - HTTP_DBG (2, "request-target query length: %u", req->target_query_len); - HTTP_DBG (2, "request-target query offset: %u", req->target_query_offset); - - /* set buffer offset to nex line start */ - req->rx_buf_offset = next_line_offset; - - return 0; -} - -#define parse_int(val, mul) \ - do \ - { \ - if (!isdigit (*p)) \ - { \ - clib_warning ("expected digit"); \ - return -1; \ - } \ - val += mul * (*p++ - '0'); \ - } \ - while (0) - -static int -http_parse_status_line (http_req_t *req) -{ - int i; - u32 next_line_offset; - u8 *p, *end; - u16 status_code = 0; - http_main_t *hm = &http_main; - - i = v_find_index (req->rx_buf, 0, 0, "\r\n"); - /* status-line = HTTP-version SP status-code SP [ reason-phrase ] CRLF */ - if (i < 0) - { - clib_warning ("status line incomplete"); - return -1; - } - HTTP_DBG (2, "status line length: %d", i); - if (i < 12) - { - clib_warning ("status line too short (%d)", i); - return -1; - } - req->control_data_len = i + 2; - next_line_offset = req->control_data_len; - p = req->rx_buf; - end = req->rx_buf + i; - - /* there should be at least one more CRLF */ - if (vec_len (req->rx_buf) < (next_line_offset + 2)) - { - clib_warning ("malformed message, too short"); - return -1; - } - - /* parse version */ - expect_char ('H'); - expect_char ('T'); - expect_char ('T'); - expect_char ('P'); - expect_char ('/'); - expect_char ('1'); - expect_char ('.'); - if (!isdigit (*p++)) - { - clib_warning ("invalid HTTP minor version"); - return -1; - } - - /* skip space(s) */ - if (*p != ' ') - { - clib_warning ("no space after HTTP version"); - return -1; - } - do - { - p++; - if (p == end) - { - clib_warning ("no status code"); - return -1; - } - } - while (*p == ' '); - - /* parse status code */ - if ((end - p) < 3) - { - clib_warning ("not enough characters for status code"); - return -1; - } - parse_int (status_code, 100); - parse_int (status_code, 10); - parse_int (status_code, 1); - if (status_code < 100 || status_code > 599) - { - clib_warning ("invalid status code %d", status_code); - return -1; - } - req->status_code = hm->sc_by_u16[status_code]; - HTTP_DBG (0, "status code: %d", status_code); - - /* set buffer offset to nex line start */ - req->rx_buf_offset = next_line_offset; - - return 0; -} - -static int -http_identify_headers (http_req_t *req, http_status_code_t *ec) -{ - int rv; - u8 *p, *end, *name_start, *value_start; - u32 name_len, value_len; - http_field_line_t *field_line; - uword header_index; - - vec_reset_length (req->headers); - req->content_len_header_index = ~0; - req->connection_header_index = ~0; - req->upgrade_header_index = ~0; - req->host_header_index = ~0; - req->headers_offset = req->rx_buf_offset; - - /* check if we have any header */ - if ((req->rx_buf[req->rx_buf_offset] == '\r') && - (req->rx_buf[req->rx_buf_offset + 1] == '\n')) - { - /* just another CRLF -> no headers */ - HTTP_DBG (2, "no headers"); - req->headers_len = 0; - req->control_data_len += 2; - return 0; - } - - end = req->rx_buf + vec_len (req->rx_buf); - p = req->rx_buf + req->rx_buf_offset; - - while (1) - { - rv = _parse_field_name (&p, end, &name_start, &name_len); - if (rv != 0) - { - *ec = HTTP_STATUS_BAD_REQUEST; - return -1; - } - rv = _parse_field_value (&p, end, &value_start, &value_len); - if (rv != 0 || (end - p) < 2) - { - *ec = HTTP_STATUS_BAD_REQUEST; - return -1; - } - - vec_add2 (req->headers, field_line, 1); - field_line->name_offset = - (name_start - req->rx_buf) - req->headers_offset; - field_line->name_len = name_len; - field_line->value_offset = - (value_start - req->rx_buf) - req->headers_offset; - field_line->value_len = value_len; - header_index = field_line - req->headers; - - /* find headers that will be used later in preprocessing */ - /* names are case-insensitive (RFC9110 section 5.1) */ - if (req->content_len_header_index == ~0 && - http_token_is_case ( - (const char *) name_start, name_len, - http_header_name_token (HTTP_HEADER_CONTENT_LENGTH))) - req->content_len_header_index = header_index; - else if (req->connection_header_index == ~0 && - http_token_is_case ( - (const char *) name_start, name_len, - http_header_name_token (HTTP_HEADER_CONNECTION))) - req->connection_header_index = header_index; - else if (req->upgrade_header_index == ~0 && - http_token_is_case ( - (const char *) name_start, name_len, - http_header_name_token (HTTP_HEADER_UPGRADE))) - req->upgrade_header_index = header_index; - else if (req->host_header_index == ~0 && - http_token_is_case ((const char *) name_start, name_len, - http_header_name_token (HTTP_HEADER_HOST))) - req->host_header_index = header_index; - - /* are we done? */ - if (*p == '\r' && *(p + 1) == '\n') - break; - } - - req->headers_len = p - (req->rx_buf + req->headers_offset); - req->control_data_len += (req->headers_len + 2); - HTTP_DBG (2, "headers length: %u", req->headers_len); - HTTP_DBG (2, "headers offset: %u", req->headers_offset); - - return 0; -} - -static int -http_identify_message_body (http_req_t *req, http_status_code_t *ec) -{ - int i; - u8 *p; - u64 body_len = 0, digit; - http_field_line_t *field_line; - - req->body_len = 0; - - if (req->headers_len == 0) - { - HTTP_DBG (2, "no header, no message-body"); - return 0; - } - if (req->is_tunnel) - { - HTTP_DBG (2, "tunnel, no message-body"); - return 0; - } - - /* TODO check for chunked transfer coding */ - - if (req->content_len_header_index == ~0) - { - HTTP_DBG (2, "Content-Length header not present, no message-body"); - return 0; - } - field_line = vec_elt_at_index (req->headers, req->content_len_header_index); - - p = req->rx_buf + req->headers_offset + field_line->value_offset; - for (i = 0; i < field_line->value_len; i++) - { - /* check for digit */ - if (!isdigit (*p)) - { - clib_warning ("expected digit"); - *ec = HTTP_STATUS_BAD_REQUEST; - return -1; - } - digit = *p - '0'; - u64 new_body_len = body_len * 10 + digit; - /* check for overflow */ - if (new_body_len < body_len) - { - clib_warning ("too big number, overflow"); - *ec = HTTP_STATUS_BAD_REQUEST; - return -1; - } - body_len = new_body_len; - p++; - } - - req->body_len = body_len; - - req->body_offset = req->headers_offset + req->headers_len + 2; - HTTP_DBG (2, "body length: %llu", req->body_len); - HTTP_DBG (2, "body offset: %u", req->body_offset); - - return 0; -} - -static http_sm_result_t -http_req_state_wait_transport_reply (http_conn_t *hc, - transport_send_params_t *sp) -{ - int rv; - http_msg_t msg = {}; - app_worker_t *app_wrk; - session_t *as; - u32 len, max_enq, body_sent; - http_status_code_t ec; - - rv = http_read_message (hc); - - /* Nothing yet, wait for data or timer expire */ - if (rv) - { - HTTP_DBG (1, "no data to deq"); - return HTTP_SM_STOP; - } - - HTTP_DBG (3, "%v", hc->req.rx_buf); - - if (vec_len (hc->req.rx_buf) < 8) - { - clib_warning ("response buffer too short"); - goto error; - } - - rv = http_parse_status_line (&hc->req); - if (rv) - goto error; - - rv = http_identify_headers (&hc->req, &ec); - if (rv) - goto error; - - rv = http_identify_message_body (&hc->req, &ec); - if (rv) - goto error; - - /* send at least "control data" which is necessary minimum, - * if there is some space send also portion of body */ - as = session_get_from_handle (hc->h_pa_session_handle); - max_enq = svm_fifo_max_enqueue (as->rx_fifo); - max_enq -= sizeof (msg); - if (max_enq < hc->req.control_data_len) - { - clib_warning ("not enough room for control data in app's rx fifo"); - goto error; - } - len = clib_min (max_enq, vec_len (hc->req.rx_buf)); - - msg.type = HTTP_MSG_REPLY; - msg.code = hc->req.status_code; - msg.data.headers_offset = hc->req.headers_offset; - msg.data.headers_len = hc->req.headers_len; - msg.data.body_offset = hc->req.body_offset; - msg.data.body_len = hc->req.body_len; - msg.data.type = HTTP_MSG_DATA_INLINE; - msg.data.len = len; - msg.data.headers_ctx = pointer_to_uword (hc->req.headers); - - svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) }, - { hc->req.rx_buf, len } }; - - rv = svm_fifo_enqueue_segments (as->rx_fifo, segs, 2, 0 /* allow partial */); - ASSERT (rv == (sizeof (msg) + len)); - - http_read_message_drop (hc, len); - - body_sent = len - hc->req.control_data_len; - hc->req.to_recv = hc->req.body_len - body_sent; - if (hc->req.to_recv == 0) - { - /* all sent, we are done */ - http_req_state_change (hc, HTTP_REQ_STATE_WAIT_APP_METHOD); - } - else - { - /* stream rest of the response body */ - http_req_state_change (hc, HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA); - } - - app_wrk = app_worker_get_if_valid (as->app_wrk_index); - if (app_wrk) - app_worker_rx_notify (app_wrk, as); - return HTTP_SM_STOP; - -error: - http_read_message_drop_all (hc); - session_transport_closing_notify (&hc->connection); - session_transport_closed_notify (&hc->connection); - http_disconnect_transport (hc); - return HTTP_SM_ERROR; -} - -#define http_field_line_value_token(_fl, _req) \ - (const char *) ((_req)->rx_buf + (_req)->headers_offset + \ - (_fl)->value_offset), \ - (_fl)->value_len - -static void -http_check_connection_upgrade (http_req_t *req) -{ - http_field_line_t *connection, *upgrade; - u8 skip; - - skip = (req->method != HTTP_REQ_GET) + (req->connection_header_index == ~0) + - (req->upgrade_header_index == ~0); - if (skip) - return; - - connection = vec_elt_at_index (req->headers, req->connection_header_index); - /* connection options are case-insensitive (RFC9110 7.6.1) */ - if (http_token_is_case (http_field_line_value_token (connection, req), - http_token_lit ("upgrade"))) - { - upgrade = vec_elt_at_index (req->headers, req->upgrade_header_index); - - /* check upgrade protocol, we want to ignore something like upgrade to - * newer HTTP version, only tunnels are supported */ - if (0) - ; -#define _(sym, str) \ - else if (http_token_is_case (http_field_line_value_token (upgrade, req), \ - http_token_lit (str))) req->upgrade_proto = \ - HTTP_UPGRADE_PROTO_##sym; - foreach_http_upgrade_proto -#undef _ - else return; - - HTTP_DBG (1, "connection upgrade: %U", format_http_bytes, - req->rx_buf + req->headers_offset + upgrade->value_offset, - upgrade->value_len); - req->is_tunnel = 1; - req->method = HTTP_REQ_CONNECT; - } -} - -static void -http_target_fixup (http_conn_t *hc) -{ - http_field_line_t *host; - - if (hc->req.target_form == HTTP_TARGET_ABSOLUTE_FORM) - return; - - /* scheme fixup */ - hc->req.scheme = session_get_transport_proto (session_get_from_handle ( - hc->h_tc_session_handle)) == TRANSPORT_PROTO_TLS ? - HTTP_URL_SCHEME_HTTPS : - HTTP_URL_SCHEME_HTTP; - - if (hc->req.target_form == HTTP_TARGET_AUTHORITY_FORM || - hc->req.connection_header_index == ~0) - return; - - /* authority fixup */ - host = vec_elt_at_index (hc->req.headers, hc->req.connection_header_index); - hc->req.target_authority_offset = host->value_offset; - hc->req.target_authority_len = host->value_len; -} - -static http_sm_result_t -http_req_state_wait_transport_method (http_conn_t *hc, - transport_send_params_t *sp) -{ - http_status_code_t ec; - app_worker_t *app_wrk; - http_msg_t msg; - session_t *as; - int rv; - u32 len, max_enq, body_sent; - u64 max_deq; - - rv = http_read_message (hc); - - /* Nothing yet, wait for data or timer expire */ - if (rv) - return HTTP_SM_STOP; - - HTTP_DBG (3, "%v", hc->req.rx_buf); - - if (vec_len (hc->req.rx_buf) < 8) - { - ec = HTTP_STATUS_BAD_REQUEST; - goto error; - } - - rv = http_parse_request_line (&hc->req, &ec); - if (rv) - goto error; - - rv = http_identify_headers (&hc->req, &ec); - if (rv) - goto error; - - http_target_fixup (hc); - http_check_connection_upgrade (&hc->req); - - rv = http_identify_message_body (&hc->req, &ec); - if (rv) - goto error; - - /* send at least "control data" which is necessary minimum, - * if there is some space send also portion of body */ - as = session_get_from_handle (hc->h_pa_session_handle); - max_enq = svm_fifo_max_enqueue (as->rx_fifo); - if (max_enq < hc->req.control_data_len) - { - clib_warning ("not enough room for control data in app's rx fifo"); - ec = HTTP_STATUS_INTERNAL_ERROR; - goto error; - } - /* do not dequeue more than one HTTP request, we do not support pipelining */ - max_deq = clib_min (hc->req.control_data_len + hc->req.body_len, - vec_len (hc->req.rx_buf)); - len = clib_min (max_enq, max_deq); - - msg.type = HTTP_MSG_REQUEST; - msg.method_type = hc->req.method; - msg.data.type = HTTP_MSG_DATA_INLINE; - msg.data.len = len; - msg.data.scheme = hc->req.scheme; - msg.data.target_authority_offset = hc->req.target_authority_offset; - msg.data.target_authority_len = hc->req.target_authority_len; - msg.data.target_path_offset = hc->req.target_path_offset; - msg.data.target_path_len = hc->req.target_path_len; - msg.data.target_query_offset = hc->req.target_query_offset; - msg.data.target_query_len = hc->req.target_query_len; - msg.data.headers_offset = hc->req.headers_offset; - msg.data.headers_len = hc->req.headers_len; - msg.data.body_offset = hc->req.body_offset; - msg.data.body_len = hc->req.body_len; - msg.data.headers_ctx = pointer_to_uword (hc->req.headers); - msg.data.upgrade_proto = hc->req.upgrade_proto; - - svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) }, - { hc->req.rx_buf, len } }; - - rv = svm_fifo_enqueue_segments (as->rx_fifo, segs, 2, 0 /* allow partial */); - ASSERT (rv == (sizeof (msg) + len)); - - body_sent = len - hc->req.control_data_len; - hc->req.to_recv = hc->req.body_len - body_sent; - if (hc->req.to_recv == 0) - { - /* drop everything, we do not support pipelining */ - http_read_message_drop_all (hc); - /* all sent, we are done */ - http_req_state_change (hc, HTTP_REQ_STATE_WAIT_APP_REPLY); - } - else - { - http_read_message_drop (hc, len); - /* stream rest of the response body */ - http_req_state_change (hc, HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA); - } - - app_wrk = app_worker_get_if_valid (as->app_wrk_index); - if (app_wrk) - app_worker_rx_notify (app_wrk, as); - - return HTTP_SM_STOP; - -error: - http_read_message_drop_all (hc); - http_send_error (hc, ec); - session_transport_closing_notify (&hc->connection); - http_disconnect_transport (hc); - - return HTTP_SM_ERROR; -} - -static void -http_write_app_headers (http_conn_t *hc, http_msg_t *msg, u8 **tx_buf) -{ - http_main_t *hm = &http_main; - session_t *as; - u8 *app_headers, *p, *end; - u32 *tmp; - int rv; - - as = session_get_from_handle (hc->h_pa_session_handle); - - /* read app header list */ - if (msg->data.type == HTTP_MSG_DATA_PTR) - { - uword app_headers_ptr; - rv = svm_fifo_dequeue (as->tx_fifo, sizeof (app_headers_ptr), - (u8 *) &app_headers_ptr); - ASSERT (rv == sizeof (app_headers_ptr)); - app_headers = uword_to_pointer (app_headers_ptr, u8 *); - } - else - { - app_headers = hm->app_header_lists[hc->c_thread_index]; - rv = svm_fifo_dequeue (as->tx_fifo, msg->data.headers_len, app_headers); - ASSERT (rv == msg->data.headers_len); - } - - /* serialize app headers to tx_buf */ - end = app_headers + msg->data.headers_len; - while (app_headers < end) - { - /* custom header name? */ - tmp = (u32 *) app_headers; - if (PREDICT_FALSE (*tmp & HTTP_CUSTOM_HEADER_NAME_BIT)) - { - http_custom_token_t *name, *value; - name = (http_custom_token_t *) app_headers; - u32 name_len = name->len & ~HTTP_CUSTOM_HEADER_NAME_BIT; - app_headers += sizeof (http_custom_token_t) + name_len; - value = (http_custom_token_t *) app_headers; - app_headers += sizeof (http_custom_token_t) + value->len; - vec_add2 (*tx_buf, p, name_len + value->len + 4); - clib_memcpy (p, name->token, name_len); - p += name_len; - *p++ = ':'; - *p++ = ' '; - clib_memcpy (p, value->token, value->len); - p += value->len; - *p++ = '\r'; - *p++ = '\n'; - } - else - { - http_app_header_t *header; - header = (http_app_header_t *) app_headers; - app_headers += sizeof (http_app_header_t) + header->value.len; - http_token_t name = { http_header_name_token (header->name) }; - vec_add2 (*tx_buf, p, name.len + header->value.len + 4); - clib_memcpy (p, name.base, name.len); - p += name.len; - *p++ = ':'; - *p++ = ' '; - clib_memcpy (p, header->value.token, header->value.len); - p += header->value.len; - *p++ = '\r'; - *p++ = '\n'; - } - } -} - -static http_sm_result_t -http_req_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp) -{ - http_main_t *hm = &http_main; - u8 *response; - u32 sent; - f64 now; - session_t *as; - http_status_code_t sc; - http_msg_t msg; - int rv; - http_sm_result_t sm_result = HTTP_SM_ERROR; - http_req_state_t next_state = HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD; - - as = session_get_from_handle (hc->h_pa_session_handle); - - rv = svm_fifo_dequeue (as->tx_fifo, sizeof (msg), (u8 *) &msg); - ASSERT (rv == sizeof (msg)); - - if (msg.data.type > HTTP_MSG_DATA_PTR) - { - clib_warning ("no data"); - sc = HTTP_STATUS_INTERNAL_ERROR; - goto error; - } - - if (msg.type != HTTP_MSG_REPLY) - { - clib_warning ("unexpected message type %d", msg.type); - sc = HTTP_STATUS_INTERNAL_ERROR; - goto error; - } - - if (msg.code >= HTTP_N_STATUS) - { - clib_warning ("unsupported status code: %d", msg.code); - return HTTP_SM_ERROR; - } - - response = hm->tx_bufs[hc->c_thread_index]; - vec_reset_length (response); - /* - * Add "protocol layer" headers: - * - current time - * - server name - * - data length - */ - now = clib_timebase_now (&hm->timebase); - response = - format (response, http_response_template, http_status_code_str[msg.code], - /* Date */ - format_clib_timebase_time, now, - /* Server */ - hc->app_name); - - /* RFC9110 8.6: A server MUST NOT send Content-Length header field in a - * 2xx (Successful) response to CONNECT or with a status code of 101 - * (Switching Protocols). */ - if (hc->req.is_tunnel && (http_status_code_str[msg.code][0] == '2' || - msg.code == HTTP_STATUS_SWITCHING_PROTOCOLS)) - { - ASSERT (msg.data.body_len == 0); - next_state = HTTP_REQ_STATE_TUNNEL; - if (hc->req.upgrade_proto > HTTP_UPGRADE_PROTO_NA) - { - response = format (response, connection_upgrade_template, - http_upgrade_proto_str[hc->req.upgrade_proto]); - if (hc->req.upgrade_proto == HTTP_UPGRADE_PROTO_CONNECT_UDP && - hc->udp_tunnel_mode == HTTP_UDP_TUNNEL_DGRAM) - next_state = HTTP_REQ_STATE_UDP_TUNNEL; - } - /* cleanup some stuff we don't need anymore in tunnel mode */ - vec_free (hc->req.rx_buf); - vec_free (hc->req.headers); - http_buffer_free (&hc->req.tx_buf); - hc->req.to_skip = 0; - } - else - response = format (response, content_len_template, msg.data.body_len); - - /* Add headers from app (if any) */ - if (msg.data.headers_len) - { - HTTP_DBG (0, "got headers from app, len %d", msg.data.headers_len); - http_write_app_headers (hc, &msg, &response); - } - /* Add empty line after headers */ - response = format (response, "\r\n"); - HTTP_DBG (3, "%v", response); - - sent = http_send_data (hc, response, vec_len (response)); - if (sent != vec_len (response)) - { - clib_warning ("sending status-line and headers failed!"); - sc = HTTP_STATUS_INTERNAL_ERROR; - goto error; - } - - if (msg.data.body_len) - { - /* Start sending the actual data */ - http_buffer_init (&hc->req.tx_buf, msg_to_buf_type[msg.data.type], - as->tx_fifo, msg.data.body_len); - next_state = HTTP_REQ_STATE_APP_IO_MORE_DATA; - sm_result = HTTP_SM_CONTINUE; - } - else - { - /* No response body, we are done */ - sm_result = HTTP_SM_STOP; - } - - http_req_state_change (hc, next_state); - - ASSERT (sp->max_burst_size >= sent); - sp->max_burst_size -= sent; - return sm_result; - -error: - http_send_error (hc, sc); - session_transport_closing_notify (&hc->connection); - http_disconnect_transport (hc); - return HTTP_SM_STOP; -} - -static http_sm_result_t -http_req_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp) -{ - http_main_t *hm = &http_main; - http_msg_t msg; - session_t *as; - u8 *target_buff = 0, *request = 0, *target; - u32 sent; - int rv; - http_sm_result_t sm_result = HTTP_SM_ERROR; - http_req_state_t next_state; - - as = session_get_from_handle (hc->h_pa_session_handle); - - rv = svm_fifo_dequeue (as->tx_fifo, sizeof (msg), (u8 *) &msg); - ASSERT (rv == sizeof (msg)); - - if (msg.data.type > HTTP_MSG_DATA_PTR) - { - clib_warning ("no data"); - goto error; - } - - if (msg.type != HTTP_MSG_REQUEST) - { - clib_warning ("unexpected message type %d", msg.type); - goto error; - } - - /* read request target */ - if (msg.data.type == HTTP_MSG_DATA_PTR) - { - uword target_ptr; - rv = svm_fifo_dequeue (as->tx_fifo, sizeof (target_ptr), - (u8 *) &target_ptr); - ASSERT (rv == sizeof (target_ptr)); - target = uword_to_pointer (target_ptr, u8 *); - } - else - { - vec_validate (target_buff, msg.data.target_path_len - 1); - rv = - svm_fifo_dequeue (as->tx_fifo, msg.data.target_path_len, target_buff); - ASSERT (rv == msg.data.target_path_len); - target = target_buff; - } - - request = hm->tx_bufs[hc->c_thread_index]; - vec_reset_length (request); - /* currently we support only GET and POST method */ - if (msg.method_type == HTTP_REQ_GET) - { - if (msg.data.body_len) - { - clib_warning ("GET request shouldn't include data"); - goto error; - } - /* - * Add "protocol layer" headers: - * - host - * - user agent - */ - request = format (request, http_get_request_template, - /* target */ - target, - /* Host */ - hc->host, - /* User-Agent */ - hc->app_name); - - next_state = HTTP_REQ_STATE_WAIT_TRANSPORT_REPLY; - sm_result = HTTP_SM_STOP; - } - else if (msg.method_type == HTTP_REQ_POST) - { - if (!msg.data.body_len) - { - clib_warning ("POST request should include data"); - goto error; - } - /* - * Add "protocol layer" headers: - * - host - * - user agent - * - content length - */ - request = format (request, http_post_request_template, - /* target */ - target, - /* Host */ - hc->host, - /* User-Agent */ - hc->app_name, - /* Content-Length */ - msg.data.body_len); - - http_buffer_init (&hc->req.tx_buf, msg_to_buf_type[msg.data.type], - as->tx_fifo, msg.data.body_len); - - next_state = HTTP_REQ_STATE_APP_IO_MORE_DATA; - sm_result = HTTP_SM_CONTINUE; - } - else - { - clib_warning ("unsupported method %d", msg.method_type); - goto error; - } - - /* Add headers from app (if any) */ - if (msg.data.headers_len) - { - HTTP_DBG (0, "got headers from app, len %d", msg.data.headers_len); - http_write_app_headers (hc, &msg, &request); - } - /* Add empty line after headers */ - request = format (request, "\r\n"); - HTTP_DBG (3, "%v", request); - - sent = http_send_data (hc, request, vec_len (request)); - if (sent != vec_len (request)) - { - clib_warning ("sending request-line and headers failed!"); - sm_result = HTTP_SM_ERROR; - goto error; - } - - http_req_state_change (hc, next_state); - goto done; - -error: - svm_fifo_dequeue_drop_all (as->tx_fifo); - session_transport_closing_notify (&hc->connection); - session_transport_closed_notify (&hc->connection); - http_disconnect_transport (hc); - -done: - vec_free (target_buff); - return sm_result; -} - -static http_sm_result_t -http_req_state_transport_io_more_data (http_conn_t *hc, - transport_send_params_t *sp) -{ - session_t *as, *ts; - app_worker_t *app_wrk; - svm_fifo_seg_t _seg, *seg = &_seg; - u32 max_len, max_deq, max_enq, n_segs = 1; - int rv, len; - - as = session_get_from_handle (hc->h_pa_session_handle); - ts = session_get_from_handle (hc->h_tc_session_handle); - - max_deq = svm_fifo_max_dequeue (ts->rx_fifo); - if (max_deq == 0) - { - HTTP_DBG (1, "no data to deq"); - return HTTP_SM_STOP; - } - - max_enq = svm_fifo_max_enqueue (as->rx_fifo); - if (max_enq == 0) - { - HTTP_DBG (1, "app's rx fifo full"); - svm_fifo_add_want_deq_ntf (as->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); - return HTTP_SM_STOP; - } - - max_len = clib_min (max_enq, max_deq); - len = svm_fifo_segments (ts->rx_fifo, 0, seg, &n_segs, max_len); - if (len < 0) - { - HTTP_DBG (1, "svm_fifo_segments() len %d", len); - return HTTP_SM_STOP; - } - - rv = svm_fifo_enqueue_segments (as->rx_fifo, seg, 1, 0 /* allow partial */); - if (rv < 0) - { - clib_warning ("data enqueue failed, rv: %d", rv); - return HTTP_SM_ERROR; - } - - svm_fifo_dequeue_drop (ts->rx_fifo, rv); - if (rv > hc->req.to_recv) - { - clib_warning ("http protocol error: received more data than expected"); - session_transport_closing_notify (&hc->connection); - http_disconnect_transport (hc); - http_req_state_change (hc, HTTP_REQ_STATE_WAIT_APP_METHOD); - return HTTP_SM_ERROR; - } - hc->req.to_recv -= rv; - HTTP_DBG (1, "drained %d from ts; remains %lu", rv, hc->req.to_recv); - - /* Finished transaction: - * server back to HTTP_REQ_STATE_WAIT_APP_REPLY - * client to HTTP_REQ_STATE_WAIT_APP_METHOD */ - if (hc->req.to_recv == 0) - http_req_state_change (hc, hc->is_server ? HTTP_REQ_STATE_WAIT_APP_REPLY : - HTTP_REQ_STATE_WAIT_APP_METHOD); - - app_wrk = app_worker_get_if_valid (as->app_wrk_index); - if (app_wrk) - app_worker_rx_notify (app_wrk, as); - - if (svm_fifo_max_dequeue_cons (ts->rx_fifo)) - session_enqueue_notify (ts); - - return HTTP_SM_STOP; -} - -static http_sm_result_t -http_req_state_app_io_more_data (http_conn_t *hc, transport_send_params_t *sp) -{ - u32 max_send = 64 << 10, n_segs; - http_buffer_t *hb = &hc->req.tx_buf; - svm_fifo_seg_t *seg; - session_t *ts; - int sent = 0; - - max_send = clib_min (max_send, sp->max_burst_size); - ts = session_get_from_handle (hc->h_tc_session_handle); - if ((seg = http_buffer_get_segs (hb, max_send, &n_segs))) - sent = svm_fifo_enqueue_segments (ts->tx_fifo, seg, n_segs, - 1 /* allow partial */); - - if (sent > 0) - { - /* Ask scheduler to notify app of deq event if needed */ - sp->bytes_dequeued += http_buffer_drain (hb, sent); - sp->max_burst_size -= sent; - } - - /* Not finished sending all data */ - if (!http_buffer_is_drained (hb)) - { - if (sent && svm_fifo_set_event (ts->tx_fifo)) - session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX); - - if (svm_fifo_max_enqueue (ts->tx_fifo) < HTTP_FIFO_THRESH) - { - /* Deschedule http session and wait for deq notification if - * underlying ts tx fifo almost full */ - svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); - transport_connection_deschedule (&hc->connection); - sp->flags |= TRANSPORT_SND_F_DESCHED; - } - } - else - { - if (sent && svm_fifo_set_event (ts->tx_fifo)) - session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX_FLUSH); - - /* Finished transaction: - * server back to HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD - * client to HTTP_REQ_STATE_WAIT_TRANSPORT_REPLY */ - http_req_state_change (hc, hc->is_server ? - HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD : - HTTP_REQ_STATE_WAIT_TRANSPORT_REPLY); - http_buffer_free (hb); - } - - return HTTP_SM_STOP; -} - -static http_sm_result_t -http_req_state_tunnel_rx (http_conn_t *hc, transport_send_params_t *sp) -{ - u32 max_deq, max_enq, max_read, n_segs = 2; - svm_fifo_seg_t segs[n_segs]; - int n_written = 0; - session_t *as, *ts; - app_worker_t *app_wrk; - - HTTP_DBG (1, "tunnel received data from client"); - - as = session_get_from_handle (hc->h_pa_session_handle); - ts = session_get_from_handle (hc->h_tc_session_handle); - - max_deq = svm_fifo_max_dequeue (ts->rx_fifo); - if (PREDICT_FALSE (max_deq == 0)) - { - HTTP_DBG (1, "max_deq == 0"); - return HTTP_SM_STOP; - } - max_enq = svm_fifo_max_enqueue (as->rx_fifo); - if (max_enq == 0) - { - HTTP_DBG (1, "app's rx fifo full"); - svm_fifo_add_want_deq_ntf (as->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); - return HTTP_SM_STOP; - } - max_read = clib_min (max_enq, max_deq); - svm_fifo_segments (ts->rx_fifo, 0, segs, &n_segs, max_read); - n_written = svm_fifo_enqueue_segments (as->rx_fifo, segs, n_segs, 0); - ASSERT (n_written > 0); - HTTP_DBG (1, "transfered %u bytes", n_written); - svm_fifo_dequeue_drop (ts->rx_fifo, n_written); - app_wrk = app_worker_get_if_valid (as->app_wrk_index); - if (app_wrk) - app_worker_rx_notify (app_wrk, as); - if (svm_fifo_max_dequeue_cons (ts->rx_fifo)) - session_program_rx_io_evt (session_handle (ts)); - - return HTTP_SM_STOP; -} - -static http_sm_result_t -http_req_state_tunnel_tx (http_conn_t *hc, transport_send_params_t *sp) -{ - u32 max_deq, max_enq, max_read, n_segs = 2; - svm_fifo_seg_t segs[n_segs]; - session_t *as, *ts; - int n_written = 0; - - HTTP_DBG (1, "tunnel received data from target"); - - as = session_get_from_handle (hc->h_pa_session_handle); - ts = session_get_from_handle (hc->h_tc_session_handle); - - max_deq = svm_fifo_max_dequeue_cons (as->tx_fifo); - if (PREDICT_FALSE (max_deq == 0)) - { - HTTP_DBG (1, "max_deq == 0"); - goto check_fifo; - } - max_enq = svm_fifo_max_enqueue_prod (ts->tx_fifo); - if (max_enq == 0) - { - HTTP_DBG (1, "ts tx fifo full"); - goto check_fifo; - } - max_read = clib_min (max_enq, max_deq); - max_read = clib_min (max_read, sp->max_burst_size); - svm_fifo_segments (as->tx_fifo, 0, segs, &n_segs, max_read); - n_written = svm_fifo_enqueue_segments (ts->tx_fifo, segs, n_segs, 0); - ASSERT (n_written > 0); - HTTP_DBG (1, "transfered %u bytes", n_written); - sp->bytes_dequeued += n_written; - sp->max_burst_size -= n_written; - svm_fifo_dequeue_drop (as->tx_fifo, n_written); - if (svm_fifo_set_event (ts->tx_fifo)) - session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX); - -check_fifo: - /* Deschedule and wait for deq notification if ts fifo is almost full */ - if (svm_fifo_max_enqueue (ts->tx_fifo) < HTTP_FIFO_THRESH) - { - svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); - transport_connection_deschedule (&hc->connection); - sp->flags |= TRANSPORT_SND_F_DESCHED; - } - - return HTTP_SM_STOP; -} - -static http_sm_result_t -http_req_state_udp_tunnel_rx (http_conn_t *hc, transport_send_params_t *sp) -{ - http_main_t *hm = &http_main; - u32 to_deq, capsule_size, dgram_size, n_written = 0; - int rv, n_read; - session_t *as, *ts; - app_worker_t *app_wrk; - u8 payload_offset; - u64 payload_len; - session_dgram_hdr_t hdr; - u8 *buf = 0; - - HTTP_DBG (1, "udp tunnel received data from client"); - - as = session_get_from_handle (hc->h_pa_session_handle); - ts = session_get_from_handle (hc->h_tc_session_handle); - buf = hm->rx_bufs[hc->c_thread_index]; - to_deq = svm_fifo_max_dequeue_cons (ts->rx_fifo); - - while (to_deq > 0) - { - /* some bytes remaining to skip? */ - if (PREDICT_FALSE (hc->req.to_skip)) - { - if (hc->req.to_skip >= to_deq) - { - svm_fifo_dequeue_drop (ts->rx_fifo, to_deq); - hc->req.to_skip -= to_deq; - goto done; - } - else - { - svm_fifo_dequeue_drop (ts->rx_fifo, hc->req.to_skip); - hc->req.to_skip = 0; - } - } - n_read = - svm_fifo_peek (ts->rx_fifo, 0, HTTP_CAPSULE_HEADER_MAX_SIZE, buf); - ASSERT (n_read > 0); - rv = http_decap_udp_payload_datagram (buf, n_read, &payload_offset, - &payload_len); - HTTP_DBG (1, "rv=%d, payload_offset=%u, payload_len=%llu", rv, - payload_offset, payload_len); - if (PREDICT_FALSE (rv != 0)) - { - if (rv < 0) - { - /* capsule datagram is invalid (session need to be aborted) */ - svm_fifo_dequeue_drop_all (ts->rx_fifo); - session_transport_closing_notify (&hc->connection); - session_transport_closed_notify (&hc->connection); - http_disconnect_transport (hc); - return HTTP_SM_STOP; - } - else - { - /* unknown capsule should be skipped */ - if (payload_len <= to_deq) - { - svm_fifo_dequeue_drop (ts->rx_fifo, payload_len); - to_deq -= payload_len; - continue; - } - else - { - svm_fifo_dequeue_drop (ts->rx_fifo, to_deq); - hc->req.to_skip = payload_len - to_deq; - goto done; - } - } - } - capsule_size = payload_offset + payload_len; - /* check if we have the full capsule */ - if (PREDICT_FALSE (to_deq < capsule_size)) - { - HTTP_DBG (1, "capsule not complete"); - goto done; - } - - dgram_size = sizeof (hdr) + payload_len; - if (svm_fifo_max_enqueue_prod (as->rx_fifo) < dgram_size) - { - HTTP_DBG (1, "app's rx fifo full"); - svm_fifo_add_want_deq_ntf (as->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); - goto done; - } - - /* read capsule payload */ - rv = svm_fifo_peek (ts->rx_fifo, payload_offset, payload_len, buf); - ASSERT (rv == payload_len); - svm_fifo_dequeue_drop (ts->rx_fifo, capsule_size); - - hdr.data_length = payload_len; - hdr.data_offset = 0; - - /* send datagram header and payload */ - svm_fifo_seg_t segs[2] = { { (u8 *) &hdr, sizeof (hdr) }, - { buf, payload_len } }; - rv = svm_fifo_enqueue_segments (as->rx_fifo, segs, 2, 0); - ASSERT (rv > 0); - - n_written += dgram_size; - to_deq -= capsule_size; - } - -done: - HTTP_DBG (1, "written %lu bytes", n_written); - - if (n_written) - { - app_wrk = app_worker_get_if_valid (as->app_wrk_index); - if (app_wrk) - app_worker_rx_notify (app_wrk, as); - } - if (svm_fifo_max_dequeue_cons (ts->rx_fifo)) - session_program_rx_io_evt (session_handle (ts)); - - return HTTP_SM_STOP; -} - -static http_sm_result_t -http_req_state_udp_tunnel_tx (http_conn_t *hc, transport_send_params_t *sp) -{ - http_main_t *hm = &http_main; - u32 to_deq, capsule_size, dgram_size, n_written = 0; - session_t *as, *ts; - int rv; - session_dgram_pre_hdr_t hdr; - u8 *buf; - u8 *payload; - - HTTP_DBG (1, "udp tunnel received data from target"); - - as = session_get_from_handle (hc->h_pa_session_handle); - ts = session_get_from_handle (hc->h_tc_session_handle); - buf = hm->tx_bufs[hc->c_thread_index]; - to_deq = svm_fifo_max_dequeue_cons (as->tx_fifo); - - while (to_deq > 0) - { - /* read datagram header */ - rv = svm_fifo_peek (as->tx_fifo, 0, sizeof (hdr), (u8 *) &hdr); - ASSERT (rv == sizeof (hdr) && - hdr.data_length <= HTTP_UDP_PAYLOAD_MAX_LEN); - ASSERT (to_deq >= hdr.data_length + SESSION_CONN_HDR_LEN); - dgram_size = hdr.data_length + SESSION_CONN_HDR_LEN; - - if (svm_fifo_max_enqueue_prod (ts->tx_fifo) < - (hdr.data_length + HTTP_UDP_PROXY_DATAGRAM_CAPSULE_OVERHEAD)) - { - HTTP_DBG (1, "ts tx fifo full"); - goto done; - } - - /* create capsule header */ - payload = http_encap_udp_payload_datagram (buf, hdr.data_length); - capsule_size = (payload - buf) + hdr.data_length; - /* read payload */ - rv = svm_fifo_peek (as->tx_fifo, SESSION_CONN_HDR_LEN, hdr.data_length, - payload); - ASSERT (rv == hdr.data_length); - svm_fifo_dequeue_drop (as->tx_fifo, dgram_size); - /* send capsule */ - rv = svm_fifo_enqueue (ts->tx_fifo, capsule_size, buf); - ASSERT (rv == capsule_size); - - n_written += capsule_size; - to_deq -= dgram_size; - } - -done: - HTTP_DBG (1, "written %lu bytes", n_written); - if (n_written) - { - if (svm_fifo_set_event (ts->tx_fifo)) - session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX); - } - - /* Deschedule and wait for deq notification if ts fifo is almost full */ - if (svm_fifo_max_enqueue (ts->tx_fifo) < HTTP_FIFO_THRESH) - { - svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); - transport_connection_deschedule (&hc->connection); - sp->flags |= TRANSPORT_SND_F_DESCHED; - } - - return HTTP_SM_STOP; -} - -typedef http_sm_result_t (*http_sm_handler) (http_conn_t *, - transport_send_params_t *sp); - -static http_sm_handler tx_state_funcs[HTTP_REQ_N_STATES] = { - 0, /* idle */ - http_req_state_wait_app_method, - 0, /* wait transport reply */ - 0, /* transport io more data */ - 0, /* wait transport method */ - http_req_state_wait_app_reply, - http_req_state_app_io_more_data, - http_req_state_tunnel_tx, - http_req_state_udp_tunnel_tx, -}; - -static_always_inline int -http_req_state_is_tx_valid (http_conn_t *hc) -{ - return tx_state_funcs[hc->req.state] ? 1 : 0; -} - -static http_sm_handler rx_state_funcs[HTTP_REQ_N_STATES] = { - 0, /* idle */ - 0, /* wait app method */ - http_req_state_wait_transport_reply, - http_req_state_transport_io_more_data, - http_req_state_wait_transport_method, - 0, /* wait app reply */ - 0, /* app io more data */ - http_req_state_tunnel_rx, - http_req_state_udp_tunnel_rx, -}; - -static_always_inline int -http_req_state_is_rx_valid (http_conn_t *hc) -{ - return rx_state_funcs[hc->req.state] ? 1 : 0; -} - -static_always_inline void -http_req_run_state_machine (http_conn_t *hc, transport_send_params_t *sp, - u8 is_tx) -{ - http_sm_result_t res; - - do - { - if (is_tx) - res = tx_state_funcs[hc->req.state](hc, sp); - else - res = rx_state_funcs[hc->req.state](hc, sp); - if (res == HTTP_SM_ERROR) - { - HTTP_DBG (1, "error in state machine %d", res); - return; - } - } - while (res == HTTP_SM_CONTINUE); - - /* Reset the session expiration timer */ - http_conn_timer_update (hc); -} - static int http_ts_rx_callback (session_t *ts) { http_conn_t *hc; + u32 hc_index = http_conn_index_from_handle (ts->opaque); - HTTP_DBG (1, "hc [%u]%x", ts->thread_index, ts->opaque); + HTTP_DBG (1, "hc [%u]%x", ts->thread_index, hc_index); - hc = http_conn_get_w_thread (ts->opaque, ts->thread_index); + hc = http_conn_get_w_thread (hc_index, ts->thread_index); if (hc->state == HTTP_CONN_STATE_CLOSED) { @@ -2187,18 +644,7 @@ http_ts_rx_callback (session_t *ts) return 0; } - if (!http_req_state_is_rx_valid (hc)) - { - clib_warning ("hc [%u]%x invalid rx state: http req state " - "'%U', session state '%U'", - ts->thread_index, ts->opaque, format_http_req_state, - hc->req.state, format_http_conn_state, hc); - svm_fifo_dequeue_drop_all (ts->rx_fifo); - return 0; - } - - HTTP_DBG (1, "run state machine"); - http_req_run_state_machine (hc, 0, 0); + http_vfts[http_version_from_handle (ts->opaque)].transport_rx_callback (hc); if (hc->state == HTTP_CONN_STATE_TRANSPORT_CLOSED) { @@ -2213,7 +659,8 @@ http_ts_builtin_tx_callback (session_t *ts) { http_conn_t *hc; - hc = http_conn_get_w_thread (ts->opaque, ts->thread_index); + hc = http_conn_get_w_thread (http_conn_index_from_handle (ts->opaque), + ts->thread_index); HTTP_DBG (1, "transport connection reschedule"); transport_connection_reschedule (&hc->connection); @@ -2224,25 +671,31 @@ static void http_ts_cleanup_callback (session_t *ts, session_cleanup_ntf_t ntf) { http_conn_t *hc; + http_req_t *req; + u32 hc_index; if (ntf == SESSION_CLEANUP_TRANSPORT) return; - hc = http_conn_get_w_thread (ts->opaque, ts->thread_index); - - HTTP_DBG (1, "going to free hc [%u]%x", ts->thread_index, ts->opaque); + hc_index = http_conn_index_from_handle (ts->opaque); + hc = http_conn_get_w_thread (hc_index, ts->thread_index); - vec_free (hc->req.rx_buf); - vec_free (hc->req.headers); + HTTP_DBG (1, "going to free hc [%u]%x", ts->thread_index, hc_index); - http_buffer_free (&hc->req.tx_buf); + pool_foreach (req, hc->req_pool) + { + vec_free (req->headers); + vec_free (req->target); + http_buffer_free (&req->tx_buf); + } + pool_free (hc->req_pool); - if (hc->pending_timer == 0) + if (!(hc->flags & HTTP_CONN_F_PENDING_TIMER)) http_conn_timer_stop (hc); session_transport_delete_notify (&hc->connection); - if (!hc->is_server) + if (!(hc->flags & HTTP_CONN_F_IS_SERVER)) { vec_free (hc->app_name); vec_free (hc->host); @@ -2253,11 +706,9 @@ http_ts_cleanup_callback (session_t *ts, session_cleanup_ntf_t ntf) static void http_ts_ho_cleanup_callback (session_t *ts) { - http_conn_t *ho_hc; - HTTP_DBG (1, "half open: %x", ts->opaque); - ho_hc = http_ho_conn_get (ts->opaque); - session_half_open_delete_notify (&ho_hc->connection); - http_ho_conn_free (ho_hc); + u32 ho_hc_index = http_conn_index_from_handle (ts->opaque); + HTTP_DBG (1, "half open: %x", ho_hc_index); + http_ho_try_free (ho_hc_index); } int @@ -2286,6 +737,10 @@ static session_cb_vft_t http_app_cb_vft = { .builtin_app_tx_callback = http_ts_builtin_tx_callback, }; +/*********************************/ +/* transport proto VFT callbacks */ +/*********************************/ + static clib_error_t * http_transport_enable (vlib_main_t *vm, u8 is_en) { @@ -2380,6 +835,8 @@ http_transport_connect (transport_endpoint_cfg_t *tep) hc->h_pa_wrk_index = sep->app_wrk_index; hc->h_pa_app_api_ctx = sep->opaque; hc->state = HTTP_CONN_STATE_CONNECTING; + /* TODO: set to HTTP_VERSION_NA in case of TLS (when supported) */ + hc->version = HTTP_VERSION_1; cargs->api_context = hc_index; ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_HTTP); @@ -2391,7 +848,12 @@ http_transport_connect (transport_endpoint_cfg_t *tep) hc->timeout = http_cfg->timeout; } - hc->is_server = 0; + ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_CRYPTO); + if (ext_cfg) + { + HTTP_DBG (1, "app set tls"); + cargs->sep.transport_proto = TRANSPORT_PROTO_TLS; + } if (vec_len (app->name)) hc->app_name = vec_dup (app->name); @@ -2483,7 +945,7 @@ http_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep) lhc->c_s_index = app_listener_index; lhc->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP; - lhc->is_server = 1; + lhc->flags |= HTTP_CONN_F_IS_SERVER; if (vec_len (app->name)) lhc->app_name = vec_dup (app->name); @@ -2518,7 +980,6 @@ http_stop_listen (u32 listener_index) static void http_transport_close (u32 hc_index, u32 thread_index) { - session_t *as; http_conn_t *hc; HTTP_DBG (1, "App disconnecting [%u]%x", thread_index, hc_index); @@ -2535,19 +996,25 @@ http_transport_close (u32 hc_index, u32 thread_index) HTTP_DBG (1, "nothing to do, already closed"); return; } - as = session_get_from_handle (hc->h_pa_session_handle); - /* Nothing more to send, confirm close */ - if (!svm_fifo_max_dequeue_cons (as->tx_fifo)) - { - session_transport_closed_notify (&hc->connection); - http_disconnect_transport (hc); - } - else + http_vfts[hc->version].app_close_callback (hc); +} + +static void +http_transport_reset (u32 hc_index, u32 thread_index) +{ + http_conn_t *hc; + + HTTP_DBG (1, "App disconnecting [%u]%x", thread_index, hc_index); + + hc = http_conn_get_w_thread (hc_index, thread_index); + if (hc->state == HTTP_CONN_STATE_CLOSED) { - /* Wait for all data to be written to ts */ - hc->state = HTTP_CONN_STATE_APP_CLOSED; + HTTP_DBG (1, "nothing to do, already closed"); + return; } + + http_vfts[hc->version].app_reset_callback (hc); } static transport_connection_t * @@ -2585,30 +1052,7 @@ http_app_tx_callback (void *session, transport_send_params_t *sp) max_burst_sz = sp->max_burst_size * TRANSPORT_PACER_MIN_MSS; sp->max_burst_size = max_burst_sz; - if (!http_req_state_is_tx_valid (hc)) - { - /* Sometimes the server apps can send the response earlier - * than expected (e.g when rejecting a bad request)*/ - if (hc->req.state == HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA && - hc->is_server) - { - svm_fifo_dequeue_drop_all (as->rx_fifo); - hc->req.state = HTTP_REQ_STATE_WAIT_APP_REPLY; - } - else - { - clib_warning ("hc [%u]%x invalid tx state: http req state " - "'%U', session state '%U'", - as->thread_index, as->connection_index, - format_http_req_state, hc->req.state, - format_http_conn_state, hc); - svm_fifo_dequeue_drop_all (as->tx_fifo); - return 0; - } - } - - HTTP_DBG (1, "run state machine"); - http_req_run_state_machine (hc, sp, 1); + http_vfts[hc->version].app_tx_callback (hc, sp); if (hc->state == HTTP_CONN_STATE_APP_CLOSED) { @@ -2630,8 +1074,7 @@ http_app_rx_evt_cb (transport_connection_t *tc) http_conn_t *hc = (http_conn_t *) tc; HTTP_DBG (1, "hc [%u]%x", vlib_get_thread_index (), hc->h_hc_index); - if (hc->req.state == HTTP_REQ_STATE_TUNNEL) - http_req_state_tunnel_rx (hc, 0); + http_vfts[hc->version].app_rx_evt_callback (hc); return 0; } @@ -2749,6 +1192,12 @@ http_transport_cleanup_ho (u32 ho_hc_index) HTTP_DBG (1, "half open: %x", ho_hc_index); ho_hc = http_ho_conn_get (ho_hc_index); + if (ho_hc->h_tc_session_handle == SESSION_INVALID_HANDLE) + { + HTTP_DBG (1, "already pending cleanup"); + ho_hc->flags |= HTTP_CONN_F_NO_APP_SESSION; + return; + } session_cleanup_half_open (ho_hc->h_tc_session_handle); http_ho_conn_free (ho_hc); } @@ -2759,6 +1208,7 @@ static const transport_proto_vft_t http_proto = { .start_listen = http_start_listen, .stop_listen = http_stop_listen, .close = http_transport_close, + .reset = http_transport_reset, .cleanup_ho = http_transport_cleanup_ho, .custom_tx = http_app_tx_callback, .app_rx_evt = http_app_rx_evt_cb, diff --git a/src/plugins/http/http.h b/src/plugins/http/http.h index d1e81ab0617..434ff965b6a 100644 --- a/src/plugins/http/http.h +++ b/src/plugins/http/http.h @@ -17,15 +17,9 @@ #define SRC_PLUGINS_HTTP_HTTP_H_ #include <ctype.h> - #include <vnet/plugin/plugin.h> -#include <vpp/app/version.h> - -#include <vppinfra/time_range.h> - -#include <vnet/session/application_interface.h> -#include <vnet/session/application.h> -#include <http/http_buffer.h> +#include <vnet/ip/format.h> +#include <vnet/ip/ip46_address.h> #define HTTP_DEBUG 0 @@ -49,20 +43,6 @@ typedef struct transport_endpt_cfg_http http_udp_tunnel_mode_t udp_tunnel_mode; /**< connect-udp mode */ } transport_endpt_cfg_http_t; -typedef struct http_conn_id_ -{ - union - { - session_handle_t app_session_handle; - u32 parent_app_api_ctx; - }; - session_handle_t tc_session_handle; - u32 parent_app_wrk_index; -} http_conn_id_t; - -STATIC_ASSERT (sizeof (http_conn_id_t) <= TRANSPORT_CONN_ID_LEN, - "ctx id must be less than TRANSPORT_CONN_ID_LEN"); - typedef struct { char *base; @@ -71,45 +51,12 @@ typedef struct #define http_token_lit(s) (s), sizeof (s) - 1 -#define foreach_http_conn_state \ - _ (LISTEN, "LISTEN") \ - _ (CONNECTING, "CONNECTING") \ - _ (ESTABLISHED, "ESTABLISHED") \ - _ (TRANSPORT_CLOSED, "TRANSPORT-CLOSED") \ - _ (APP_CLOSED, "APP-CLOSED") \ - _ (CLOSED, "CLOSED") - -typedef enum http_conn_state_ -{ -#define _(s, str) HTTP_CONN_STATE_##s, - foreach_http_conn_state -#undef _ -} http_conn_state_t; - -#define foreach_http_req_state \ - _ (0, IDLE, "idle") \ - _ (1, WAIT_APP_METHOD, "wait app method") \ - _ (2, WAIT_TRANSPORT_REPLY, "wait transport reply") \ - _ (3, TRANSPORT_IO_MORE_DATA, "transport io more data") \ - _ (4, WAIT_TRANSPORT_METHOD, "wait transport method") \ - _ (5, WAIT_APP_REPLY, "wait app reply") \ - _ (6, APP_IO_MORE_DATA, "app io more data") \ - _ (7, TUNNEL, "tunnel") \ - _ (8, UDP_TUNNEL, "udp tunnel") - -typedef enum http_req_state_ -{ -#define _(n, s, str) HTTP_REQ_STATE_##s = n, - foreach_http_req_state -#undef _ - HTTP_REQ_N_STATES -} http_req_state_t; - typedef enum http_req_method_ { HTTP_REQ_GET = 0, HTTP_REQ_POST, HTTP_REQ_CONNECT, + HTTP_REQ_UNKNOWN, /* for internal use */ } http_req_method_t; typedef enum http_msg_type_ @@ -118,14 +65,6 @@ typedef enum http_msg_type_ HTTP_MSG_REPLY } http_msg_type_t; -typedef enum http_target_form_ -{ - HTTP_TARGET_ORIGIN_FORM, - HTTP_TARGET_ABSOLUTE_FORM, - HTTP_TARGET_AUTHORITY_FORM, - HTTP_TARGET_ASTERISK_FORM -} http_target_form_t; - #define foreach_http_content_type \ _ (APP_7Z, ".7z", "application/x-7z-compressed") \ _ (APP_DOC, ".doc", "application/msword") \ @@ -271,96 +210,108 @@ typedef enum http_status_code_ } http_status_code_t; #define foreach_http_header_name \ - _ (ACCEPT, "Accept") \ - _ (ACCEPT_CHARSET, "Accept-Charset") \ - _ (ACCEPT_ENCODING, "Accept-Encoding") \ - _ (ACCEPT_LANGUAGE, "Accept-Language") \ - _ (ACCEPT_RANGES, "Accept-Ranges") \ - _ (ACCESS_CONTROL_ALLOW_CREDENTIALS, "Access-Control-Allow-Credentials") \ - _ (ACCESS_CONTROL_ALLOW_HEADERS, "Access-Control-Allow-Headers") \ - _ (ACCESS_CONTROL_ALLOW_METHODS, "Access-Control-Allow-Methods") \ - _ (ACCESS_CONTROL_ALLOW_ORIGIN, "Access-Control-Allow-Origin") \ - _ (ACCESS_CONTROL_EXPOSE_HEADERS, "Access-Control-Expose-Headers") \ - _ (ACCESS_CONTROL_MAX_AGE, "Access-Control-Max-Age") \ - _ (ACCESS_CONTROL_REQUEST_HEADERS, "Access-Control-Request-Headers") \ - _ (ACCESS_CONTROL_REQUEST_METHOD, "Access-Control-Request-Method") \ - _ (AGE, "Age") \ - _ (ALLOW, "Allow") \ - _ (ALPN, "ALPN") \ - _ (ALT_SVC, "Alt-Svc") \ - _ (ALT_USED, "Alt-Used") \ - _ (ALTERNATES, "Alternates") \ - _ (AUTHENTICATION_CONTROL, "Authentication-Control") \ - _ (AUTHENTICATION_INFO, "Authentication-Info") \ - _ (AUTHORIZATION, "Authorization") \ - _ (CACHE_CONTROL, "Cache-Control") \ - _ (CACHE_STATUS, "Cache-Status") \ - _ (CAPSULE_PROTOCOL, "Capsule-Protocol") \ - _ (CDN_CACHE_CONTROL, "CDN-Cache-Control") \ - _ (CDN_LOOP, "CDN-Loop") \ - _ (CLIENT_CERT, "Client-Cert") \ - _ (CLIENT_CERT_CHAIN, "Client-Cert-Chain") \ - _ (CLOSE, "Close") \ - _ (CONNECTION, "Connection") \ - _ (CONTENT_DIGEST, "Content-Digest") \ - _ (CONTENT_DISPOSITION, "Content-Disposition") \ - _ (CONTENT_ENCODING, "Content-Encoding") \ - _ (CONTENT_LANGUAGE, "Content-Language") \ - _ (CONTENT_LENGTH, "Content-Length") \ - _ (CONTENT_LOCATION, "Content-Location") \ - _ (CONTENT_RANGE, "Content-Range") \ - _ (CONTENT_TYPE, "Content-Type") \ - _ (COOKIE, "Cookie") \ - _ (DATE, "Date") \ - _ (DIGEST, "Digest") \ - _ (DPOP, "DPoP") \ - _ (DPOP_NONCE, "DPoP-Nonce") \ - _ (EARLY_DATA, "Early-Data") \ - _ (ETAG, "ETag") \ - _ (EXPECT, "Expect") \ - _ (EXPIRES, "Expires") \ - _ (FORWARDED, "Forwarded") \ - _ (FROM, "From") \ - _ (HOST, "Host") \ - _ (IF_MATCH, "If-Match") \ - _ (IF_MODIFIED_SINCE, "If-Modified-Since") \ - _ (IF_NONE_MATCH, "If-None-Match") \ - _ (IF_RANGE, "If-Range") \ - _ (IF_UNMODIFIED_SINCE, "If-Unmodified-Since") \ - _ (KEEP_ALIVE, "Keep-Alive") \ - _ (LAST_MODIFIED, "Last-Modified") \ - _ (LINK, "Link") \ - _ (LOCATION, "Location") \ - _ (MAX_FORWARDS, "Max-Forwards") \ - _ (ORIGIN, "Origin") \ - _ (PRIORITY, "Priority") \ - _ (PROXY_AUTHENTICATE, "Proxy-Authenticate") \ - _ (PROXY_AUTHENTICATION_INFO, "Proxy-Authentication-Info") \ - _ (PROXY_AUTHORIZATION, "Proxy-Authorization") \ - _ (PROXY_STATUS, "Proxy-Status") \ - _ (RANGE, "Range") \ - _ (REFERER, "Referer") \ - _ (REPR_DIGEST, "Repr-Digest") \ - _ (SET_COOKIE, "Set-Cookie") \ - _ (SIGNATURE, "Signature") \ - _ (SIGNATURE_INPUT, "Signature-Input") \ - _ (STRICT_TRANSPORT_SECURITY, "Strict-Transport-Security") \ - _ (RETRY_AFTER, "Retry-After") \ - _ (SERVER, "Server") \ - _ (TE, "TE") \ - _ (TRAILER, "Trailer") \ - _ (TRANSFER_ENCODING, "Transfer-Encoding") \ - _ (UPGRADE, "Upgrade") \ - _ (USER_AGENT, "User-Agent") \ - _ (VARY, "Vary") \ - _ (VIA, "Via") \ - _ (WANT_CONTENT_DIGEST, "Want-Content-Digest") \ - _ (WANT_REPR_DIGEST, "Want-Repr-Digest") \ - _ (WWW_AUTHENTICATE, "WWW-Authenticate") + _ (ACCEPT_CHARSET, "Accept-Charset", "accept-charset", 15) \ + _ (ACCEPT_ENCODING, "Accept-Encoding", "accept-encoding", 16) \ + _ (ACCEPT_LANGUAGE, "Accept-Language", "accept-language", 17) \ + _ (ACCEPT_RANGES, "Accept-Ranges", "accept-ranges", 18) \ + _ (ACCEPT, "Accept", "accept", 19) \ + _ (ACCESS_CONTROL_ALLOW_CREDENTIALS, "Access-Control-Allow-Credentials", \ + "access-control-allow-credentials", 0) \ + _ (ACCESS_CONTROL_ALLOW_HEADERS, "Access-Control-Allow-Headers", \ + "access-control-allow-headers", 0) \ + _ (ACCESS_CONTROL_ALLOW_METHODS, "Access-Control-Allow-Methods", \ + "access-control-allow-methods", 0) \ + _ (ACCESS_CONTROL_ALLOW_ORIGIN, "Access-Control-Allow-Origin", \ + "access-control-allow-origin", 20) \ + _ (ACCESS_CONTROL_EXPOSE_HEADERS, "Access-Control-Expose-Headers", \ + "access-control-expose-headers", 0) \ + _ (ACCESS_CONTROL_MAX_AGE, "Access-Control-Max-Age", \ + "access-control-max-age", 0) \ + _ (ACCESS_CONTROL_REQUEST_HEADERS, "Access-Control-Request-Headers", \ + "access-control-request-headers", 0) \ + _ (ACCESS_CONTROL_REQUEST_METHOD, "Access-Control-Request-Method", \ + "access-control-request-method", 0) \ + _ (AGE, "Age", "age", 21) \ + _ (ALLOW, "Allow", "allow", 22) \ + _ (ALPN, "ALPN", "alpn", 0) \ + _ (ALT_SVC, "Alt-Svc", "alt-svc", 0) \ + _ (ALT_USED, "Alt-Used", "alt-used", 0) \ + _ (ALTERNATES, "Alternates", "alternates", 0) \ + _ (AUTHENTICATION_CONTROL, "Authentication-Control", \ + "authentication-control", 0) \ + _ (AUTHENTICATION_INFO, "Authentication-Info", "authentication-info", 0) \ + _ (AUTHORIZATION, "Authorization", "authorization", 23) \ + _ (CACHE_CONTROL, "Cache-Control", "cache-control", 24) \ + _ (CACHE_STATUS, "Cache-Status", "cache-status", 0) \ + _ (CAPSULE_PROTOCOL, "Capsule-Protocol", "capsule-protocol", 0) \ + _ (CDN_CACHE_CONTROL, "CDN-Cache-Control", "cdn-cache-control", 0) \ + _ (CDN_LOOP, "CDN-Loop", "cdn-loop", 0) \ + _ (CLIENT_CERT, "Client-Cert", "client-cert", 0) \ + _ (CLIENT_CERT_CHAIN, "Client-Cert-Chain", "client-cert-chain", 0) \ + _ (CLOSE, "Close", "close", 0) \ + _ (CONNECTION, "Connection", "connection", 0) \ + _ (CONTENT_DIGEST, "Content-Digest", "content-digest", 0) \ + _ (CONTENT_DISPOSITION, "Content-Disposition", "content-disposition", 25) \ + _ (CONTENT_ENCODING, "Content-Encoding", "content-encoding", 26) \ + _ (CONTENT_LANGUAGE, "Content-Language", "content-language", 27) \ + _ (CONTENT_LENGTH, "Content-Length", "content-length", 28) \ + _ (CONTENT_LOCATION, "Content-Location", "content-location", 29) \ + _ (CONTENT_RANGE, "Content-Range", "content-range", 30) \ + _ (CONTENT_TYPE, "Content-Type", "content-type", 31) \ + _ (COOKIE, "Cookie", "cookie", 32) \ + _ (DATE, "Date", "date", 33) \ + _ (DIGEST, "Digest", "digest", 0) \ + _ (DPOP, "DPoP", "dpop", 0) \ + _ (DPOP_NONCE, "DPoP-Nonce", "dpop-nonce", 0) \ + _ (EARLY_DATA, "Early-Data", "early-data", 0) \ + _ (ETAG, "ETag", "etag", 34) \ + _ (EXPECT, "Expect", "expect", 35) \ + _ (EXPIRES, "Expires", "expires", 36) \ + _ (FORWARDED, "Forwarded", "forwarded", 0) \ + _ (FROM, "From", "from", 37) \ + _ (HOST, "Host", "host", 38) \ + _ (IF_MATCH, "If-Match", "if-match", 39) \ + _ (IF_MODIFIED_SINCE, "If-Modified-Since", "if-modified-since", 40) \ + _ (IF_NONE_MATCH, "If-None-Match", "if-none-match", 41) \ + _ (IF_RANGE, "If-Range", "if-range", 42) \ + _ (IF_UNMODIFIED_SINCE, "If-Unmodified-Since", "if-unmodified-since", 43) \ + _ (KEEP_ALIVE, "Keep-Alive", "keep-alive", 0) \ + _ (LAST_MODIFIED, "Last-Modified", "last-modified", 44) \ + _ (LINK, "Link", "link", 45) \ + _ (LOCATION, "Location", "location", 46) \ + _ (MAX_FORWARDS, "Max-Forwards", "max-forwards", 47) \ + _ (ORIGIN, "Origin", "origin", 0) \ + _ (PRIORITY, "Priority", "priority", 0) \ + _ (PROXY_AUTHENTICATE, "Proxy-Authenticate", "proxy-authenticate", 48) \ + _ (PROXY_AUTHENTICATION_INFO, "Proxy-Authentication-Info", \ + "proxy-authentication-info", 0) \ + _ (PROXY_AUTHORIZATION, "Proxy-Authorization", "proxy-authorization", 49) \ + _ (PROXY_STATUS, "Proxy-Status", "proxy-status", 0) \ + _ (RANGE, "Range", "range", 50) \ + _ (REFERER, "Referer", "referer", 51) \ + _ (REFRESH, "Refresh", "refresh", 52) \ + _ (REPR_DIGEST, "Repr-Digest", "repr-digest", 0) \ + _ (RETRY_AFTER, "Retry-After", "retry-after", 53) \ + _ (SERVER, "Server", "server", 54) \ + _ (SET_COOKIE, "Set-Cookie", "set-cookie", 55) \ + _ (SIGNATURE, "Signature", "signature", 0) \ + _ (SIGNATURE_INPUT, "Signature-Input", "signature-input", 0) \ + _ (STRICT_TRANSPORT_SECURITY, "Strict-Transport-Security", \ + "strict-transport-security", 56) \ + _ (TE, "TE", "te", 0) \ + _ (TRAILER, "Trailer", "trailer", 0) \ + _ (TRANSFER_ENCODING, "Transfer-Encoding", "transfer-encoding", 57) \ + _ (UPGRADE, "Upgrade", "upgrade", 0) \ + _ (USER_AGENT, "User-Agent", "user-agent", 58) \ + _ (VARY, "Vary", "vary", 59) \ + _ (VIA, "Via", "via", 60) \ + _ (WANT_CONTENT_DIGEST, "Want-Content-Digest", "want-content-digest", 0) \ + _ (WANT_REPR_DIGEST, "Want-Repr-Digest", "want-repr-digest", 0) \ + _ (WWW_AUTHENTICATE, "WWW-Authenticate", "www-authenticate", 61) typedef enum http_header_name_ { -#define _(sym, str) HTTP_HEADER_##sym, +#define _(sym, str_canonical, str_lower, hpack_index) HTTP_HEADER_##sym, foreach_http_header_name #undef _ } http_header_name_t; @@ -399,6 +350,7 @@ typedef enum http_url_scheme_ { HTTP_URL_SCHEME_HTTP, HTTP_URL_SCHEME_HTTPS, + HTTP_URL_SCHEME_UNKNOWN, /* for internal use */ } http_url_scheme_t; typedef struct http_msg_data_ @@ -432,118 +384,6 @@ typedef struct http_msg_ http_msg_data_t data; } http_msg_t; -typedef struct http_req_ -{ - http_req_state_t state; /* state-machine state */ - - http_buffer_t tx_buf; /* message body from app to be sent */ - - /* - * for parsing of incoming message from transport - */ - u8 *rx_buf; /* this should hold at least control data */ - u32 rx_buf_offset; /* current offset during parsing */ - u32 control_data_len; /* start line + headers + empty line */ - - union - { - u64 to_recv; /* remaining bytes of body to receive from transport */ - u64 to_skip; /* remaining bytes of capsule to skip */ - }; - - u8 is_tunnel; - - /* - * parsed metadata for app - */ - union - { - http_status_code_t status_code; - http_req_method_t method; - }; - - http_target_form_t target_form; - http_url_scheme_t scheme; - u32 target_authority_offset; - u32 target_authority_len; - u32 target_path_offset; - u32 target_path_len; - u32 target_query_offset; - u32 target_query_len; - - u32 headers_offset; - u32 headers_len; - - u32 body_offset; - u64 body_len; - - http_field_line_t *headers; - uword content_len_header_index; - uword connection_header_index; - uword upgrade_header_index; - uword host_header_index; - - http_upgrade_proto_t upgrade_proto; -} http_req_t; - -typedef struct http_tc_ -{ - union - { - transport_connection_t connection; - http_conn_id_t c_http_conn_id; - }; -#define h_tc_session_handle c_http_conn_id.tc_session_handle -#define h_pa_wrk_index c_http_conn_id.parent_app_wrk_index -#define h_pa_session_handle c_http_conn_id.app_session_handle -#define h_pa_app_api_ctx c_http_conn_id.parent_app_api_ctx -#define h_hc_index connection.c_index - - http_conn_state_t state; - u32 timer_handle; - u32 timeout; - u8 pending_timer; - u8 *app_name; - u8 *host; - u8 is_server; - http_udp_tunnel_mode_t udp_tunnel_mode; - - http_req_t req; -} http_conn_t; - -typedef struct http_worker_ -{ - http_conn_t *conn_pool; -} http_worker_t; - -typedef struct http_main_ -{ - http_worker_t *wrk; - http_conn_t *listener_pool; - http_conn_t *ho_conn_pool; - u32 app_index; - - u8 **rx_bufs; - u8 **tx_bufs; - u8 **app_header_lists; - - clib_timebase_t timebase; - - u16 *sc_by_u16; - /* - * Runtime config - */ - u8 debug_level; - u8 is_init; - - /* - * Config - */ - u64 first_seg_size; - u64 add_seg_size; - u32 fifo_size; -} http_main_t; - always_inline u8 * format_http_bytes (u8 *s, va_list *va) { @@ -669,7 +509,8 @@ http_percent_decode (u8 *src, u32 len) } /** - * Remove dot segments from path (RFC3986 section 5.2.4) + * Sanitize HTTP path by squashing repeating slashes and removing + * dot segments from path (RFC3986 section 5.2.4) * * @param path Path to sanitize. * @@ -678,18 +519,18 @@ http_percent_decode (u8 *src, u32 len) * The caller is always responsible to free the returned vector. */ always_inline u8 * -http_path_remove_dot_segments (u8 *path) +http_path_sanitize (u8 *path) { u32 *segments = 0, *segments_len = 0, segment_len; u8 *new_path = 0; int i, ii; - if (!path) + if (!path || vec_len (path) == 0) return vec_new (u8, 0); segments = vec_new (u32, 1); /* first segment */ - segments[0] = 0; + segments[0] = (path[0] == '/' ? 1 : 0); /* find all segments */ for (i = 1; i < (vec_len (path) - 1); i++) { @@ -704,7 +545,8 @@ http_path_remove_dot_segments (u8 *path) for (i = 0; i < vec_len (segments_len); i++) { segment_len = segments[i + 1] - segments[i]; - if (segment_len == 2 && path[segments[i]] == '.') + /* aside from dots, skip empty segments (double slashes) */ + if ((segment_len == 2 && path[segments[i]] == '.') || segment_len == 1) segment_len = 0; else if (segment_len == 3 && path[segments[i]] == '.' && path[segments[i] + 1] == '.') @@ -736,124 +578,6 @@ http_path_remove_dot_segments (u8 *path) return new_path; } -always_inline int -_parse_field_name (u8 **pos, u8 *end, u8 **field_name_start, - u32 *field_name_len) -{ - u32 name_len = 0; - u8 *p; - - static uword tchar[4] = { - /* !#$%'*+-.0123456789 */ - 0x03ff6cba00000000, - /* ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~ */ - 0x57ffffffc7fffffe, - 0x0000000000000000, - 0x0000000000000000, - }; - - p = *pos; - - *field_name_start = p; - while (p != end) - { - if (clib_bitmap_get_no_check (tchar, *p)) - { - name_len++; - p++; - } - else if (*p == ':') - { - if (name_len == 0) - { - clib_warning ("empty field name"); - return -1; - } - *field_name_len = name_len; - p++; - *pos = p; - return 0; - } - else - { - clib_warning ("invalid character %d", *p); - return -1; - } - } - clib_warning ("field name end not found"); - return -1; -} - -always_inline int -_parse_field_value (u8 **pos, u8 *end, u8 **field_value_start, - u32 *field_value_len) -{ - u32 value_len = 0; - u8 *p; - - p = *pos; - - /* skip leading whitespace */ - while (1) - { - if (p == end) - { - clib_warning ("field value not found"); - return -1; - } - else if (*p != ' ' && *p != '\t') - { - break; - } - p++; - } - - *field_value_start = p; - while (p != end) - { - if (*p == '\r') - { - if ((end - p) < 1) - { - clib_warning ("incorrect field line end"); - return -1; - } - p++; - if (*p == '\n') - { - if (value_len == 0) - { - clib_warning ("empty field value"); - return -1; - } - p++; - *pos = p; - /* skip trailing whitespace */ - p = *field_value_start + value_len - 1; - while (*p == ' ' || *p == '\t') - { - p--; - value_len--; - } - *field_value_len = value_len; - return 0; - } - clib_warning ("CR without LF"); - return -1; - } - if (*p < ' ' && *p != '\t') - { - clib_warning ("invalid character %d", *p); - return -1; - } - p++; - value_len++; - } - - clib_warning ("field value end not found"); - return -1; -} - typedef struct { http_token_t name; @@ -873,6 +597,16 @@ typedef struct .values = 0, .value_by_name = 0, .buf = 0, .concatenated_values = 0, \ } +/** + * Case-sensitive comparison of two tokens. + * + * @param actual Pointer to the first token. + * @param actual_len Length of the first token. + * @param expected Pointer to the second token. + * @param expected_len Length of the second token. + * + * @return @c 1 if tokens are same, @c 0 otherwise. + */ always_inline u8 http_token_is (const char *actual, uword actual_len, const char *expected, uword expected_len) @@ -903,6 +637,16 @@ http_tolower_word (uword x) return (x | y); } +/** + * Case-insensitive comparison of two tokens. + * + * @param actual Pointer to the first token. + * @param actual_len Length of the first token. + * @param expected Pointer to the second token. + * @param expected_len Length of the second token. + * + * @return @c 1 if tokens are same, @c 0 otherwise. + */ always_inline u8 http_token_is_case (const char *actual, uword actual_len, const char *expected, uword expected_len) @@ -934,6 +678,16 @@ http_token_is_case (const char *actual, uword actual_len, const char *expected, return 1; } +/** + * Check if there is occurrence of token in another token. + * + * @param haystack Pointer to the token being searched. + * @param haystack_len Length of the token being searched. + * @param needle The token to search for. + * @param needle_len Length of the token to search for. + * + * @return @c 1 if in case of success, @c 0 otherwise. + */ always_inline u8 http_token_contains (const char *haystack, uword haystack_len, const char *needle, uword needle_len) @@ -1158,6 +912,13 @@ typedef struct /* Use high bit of header name length as custom header name bit. */ #define HTTP_CUSTOM_HEADER_NAME_BIT (1 << 31) +/** + * Initialize headers list context. + * + * @param ctx Headers list context. + * @param buf Buffer, which store headers list, provided by app. + * @param len Length of headers list buffer. + */ always_inline void http_init_headers_ctx (http_headers_ctx_t *ctx, u8 *buf, u32 len) { @@ -1166,30 +927,53 @@ http_init_headers_ctx (http_headers_ctx_t *ctx, u8 *buf, u32 len) ctx->buf = buf; } -always_inline void +/** + * Add header with predefined name to the headers list. + * + * @param ctx Headers list context. + * @param name Header name ID (see @ref http_header_name_t). + * @param value Header value pointer. + * @param value_len Header value length. + * + * @return @c 0 if in case of success, @c -1 otherwise. + */ +always_inline int http_add_header (http_headers_ctx_t *ctx, http_header_name_t name, const char *value, uword value_len) { http_app_header_t *header; - ASSERT ((ctx->tail_offset + sizeof (http_app_header_t) + value_len) < - ctx->len); + if ((ctx->tail_offset + sizeof (http_app_header_t) + value_len) > ctx->len) + return -1; header = (http_app_header_t *) (ctx->buf + ctx->tail_offset); header->name = (u32) name; header->value.len = (u32) value_len; clib_memcpy (header->value.token, (u8 *) value, value_len); ctx->tail_offset += sizeof (http_app_header_t) + value_len; + return 0; } -always_inline void +/** + * Add header with custom name to the headers list. + * + * @param ctx Headers list context. + * @param name Header name pointer. + * @param name_len Header name length. + * @param value Header value pointer. + * @param value_len Header value length. + * + * @return @c 0 if in case of success, @c -1 otherwise. + */ +always_inline int http_add_custom_header (http_headers_ctx_t *ctx, const char *name, uword name_len, const char *value, uword value_len) { http_custom_token_t *token; - ASSERT ((ctx->tail_offset + 2 * sizeof (http_custom_token_t) + name_len + - value_len) < ctx->len); + if ((ctx->tail_offset + 2 * sizeof (http_custom_token_t) + name_len + + value_len) > ctx->len) + return -1; /* name */ token = (http_custom_token_t *) (ctx->buf + ctx->tail_offset); @@ -1202,6 +986,18 @@ http_add_custom_header (http_headers_ctx_t *ctx, const char *name, token->len = (u32) value_len; clib_memcpy (token->token, (u8 *) value, token->len); ctx->tail_offset += sizeof (http_custom_token_t) + value_len; + return 0; +} + +/** + * Truncate the header list + * + * @param ctx Headers list context. + */ +always_inline void +http_truncate_headers_list (http_headers_ctx_t *ctx) +{ + ctx->tail_offset = 0; } typedef enum http_uri_host_type_ @@ -1491,6 +1287,15 @@ http_parse_authority (u8 *authority, u32 authority_len, return token_start == end ? 0 : -1; } +/** + * Format given authority (RFC3986 section 3.2) + * + * @param authority Authority to format. + * + * @return New vector with formated authority. + * + * The caller is always responsible to free the returned vector. + */ always_inline u8 * http_serialize_authority (http_uri_authority_t *authority) { diff --git a/src/plugins/http/http1.c b/src/plugins/http/http1.c new file mode 100644 index 00000000000..c152956e43c --- /dev/null +++ b/src/plugins/http/http1.c @@ -0,0 +1,1751 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. + */ + +#include <vnet/session/application.h> + +#include <http/http.h> +#include <http/http_header_names.h> +#include <http/http_private.h> +#include <http/http_status_codes.h> +#include <http/http_timer.h> + +const char *http1_upgrade_proto_str[] = { "", +#define _(sym, str) str, + foreach_http_upgrade_proto +#undef _ +}; + +/** + * http error boilerplate + */ +static const char *error_template = "HTTP/1.1 %s\r\n" + "Date: %U GMT\r\n" + "Connection: close\r\n" + "Content-Length: 0\r\n\r\n"; + +/** + * http response boilerplate + */ +static const char *response_template = "HTTP/1.1 %s\r\n" + "Date: %U GMT\r\n" + "Server: %v\r\n"; + +static const char *content_len_template = "Content-Length: %llu\r\n"; + +static const char *connection_upgrade_template = "Connection: upgrade\r\n" + "Upgrade: %s\r\n"; + +/** + * http request boilerplate + */ +static const char *get_request_template = "GET %s HTTP/1.1\r\n" + "Host: %v\r\n" + "User-Agent: %v\r\n"; + +static const char *post_request_template = "POST %s HTTP/1.1\r\n" + "Host: %v\r\n" + "User-Agent: %v\r\n" + "Content-Length: %llu\r\n"; + +static void +http1_send_error (http_conn_t *hc, http_status_code_t ec, + transport_send_params_t *sp) +{ + u8 *data; + + if (ec >= HTTP_N_STATUS) + ec = HTTP_STATUS_INTERNAL_ERROR; + + data = format (0, error_template, http_status_code_str[ec], + format_http_time_now, hc); + HTTP_DBG (3, "%v", data); + http_io_ts_write (hc, data, vec_len (data), sp); + vec_free (data); + http_io_ts_after_write (hc, sp, 0, 1); +} + +static int +http1_read_message (http_conn_t *hc, u8 *rx_buf) +{ + u32 max_deq; + + max_deq = http_io_ts_max_read (hc); + if (PREDICT_FALSE (max_deq == 0)) + return -1; + + vec_validate (rx_buf, max_deq - 1); + http_io_ts_read (hc, rx_buf, max_deq, 1); + + return 0; +} + +static void +http1_identify_optional_query (http_req_t *req, u8 *rx_buf) +{ + int i; + for (i = req->target_path_offset; + i < (req->target_path_offset + req->target_path_len); i++) + { + if (rx_buf[i] == '?') + { + req->target_query_offset = i + 1; + req->target_query_len = req->target_path_offset + + req->target_path_len - + req->target_query_offset; + req->target_path_len = + req->target_path_len - req->target_query_len - 1; + break; + } + } +} + +static int +http1_parse_target (http_req_t *req, u8 *rx_buf) +{ + int i; + u8 *p, *end; + + /* asterisk-form = "*" */ + if ((rx_buf[req->target_path_offset] == '*') && (req->target_path_len == 1)) + { + req->target_form = HTTP_TARGET_ASTERISK_FORM; + /* we do not support OPTIONS request */ + return -1; + } + + /* origin-form = 1*( "/" segment ) [ "?" query ] */ + if (rx_buf[req->target_path_offset] == '/') + { + /* drop leading slash */ + req->target_path_len--; + req->target_path_offset++; + req->target_form = HTTP_TARGET_ORIGIN_FORM; + http1_identify_optional_query (req, rx_buf); + /* can't be CONNECT method */ + return req->method == HTTP_REQ_CONNECT ? -1 : 0; + } + + /* absolute-form = + * scheme "://" host [ ":" port ] *( "/" segment ) [ "?" query ] */ + if (req->target_path_len > 8 && + !memcmp (rx_buf + req->target_path_offset, "http", 4)) + { + req->scheme = HTTP_URL_SCHEME_HTTP; + p = rx_buf + req->target_path_offset + 4; + if (*p == 's') + { + p++; + req->scheme = HTTP_URL_SCHEME_HTTPS; + } + if (*p++ == ':') + { + expect_char ('/'); + expect_char ('/'); + req->target_form = HTTP_TARGET_ABSOLUTE_FORM; + req->target_authority_offset = p - rx_buf; + req->target_authority_len = 0; + end = rx_buf + req->target_path_offset + req->target_path_len; + while (p < end) + { + if (*p == '/') + { + p++; /* drop leading slash */ + req->target_path_offset = p - rx_buf; + req->target_path_len = end - p; + break; + } + req->target_authority_len++; + p++; + } + if (!req->target_path_len) + { + clib_warning ("zero length host"); + return -1; + } + http1_identify_optional_query (req, rx_buf); + /* can't be CONNECT method */ + return req->method == HTTP_REQ_CONNECT ? -1 : 0; + } + } + + /* authority-form = host ":" port */ + for (i = req->target_path_offset; + i < (req->target_path_offset + req->target_path_len); i++) + { + if ((rx_buf[i] == ':') && (isdigit (rx_buf[i + 1]))) + { + req->target_authority_len = req->target_path_len; + req->target_path_len = 0; + req->target_authority_offset = req->target_path_offset; + req->target_path_offset = 0; + req->target_form = HTTP_TARGET_AUTHORITY_FORM; + /* "authority-form" is only used for CONNECT requests */ + return req->method == HTTP_REQ_CONNECT ? 0 : -1; + } + } + + return -1; +} + +static int +http1_parse_request_line (http_req_t *req, u8 *rx_buf, http_status_code_t *ec) +{ + int i, target_len; + u32 next_line_offset, method_offset; + + /* request-line = method SP request-target SP HTTP-version CRLF */ + i = http_v_find_index (rx_buf, 8, 0, "\r\n"); + if (i < 0) + { + clib_warning ("request line incomplete"); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + HTTP_DBG (2, "request line length: %d", i); + req->control_data_len = i + 2; + next_line_offset = req->control_data_len; + + /* there should be at least one more CRLF */ + if (vec_len (rx_buf) < (next_line_offset + 2)) + { + clib_warning ("malformed message, too short"); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + + /* + * RFC9112 2.2: + * In the interest of robustness, a server that is expecting to receive and + * parse a request-line SHOULD ignore at least one empty line (CRLF) + * received prior to the request-line. + */ + method_offset = rx_buf[0] == '\r' && rx_buf[1] == '\n' ? 2 : 0; + /* parse method */ + if (!memcmp (rx_buf + method_offset, "GET ", 4)) + { + HTTP_DBG (0, "GET method"); + req->method = HTTP_REQ_GET; + req->target_path_offset = method_offset + 4; + } + else if (!memcmp (rx_buf + method_offset, "POST ", 5)) + { + HTTP_DBG (0, "POST method"); + req->method = HTTP_REQ_POST; + req->target_path_offset = method_offset + 5; + } + else if (!memcmp (rx_buf + method_offset, "CONNECT ", 8)) + { + HTTP_DBG (0, "CONNECT method"); + req->method = HTTP_REQ_CONNECT; + req->upgrade_proto = HTTP_UPGRADE_PROTO_NA; + req->target_path_offset = method_offset + 8; + req->is_tunnel = 1; + } + else + { + if (rx_buf[method_offset] - 'A' <= 'Z' - 'A') + { + *ec = HTTP_STATUS_NOT_IMPLEMENTED; + return -1; + } + else + { + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + } + + /* find version */ + i = http_v_find_index (rx_buf, next_line_offset - 11, 11, " HTTP/"); + if (i < 0) + { + clib_warning ("HTTP version not present"); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + /* verify major version */ + if (isdigit (rx_buf[i + 6])) + { + if (rx_buf[i + 6] != '1') + { + clib_warning ("HTTP major version '%c' not supported", + rx_buf[i + 6]); + *ec = HTTP_STATUS_HTTP_VERSION_NOT_SUPPORTED; + return -1; + } + } + else + { + clib_warning ("HTTP major version '%c' is not digit", rx_buf[i + 6]); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + + /* parse request-target */ + HTTP_DBG (2, "http at %d", i); + target_len = i - req->target_path_offset; + HTTP_DBG (2, "target_len %d", target_len); + if (target_len < 1) + { + clib_warning ("request-target not present"); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + req->target_path_len = target_len; + req->target_query_offset = 0; + req->target_query_len = 0; + req->target_authority_len = 0; + req->target_authority_offset = 0; + if (http1_parse_target (req, rx_buf)) + { + clib_warning ("invalid target"); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + HTTP_DBG (2, "request-target path length: %u", req->target_path_len); + HTTP_DBG (2, "request-target path offset: %u", req->target_path_offset); + HTTP_DBG (2, "request-target query length: %u", req->target_query_len); + HTTP_DBG (2, "request-target query offset: %u", req->target_query_offset); + + /* set buffer offset to nex line start */ + req->rx_buf_offset = next_line_offset; + + return 0; +} + +static int +http1_parse_status_line (http_req_t *req, u8 *rx_buf) +{ + int i; + u32 next_line_offset; + u8 *p, *end; + u16 status_code = 0; + + i = http_v_find_index (rx_buf, 0, 0, "\r\n"); + /* status-line = HTTP-version SP status-code SP [ reason-phrase ] CRLF */ + if (i < 0) + { + clib_warning ("status line incomplete"); + return -1; + } + HTTP_DBG (2, "status line length: %d", i); + if (i < 12) + { + clib_warning ("status line too short (%d)", i); + return -1; + } + req->control_data_len = i + 2; + next_line_offset = req->control_data_len; + p = rx_buf; + end = rx_buf + i; + + /* there should be at least one more CRLF */ + if (vec_len (rx_buf) < (next_line_offset + 2)) + { + clib_warning ("malformed message, too short"); + return -1; + } + + /* parse version */ + expect_char ('H'); + expect_char ('T'); + expect_char ('T'); + expect_char ('P'); + expect_char ('/'); + expect_char ('1'); + expect_char ('.'); + if (!isdigit (*p++)) + { + clib_warning ("invalid HTTP minor version"); + return -1; + } + + /* skip space(s) */ + if (*p != ' ') + { + clib_warning ("no space after HTTP version"); + return -1; + } + do + { + p++; + if (p == end) + { + clib_warning ("no status code"); + return -1; + } + } + while (*p == ' '); + + /* parse status code */ + if ((end - p) < 3) + { + clib_warning ("not enough characters for status code"); + return -1; + } + parse_int (status_code, 100); + parse_int (status_code, 10); + parse_int (status_code, 1); + if (status_code < 100 || status_code > 599) + { + clib_warning ("invalid status code %d", status_code); + return -1; + } + req->status_code = http_sc_by_u16 (status_code); + HTTP_DBG (0, "status code: %d", status_code); + + /* set buffer offset to nex line start */ + req->rx_buf_offset = next_line_offset; + + return 0; +} + +always_inline int +http1_parse_field_name (u8 **pos, u8 *end, u8 **field_name_start, + u32 *field_name_len) +{ + u32 name_len = 0; + u8 *p; + + static uword tchar[4] = { + /* !#$%'*+-.0123456789 */ + 0x03ff6cba00000000, + /* ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~ */ + 0x57ffffffc7fffffe, + 0x0000000000000000, + 0x0000000000000000, + }; + + p = *pos; + + *field_name_start = p; + while (p != end) + { + if (clib_bitmap_get_no_check (tchar, *p)) + { + name_len++; + p++; + } + else if (*p == ':') + { + if (name_len == 0) + { + clib_warning ("empty field name"); + return -1; + } + *field_name_len = name_len; + p++; + *pos = p; + return 0; + } + else + { + clib_warning ("invalid character %d", *p); + return -1; + } + } + clib_warning ("field name end not found"); + return -1; +} + +always_inline int +http1_parse_field_value (u8 **pos, u8 *end, u8 **field_value_start, + u32 *field_value_len) +{ + u32 value_len = 0; + u8 *p; + + p = *pos; + + /* skip leading whitespace */ + while (1) + { + if (p == end) + { + clib_warning ("field value not found"); + return -1; + } + else if (*p != ' ' && *p != '\t') + { + break; + } + p++; + } + + *field_value_start = p; + while (p != end) + { + if (*p == '\r') + { + if ((end - p) < 1) + { + clib_warning ("incorrect field line end"); + return -1; + } + p++; + if (*p == '\n') + { + if (value_len == 0) + { + clib_warning ("empty field value"); + return -1; + } + p++; + *pos = p; + /* skip trailing whitespace */ + p = *field_value_start + value_len - 1; + while (*p == ' ' || *p == '\t') + { + p--; + value_len--; + } + *field_value_len = value_len; + return 0; + } + clib_warning ("CR without LF"); + return -1; + } + if (*p < ' ' && *p != '\t') + { + clib_warning ("invalid character %d", *p); + return -1; + } + p++; + value_len++; + } + + clib_warning ("field value end not found"); + return -1; +} + +static int +http1_identify_headers (http_req_t *req, u8 *rx_buf, http_status_code_t *ec) +{ + int rv; + u8 *p, *end, *name_start, *value_start; + u32 name_len, value_len; + http_field_line_t *field_line; + uword header_index; + + vec_reset_length (req->headers); + req->content_len_header_index = ~0; + req->connection_header_index = ~0; + req->upgrade_header_index = ~0; + req->host_header_index = ~0; + req->headers_offset = req->rx_buf_offset; + + /* check if we have any header */ + if ((rx_buf[req->rx_buf_offset] == '\r') && + (rx_buf[req->rx_buf_offset + 1] == '\n')) + { + /* just another CRLF -> no headers */ + HTTP_DBG (2, "no headers"); + req->headers_len = 0; + req->control_data_len += 2; + return 0; + } + + end = vec_end (rx_buf); + p = rx_buf + req->rx_buf_offset; + + while (1) + { + rv = http1_parse_field_name (&p, end, &name_start, &name_len); + if (rv != 0) + { + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + rv = http1_parse_field_value (&p, end, &value_start, &value_len); + if (rv != 0 || (end - p) < 2) + { + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + + vec_add2 (req->headers, field_line, 1); + field_line->name_offset = (name_start - rx_buf) - req->headers_offset; + field_line->name_len = name_len; + field_line->value_offset = (value_start - rx_buf) - req->headers_offset; + field_line->value_len = value_len; + header_index = field_line - req->headers; + + /* find headers that will be used later in preprocessing */ + /* names are case-insensitive (RFC9110 section 5.1) */ + if (req->content_len_header_index == ~0 && + http_token_is_case ( + (const char *) name_start, name_len, + http_header_name_token (HTTP_HEADER_CONTENT_LENGTH))) + req->content_len_header_index = header_index; + else if (req->connection_header_index == ~0 && + http_token_is_case ( + (const char *) name_start, name_len, + http_header_name_token (HTTP_HEADER_CONNECTION))) + req->connection_header_index = header_index; + else if (req->upgrade_header_index == ~0 && + http_token_is_case ( + (const char *) name_start, name_len, + http_header_name_token (HTTP_HEADER_UPGRADE))) + req->upgrade_header_index = header_index; + else if (req->host_header_index == ~0 && + http_token_is_case ((const char *) name_start, name_len, + http_header_name_token (HTTP_HEADER_HOST))) + req->host_header_index = header_index; + + /* are we done? */ + if (*p == '\r' && *(p + 1) == '\n') + break; + } + + req->headers_len = p - (rx_buf + req->headers_offset); + req->control_data_len += (req->headers_len + 2); + HTTP_DBG (2, "headers length: %u", req->headers_len); + HTTP_DBG (2, "headers offset: %u", req->headers_offset); + + return 0; +} + +static int +http1_identify_message_body (http_req_t *req, u8 *rx_buf, + http_status_code_t *ec) +{ + int i; + u8 *p; + u64 body_len = 0, digit; + http_field_line_t *field_line; + + req->body_len = 0; + + if (req->headers_len == 0) + { + HTTP_DBG (2, "no header, no message-body"); + return 0; + } + if (req->is_tunnel) + { + HTTP_DBG (2, "tunnel, no message-body"); + return 0; + } + + /* TODO check for chunked transfer coding */ + + if (req->content_len_header_index == ~0) + { + HTTP_DBG (2, "Content-Length header not present, no message-body"); + return 0; + } + field_line = vec_elt_at_index (req->headers, req->content_len_header_index); + + p = rx_buf + req->headers_offset + field_line->value_offset; + for (i = 0; i < field_line->value_len; i++) + { + /* check for digit */ + if (!isdigit (*p)) + { + clib_warning ("expected digit"); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + digit = *p - '0'; + u64 new_body_len = body_len * 10 + digit; + /* check for overflow */ + if (new_body_len < body_len) + { + clib_warning ("too big number, overflow"); + *ec = HTTP_STATUS_BAD_REQUEST; + return -1; + } + body_len = new_body_len; + p++; + } + + req->body_len = body_len; + + req->body_offset = req->headers_offset + req->headers_len + 2; + HTTP_DBG (2, "body length: %llu", req->body_len); + HTTP_DBG (2, "body offset: %u", req->body_offset); + + return 0; +} + +static void +http1_check_connection_upgrade (http_req_t *req, u8 *rx_buf) +{ + http_field_line_t *connection, *upgrade; + u8 skip; + + skip = (req->method != HTTP_REQ_GET) + (req->connection_header_index == ~0) + + (req->upgrade_header_index == ~0); + if (skip) + return; + + connection = vec_elt_at_index (req->headers, req->connection_header_index); + /* connection options are case-insensitive (RFC9110 7.6.1) */ + if (http_token_is_case ( + http_field_line_value_token (connection, req, rx_buf), + http_token_lit ("upgrade"))) + { + upgrade = vec_elt_at_index (req->headers, req->upgrade_header_index); + + /* check upgrade protocol, we want to ignore something like upgrade to + * newer HTTP version, only tunnels are supported */ + if (0) + ; +#define _(sym, str) \ + else if (http_token_is_case ( \ + http_field_line_value_token (upgrade, req, rx_buf), \ + http_token_lit (str))) req->upgrade_proto = \ + HTTP_UPGRADE_PROTO_##sym; + foreach_http_upgrade_proto +#undef _ + else return; + + req->is_tunnel = 1; + req->method = HTTP_REQ_CONNECT; + } +} + +static void +http1_target_fixup (http_conn_t *hc, http_req_t *req) +{ + http_field_line_t *host; + + if (req->target_form == HTTP_TARGET_ABSOLUTE_FORM) + return; + + /* scheme fixup */ + req->scheme = http_get_transport_proto (hc) == TRANSPORT_PROTO_TLS ? + HTTP_URL_SCHEME_HTTPS : + HTTP_URL_SCHEME_HTTP; + + if (req->target_form == HTTP_TARGET_AUTHORITY_FORM || + req->connection_header_index == ~0) + return; + + /* authority fixup */ + host = vec_elt_at_index (req->headers, req->connection_header_index); + req->target_authority_offset = host->value_offset; + req->target_authority_len = host->value_len; +} + +static void +http1_write_app_headers (http_conn_t *hc, http_msg_t *msg, u8 **tx_buf) +{ + u8 *app_headers, *p, *end; + u32 *tmp; + + /* read app header list */ + app_headers = http_get_app_header_list (hc, msg); + + /* serialize app headers to tx_buf */ + end = app_headers + msg->data.headers_len; + while (app_headers < end) + { + /* custom header name? */ + tmp = (u32 *) app_headers; + if (PREDICT_FALSE (*tmp & HTTP_CUSTOM_HEADER_NAME_BIT)) + { + http_custom_token_t *name, *value; + name = (http_custom_token_t *) app_headers; + u32 name_len = name->len & ~HTTP_CUSTOM_HEADER_NAME_BIT; + app_headers += sizeof (http_custom_token_t) + name_len; + value = (http_custom_token_t *) app_headers; + app_headers += sizeof (http_custom_token_t) + value->len; + vec_add2 (*tx_buf, p, name_len + value->len + 4); + clib_memcpy (p, name->token, name_len); + p += name_len; + *p++ = ':'; + *p++ = ' '; + clib_memcpy (p, value->token, value->len); + p += value->len; + *p++ = '\r'; + *p++ = '\n'; + } + else + { + http_app_header_t *header; + header = (http_app_header_t *) app_headers; + app_headers += sizeof (http_app_header_t) + header->value.len; + http_token_t name = { http_header_name_token (header->name) }; + vec_add2 (*tx_buf, p, name.len + header->value.len + 4); + clib_memcpy (p, name.base, name.len); + p += name.len; + *p++ = ':'; + *p++ = ' '; + clib_memcpy (p, header->value.token, header->value.len); + p += header->value.len; + *p++ = '\r'; + *p++ = '\n'; + } + } +} + +/*************************************/ +/* request state machine handlers RX */ +/*************************************/ + +static http_sm_result_t +http1_req_state_wait_transport_reply (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp) +{ + int rv; + http_msg_t msg = {}; + u32 len, max_enq, body_sent; + http_status_code_t ec; + u8 *rx_buf; + + rx_buf = http_get_rx_buf (hc); + rv = http1_read_message (hc, rx_buf); + + /* Nothing yet, wait for data or timer expire */ + if (rv) + { + HTTP_DBG (1, "no data to deq"); + return HTTP_SM_STOP; + } + + HTTP_DBG (3, "%v", rx_buf); + + if (vec_len (rx_buf) < 8) + { + clib_warning ("response buffer too short"); + goto error; + } + + rv = http1_parse_status_line (req, rx_buf); + if (rv) + goto error; + + rv = http1_identify_headers (req, rx_buf, &ec); + if (rv) + goto error; + + rv = http1_identify_message_body (req, rx_buf, &ec); + if (rv) + goto error; + + /* send at least "control data" which is necessary minimum, + * if there is some space send also portion of body */ + max_enq = http_io_as_max_write (req); + max_enq -= sizeof (msg); + if (max_enq < req->control_data_len) + { + clib_warning ("not enough room for control data in app's rx fifo"); + goto error; + } + len = clib_min (max_enq, vec_len (rx_buf)); + + msg.type = HTTP_MSG_REPLY; + msg.code = req->status_code; + msg.data.headers_offset = req->headers_offset; + msg.data.headers_len = req->headers_len; + msg.data.body_offset = req->body_offset; + msg.data.body_len = req->body_len; + msg.data.type = HTTP_MSG_DATA_INLINE; + msg.data.len = len; + msg.data.headers_ctx = pointer_to_uword (req->headers); + + svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) }, { rx_buf, len } }; + + http_io_as_write_segs (req, segs, 2); + + body_sent = len - req->control_data_len; + req->to_recv = req->body_len - body_sent; + if (req->to_recv == 0) + { + /* all sent, we are done */ + http_req_state_change (req, HTTP_REQ_STATE_WAIT_APP_METHOD); + } + else + { + /* stream rest of the response body */ + http_req_state_change (req, HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA); + } + + http_io_ts_drain (hc, len); + http_io_ts_after_read (hc, 1); + http_app_worker_rx_notify (req); + return HTTP_SM_STOP; + +error: + http_io_ts_drain_all (hc); + http_io_ts_after_read (hc, 1); + session_transport_closing_notify (&hc->connection); + session_transport_closed_notify (&hc->connection); + http_disconnect_transport (hc); + return HTTP_SM_ERROR; +} + +static http_sm_result_t +http1_req_state_wait_transport_method (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp) +{ + http_status_code_t ec; + http_msg_t msg; + int rv; + u32 len, max_enq, body_sent; + u64 max_deq; + u8 *rx_buf; + + rx_buf = http_get_rx_buf (hc); + rv = http1_read_message (hc, rx_buf); + + /* Nothing yet, wait for data or timer expire */ + if (rv) + return HTTP_SM_STOP; + + HTTP_DBG (3, "%v", rx_buf); + + if (vec_len (rx_buf) < 8) + { + ec = HTTP_STATUS_BAD_REQUEST; + goto error; + } + + rv = http1_parse_request_line (req, rx_buf, &ec); + if (rv) + goto error; + + rv = http1_identify_headers (req, rx_buf, &ec); + if (rv) + goto error; + + http1_target_fixup (hc, req); + http1_check_connection_upgrade (req, rx_buf); + + rv = http1_identify_message_body (req, rx_buf, &ec); + if (rv) + goto error; + + /* send at least "control data" which is necessary minimum, + * if there is some space send also portion of body */ + max_enq = http_io_as_max_write (req); + if (max_enq < req->control_data_len) + { + clib_warning ("not enough room for control data in app's rx fifo"); + ec = HTTP_STATUS_INTERNAL_ERROR; + goto error; + } + /* do not dequeue more than one HTTP request, we do not support pipelining */ + max_deq = clib_min (req->control_data_len + req->body_len, vec_len (rx_buf)); + len = clib_min (max_enq, max_deq); + + msg.type = HTTP_MSG_REQUEST; + msg.method_type = req->method; + msg.data.type = HTTP_MSG_DATA_INLINE; + msg.data.len = len; + msg.data.scheme = req->scheme; + msg.data.target_authority_offset = req->target_authority_offset; + msg.data.target_authority_len = req->target_authority_len; + msg.data.target_path_offset = req->target_path_offset; + msg.data.target_path_len = req->target_path_len; + msg.data.target_query_offset = req->target_query_offset; + msg.data.target_query_len = req->target_query_len; + msg.data.headers_offset = req->headers_offset; + msg.data.headers_len = req->headers_len; + msg.data.body_offset = req->body_offset; + msg.data.body_len = req->body_len; + msg.data.headers_ctx = pointer_to_uword (req->headers); + msg.data.upgrade_proto = req->upgrade_proto; + + svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) }, { rx_buf, len } }; + + http_io_as_write_segs (req, segs, 2); + + body_sent = len - req->control_data_len; + req->to_recv = req->body_len - body_sent; + if (req->to_recv == 0) + { + /* drop everything, we do not support pipelining */ + http_io_ts_drain_all (hc); + /* all sent, we are done */ + http_req_state_change (req, HTTP_REQ_STATE_WAIT_APP_REPLY); + } + else + { + http_io_ts_drain (hc, len); + /* stream rest of the response body */ + http_req_state_change (req, HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA); + } + + http_app_worker_rx_notify (req); + http_io_ts_after_read (hc, 1); + + return HTTP_SM_STOP; + +error: + http_io_ts_drain_all (hc); + http_io_ts_after_read (hc, 1); + http1_send_error (hc, ec, 0); + session_transport_closing_notify (&hc->connection); + http_disconnect_transport (hc); + + return HTTP_SM_ERROR; +} + +static http_sm_result_t +http1_req_state_transport_io_more_data (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp) +{ + u32 max_len, max_deq, max_enq, n_segs = 2; + svm_fifo_seg_t segs[n_segs]; + int n_written; + + max_deq = http_io_ts_max_read (hc); + if (max_deq == 0) + { + HTTP_DBG (1, "no data to deq"); + return HTTP_SM_STOP; + } + + max_enq = http_io_as_max_write (req); + if (max_enq == 0) + { + HTTP_DBG (1, "app's rx fifo full"); + http_io_as_want_deq_ntf (req); + return HTTP_SM_STOP; + } + + max_len = clib_min (max_enq, max_deq); + http_io_ts_read_segs (hc, segs, &n_segs, max_len); + + n_written = http_io_as_write_segs (req, segs, n_segs); + + if (n_written > req->to_recv) + { + clib_warning ("http protocol error: received more data than expected"); + session_transport_closing_notify (&hc->connection); + http_disconnect_transport (hc); + http_req_state_change (req, HTTP_REQ_STATE_WAIT_APP_METHOD); + return HTTP_SM_ERROR; + } + req->to_recv -= n_written; + http_io_ts_drain (hc, n_written); + HTTP_DBG (1, "drained %d from ts; remains %lu", n_written, req->to_recv); + + /* Finished transaction: + * server back to HTTP_REQ_STATE_WAIT_APP_REPLY + * client to HTTP_REQ_STATE_WAIT_APP_METHOD */ + if (req->to_recv == 0) + http_req_state_change (req, (hc->flags & HTTP_CONN_F_IS_SERVER) ? + HTTP_REQ_STATE_WAIT_APP_REPLY : + HTTP_REQ_STATE_WAIT_APP_METHOD); + + http_app_worker_rx_notify (req); + + http_io_ts_after_read (hc, 0); + + return HTTP_SM_STOP; +} + +static http_sm_result_t +http1_req_state_tunnel_rx (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp) +{ + u32 max_deq, max_enq, max_read, n_segs = 2; + svm_fifo_seg_t segs[n_segs]; + int n_written = 0; + + HTTP_DBG (1, "tunnel received data from client"); + + max_deq = http_io_ts_max_read (hc); + if (PREDICT_FALSE (max_deq == 0)) + { + HTTP_DBG (1, "max_deq == 0"); + return HTTP_SM_STOP; + } + max_enq = http_io_as_max_write (req); + if (max_enq == 0) + { + HTTP_DBG (1, "app's rx fifo full"); + http_io_as_want_deq_ntf (req); + return HTTP_SM_STOP; + } + max_read = clib_min (max_enq, max_deq); + http_io_ts_read_segs (hc, segs, &n_segs, max_read); + n_written = http_io_as_write_segs (req, segs, n_segs); + http_io_ts_drain (hc, n_written); + HTTP_DBG (1, "transfered %u bytes", n_written); + http_app_worker_rx_notify (req); + http_io_ts_after_read (hc, 0); + + return HTTP_SM_STOP; +} + +static http_sm_result_t +http1_req_state_udp_tunnel_rx (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp) +{ + u32 to_deq, capsule_size, dgram_size, n_read, n_written = 0; + int rv; + u8 payload_offset = 0; + u64 payload_len = 0; + session_dgram_hdr_t hdr; + u8 *buf = 0; + + HTTP_DBG (1, "udp tunnel received data from client"); + + buf = http_get_rx_buf (hc); + to_deq = http_io_ts_max_read (hc); + + while (to_deq > 0) + { + /* some bytes remaining to skip? */ + if (PREDICT_FALSE (req->to_skip)) + { + if (req->to_skip >= to_deq) + { + http_io_ts_drain (hc, to_deq); + req->to_skip -= to_deq; + goto done; + } + else + { + http_io_ts_drain (hc, req->to_skip); + req->to_skip = 0; + } + } + n_read = http_io_ts_read (hc, buf, HTTP_CAPSULE_HEADER_MAX_SIZE, 1); + rv = http_decap_udp_payload_datagram (buf, n_read, &payload_offset, + &payload_len); + HTTP_DBG (1, "rv=%d, payload_offset=%u, payload_len=%llu", rv, + payload_offset, payload_len); + if (PREDICT_FALSE (rv != 0)) + { + if (rv < 0) + { + /* capsule datagram is invalid (session need to be aborted) */ + http_io_ts_drain_all (hc); + session_transport_closing_notify (&hc->connection); + session_transport_closed_notify (&hc->connection); + http_disconnect_transport (hc); + return HTTP_SM_STOP; + } + else + { + /* unknown capsule should be skipped */ + if (payload_len <= to_deq) + { + http_io_ts_drain (hc, payload_len); + to_deq -= payload_len; + continue; + } + else + { + http_io_ts_drain (hc, to_deq); + req->to_skip = payload_len - to_deq; + goto done; + } + } + } + capsule_size = payload_offset + payload_len; + /* check if we have the full capsule */ + if (PREDICT_FALSE (to_deq < capsule_size)) + { + HTTP_DBG (1, "capsule not complete"); + goto done; + } + + dgram_size = sizeof (hdr) + payload_len; + if (http_io_as_max_write (req) < dgram_size) + { + HTTP_DBG (1, "app's rx fifo full"); + http_io_as_want_deq_ntf (req); + goto done; + } + + http_io_ts_drain (hc, payload_offset); + + /* read capsule payload */ + http_io_ts_read (hc, buf, payload_len, 0); + + hdr.data_length = payload_len; + hdr.data_offset = 0; + + /* send datagram header and payload */ + svm_fifo_seg_t segs[2] = { { (u8 *) &hdr, sizeof (hdr) }, + { buf, payload_len } }; + http_io_as_write_segs (req, segs, 2); + + n_written += dgram_size; + to_deq -= capsule_size; + } + +done: + HTTP_DBG (1, "written %lu bytes", n_written); + + if (n_written) + http_app_worker_rx_notify (req); + + http_io_ts_after_read (hc, 0); + + return HTTP_SM_STOP; +} + +/*************************************/ +/* request state machine handlers TX */ +/*************************************/ + +static http_sm_result_t +http1_req_state_wait_app_reply (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp) +{ + u8 *response; + u32 max_enq; + http_status_code_t sc; + http_msg_t msg; + http_sm_result_t sm_result = HTTP_SM_ERROR; + http_req_state_t next_state = HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD; + + http_get_app_msg (req, &msg); + + if (msg.data.type > HTTP_MSG_DATA_PTR) + { + clib_warning ("no data"); + sc = HTTP_STATUS_INTERNAL_ERROR; + goto error; + } + + if (msg.type != HTTP_MSG_REPLY) + { + clib_warning ("unexpected message type %d", msg.type); + sc = HTTP_STATUS_INTERNAL_ERROR; + goto error; + } + + if (msg.code >= HTTP_N_STATUS) + { + clib_warning ("unsupported status code: %d", msg.code); + return HTTP_SM_ERROR; + } + + response = http_get_tx_buf (hc); + /* + * Add "protocol layer" headers: + * - current time + * - server name + * - data length + */ + response = + format (response, response_template, http_status_code_str[msg.code], + /* Date */ + format_http_time_now, hc, + /* Server */ + hc->app_name); + + /* RFC9110 8.6: A server MUST NOT send Content-Length header field in a + * 2xx (Successful) response to CONNECT or with a status code of 101 + * (Switching Protocols). */ + if (req->is_tunnel && (http_status_code_str[msg.code][0] == '2' || + msg.code == HTTP_STATUS_SWITCHING_PROTOCOLS)) + { + ASSERT (msg.data.body_len == 0); + next_state = HTTP_REQ_STATE_TUNNEL; + if (req->upgrade_proto > HTTP_UPGRADE_PROTO_NA) + { + response = format (response, connection_upgrade_template, + http1_upgrade_proto_str[req->upgrade_proto]); + if (req->upgrade_proto == HTTP_UPGRADE_PROTO_CONNECT_UDP && + hc->udp_tunnel_mode == HTTP_UDP_TUNNEL_DGRAM) + next_state = HTTP_REQ_STATE_UDP_TUNNEL; + } + /* cleanup some stuff we don't need anymore in tunnel mode */ + vec_free (req->headers); + http_buffer_free (&req->tx_buf); + req->to_skip = 0; + } + else + response = format (response, content_len_template, msg.data.body_len); + + /* Add headers from app (if any) */ + if (msg.data.headers_len) + { + HTTP_DBG (0, "got headers from app, len %d", msg.data.headers_len); + http1_write_app_headers (hc, &msg, &response); + } + /* Add empty line after headers */ + response = format (response, "\r\n"); + HTTP_DBG (3, "%v", response); + + max_enq = http_io_ts_max_write (hc, sp); + if (max_enq < vec_len (response)) + { + clib_warning ("sending status-line and headers failed!"); + sc = HTTP_STATUS_INTERNAL_ERROR; + goto error; + } + http_io_ts_write (hc, response, vec_len (response), sp); + + if (msg.data.body_len) + { + /* Start sending the actual data */ + http_req_tx_buffer_init (req, &msg); + next_state = HTTP_REQ_STATE_APP_IO_MORE_DATA; + sm_result = HTTP_SM_CONTINUE; + } + else + { + /* No response body, we are done */ + sm_result = HTTP_SM_STOP; + } + + http_req_state_change (req, next_state); + + http_io_ts_after_write (hc, sp, 0, 1); + return sm_result; + +error: + http1_send_error (hc, sc, sp); + session_transport_closing_notify (&hc->connection); + http_disconnect_transport (hc); + return HTTP_SM_STOP; +} + +static http_sm_result_t +http1_req_state_wait_app_method (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp) +{ + http_msg_t msg; + u8 *request = 0, *target; + u32 max_enq; + http_sm_result_t sm_result = HTTP_SM_ERROR; + http_req_state_t next_state; + + http_get_app_msg (req, &msg); + + if (msg.data.type > HTTP_MSG_DATA_PTR) + { + clib_warning ("no data"); + goto error; + } + + if (msg.type != HTTP_MSG_REQUEST) + { + clib_warning ("unexpected message type %d", msg.type); + goto error; + } + + /* read request target */ + target = http_get_app_target (req, &msg); + + request = http_get_tx_buf (hc); + /* currently we support only GET and POST method */ + if (msg.method_type == HTTP_REQ_GET) + { + if (msg.data.body_len) + { + clib_warning ("GET request shouldn't include data"); + goto error; + } + /* + * Add "protocol layer" headers: + * - host + * - user agent + */ + request = format (request, get_request_template, + /* target */ + target, + /* Host */ + hc->host, + /* User-Agent */ + hc->app_name); + + next_state = HTTP_REQ_STATE_WAIT_TRANSPORT_REPLY; + sm_result = HTTP_SM_STOP; + } + else if (msg.method_type == HTTP_REQ_POST) + { + if (!msg.data.body_len) + { + clib_warning ("POST request should include data"); + goto error; + } + /* + * Add "protocol layer" headers: + * - host + * - user agent + * - content length + */ + request = format (request, post_request_template, + /* target */ + target, + /* Host */ + hc->host, + /* User-Agent */ + hc->app_name, + /* Content-Length */ + msg.data.body_len); + + http_req_tx_buffer_init (req, &msg); + + next_state = HTTP_REQ_STATE_APP_IO_MORE_DATA; + sm_result = HTTP_SM_CONTINUE; + } + else + { + clib_warning ("unsupported method %d", msg.method_type); + goto error; + } + + /* Add headers from app (if any) */ + if (msg.data.headers_len) + { + HTTP_DBG (0, "got headers from app, len %d", msg.data.headers_len); + http1_write_app_headers (hc, &msg, &request); + } + /* Add empty line after headers */ + request = format (request, "\r\n"); + HTTP_DBG (3, "%v", request); + + max_enq = http_io_ts_max_write (hc, sp); + if (max_enq < vec_len (request)) + { + clib_warning ("sending request-line and headers failed!"); + sm_result = HTTP_SM_ERROR; + goto error; + } + http_io_ts_write (hc, request, vec_len (request), sp); + + http_req_state_change (req, next_state); + + http_io_ts_after_write (hc, sp, 0, 1); + goto done; + +error: + http_io_as_drain_all (req); + session_transport_closing_notify (&hc->connection); + session_transport_closed_notify (&hc->connection); + http_disconnect_transport (hc); + +done: + return sm_result; +} + +static http_sm_result_t +http1_req_state_app_io_more_data (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp) +{ + u32 max_write, n_segs, n_written = 0; + http_buffer_t *hb = &req->tx_buf; + svm_fifo_seg_t *seg; + u8 finished = 0; + + ASSERT (!http_buffer_is_drained (hb)); + max_write = http_io_ts_max_write (hc, sp); + if (max_write == 0) + { + HTTP_DBG (1, "ts tx fifo full"); + goto check_fifo; + } + + seg = http_buffer_get_segs (hb, max_write, &n_segs); + if (!seg) + { + HTTP_DBG (1, "no data to deq"); + goto check_fifo; + } + + n_written = http_io_ts_write_segs (hc, seg, n_segs, sp); + + http_buffer_drain (hb, n_written); + finished = http_buffer_is_drained (hb); + + if (finished) + { + /* Finished transaction: + * server back to HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD + * client to HTTP_REQ_STATE_WAIT_TRANSPORT_REPLY */ + http_req_state_change (req, (hc->flags & HTTP_CONN_F_IS_SERVER) ? + HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD : + HTTP_REQ_STATE_WAIT_TRANSPORT_REPLY); + http_buffer_free (hb); + } + +check_fifo: + http_io_ts_after_write (hc, sp, finished, !!n_written); + return HTTP_SM_STOP; +} + +static http_sm_result_t +http1_req_state_tunnel_tx (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp) +{ + u32 max_deq, max_enq, max_read, n_segs = 2; + svm_fifo_seg_t segs[n_segs]; + int n_written = 0; + + HTTP_DBG (1, "tunnel received data from target"); + + max_deq = http_io_as_max_read (req); + if (PREDICT_FALSE (max_deq == 0)) + { + HTTP_DBG (1, "max_deq == 0"); + goto check_fifo; + } + max_enq = http_io_ts_max_write (hc, sp); + if (max_enq == 0) + { + HTTP_DBG (1, "ts tx fifo full"); + goto check_fifo; + } + max_read = clib_min (max_enq, max_deq); + http_io_as_read_segs (req, segs, &n_segs, max_read); + n_written = http_io_ts_write_segs (hc, segs, n_segs, sp); + http_io_as_drain (req, n_written); + +check_fifo: + http_io_ts_after_write (hc, sp, 0, !!n_written); + + return HTTP_SM_STOP; +} + +static http_sm_result_t +http1_req_state_udp_tunnel_tx (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp) +{ + u32 to_deq, capsule_size, dgram_size; + u8 written = 0; + session_dgram_hdr_t hdr; + u8 *buf; + u8 *payload; + + HTTP_DBG (1, "udp tunnel received data from target"); + + buf = http_get_tx_buf (hc); + to_deq = http_io_as_max_read (req); + + while (to_deq > 0) + { + /* read datagram header */ + http_io_as_read (req, (u8 *) &hdr, sizeof (hdr), 1); + ASSERT (hdr.data_length <= HTTP_UDP_PAYLOAD_MAX_LEN); + dgram_size = hdr.data_length + SESSION_CONN_HDR_LEN; + ASSERT (to_deq >= dgram_size); + + if (http_io_ts_max_write (hc, sp) < + (hdr.data_length + HTTP_UDP_PROXY_DATAGRAM_CAPSULE_OVERHEAD)) + { + HTTP_DBG (1, "ts tx fifo full"); + goto done; + } + + /* create capsule header */ + payload = http_encap_udp_payload_datagram (buf, hdr.data_length); + capsule_size = (payload - buf) + hdr.data_length; + /* read payload */ + http_io_as_read (req, payload, hdr.data_length, 1); + http_io_as_drain (req, dgram_size); + /* send capsule */ + http_io_ts_write (hc, buf, capsule_size, sp); + + written = 1; + to_deq -= dgram_size; + } + +done: + http_io_ts_after_write (hc, sp, 0, written); + + return HTTP_SM_STOP; +} + +/*************************/ +/* request state machine */ +/*************************/ + +static http_sm_handler tx_state_funcs[HTTP_REQ_N_STATES] = { + 0, /* idle */ + http1_req_state_wait_app_method, + 0, /* wait transport reply */ + 0, /* transport io more data */ + 0, /* wait transport method */ + http1_req_state_wait_app_reply, + http1_req_state_app_io_more_data, + http1_req_state_tunnel_tx, + http1_req_state_udp_tunnel_tx, +}; + +static http_sm_handler rx_state_funcs[HTTP_REQ_N_STATES] = { + 0, /* idle */ + 0, /* wait app method */ + http1_req_state_wait_transport_reply, + http1_req_state_transport_io_more_data, + http1_req_state_wait_transport_method, + 0, /* wait app reply */ + 0, /* app io more data */ + http1_req_state_tunnel_rx, + http1_req_state_udp_tunnel_rx, +}; + +static_always_inline int +http1_req_state_is_tx_valid (http_req_t *req) +{ + return tx_state_funcs[req->state] ? 1 : 0; +} + +static_always_inline int +http1_req_state_is_rx_valid (http_req_t *req) +{ + return rx_state_funcs[req->state] ? 1 : 0; +} + +static_always_inline void +http1_req_run_state_machine (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp, u8 is_tx) +{ + http_sm_result_t res; + + do + { + if (is_tx) + res = tx_state_funcs[req->state](hc, req, sp); + else + res = rx_state_funcs[req->state](hc, req, 0); + if (res == HTTP_SM_ERROR) + { + HTTP_DBG (1, "error in state machine %d", res); + return; + } + } + while (res == HTTP_SM_CONTINUE); + + /* Reset the session expiration timer */ + http_conn_timer_update (hc); +} + +/*****************/ +/* http core VFT */ +/*****************/ + +static void +http1_app_tx_callback (http_conn_t *hc, transport_send_params_t *sp) +{ + http_req_t *req; + + req = http_get_req_if_valid (hc, 0); + if (!req) + { + http_alloc_req (hc); + req = http_get_req (hc, 0); + req->app_session_handle = hc->h_pa_session_handle; + http_req_state_change (req, HTTP_REQ_STATE_WAIT_APP_METHOD); + } + + if (!http1_req_state_is_tx_valid (req)) + { + /* Sometimes the server apps can send the response earlier + * than expected (e.g when rejecting a bad request)*/ + if (req->state == HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA && + (hc->flags & HTTP_CONN_F_IS_SERVER)) + { + http_io_ts_drain_all (hc); + http_req_state_change (req, HTTP_REQ_STATE_WAIT_APP_REPLY); + } + else + { + clib_warning ("hc [%u]%x invalid tx state: http req state " + "'%U', session state '%U'", + hc->c_thread_index, hc->h_hc_index, + format_http_req_state, req->state, + format_http_conn_state, hc); + http_io_as_drain_all (req); + return; + } + } + + HTTP_DBG (1, "run state machine"); + http1_req_run_state_machine (hc, req, sp, 1); +} + +static void +http1_app_rx_evt_callback (http_conn_t *hc) +{ + http_req_t *req; + + req = http_get_req (hc, 0); + + if (req->state == HTTP_REQ_STATE_TUNNEL) + http1_req_state_tunnel_rx (hc, req, 0); +} + +static void +http1_app_close_callback (http_conn_t *hc) +{ + http_req_t *req; + + req = http_get_req_if_valid (hc, 0); + /* Nothing more to send, confirm close */ + if (!req || !http_io_as_max_read (req)) + { + session_transport_closed_notify (&hc->connection); + http_disconnect_transport (hc); + } + else + { + /* Wait for all data to be written to ts */ + hc->state = HTTP_CONN_STATE_APP_CLOSED; + } +} + +static void +http1_app_reset_callback (http_conn_t *hc) +{ + session_transport_closed_notify (&hc->connection); + http_disconnect_transport (hc); +} + +static void +http1_transport_rx_callback (http_conn_t *hc) +{ + http_req_t *req; + + req = http_get_req_if_valid (hc, 0); + if (!req) + { + http_alloc_req (hc); + req = http_get_req (hc, 0); + req->app_session_handle = hc->h_pa_session_handle; + http_req_state_change (req, HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD); + } + + if (!http1_req_state_is_rx_valid (req)) + { + clib_warning ("hc [%u]%x invalid rx state: http req state " + "'%U', session state '%U'", + hc->c_thread_index, hc->h_hc_index, format_http_req_state, + req->state, format_http_conn_state, hc); + http_io_ts_drain_all (hc); + return; + } + + HTTP_DBG (1, "run state machine"); + http1_req_run_state_machine (hc, req, 0, 0); +} + +static void +http1_transport_close_callback (http_conn_t *hc) +{ + /* Nothing more to rx, propagate to app */ + if (!http_io_ts_max_read (hc)) + session_transport_closing_notify (&hc->connection); +} + +const static http_engine_vft_t http1_engine = { + .app_tx_callback = http1_app_tx_callback, + .app_rx_evt_callback = http1_app_rx_evt_callback, + .app_close_callback = http1_app_close_callback, + .app_reset_callback = http1_app_reset_callback, + .transport_rx_callback = http1_transport_rx_callback, + .transport_close_callback = http1_transport_close_callback, +}; + +static clib_error_t * +http1_init (vlib_main_t *vm) +{ + http_register_engine (&http1_engine, HTTP_VERSION_1); + return 0; +} + +VLIB_INIT_FUNCTION (http1_init) = { + .runs_after = VLIB_INITS ("http_transport_init"), +}; diff --git a/src/plugins/http/http2/frame.c b/src/plugins/http/http2/frame.c new file mode 100644 index 00000000000..577bb6c1e3b --- /dev/null +++ b/src/plugins/http/http2/frame.c @@ -0,0 +1,323 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. + */ + +#include <vppinfra/string.h> +#include <http/http2/frame.h> + +#define MAX_U24 0xFFFFFF + +static_always_inline u8 * +http2_decode_u24 (u8 *src, u32 *value) +{ + *value = 0; + *value = (u32) (src[0] << 16) | (u32) (src[1] << 8) | (u32) src[2]; + return src + 3; +} + +static_always_inline u8 * +http2_encode_u24 (u8 *dst, u32 value) +{ + ASSERT (value <= MAX_U24); + *dst++ = (value >> 16) & 0xFF; + *dst++ = (value >> 8) & 0xFF; + *dst++ = value & 0xFF; + return dst; +} + +/* + * RFC9113 section 4.1 + * + * HTTP Frame { + * Length (24), + * Type (8), + * Flags (8), + * Reserved (1), + * Stream Identifier (31), + * Frame Payload (..), + * } + */ + +__clib_export void +http2_frame_header_read (u8 *src, http2_frame_header_t *fh) +{ + u32 *stream_id; + src = http2_decode_u24 (src, &fh->length); + fh->type = *src++; + fh->flags = *src++; + stream_id = (u32 *) src; + fh->stream_id = clib_net_to_host_u32 (*stream_id) & 0x7FFFFFFF; +} + +static void +http2_frame_header_write (http2_frame_header_t *fh, u8 *dst) +{ + u32 stream_id; + + dst = http2_encode_u24 (dst, fh->length); + *dst++ = fh->type; + *dst++ = fh->flags; + stream_id = clib_host_to_net_u32 (fh->stream_id); + clib_memcpy_fast (dst, &stream_id, sizeof (stream_id)); +} + +__clib_export http2_error_t +http2_frame_read_settings (http2_conn_settings_t *settings, u8 *payload, + u32 payload_len) +{ + http2_settings_entry_t *entry; + u32 value; + + while (payload_len >= sizeof (*entry)) + { + entry = (http2_settings_entry_t *) payload; + switch (clib_net_to_host_u16 (entry->identifier)) + { +#define _(v, label, member, min, max, default_value, err_code) \ + case HTTP2_SETTINGS_##label: \ + value = clib_net_to_host_u32 (entry->value); \ + if (!(value >= min && value <= max)) \ + return err_code; \ + settings->member = value; \ + break; + foreach_http2_settings +#undef _ + /* ignore unknown or unsupported identifier */ + default : break; + } + payload_len -= sizeof (*entry); + payload += sizeof (*entry); + } + + if (payload_len != 0) + return HTTP2_ERROR_FRAME_SIZE_ERROR; + + return HTTP2_ERROR_NO_ERROR; +} + +__clib_export void +http2_frame_write_settings_ack (u8 **dst) +{ + http2_frame_header_t fh = { .flags = HTTP2_FRAME_FLAG_ACK, + .type = HTTP2_FRAME_TYPE_SETTINGS }; + u8 *p = http2_frame_header_alloc (dst); + http2_frame_header_write (&fh, p); +} + +__clib_export void +http2_frame_write_settings (http2_settings_entry_t *settings, u8 **dst) +{ + u8 *p; + u32 length; + http2_settings_entry_t *entry, e; + + ASSERT (settings); + ASSERT (vec_len (settings) > 0); + + length = vec_len (settings) * sizeof (*entry); + http2_frame_header_t fh = { .type = HTTP2_FRAME_TYPE_SETTINGS, + .length = length }; + p = http2_frame_header_alloc (dst); + http2_frame_header_write (&fh, p); + + vec_add2 (*dst, p, length); + vec_foreach (entry, settings) + { + e.identifier = clib_host_to_net_u16 (entry->identifier); + e.value = clib_host_to_net_u32 (entry->value); + clib_memcpy_fast (p, &e, sizeof (e)); + p += sizeof (e); + } +} + +#define WINDOW_UPDATE_LENGTH 4 + +__clib_export http2_error_t +http2_frame_read_window_update (u32 *increment, u8 *payload, u32 payload_len) +{ + u32 *value; + + if (payload_len != WINDOW_UPDATE_LENGTH) + return HTTP2_ERROR_FRAME_SIZE_ERROR; + + value = (u32 *) payload; + + if (value == 0) + return HTTP2_ERROR_PROTOCOL_ERROR; + + *increment = clib_net_to_host_u32 (*value) & 0x7FFFFFFF; + return HTTP2_ERROR_NO_ERROR; +} + +__clib_export void +http2_frame_write_window_update (u32 increment, u32 stream_id, u8 **dst) +{ + u8 *p; + u32 value; + + ASSERT (increment > 0 && increment <= 0x7FFFFFFF); + + http2_frame_header_t fh = { .type = HTTP2_FRAME_TYPE_WINDOW_UPDATE, + .length = WINDOW_UPDATE_LENGTH, + .stream_id = stream_id }; + p = http2_frame_header_alloc (dst); + http2_frame_header_write (&fh, p); + + vec_add2 (*dst, p, WINDOW_UPDATE_LENGTH); + value = clib_host_to_net_u32 (increment); + clib_memcpy_fast (p, &value, WINDOW_UPDATE_LENGTH); +} + +#define RST_STREAM_LENGTH 4 + +__clib_export http2_error_t +http2_frame_read_rst_stream (u32 *error_code, u8 *payload, u32 payload_len) +{ + u32 *value; + + if (payload_len != RST_STREAM_LENGTH) + return HTTP2_ERROR_FRAME_SIZE_ERROR; + + value = (u32 *) payload; + + *error_code = clib_net_to_host_u32 (*value); + return HTTP2_ERROR_NO_ERROR; +} + +__clib_export void +http2_frame_write_rst_stream (http2_error_t error_code, u32 stream_id, + u8 **dst) +{ + u8 *p; + u32 value; + + ASSERT (stream_id > 0 && stream_id <= 0x7FFFFFFF); + + http2_frame_header_t fh = { .type = HTTP2_FRAME_TYPE_RST_STREAM, + .length = RST_STREAM_LENGTH, + .stream_id = stream_id }; + p = http2_frame_header_alloc (dst); + http2_frame_header_write (&fh, p); + + vec_add2 (*dst, p, RST_STREAM_LENGTH); + value = clib_host_to_net_u32 ((u32) error_code); + clib_memcpy_fast (p, &value, RST_STREAM_LENGTH); +} + +#define GOAWAY_MIN_SIZE 8 + +__clib_export http2_error_t +http2_frame_read_goaway (u32 *error_code, u32 *last_stream_id, u8 *payload, + u32 payload_len) +{ + u32 *value; + + if (payload_len < GOAWAY_MIN_SIZE) + return HTTP2_ERROR_FRAME_SIZE_ERROR; + + value = (u32 *) payload; + *last_stream_id = clib_net_to_host_u32 (*value) & 0x7FFFFFFF; + payload += 4; + + value = (u32 *) payload; + *error_code = clib_net_to_host_u32 (*value); + + /* TODO: Additional Debug Data */ + return HTTP2_ERROR_NO_ERROR; +} + +__clib_export void +http2_frame_write_goaway (http2_error_t error_code, u32 last_stream_id, + u8 **dst) +{ + u8 *p; + u32 value; + + ASSERT (last_stream_id > 0 && last_stream_id <= 0x7FFFFFFF); + + http2_frame_header_t fh = { .type = HTTP2_FRAME_TYPE_GOAWAY, + .length = GOAWAY_MIN_SIZE }; + p = http2_frame_header_alloc (dst); + http2_frame_header_write (&fh, p); + + vec_add2 (*dst, p, GOAWAY_MIN_SIZE); + value = clib_host_to_net_u32 (last_stream_id); + clib_memcpy_fast (p, &value, 4); + p += 4; + value = clib_host_to_net_u32 ((u32) error_code); + clib_memcpy_fast (p, &value, 4); + /* TODO: Additional Debug Data */ +} + +#define PRIORITY_DATA_LEN 5 + +__clib_export http2_error_t +http2_frame_read_headers (u8 **headers, u32 *headers_len, u8 *payload, + u32 payload_len, u8 flags) +{ + *headers_len = payload_len; + + if (flags & HTTP2_FRAME_FLAG_PADED) + { + u8 pad_len = *payload++; + if ((u32) pad_len >= payload_len) + return HTTP2_ERROR_PROTOCOL_ERROR; + *headers_len -= pad_len; + } + + if (flags & HTTP2_FRAME_FLAG_PRIORITY) + { + if (*headers_len <= PRIORITY_DATA_LEN) + return HTTP2_ERROR_FRAME_SIZE_ERROR; + /* just skip, priority scheme defined in RFC7540 is deprecated */ + *headers_len -= PRIORITY_DATA_LEN; + payload += PRIORITY_DATA_LEN; + } + + *headers = payload; + + return HTTP2_ERROR_NO_ERROR; +} + +__clib_export void +http2_frame_write_headers_header (u32 headers_len, u32 stream_id, u8 flags, + u8 *dst) +{ + ASSERT (stream_id > 0 && stream_id <= 0x7FFFFFFF); + + http2_frame_header_t fh = { .type = HTTP2_FRAME_TYPE_HEADERS, + .length = headers_len, + .flags = flags, + .stream_id = stream_id }; + http2_frame_header_write (&fh, dst); +} + +__clib_export http2_error_t +http2_frame_read_data (u8 **data, u32 *data_len, u8 *payload, u32 payload_len, + u8 flags) +{ + *data_len = payload_len; + + if (flags & HTTP2_FRAME_FLAG_PADED) + { + u8 pad_len = *payload++; + if ((u32) pad_len >= payload_len) + return HTTP2_ERROR_PROTOCOL_ERROR; + *data_len -= pad_len; + } + + *data = payload; + return HTTP2_ERROR_NO_ERROR; +} + +__clib_export void +http2_frame_write_data_header (u32 data_len, u32 stream_id, u8 flags, u8 *dst) +{ + ASSERT (stream_id > 0 && stream_id <= 0x7FFFFFFF); + + http2_frame_header_t fh = { .type = HTTP2_FRAME_TYPE_DATA, + .length = data_len, + .flags = flags, + .stream_id = stream_id }; + http2_frame_header_write (&fh, dst); +} diff --git a/src/plugins/http/http2/frame.h b/src/plugins/http/http2/frame.h new file mode 100644 index 00000000000..bfe4e122f0d --- /dev/null +++ b/src/plugins/http/http2/frame.h @@ -0,0 +1,235 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. + */ + +#ifndef SRC_PLUGINS_HTTP_HTTP2_FRAME_H_ +#define SRC_PLUGINS_HTTP_HTTP2_FRAME_H_ + +#include <vppinfra/error.h> +#include <vppinfra/types.h> +#include <http/http2/http2.h> + +#define HTTP2_FRAME_HEADER_SIZE 9 + +#define foreach_http2_frame_type \ + _ (0x00, DATA, "DATA") \ + _ (0x01, HEADERS, "HEADERS") \ + _ (0x02, PRIORITY, "PRIORITY") \ + _ (0x03, RST_STREAM, "RST_STREAM") \ + _ (0x04, SETTINGS, "SETTINGS") \ + _ (0x05, PUSH_PROMISE, "PUSH_PROMISE") \ + _ (0x06, PING, "PING") \ + _ (0x07, GOAWAY, "GOAWAY") \ + _ (0x08, WINDOW_UPDATE, "WINDOW_UPDATE") \ + _ (0x09, CONTINUATION, "CONTINUATION") + +typedef enum +{ +#define _(v, n, s) HTTP2_FRAME_TYPE_##n = v, + foreach_http2_frame_type +#undef _ +} __clib_packed http2_frame_type_t; + +STATIC_ASSERT_SIZEOF (http2_frame_type_t, 1); + +#define foreach_http2_frame_flag \ + _ (0, NONE) \ + _ (1, END_STREAM) \ + _ (1, ACK) \ + _ (1 << 2, END_HEADERS) \ + _ (1 << 3, PADED) \ + _ (1 << 5, PRIORITY) + +typedef enum +{ +#define _(v, n) HTTP2_FRAME_FLAG_##n = v, + foreach_http2_frame_flag +#undef _ +} __clib_packed http2_frame_flag_t; + +STATIC_ASSERT_SIZEOF (http2_frame_flag_t, 1); + +typedef struct +{ + u32 length; + http2_frame_type_t type; + u8 flags; + u32 stream_id; +} http2_frame_header_t; + +typedef struct +{ + u16 identifier; + u32 value; +} __clib_packed http2_settings_entry_t; + +/** + * Parse frame header + * + * @param src Pointer to the beginning of the frame + * @param fh Parsed frame header + */ +void http2_frame_header_read (u8 *src, http2_frame_header_t *fh); + +/** + * Add 9 bytes (frame header size) to the end of given vector + * + * @param dst Pointer to vector + * + * @return Pointer to the frame header beginning + */ +static_always_inline u8 * +http2_frame_header_alloc (u8 **dst) +{ + u8 *p; + + vec_add2 (*dst, p, HTTP2_FRAME_HEADER_SIZE); + return p; +} + +/** + * Parse SETTINGS frame payload + * + * @param settings Vector of HTTP/2 settings + * @param payload Payload to parse + * @param payload_len Payload length + * + * @return @c HTTP2_ERROR_NO_ERROR on success, error otherwise + */ +http2_error_t http2_frame_read_settings (http2_conn_settings_t *settings, + u8 *payload, u32 payload_len); + +/** + * Write SETTINGS ACK frame to the end of given vector + * + * @param dst Vector where SETTINGS ACK frame will be written + */ +void http2_frame_write_settings_ack (u8 **dst); + +/** + * Write SETTINGS frame to the end of given vector + * + * @param settings Vector of HTTP/2 settings + * @param dst Vector where SETTINGS frame will be written + */ +void http2_frame_write_settings (http2_settings_entry_t *settings, u8 **dst); + +/** + * Parse WINDOW_UPDATE frame payload + * + * @param increment Parsed window increment value + * @param payload Payload to parse + * @param payload_len Payload length + * + * @return @c HTTP2_ERROR_NO_ERROR on success, error otherwise + */ +http2_error_t http2_frame_read_window_update (u32 *increment, u8 *payload, + u32 payload_len); + +/** + * Write WINDOW_UPDATE frame to the end of given vector + * + * @param increment Window increment value + * @param stream_id Stream ID + * @param dst Vector where WINDOW_UPDATE frame will be written + */ +void http2_frame_write_window_update (u32 increment, u32 stream_id, u8 **dst); + +/** + * Parse RST_STREAM frame payload + * + * @param error_code Parsed error code + * @param payload Payload to parse + * @param payload_len Payload length + * + * @return @c HTTP2_ERROR_NO_ERROR on success, error otherwise + */ +http2_error_t http2_frame_read_rst_stream (u32 *error_code, u8 *payload, + u32 payload_len); + +/** + * Write RST_STREAM frame to the end of given vector + * + * @param error_code Error code + * @param stream_id Stream ID, except 0 + * @param dst Vector where RST_STREAM frame will be written + */ +void http2_frame_write_rst_stream (http2_error_t error_code, u32 stream_id, + u8 **dst); + +/** + * Parse GOAWAY frame payload + * + * @param last_stream_id Parsed last stream ID + * @param error_code Parsed error code + * @param payload Payload to parse + * @param payload_len Payload length + * + * @return @c HTTP2_ERROR_NO_ERROR on success, error otherwise + */ +http2_error_t http2_frame_read_goaway (u32 *last_stream_id, u32 *error_code, + u8 *payload, u32 payload_len); + +/** + * Write GOAWAY frame to the end of given vector + * @param error_code Error code + * @param last_stream_id Last stream ID + * @param dst Vector where GOAWAY frame will be written + */ +void http2_frame_write_goaway (http2_error_t error_code, u32 last_stream_id, + u8 **dst); + +/** + * Parse HEADERS frame payload + * + * @param headers Pointer to header block fragment + * @param headers_len Header block fragment length + * @param payload Payload to parse + * @param payload_len Payload length + * @param flags Flag field of frame header + * + * @return @c HTTP2_ERROR_NO_ERROR on success, error otherwise + */ +http2_error_t http2_frame_read_headers (u8 **headers, u32 *headers_len, + u8 *payload, u32 payload_len, + u8 flags); + +/** + * Write HEADERS frame header + * + * @param headers_len Header block fragment length + * @param stream_id Stream ID, except 0 + * @param flags Frame header flags + * @param dst Pointer where frame header will be written + * + * @note Use @c http2_frame_header_alloc before + */ +void http2_frame_write_headers_header (u32 headers_len, u32 stream_id, + u8 flags, u8 *dst); + +/** + * Parse DATA frame payload + * + * @param headers Pointer to data + * @param headers_len Data length + * @param payload Payload to parse + * @param payload_len Payload length + * @param flags Flag field of frame header + * + * @return @c HTTP2_ERROR_NO_ERROR on success, error otherwise + */ +http2_error_t http2_frame_read_data (u8 **data, u32 *data_len, u8 *payload, + u32 payload_len, u8 flags); + +/** + * Write DATA frame header + * + * @param data_len Data length + * @param stream_id Stream ID, except 0 + * @param flags Frame header flags + * @param dst Pointer where frame header will be written + */ +void http2_frame_write_data_header (u32 data_len, u32 stream_id, u8 flags, + u8 *dst); + +#endif /* SRC_PLUGINS_HTTP_HTTP2_FRAME_H_ */ diff --git a/src/plugins/http/http2/hpack.c b/src/plugins/http/http2/hpack.c new file mode 100644 index 00000000000..6dcf5f6c19b --- /dev/null +++ b/src/plugins/http/http2/hpack.c @@ -0,0 +1,1101 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. + */ + +#include <vppinfra/error.h> +#include <vppinfra/ring.h> +#include <http/http.h> +#include <http/http2/hpack.h> +#include <http/http2/huffman_table.h> +#include <http/http_status_codes.h> + +#define HPACK_STATIC_TABLE_SIZE 61 + +typedef struct +{ + char *name; + uword name_len; + char *value; + uword value_len; +} hpack_static_table_entry_t; + +#define name_val_token_lit(name, value) \ + (name), sizeof (name) - 1, (value), sizeof (value) - 1 + +static hpack_static_table_entry_t + hpack_static_table[HPACK_STATIC_TABLE_SIZE] = { + { name_val_token_lit (":authority", "") }, + { name_val_token_lit (":method", "GET") }, + { name_val_token_lit (":method", "POST") }, + { name_val_token_lit (":path", "/") }, + { name_val_token_lit (":path", "/index.html") }, + { name_val_token_lit (":scheme", "http") }, + { name_val_token_lit (":scheme", "https") }, + { name_val_token_lit (":status", "200") }, + { name_val_token_lit (":status", "204") }, + { name_val_token_lit (":status", "206") }, + { name_val_token_lit (":status", "304") }, + { name_val_token_lit (":status", "400") }, + { name_val_token_lit (":status", "404") }, + { name_val_token_lit (":status", "500") }, + { name_val_token_lit ("accept-charset", "") }, + { name_val_token_lit ("accept-encoding", "gzip, deflate") }, + { name_val_token_lit ("accept-language", "") }, + { name_val_token_lit ("accept-ranges", "") }, + { name_val_token_lit ("accept", "") }, + { name_val_token_lit ("access-control-allow-origin", "") }, + { name_val_token_lit ("age", "") }, + { name_val_token_lit ("allow", "") }, + { name_val_token_lit ("authorization", "") }, + { name_val_token_lit ("cache-control", "") }, + { name_val_token_lit ("content-disposition", "") }, + { name_val_token_lit ("content-encoding", "") }, + { name_val_token_lit ("content-language", "") }, + { name_val_token_lit ("content-length", "") }, + { name_val_token_lit ("content-location", "") }, + { name_val_token_lit ("content-range", "") }, + { name_val_token_lit ("content-type", "") }, + { name_val_token_lit ("cookie", "") }, + { name_val_token_lit ("date", "") }, + { name_val_token_lit ("etag", "") }, + { name_val_token_lit ("etag", "") }, + { name_val_token_lit ("expires", "") }, + { name_val_token_lit ("from", "") }, + { name_val_token_lit ("host", "") }, + { name_val_token_lit ("if-match", "") }, + { name_val_token_lit ("if-modified-since", "") }, + { name_val_token_lit ("if-none-match", "") }, + { name_val_token_lit ("if-range", "") }, + { name_val_token_lit ("if-unmodified-since", "") }, + { name_val_token_lit ("last-modified", "") }, + { name_val_token_lit ("link", "") }, + { name_val_token_lit ("location", "") }, + { name_val_token_lit ("max-forwards", "") }, + { name_val_token_lit ("proxy-authenticate", "") }, + { name_val_token_lit ("proxy-authorization", "") }, + { name_val_token_lit ("range", "") }, + { name_val_token_lit ("referer", "") }, + { name_val_token_lit ("refresh", "") }, + { name_val_token_lit ("retry-after", "") }, + { name_val_token_lit ("server", "") }, + { name_val_token_lit ("set-cookie", "") }, + { name_val_token_lit ("strict-transport-security", "") }, + { name_val_token_lit ("transfer-encoding", "") }, + { name_val_token_lit ("user-agent", "") }, + { name_val_token_lit ("vary", "") }, + { name_val_token_lit ("via", "") }, + { name_val_token_lit ("www-authenticate", "") }, + }; + +typedef struct +{ + char *base; + uword len; + u8 static_table_index; +} hpack_token_t; + +static hpack_token_t hpack_headers[] = { +#define _(sym, str_canonical, str_lower, hpack_index) \ + { http_token_lit (str_lower), hpack_index }, + foreach_http_header_name +#undef _ +}; + +__clib_export uword +hpack_decode_int (u8 **src, u8 *end, u8 prefix_len) +{ + uword value, new_value; + u8 *p, shift = 0, byte; + u16 prefix_max; + + ASSERT (*src < end); + ASSERT (prefix_len >= 1 && prefix_len <= 8); + + p = *src; + prefix_max = (1 << prefix_len) - 1; + value = *p & (u8) prefix_max; + p++; + /* if integer value is less than 2^prefix_len-1 it's encoded within prefix */ + if (value != prefix_max) + { + *src = p; + return value; + } + + while (p != end) + { + byte = *p; + p++; + new_value = value + ((uword) (byte & 0x7F) << shift); + shift += 7; + /* check for overflow */ + if (new_value < value) + return HPACK_INVALID_INT; + value = new_value; + /* MSB of the last byte is zero */ + if ((byte & 0x80) == 0) + { + *src = p; + return value; + } + } + + return HPACK_INVALID_INT; +} + +http2_error_t +hpack_decode_huffman (u8 **src, u8 *end, u8 **buf, uword *buf_len) +{ + u64 accumulator = 0; + u8 accumulator_len = 0; + u8 *p; + hpack_huffman_code_t *code; + + p = *src; + while (1) + { + /* out of space? */ + if (*buf_len == 0) + return HTTP2_ERROR_INTERNAL_ERROR; + /* refill */ + while (p < end && accumulator_len <= 56) + { + accumulator <<= 8; + accumulator_len += 8; + accumulator |= (u64) *p++; + } + /* first try short codes (5 - 8 bits) */ + code = + &huff_code_table_fast[(u8) (accumulator >> (accumulator_len - 8))]; + /* zero code length mean no luck */ + if (PREDICT_TRUE (code->code_len)) + { + **buf = code->symbol; + (*buf)++; + (*buf_len)--; + accumulator_len -= code->code_len; + } + else + { + /* slow path / long codes (10 - 30 bits) */ + u32 tmp; + /* group boundaries are aligned to 32 bits */ + if (accumulator_len < 32) + tmp = accumulator << (32 - accumulator_len); + else + tmp = accumulator >> (accumulator_len - 32); + /* figure out which interval code falls into, this is possible + * because HPACK use canonical Huffman codes + * see Schwartz, E. and B. Kallick, “Generating a canonical prefix + * encoding” + */ + hpack_huffman_group_t *hg = hpack_huffman_get_group (tmp); + /* trim code to correct length */ + u32 code = (accumulator >> (accumulator_len - hg->code_len)) & + ((1 << hg->code_len) - 1); + /* find symbol in the list */ + **buf = hg->symbols[code - hg->first_code]; + (*buf)++; + (*buf_len)--; + accumulator_len -= hg->code_len; + } + /* all done */ + if (p == end && accumulator_len < 8) + { + /* there might be one more symbol encoded with short code */ + if (accumulator_len >= 5) + { + /* first check EOF case */ + if (((1 << accumulator_len) - 1) == + (accumulator & ((1 << accumulator_len) - 1))) + break; + + /* out of space? */ + if (*buf_len == 0) + return HTTP2_ERROR_INTERNAL_ERROR; + + /* if bogus EOF check bellow will fail */ + code = &huff_code_table_fast[(u8) (accumulator + << (8 - accumulator_len))]; + **buf = code->symbol; + (*buf)++; + (*buf_len)--; + accumulator_len -= code->code_len; + /* end at byte boundary? */ + if (accumulator_len == 0) + break; + } + /* we must end with EOF here */ + if (((1 << accumulator_len) - 1) != + (accumulator & ((1 << accumulator_len) - 1))) + return HTTP2_ERROR_COMPRESSION_ERROR; + break; + } + } + return HTTP2_ERROR_NO_ERROR; +} + +__clib_export http2_error_t +hpack_decode_string (u8 **src, u8 *end, u8 **buf, uword *buf_len) +{ + u8 *p, is_huffman; + uword len; + + ASSERT (*src < end); + + p = *src; + /* H flag in first bit */ + is_huffman = *p & 0x80; + + /* length is integer with 7 bit prefix */ + len = hpack_decode_int (&p, end, 7); + if (PREDICT_FALSE (len == HPACK_INVALID_INT)) + return HTTP2_ERROR_COMPRESSION_ERROR; + + /* do we have everything? */ + if (len > (end - p)) + return HTTP2_ERROR_COMPRESSION_ERROR; + + if (is_huffman) + { + *src = (p + len); + return hpack_decode_huffman (&p, p + len, buf, buf_len); + } + else + { + /* enough space? */ + if (len > *buf_len) + return HTTP2_ERROR_INTERNAL_ERROR; + + clib_memcpy (*buf, p, len); + *buf_len -= len; + *buf += len; + *src = (p + len); + return HTTP2_ERROR_NO_ERROR; + } +} + +__clib_export u8 * +hpack_encode_int (u8 *dst, uword value, u8 prefix_len) +{ + u16 prefix_max; + + ASSERT (prefix_len >= 1 && prefix_len <= 8); + + prefix_max = (1 << prefix_len) - 1; + + /* if integer value is less than 2^prefix_len-1 it's encoded within prefix */ + if (value < prefix_max) + { + *dst++ |= (u8) value; + return dst; + } + + /* otherwise all bits of the prefix are set to 1 */ + *dst++ |= (u8) prefix_max; + /* and the value is decreased by 2^prefix_len-1 */ + value -= prefix_max; + /* MSB of each byte is used as continuation flag */ + for (; value >= 0x80; value >>= 7) + *dst++ = 0x80 | (value & 0x7F); + /* except for the last byte */ + *dst++ = (u8) value; + + return dst; +} + +uword +hpack_huffman_encoded_len (const u8 *value, uword value_len) +{ + uword len = 0; + u8 *end; + hpack_huffman_symbol_t *sym; + + end = (u8 *) value + value_len; + while (value != end) + { + sym = &huff_sym_table[*value++]; + len += sym->code_len; + } + /* round up to byte boundary */ + return (len + 7) / 8; +} + +u8 * +hpack_encode_huffman (u8 *dst, const u8 *value, uword value_len) +{ + u8 *end; + hpack_huffman_symbol_t *sym; + u8 accumulator_len = 40; /* leftover (1 byte) + max code_len (4 bytes) */ + u64 accumulator = 0; /* to fit leftover and current code */ + + end = (u8 *) value + value_len; + + while (value != end) + { + sym = &huff_sym_table[*value++]; + /* add current code to leftover of previous one */ + accumulator |= (u64) sym->code << (accumulator_len - sym->code_len); + accumulator_len -= sym->code_len; + /* write only fully occupied bytes (max 4) */ + switch (accumulator_len) + { + case 1 ... 8: +#define WRITE_BYTE() \ + *dst = (u8) (accumulator >> 32); \ + accumulator_len += 8; \ + accumulator <<= 8; \ + dst++; + WRITE_BYTE (); + case 9 ... 16: + WRITE_BYTE (); + case 17 ... 24: + WRITE_BYTE (); + case 25 ... 32: + WRITE_BYTE (); + default: + break; + } + } + + /* padding (0-7 bits)*/ + ASSERT (accumulator_len > 32 && accumulator_len <= 40); + if (accumulator_len != 40) + { + accumulator |= (u64) 0x7F << (accumulator_len - 7); + *dst = (u8) (accumulator >> 32); + dst++; + } + return dst; +} + +__clib_export u8 * +hpack_encode_string (u8 *dst, const u8 *value, uword value_len) +{ + uword huff_len; + + huff_len = hpack_huffman_encoded_len (value, value_len); + /* raw bytes might take fewer bytes */ + if (huff_len >= value_len) + { + *dst = 0; /* clear H flag */ + dst = hpack_encode_int (dst, value_len, 7); + clib_memcpy (dst, value, value_len); + return dst + value_len; + } + + *dst = 0x80; /* set H flag */ + dst = hpack_encode_int (dst, huff_len, 7); + dst = hpack_encode_huffman (dst, value, value_len); + + return dst; +} + +__clib_export void +hpack_dynamic_table_init (hpack_dynamic_table_t *table, u32 max_size) +{ + table->max_size = max_size; + table->size = max_size; + table->used = 0; + clib_ring_new (table->entries, + max_size / HPACK_DYNAMIC_TABLE_ENTRY_OVERHEAD); +} + +__clib_export void +hpack_dynamic_table_free (hpack_dynamic_table_t *table) +{ + hpack_dynamic_table_entry_t *e; + + while ((e = clib_ring_deq (table->entries)) != 0) + vec_free (e->buf); + + clib_ring_free (table->entries); +} + +#define hpack_dynamic_table_entry_value_base(e) \ + ((char *) ((e)->buf + (e)->name_len)) +#define hpack_dynamic_table_entry_value_len(e) \ + (vec_len ((e)->buf) - (e)->name_len) + +always_inline hpack_dynamic_table_entry_t * +hpack_dynamic_table_get (hpack_dynamic_table_t *table, uword index) +{ + if (index > clib_ring_n_enq (table->entries)) + return 0; + + hpack_dynamic_table_entry_t *first = clib_ring_get_first (table->entries); + u32 first_index = first - table->entries; + u32 entry_index = + (first_index + (clib_ring_n_enq (table->entries) - 1 - (u32) index)) % + vec_len (table->entries); + return table->entries + entry_index; +} + +__clib_export u8 * +format_hpack_dynamic_table (u8 *s, va_list *args) +{ + hpack_dynamic_table_t *table = va_arg (*args, hpack_dynamic_table_t *); + u32 i; + hpack_dynamic_table_entry_t *e; + + s = format (s, "HPACK dynamic table:\n"); + for (i = 0; i < clib_ring_n_enq (table->entries); i++) + { + e = hpack_dynamic_table_get (table, i); + s = format (s, "\t[%u] %U: %U\n", i, format_http_bytes, e->buf, + e->name_len, format_http_bytes, + hpack_dynamic_table_entry_value_base (e), + hpack_dynamic_table_entry_value_len (e)); + } + return s; +} + +static inline void +hpack_dynamic_table_evict_one (hpack_dynamic_table_t *table) +{ + u32 entry_size; + hpack_dynamic_table_entry_t *e; + + e = clib_ring_deq (table->entries); + ASSERT (e); + HTTP_DBG (2, "%U: %U", format_http_bytes, e->buf, e->name_len, + format_http_bytes, hpack_dynamic_table_entry_value_base (e), + hpack_dynamic_table_entry_value_len (e)); + entry_size = vec_len (e->buf) + HPACK_DYNAMIC_TABLE_ENTRY_OVERHEAD; + table->used -= entry_size; + vec_reset_length (e->buf); +} + +static void +hpack_dynamic_table_add (hpack_dynamic_table_t *table, http_token_t *name, + http_token_t *value) +{ + u32 entry_size; + hpack_dynamic_table_entry_t *e; + + entry_size = name->len + value->len + HPACK_DYNAMIC_TABLE_ENTRY_OVERHEAD; + + /* make space or evict all */ + while (clib_ring_n_enq (table->entries) && + (table->used + entry_size > table->size)) + hpack_dynamic_table_evict_one (table); + + /* attempt to add entry larger than the maximum size is not error */ + if (entry_size > table->size) + return; + + e = clib_ring_enq (table->entries); + ASSERT (e); + vec_validate (e->buf, name->len + value->len - 1); + clib_memcpy (e->buf, name->base, name->len); + clib_memcpy (e->buf + name->len, value->base, value->len); + e->name_len = name->len; + table->used += entry_size; + + HTTP_DBG (2, "%U: %U", format_http_bytes, e->buf, e->name_len, + format_http_bytes, hpack_dynamic_table_entry_value_base (e), + hpack_dynamic_table_entry_value_len (e)); +} + +static http2_error_t +hpack_get_table_entry (uword index, http_token_t *name, http_token_t *value, + u8 value_is_indexed, hpack_dynamic_table_t *dt) +{ + if (index <= HPACK_STATIC_TABLE_SIZE) + { + hpack_static_table_entry_t *e = &hpack_static_table[index - 1]; + name->base = e->name; + name->len = e->name_len; + if (value_is_indexed) + { + if (PREDICT_FALSE (e->value_len == 0)) + { + HTTP_DBG (1, "static table entry [%llu] without value", index); + return HTTP2_ERROR_COMPRESSION_ERROR; + } + value->base = e->value; + value->len = e->value_len; + } + HTTP_DBG (2, "[%llu] %U: %U", index, format_http_bytes, e->name, + e->name_len, format_http_bytes, e->value, e->value_len); + return HTTP2_ERROR_NO_ERROR; + } + else + { + hpack_dynamic_table_entry_t *e = + hpack_dynamic_table_get (dt, index - HPACK_STATIC_TABLE_SIZE - 1); + if (PREDICT_FALSE (!e)) + { + HTTP_DBG (1, "index %llu not in dynamic table", index); + return HTTP2_ERROR_COMPRESSION_ERROR; + } + name->base = (char *) e->buf; + name->len = e->name_len; + value->base = hpack_dynamic_table_entry_value_base (e); + value->len = hpack_dynamic_table_entry_value_len (e); + HTTP_DBG (2, "[%llu] %U: %U", index, format_http_bytes, name->base, + name->len, format_http_bytes, value->base, value->len); + return HTTP2_ERROR_NO_ERROR; + } +} + +__clib_export http2_error_t +hpack_decode_header (u8 **src, u8 *end, u8 **buf, uword *buf_len, + u32 *name_len, u32 *value_len, hpack_dynamic_table_t *dt) +{ + u8 *p; + u8 value_is_indexed = 0, add_new_entry = 0; + uword old_len, new_max, index = 0; + http_token_t name, value; + http2_error_t rv; + + ASSERT (*src < end); + p = *src; + + /* dynamic table size update */ + while ((*p & 0xE0) == 0x20) + { + new_max = hpack_decode_int (&p, end, 5); + if (p == end || new_max > (uword) dt->max_size) + { + HTTP_DBG (1, "invalid dynamic table size update"); + return HTTP2_ERROR_COMPRESSION_ERROR; + } + while (clib_ring_n_enq (dt->entries) && new_max > dt->used) + hpack_dynamic_table_evict_one (dt); + dt->size = (u32) new_max; + } + + if (*p & 0x80) /* indexed header field */ + { + index = hpack_decode_int (&p, end, 7); + /* index value of 0 is not used */ + if (index == 0 || index == HPACK_INVALID_INT) + { + HTTP_DBG (1, "invalid index"); + return HTTP2_ERROR_COMPRESSION_ERROR; + } + value_is_indexed = 1; + } + else if (*p > 0x40) /* incremental indexing - indexed name */ + { + index = hpack_decode_int (&p, end, 6); + /* index value of 0 is not used */ + if (index == 0 || index == HPACK_INVALID_INT) + { + HTTP_DBG (1, "invalid index"); + return HTTP2_ERROR_COMPRESSION_ERROR; + } + add_new_entry = 1; + } + else if (*p == 0x40) /* incremental indexing - new name */ + { + add_new_entry = 1; + p++; + } + else /* without indexing / never indexed */ + { + if ((*p & 0x0F) == 0) /* new name */ + p++; + else /* indexed name */ + { + index = hpack_decode_int (&p, end, 4); + /* index value of 0 is not used */ + if (index == 0 || index == HPACK_INVALID_INT) + { + HTTP_DBG (1, "invalid index"); + return HTTP2_ERROR_COMPRESSION_ERROR; + } + } + } + + if (index) + { + rv = hpack_get_table_entry (index, &name, &value, value_is_indexed, dt); + if (rv != HTTP2_ERROR_NO_ERROR) + { + HTTP_DBG (1, "entry index %llu error", index); + return rv; + } + if (name.len > *buf_len) + { + HTTP_DBG (1, "not enough space"); + return HTTP2_ERROR_INTERNAL_ERROR; + } + clib_memcpy (*buf, name.base, name.len); + *buf_len -= name.len; + *buf += name.len; + *name_len = name.len; + if (value_is_indexed) + { + if (value.len > *buf_len) + { + HTTP_DBG (1, "not enough space"); + return HTTP2_ERROR_INTERNAL_ERROR; + } + clib_memcpy (*buf, value.base, value.len); + *buf_len -= value.len; + *buf += value.len; + *value_len = value.len; + } + } + else + { + old_len = *buf_len; + name.base = (char *) *buf; + rv = hpack_decode_string (&p, end, buf, buf_len); + if (rv != HTTP2_ERROR_NO_ERROR) + { + HTTP_DBG (1, "invalid header name"); + return rv; + } + *name_len = old_len - *buf_len; + name.len = *name_len; + } + + if (!value_is_indexed) + { + old_len = *buf_len; + value.base = (char *) *buf; + rv = hpack_decode_string (&p, end, buf, buf_len); + if (rv != HTTP2_ERROR_NO_ERROR) + { + HTTP_DBG (1, "invalid header value"); + return rv; + } + *value_len = old_len - *buf_len; + value.len = *value_len; + } + + if (add_new_entry) + hpack_dynamic_table_add (dt, &name, &value); + + *src = p; + return HTTP2_ERROR_NO_ERROR; +} + +static inline u8 +hpack_header_name_is_valid (u8 *name, u32 name_len) +{ + u32 i; + static uword tchar[4] = { + /* !#$%'*+-.0123456789 */ + 0x03ff6cba00000000, + /* ^_`abcdefghijklmnopqrstuvwxyz|~ */ + 0x57ffffffc0000000, + 0x0000000000000000, + 0x0000000000000000, + }; + for (i = 0; i < name_len; i++) + { + if (!clib_bitmap_get_no_check (tchar, name[i])) + return 0; + } + return 1; +} + +static inline u8 +hpack_header_value_is_valid (u8 *value, u32 value_len) +{ + u32 i; + /* VCHAR / SP / HTAB / %x80-FF */ + static uword tchar[4] = { + 0xffffffff00000200, + 0x7fffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + }; + + /* must not start or end with SP or HTAB */ + if ((value[0] == 0x20 || value[0] == 0x09 || value[value_len - 1] == 0x20 || + value[value_len - 1] == 0x09)) + return 0; + + for (i = 0; i < value_len; i++) + { + if (!clib_bitmap_get_no_check (tchar, value[i])) + return 0; + } + return 1; +} + +static inline http_req_method_t +hpack_parse_method (u8 *value, u32 value_len) +{ + switch (value_len) + { + case 3: + if (!memcmp (value, "GET", 3)) + return HTTP_REQ_GET; + break; + case 4: + if (!memcmp (value, "POST", 4)) + return HTTP_REQ_POST; + break; + case 7: + if (!memcmp (value, "CONNECT", 7)) + return HTTP_REQ_CONNECT; + break; + default: + break; + } + /* HPACK should return only connection errors, this one is stream error */ + return HTTP_REQ_UNKNOWN; +} + +static inline http_url_scheme_t +hpack_parse_scheme (u8 *value, u32 value_len) +{ + switch (value_len) + { + case 4: + if (!memcmp (value, "http", 4)) + return HTTP_URL_SCHEME_HTTP; + break; + case 5: + if (!memcmp (value, "https", 5)) + return HTTP_URL_SCHEME_HTTPS; + break; + default: + break; + } + /* HPACK should return only connection errors, this one is stream error */ + return HTTP_URL_SCHEME_UNKNOWN; +} + +static http2_error_t +hpack_parse_req_pseudo_header (u8 *name, u32 name_len, u8 *value, + u32 value_len, + hpack_request_control_data_t *control_data) +{ + HTTP_DBG (2, "%U: %U", format_http_bytes, name, name_len, format_http_bytes, + value, value_len); + switch (name_len) + { + case 5: + if (!memcmp (name + 1, "path", 4)) + { + if (control_data->parsed_bitmap & HPACK_PSEUDO_HEADER_PATH_PARSED) + return HTTP2_ERROR_PROTOCOL_ERROR; + control_data->parsed_bitmap |= HPACK_PSEUDO_HEADER_PATH_PARSED; + control_data->path = value; + control_data->path_len = value_len; + break; + } + return HTTP2_ERROR_PROTOCOL_ERROR; + case 7: + switch (name[1]) + { + case 'm': + if (!memcmp (name + 2, "ethod", 5)) + { + if (control_data->parsed_bitmap & + HPACK_PSEUDO_HEADER_METHOD_PARSED) + return HTTP2_ERROR_PROTOCOL_ERROR; + control_data->parsed_bitmap |= HPACK_PSEUDO_HEADER_METHOD_PARSED; + control_data->method = hpack_parse_method (value, value_len); + break; + } + return HTTP2_ERROR_PROTOCOL_ERROR; + case 's': + if (!memcmp (name + 2, "cheme", 5)) + { + if (control_data->parsed_bitmap & + HPACK_PSEUDO_HEADER_SCHEME_PARSED) + return HTTP2_ERROR_PROTOCOL_ERROR; + control_data->parsed_bitmap |= HPACK_PSEUDO_HEADER_SCHEME_PARSED; + control_data->scheme = hpack_parse_scheme (value, value_len); + break; + } + return HTTP2_ERROR_PROTOCOL_ERROR; + default: + return HTTP2_ERROR_PROTOCOL_ERROR; + } + break; + case 10: + if (!memcmp (name + 1, "authority", 9)) + { + if (control_data->parsed_bitmap & + HPACK_PSEUDO_HEADER_AUTHORITY_PARSED) + return HTTP2_ERROR_PROTOCOL_ERROR; + control_data->parsed_bitmap |= HPACK_PSEUDO_HEADER_AUTHORITY_PARSED; + control_data->authority = value; + control_data->authority_len = value_len; + break; + } + return HTTP2_ERROR_PROTOCOL_ERROR; + default: + return HTTP2_ERROR_PROTOCOL_ERROR; + } + + return HTTP2_ERROR_NO_ERROR; +} + +__clib_export http2_error_t +hpack_parse_request (u8 *src, u32 src_len, u8 *dst, u32 dst_len, + hpack_request_control_data_t *control_data, + http_field_line_t **headers, + hpack_dynamic_table_t *dynamic_table) +{ + u8 *p, *end, *b, *name, *value; + u8 regular_header_parsed = 0; + u32 name_len, value_len; + uword b_left; + http_field_line_t *header; + http2_error_t rv; + + p = src; + end = src + src_len; + b = dst; + b_left = dst_len; + control_data->parsed_bitmap = 0; + control_data->headers_len = 0; + + while (p != end) + { + name = b; + rv = hpack_decode_header (&p, end, &b, &b_left, &name_len, &value_len, + dynamic_table); + if (rv != HTTP2_ERROR_NO_ERROR) + { + HTTP_DBG (1, "hpack_decode_header: %U", format_http2_error, rv); + return rv; + } + value = name + name_len; + + /* pseudo header */ + if (name[0] == ':') + { + /* all pseudo-headers must be before regular headers */ + if (regular_header_parsed) + { + HTTP_DBG (1, "pseudo-headers after regular header"); + return HTTP2_ERROR_PROTOCOL_ERROR; + } + rv = hpack_parse_req_pseudo_header (name, name_len, value, value_len, + control_data); + if (rv != HTTP2_ERROR_NO_ERROR) + { + HTTP_DBG (1, "hpack_parse_req_pseudo_header: %U", + format_http2_error, rv); + return rv; + } + continue; + } + else + { + if (!hpack_header_name_is_valid (name, name_len)) + return HTTP2_ERROR_PROTOCOL_ERROR; + if (!regular_header_parsed) + { + regular_header_parsed = 1; + control_data->headers = name; + } + } + if (!hpack_header_value_is_valid (value, value_len)) + return HTTP2_ERROR_PROTOCOL_ERROR; + vec_add2 (*headers, header, 1); + HTTP_DBG (2, "%U: %U", format_http_bytes, name, name_len, + format_http_bytes, value, value_len); + header->name_offset = name - control_data->headers; + header->name_len = name_len; + header->value_offset = value - control_data->headers; + header->value_len = value_len; + control_data->headers_len += name_len; + control_data->headers_len += value_len; + } + + HTTP_DBG (2, "%U", format_hpack_dynamic_table, dynamic_table); + return HTTP2_ERROR_NO_ERROR; +} + +static inline u8 * +hpack_encode_header (u8 *dst, http_header_name_t name, const u8 *value, + u32 value_len) +{ + hpack_token_t *name_token; + u8 *a, *b; + u32 orig_len, actual_size; + + orig_len = vec_len (dst); + name_token = &hpack_headers[name]; + if (name_token->static_table_index) + { + /* static table index with 4 bit prefix is max 2 bytes */ + vec_add2 (dst, a, 2 + value_len + HPACK_ENCODED_INT_MAX_LEN); + /* Literal Header Field without Indexing — Indexed Name */ + *a = 0x00; /* zero first 4 bits */ + b = hpack_encode_int (a, name_token->static_table_index, 4); + } + else + { + /* one extra byte for 4 bit prefix */ + vec_add2 (dst, a, + name_token->len + value_len + HPACK_ENCODED_INT_MAX_LEN * 2 + + 1); + b = a; + /* Literal Header Field without Indexing — New Name */ + *b++ = 0x00; + b = hpack_encode_string (b, (const u8 *) name_token->base, + name_token->len); + } + b = hpack_encode_string (b, value, value_len); + + actual_size = b - a; + vec_set_len (dst, orig_len + actual_size); + return dst; +} + +static inline u8 * +hpack_encode_custom_header (u8 *dst, const u8 *name, u32 name_len, + const u8 *value, u32 value_len) +{ + u32 orig_len, actual_size; + u8 *a, *b; + + orig_len = vec_len (dst); + /* one extra byte for 4 bit prefix */ + vec_add2 (dst, a, name_len + value_len + HPACK_ENCODED_INT_MAX_LEN * 2 + 1); + b = a; + /* Literal Header Field without Indexing — New Name */ + *b++ = 0x00; + b = hpack_encode_string (b, name, name_len); + b = hpack_encode_string (b, value, value_len); + actual_size = b - a; + vec_set_len (dst, orig_len + actual_size); + return dst; +} + +static inline u8 * +hpack_encode_status_code (u8 *dst, http_status_code_t sc) +{ + u32 orig_len, actual_size; + u8 *a, *b; + +#define encode_common_sc(_index) \ + vec_add2 (dst, a, 1); \ + *a++ = 0x80 | _index; + + switch (sc) + { + case HTTP_STATUS_OK: + encode_common_sc (8); + break; + case HTTP_STATUS_NO_CONTENT: + encode_common_sc (9); + break; + case HTTP_STATUS_PARTIAL_CONTENT: + encode_common_sc (10); + break; + case HTTP_STATUS_NOT_MODIFIED: + encode_common_sc (11); + break; + case HTTP_STATUS_BAD_REQUEST: + encode_common_sc (12); + break; + case HTTP_STATUS_NOT_FOUND: + encode_common_sc (13); + break; + case HTTP_STATUS_INTERNAL_ERROR: + encode_common_sc (14); + break; + default: + orig_len = vec_len (dst); + vec_add2 (dst, a, 5); + b = a; + /* Literal Header Field without Indexing — Indexed Name */ + *b++ = 8; + b = hpack_encode_string (b, (const u8 *) http_status_code_str[sc], 3); + actual_size = b - a; + vec_set_len (dst, orig_len + actual_size); + break; + } + return dst; +} + +static inline u8 * +hpack_encode_content_len (u8 *dst, u64 content_len) +{ + u8 digit_buffer[20]; + u8 *d = digit_buffer + sizeof (digit_buffer); + u32 orig_len, actual_size; + u8 *a, *b; + + orig_len = vec_len (dst); + vec_add2 (dst, a, 3 + sizeof (digit_buffer)); + b = a; + + /* static table index 28 */ + *b++ = 0x0F; + *b++ = 0x0D; + do + { + *--d = '0' + content_len % 10; + content_len /= 10; + } + while (content_len); + + b = hpack_encode_string (b, d, digit_buffer + sizeof (digit_buffer) - d); + actual_size = b - a; + vec_set_len (dst, orig_len + actual_size); + return dst; +} + +__clib_export void +hpack_serialize_response (u8 *app_headers, u32 app_headers_len, + hpack_response_control_data_t *control_data, + u8 **dst) +{ + u8 *p, *end; + + p = *dst; + + /* status code must be first since it is pseudo-header */ + p = hpack_encode_status_code (p, control_data->sc); + + /* server name */ + p = hpack_encode_header (p, HTTP_HEADER_SERVER, control_data->server_name, + control_data->server_name_len); + + /* date */ + p = hpack_encode_header (p, HTTP_HEADER_DATE, control_data->date, + control_data->date_len); + + /* content length if any */ + if (control_data->content_len != HPACK_ENCODER_SKIP_CONTENT_LEN) + p = hpack_encode_content_len (p, control_data->content_len); + + if (!app_headers_len) + { + *dst = p; + return; + } + + end = app_headers + app_headers_len; + while (app_headers < end) + { + /* custom header name? */ + u32 *tmp = (u32 *) app_headers; + if (PREDICT_FALSE (*tmp & HTTP_CUSTOM_HEADER_NAME_BIT)) + { + http_custom_token_t *name, *value; + name = (http_custom_token_t *) app_headers; + u32 name_len = name->len & ~HTTP_CUSTOM_HEADER_NAME_BIT; + app_headers += sizeof (http_custom_token_t) + name_len; + value = (http_custom_token_t *) app_headers; + app_headers += sizeof (http_custom_token_t) + value->len; + p = hpack_encode_custom_header (p, name->token, name_len, + value->token, value->len); + } + else + { + http_app_header_t *header; + header = (http_app_header_t *) app_headers; + app_headers += sizeof (http_app_header_t) + header->value.len; + p = hpack_encode_header (p, header->name, header->value.token, + header->value.len); + } + } + + *dst = p; +} diff --git a/src/plugins/http/http2/hpack.h b/src/plugins/http/http2/hpack.h new file mode 100644 index 00000000000..2a2936b7611 --- /dev/null +++ b/src/plugins/http/http2/hpack.h @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. + */ + +#ifndef SRC_PLUGINS_HTTP_HPACK_H_ +#define SRC_PLUGINS_HTTP_HPACK_H_ + +#include <vppinfra/types.h> +#include <http/http2/http2.h> + +#define HPACK_INVALID_INT CLIB_UWORD_MAX +#if uword_bits == 64 +#define HPACK_ENCODED_INT_MAX_LEN 10 +#else +#define HPACK_ENCODED_INT_MAX_LEN 6 +#endif + +#define HPACK_DEFAULT_HEADER_TABLE_SIZE 4096 +#define HPACK_DYNAMIC_TABLE_ENTRY_OVERHEAD 32 +#define HPACK_ENCODER_SKIP_CONTENT_LEN ((u64) ~0) + +typedef struct +{ + u8 *buf; + uword name_len; +} hpack_dynamic_table_entry_t; + +typedef struct +{ + /* SETTINGS_HEADER_TABLE_SIZE */ + u32 max_size; + /* dynamic table size update */ + u32 size; + /* current usage (each entry = 32 + name len + value len) */ + u32 used; + /* ring buffer */ + hpack_dynamic_table_entry_t *entries; +} hpack_dynamic_table_t; + +enum +{ +#define _(bit, name, str) HPACK_PSEUDO_HEADER_##name##_PARSED = (1 << bit), + foreach_http2_pseudo_header +#undef _ +}; + +typedef struct +{ + http_req_method_t method; + http_url_scheme_t scheme; + u8 *authority; + u32 authority_len; + u8 *path; + u32 path_len; + u8 *headers; + u32 headers_len; + u16 parsed_bitmap; +} hpack_request_control_data_t; + +typedef struct +{ + http_status_code_t sc; + u64 content_len; + u8 *server_name; + u32 server_name_len; + u8 *date; + u32 date_len; +} hpack_response_control_data_t; + +/** + * Decode unsigned variable-length integer (RFC7541 section 5.1) + * + * @param src Pointer to source buffer which will be advanced + * @param end End of the source buffer + * @param prefix_len Number of bits of the prefix (between 1 and 8) + * + * @return Decoded integer or @c HPACK_INVALID_INT in case of error + */ +uword hpack_decode_int (u8 **src, u8 *end, u8 prefix_len); + +/** + * Encode given value as unsigned variable-length integer (RFC7541 section 5.1) + * + * @param dst Pointer to destination buffer, should have enough space + * @param value Integer value to encode (up to @c CLIB_WORD_MAX) + * @param prefix_len Number of bits of the prefix (between 1 and 8) + * + * @return Advanced pointer to the destination buffer + * + * @note Encoded integer will take maximum @c HPACK_ENCODED_INT_MAX_LEN bytes + */ +u8 *hpack_encode_int (u8 *dst, uword value, u8 prefix_len); + +/** + * Decode + * + * @param src Pointer to source buffer which will be advanced + * @param end End of the source buffer + * @param buf Pointer to the buffer where string is decoded which will be + * advanced by number of written bytes + * @param buf_len Length the buffer, will be decreased + * + * @return @c HTTP2_ERROR_NO_ERROR on success + * + * @note Caller is responsible to check if there is somthing left in source + * buffer first + */ +http2_error_t hpack_decode_huffman (u8 **src, u8 *end, u8 **buf, + uword *buf_len); + +/** + * Encode given string in Huffman codes. + * + * @param dst Pointer to destination buffer, should have enough space + * @param value String to encode + * @param value_len Length of the string + * + * @return Advanced pointer to the destination buffer + */ +u8 *hpack_encode_huffman (u8 *dst, const u8 *value, uword value_len); + +/** + * Number of bytes required to encode given string in Huffman codes + * + * @param value Pointer to buffer with string to encode + * @param value_len Length of the string + * + * @return number of bytes required to encode string in Huffman codes, round up + * to byte boundary + */ +uword hpack_huffman_encoded_len (const u8 *value, uword value_len); + +/** + * Initialize HPACK dynamic table + * + * @param table Dynamic table to initialize + * @param max_size Maximum table size (SETTINGS_HEADER_TABLE_SIZE) + */ +void hpack_dynamic_table_init (hpack_dynamic_table_t *table, u32 max_size); + +/** + * Free HPACK dynamic table + * + * @param table Dynamic table to free + */ +void hpack_dynamic_table_free (hpack_dynamic_table_t *table); + +u8 *format_hpack_dynamic_table (u8 *s, va_list *args); + +/** + * Request parser + * + * @param src Header block to parse + * @param src_len Length of header block + * @param dst Buffer where headers will be decoded + * @param dst_len Length of buffer for decoded headers + * @param control_data Preparsed pseudo-headers + * @param headers List of regular headers + * @param dynamic_table Decoder dynamic table + * + * @return @c HTTP2_ERROR_NO_ERROR on success, connection error otherwise + */ +http2_error_t hpack_parse_request (u8 *src, u32 src_len, u8 *dst, u32 dst_len, + hpack_request_control_data_t *control_data, + http_field_line_t **headers, + hpack_dynamic_table_t *dynamic_table); + +/** + * Serialize response + * + * @param app_headers App header list + * @param app_headers_len App header list length + * @param control_data Header values set by protocol layer + * @param dst Vector where serialized headers will be added + */ +void hpack_serialize_response (u8 *app_headers, u32 app_headers_len, + hpack_response_control_data_t *control_data, + u8 **dst); + +#endif /* SRC_PLUGINS_HTTP_HPACK_H_ */ diff --git a/src/plugins/http/http2/http2.h b/src/plugins/http/http2/http2.h new file mode 100644 index 00000000000..9fc95344771 --- /dev/null +++ b/src/plugins/http/http2/http2.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. + */ + +#ifndef SRC_PLUGINS_HTTP_HTTP2_H_ +#define SRC_PLUGINS_HTTP_HTTP2_H_ + +#include <vppinfra/format.h> +#include <vppinfra/types.h> + +/* RFC9113 section 7 */ +#define foreach_http2_error \ + _ (NO_ERROR, "NO_ERROR") \ + _ (PROTOCOL_ERROR, "PROTOCOL_ERROR") \ + _ (INTERNAL_ERROR, "INTERNAL_ERROR") \ + _ (FLOW_CONTROL_ERROR, "FLOW_CONTROL_ERROR") \ + _ (SETTINGS_TIMEOUT, "SETTINGS_TIMEOUT") \ + _ (STREAM_CLOSED, "STREAM_CLOSED") \ + _ (FRAME_SIZE_ERROR, "FRAME_SIZE_ERROR") \ + _ (REFUSED_STREAM, "REFUSED_STREAM") \ + _ (CANCEL, "CANCEL") \ + _ (COMPRESSION_ERROR, "COMPRESSION_ERROR") \ + _ (CONNECT_ERROR, "CONNECT_ERROR") \ + _ (ENHANCE_YOUR_CALM, "ENHANCE_YOUR_CALM") \ + _ (INADEQUATE_SECURITY, "INADEQUATE_SECURITY") \ + _ (HTTP_1_1_REQUIRED, "HTTP_1_1_REQUIRED") + +typedef enum http2_error_ +{ +#define _(s, str) HTTP2_ERROR_##s, + foreach_http2_error +#undef _ +} http2_error_t; + +static inline u8 * +format_http2_error (u8 *s, va_list *va) +{ + http2_error_t e = va_arg (*va, http2_error_t); + u8 *t = 0; + + switch (e) + { +#define _(s, str) \ + case HTTP2_ERROR_##s: \ + t = (u8 *) str; \ + break; + foreach_http2_error +#undef _ + default : return format (s, "BUG: unknown"); + } + return format (s, "%s", t); +} + +#define foreach_http2_pseudo_header \ + _ (0, METHOD, "method") \ + _ (1, SCHEME, "scheme") \ + _ (2, AUTHORITY, "authority") \ + _ (3, PATH, "path") \ + _ (4, STATUS, "status") + +/* value, label, member, min, max, default_value, err_code */ +#define foreach_http2_settings \ + _ (1, HEADER_TABLE_SIZE, header_table_size, 0, CLIB_U32_MAX, 4096, \ + HTTP2_ERROR_NO_ERROR) \ + _ (2, ENABLE_PUSH, enable_push, 0, 1, 1, HTTP2_ERROR_PROTOCOL_ERROR) \ + _ (3, MAX_CONCURRENT_STREAMS, max_concurrent_streams, 0, CLIB_U32_MAX, \ + CLIB_U32_MAX, HTTP2_ERROR_NO_ERROR) \ + _ (4, INITIAL_WINDOW_SIZE, initial_window_size, 0, 0x7FFFFFFF, 65535, \ + HTTP2_ERROR_FLOW_CONTROL_ERROR) \ + _ (5, MAX_FRAME_SIZE, max_frame_size, 16384, 16777215, 16384, \ + HTTP2_ERROR_PROTOCOL_ERROR) \ + _ (6, MAX_HEADER_LIST_SIZE, max_header_list_size, 0, CLIB_U32_MAX, \ + CLIB_U32_MAX, HTTP2_ERROR_NO_ERROR) + +typedef enum +{ +#define _(value, label, member, min, max, default_value, err_code) \ + HTTP2_SETTINGS_##label = value, + foreach_http2_settings +#undef _ +} http_settings_t; + +typedef struct +{ +#define _(value, label, member, min, max, default_value, err_code) u32 member; + foreach_http2_settings +#undef _ +} http2_conn_settings_t; + +static const http2_conn_settings_t http2_default_conn_settings = { +#define _(value, label, member, min, max, default_value, err_code) \ + default_value, + foreach_http2_settings +#undef _ +}; + +#endif /* SRC_PLUGINS_HTTP_HTTP2_H_ */ diff --git a/src/plugins/http/http2/huffman_table.h b/src/plugins/http/http2/huffman_table.h new file mode 100644 index 00000000000..66afffbc54a --- /dev/null +++ b/src/plugins/http/http2/huffman_table.h @@ -0,0 +1,319 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. + */ + +/* generated by mk_huffman_table.py */ + +#ifndef SRC_PLUGINS_HTTP_HUFFMAN_TABLE_H_ +#define SRC_PLUGINS_HTTP_HUFFMAN_TABLE_H_ + +#include <vppinfra/types.h> + +typedef struct +{ + u8 code_len; + u32 code; +} hpack_huffman_symbol_t; + +static hpack_huffman_symbol_t huff_sym_table[] = { + { 13, 0x1ff8 }, { 23, 0x7fffd8 }, { 28, 0xfffffe2 }, { 28, 0xfffffe3 }, + { 28, 0xfffffe4 }, { 28, 0xfffffe5 }, { 28, 0xfffffe6 }, { 28, 0xfffffe7 }, + { 28, 0xfffffe8 }, { 24, 0xffffea }, { 30, 0x3ffffffc }, { 28, 0xfffffe9 }, + { 28, 0xfffffea }, { 30, 0x3ffffffd }, { 28, 0xfffffeb }, { 28, 0xfffffec }, + { 28, 0xfffffed }, { 28, 0xfffffee }, { 28, 0xfffffef }, { 28, 0xffffff0 }, + { 28, 0xffffff1 }, { 28, 0xffffff2 }, { 30, 0x3ffffffe }, { 28, 0xffffff3 }, + { 28, 0xffffff4 }, { 28, 0xffffff5 }, { 28, 0xffffff6 }, { 28, 0xffffff7 }, + { 28, 0xffffff8 }, { 28, 0xffffff9 }, { 28, 0xffffffa }, { 28, 0xffffffb }, + { 6, 0x14 }, { 10, 0x3f8 }, { 10, 0x3f9 }, { 12, 0xffa }, + { 13, 0x1ff9 }, { 6, 0x15 }, { 8, 0xf8 }, { 11, 0x7fa }, + { 10, 0x3fa }, { 10, 0x3fb }, { 8, 0xf9 }, { 11, 0x7fb }, + { 8, 0xfa }, { 6, 0x16 }, { 6, 0x17 }, { 6, 0x18 }, + { 5, 0x0 }, { 5, 0x1 }, { 5, 0x2 }, { 6, 0x19 }, + { 6, 0x1a }, { 6, 0x1b }, { 6, 0x1c }, { 6, 0x1d }, + { 6, 0x1e }, { 6, 0x1f }, { 7, 0x5c }, { 8, 0xfb }, + { 15, 0x7ffc }, { 6, 0x20 }, { 12, 0xffb }, { 10, 0x3fc }, + { 13, 0x1ffa }, { 6, 0x21 }, { 7, 0x5d }, { 7, 0x5e }, + { 7, 0x5f }, { 7, 0x60 }, { 7, 0x61 }, { 7, 0x62 }, + { 7, 0x63 }, { 7, 0x64 }, { 7, 0x65 }, { 7, 0x66 }, + { 7, 0x67 }, { 7, 0x68 }, { 7, 0x69 }, { 7, 0x6a }, + { 7, 0x6b }, { 7, 0x6c }, { 7, 0x6d }, { 7, 0x6e }, + { 7, 0x6f }, { 7, 0x70 }, { 7, 0x71 }, { 7, 0x72 }, + { 8, 0xfc }, { 7, 0x73 }, { 8, 0xfd }, { 13, 0x1ffb }, + { 19, 0x7fff0 }, { 13, 0x1ffc }, { 14, 0x3ffc }, { 6, 0x22 }, + { 15, 0x7ffd }, { 5, 0x3 }, { 6, 0x23 }, { 5, 0x4 }, + { 6, 0x24 }, { 5, 0x5 }, { 6, 0x25 }, { 6, 0x26 }, + { 6, 0x27 }, { 5, 0x6 }, { 7, 0x74 }, { 7, 0x75 }, + { 6, 0x28 }, { 6, 0x29 }, { 6, 0x2a }, { 5, 0x7 }, + { 6, 0x2b }, { 7, 0x76 }, { 6, 0x2c }, { 5, 0x8 }, + { 5, 0x9 }, { 6, 0x2d }, { 7, 0x77 }, { 7, 0x78 }, + { 7, 0x79 }, { 7, 0x7a }, { 7, 0x7b }, { 15, 0x7ffe }, + { 11, 0x7fc }, { 14, 0x3ffd }, { 13, 0x1ffd }, { 28, 0xffffffc }, + { 20, 0xfffe6 }, { 22, 0x3fffd2 }, { 20, 0xfffe7 }, { 20, 0xfffe8 }, + { 22, 0x3fffd3 }, { 22, 0x3fffd4 }, { 22, 0x3fffd5 }, { 23, 0x7fffd9 }, + { 22, 0x3fffd6 }, { 23, 0x7fffda }, { 23, 0x7fffdb }, { 23, 0x7fffdc }, + { 23, 0x7fffdd }, { 23, 0x7fffde }, { 24, 0xffffeb }, { 23, 0x7fffdf }, + { 24, 0xffffec }, { 24, 0xffffed }, { 22, 0x3fffd7 }, { 23, 0x7fffe0 }, + { 24, 0xffffee }, { 23, 0x7fffe1 }, { 23, 0x7fffe2 }, { 23, 0x7fffe3 }, + { 23, 0x7fffe4 }, { 21, 0x1fffdc }, { 22, 0x3fffd8 }, { 23, 0x7fffe5 }, + { 22, 0x3fffd9 }, { 23, 0x7fffe6 }, { 23, 0x7fffe7 }, { 24, 0xffffef }, + { 22, 0x3fffda }, { 21, 0x1fffdd }, { 20, 0xfffe9 }, { 22, 0x3fffdb }, + { 22, 0x3fffdc }, { 23, 0x7fffe8 }, { 23, 0x7fffe9 }, { 21, 0x1fffde }, + { 23, 0x7fffea }, { 22, 0x3fffdd }, { 22, 0x3fffde }, { 24, 0xfffff0 }, + { 21, 0x1fffdf }, { 22, 0x3fffdf }, { 23, 0x7fffeb }, { 23, 0x7fffec }, + { 21, 0x1fffe0 }, { 21, 0x1fffe1 }, { 22, 0x3fffe0 }, { 21, 0x1fffe2 }, + { 23, 0x7fffed }, { 22, 0x3fffe1 }, { 23, 0x7fffee }, { 23, 0x7fffef }, + { 20, 0xfffea }, { 22, 0x3fffe2 }, { 22, 0x3fffe3 }, { 22, 0x3fffe4 }, + { 23, 0x7ffff0 }, { 22, 0x3fffe5 }, { 22, 0x3fffe6 }, { 23, 0x7ffff1 }, + { 26, 0x3ffffe0 }, { 26, 0x3ffffe1 }, { 20, 0xfffeb }, { 19, 0x7fff1 }, + { 22, 0x3fffe7 }, { 23, 0x7ffff2 }, { 22, 0x3fffe8 }, { 25, 0x1ffffec }, + { 26, 0x3ffffe2 }, { 26, 0x3ffffe3 }, { 26, 0x3ffffe4 }, { 27, 0x7ffffde }, + { 27, 0x7ffffdf }, { 26, 0x3ffffe5 }, { 24, 0xfffff1 }, { 25, 0x1ffffed }, + { 19, 0x7fff2 }, { 21, 0x1fffe3 }, { 26, 0x3ffffe6 }, { 27, 0x7ffffe0 }, + { 27, 0x7ffffe1 }, { 26, 0x3ffffe7 }, { 27, 0x7ffffe2 }, { 24, 0xfffff2 }, + { 21, 0x1fffe4 }, { 21, 0x1fffe5 }, { 26, 0x3ffffe8 }, { 26, 0x3ffffe9 }, + { 28, 0xffffffd }, { 27, 0x7ffffe3 }, { 27, 0x7ffffe4 }, { 27, 0x7ffffe5 }, + { 20, 0xfffec }, { 24, 0xfffff3 }, { 20, 0xfffed }, { 21, 0x1fffe6 }, + { 22, 0x3fffe9 }, { 21, 0x1fffe7 }, { 21, 0x1fffe8 }, { 23, 0x7ffff3 }, + { 22, 0x3fffea }, { 22, 0x3fffeb }, { 25, 0x1ffffee }, { 25, 0x1ffffef }, + { 24, 0xfffff4 }, { 24, 0xfffff5 }, { 26, 0x3ffffea }, { 23, 0x7ffff4 }, + { 26, 0x3ffffeb }, { 27, 0x7ffffe6 }, { 26, 0x3ffffec }, { 26, 0x3ffffed }, + { 27, 0x7ffffe7 }, { 27, 0x7ffffe8 }, { 27, 0x7ffffe9 }, { 27, 0x7ffffea }, + { 27, 0x7ffffeb }, { 28, 0xffffffe }, { 27, 0x7ffffec }, { 27, 0x7ffffed }, + { 27, 0x7ffffee }, { 27, 0x7ffffef }, { 27, 0x7fffff0 }, { 26, 0x3ffffee }, +}; + +typedef struct +{ + u8 symbol; + u8 code_len; +} hpack_huffman_code_t; + +static hpack_huffman_code_t huff_code_table_fast[] = { + { 0x30, 5 }, { 0x30, 5 }, { 0x30, 5 }, { 0x30, 5 }, { 0x30, 5 }, { 0x30, 5 }, + { 0x30, 5 }, { 0x30, 5 }, { 0x31, 5 }, { 0x31, 5 }, { 0x31, 5 }, { 0x31, 5 }, + { 0x31, 5 }, { 0x31, 5 }, { 0x31, 5 }, { 0x31, 5 }, { 0x32, 5 }, { 0x32, 5 }, + { 0x32, 5 }, { 0x32, 5 }, { 0x32, 5 }, { 0x32, 5 }, { 0x32, 5 }, { 0x32, 5 }, + { 0x61, 5 }, { 0x61, 5 }, { 0x61, 5 }, { 0x61, 5 }, { 0x61, 5 }, { 0x61, 5 }, + { 0x61, 5 }, { 0x61, 5 }, { 0x63, 5 }, { 0x63, 5 }, { 0x63, 5 }, { 0x63, 5 }, + { 0x63, 5 }, { 0x63, 5 }, { 0x63, 5 }, { 0x63, 5 }, { 0x65, 5 }, { 0x65, 5 }, + { 0x65, 5 }, { 0x65, 5 }, { 0x65, 5 }, { 0x65, 5 }, { 0x65, 5 }, { 0x65, 5 }, + { 0x69, 5 }, { 0x69, 5 }, { 0x69, 5 }, { 0x69, 5 }, { 0x69, 5 }, { 0x69, 5 }, + { 0x69, 5 }, { 0x69, 5 }, { 0x6F, 5 }, { 0x6F, 5 }, { 0x6F, 5 }, { 0x6F, 5 }, + { 0x6F, 5 }, { 0x6F, 5 }, { 0x6F, 5 }, { 0x6F, 5 }, { 0x73, 5 }, { 0x73, 5 }, + { 0x73, 5 }, { 0x73, 5 }, { 0x73, 5 }, { 0x73, 5 }, { 0x73, 5 }, { 0x73, 5 }, + { 0x74, 5 }, { 0x74, 5 }, { 0x74, 5 }, { 0x74, 5 }, { 0x74, 5 }, { 0x74, 5 }, + { 0x74, 5 }, { 0x74, 5 }, { 0x20, 6 }, { 0x20, 6 }, { 0x20, 6 }, { 0x20, 6 }, + { 0x25, 6 }, { 0x25, 6 }, { 0x25, 6 }, { 0x25, 6 }, { 0x2D, 6 }, { 0x2D, 6 }, + { 0x2D, 6 }, { 0x2D, 6 }, { 0x2E, 6 }, { 0x2E, 6 }, { 0x2E, 6 }, { 0x2E, 6 }, + { 0x2F, 6 }, { 0x2F, 6 }, { 0x2F, 6 }, { 0x2F, 6 }, { 0x33, 6 }, { 0x33, 6 }, + { 0x33, 6 }, { 0x33, 6 }, { 0x34, 6 }, { 0x34, 6 }, { 0x34, 6 }, { 0x34, 6 }, + { 0x35, 6 }, { 0x35, 6 }, { 0x35, 6 }, { 0x35, 6 }, { 0x36, 6 }, { 0x36, 6 }, + { 0x36, 6 }, { 0x36, 6 }, { 0x37, 6 }, { 0x37, 6 }, { 0x37, 6 }, { 0x37, 6 }, + { 0x38, 6 }, { 0x38, 6 }, { 0x38, 6 }, { 0x38, 6 }, { 0x39, 6 }, { 0x39, 6 }, + { 0x39, 6 }, { 0x39, 6 }, { 0x3D, 6 }, { 0x3D, 6 }, { 0x3D, 6 }, { 0x3D, 6 }, + { 0x41, 6 }, { 0x41, 6 }, { 0x41, 6 }, { 0x41, 6 }, { 0x5F, 6 }, { 0x5F, 6 }, + { 0x5F, 6 }, { 0x5F, 6 }, { 0x62, 6 }, { 0x62, 6 }, { 0x62, 6 }, { 0x62, 6 }, + { 0x64, 6 }, { 0x64, 6 }, { 0x64, 6 }, { 0x64, 6 }, { 0x66, 6 }, { 0x66, 6 }, + { 0x66, 6 }, { 0x66, 6 }, { 0x67, 6 }, { 0x67, 6 }, { 0x67, 6 }, { 0x67, 6 }, + { 0x68, 6 }, { 0x68, 6 }, { 0x68, 6 }, { 0x68, 6 }, { 0x6C, 6 }, { 0x6C, 6 }, + { 0x6C, 6 }, { 0x6C, 6 }, { 0x6D, 6 }, { 0x6D, 6 }, { 0x6D, 6 }, { 0x6D, 6 }, + { 0x6E, 6 }, { 0x6E, 6 }, { 0x6E, 6 }, { 0x6E, 6 }, { 0x70, 6 }, { 0x70, 6 }, + { 0x70, 6 }, { 0x70, 6 }, { 0x72, 6 }, { 0x72, 6 }, { 0x72, 6 }, { 0x72, 6 }, + { 0x75, 6 }, { 0x75, 6 }, { 0x75, 6 }, { 0x75, 6 }, { 0x3A, 7 }, { 0x3A, 7 }, + { 0x42, 7 }, { 0x42, 7 }, { 0x43, 7 }, { 0x43, 7 }, { 0x44, 7 }, { 0x44, 7 }, + { 0x45, 7 }, { 0x45, 7 }, { 0x46, 7 }, { 0x46, 7 }, { 0x47, 7 }, { 0x47, 7 }, + { 0x48, 7 }, { 0x48, 7 }, { 0x49, 7 }, { 0x49, 7 }, { 0x4A, 7 }, { 0x4A, 7 }, + { 0x4B, 7 }, { 0x4B, 7 }, { 0x4C, 7 }, { 0x4C, 7 }, { 0x4D, 7 }, { 0x4D, 7 }, + { 0x4E, 7 }, { 0x4E, 7 }, { 0x4F, 7 }, { 0x4F, 7 }, { 0x50, 7 }, { 0x50, 7 }, + { 0x51, 7 }, { 0x51, 7 }, { 0x52, 7 }, { 0x52, 7 }, { 0x53, 7 }, { 0x53, 7 }, + { 0x54, 7 }, { 0x54, 7 }, { 0x55, 7 }, { 0x55, 7 }, { 0x56, 7 }, { 0x56, 7 }, + { 0x57, 7 }, { 0x57, 7 }, { 0x59, 7 }, { 0x59, 7 }, { 0x6A, 7 }, { 0x6A, 7 }, + { 0x6B, 7 }, { 0x6B, 7 }, { 0x71, 7 }, { 0x71, 7 }, { 0x76, 7 }, { 0x76, 7 }, + { 0x77, 7 }, { 0x77, 7 }, { 0x78, 7 }, { 0x78, 7 }, { 0x79, 7 }, { 0x79, 7 }, + { 0x7A, 7 }, { 0x7A, 7 }, { 0x26, 8 }, { 0x2A, 8 }, { 0x2C, 8 }, { 0x3B, 8 }, + { 0x58, 8 }, { 0x5A, 8 }, { 0x00, 0 }, { 0x00, 0 }, +}; + +typedef struct +{ + u32 first_code; + u8 code_len; + u8 symbols[29]; +} hpack_huffman_group_t; + +/* clang-format off */ + +static hpack_huffman_group_t huff_code_table_slow[] = { + { + 0x3f8, /* first_code */ + 10, /* code_len */ + { + 0x21, 0x22, 0x28, 0x29, 0x3F, + } /* symbols */ + }, + { + 0x7fa, /* first_code */ + 11, /* code_len */ + { + 0x27, 0x2B, 0x7C, + } /* symbols */ + }, + { + 0xffa, /* first_code */ + 12, /* code_len */ + { + 0x23, 0x3E, + } /* symbols */ + }, + { + 0x1ff8, /* first_code */ + 13, /* code_len */ + { + 0x00, 0x24, 0x40, 0x5B, 0x5D, 0x7E, + } /* symbols */ + }, + { + 0x3ffc, /* first_code */ + 14, /* code_len */ + { + 0x5E, 0x7D, + } /* symbols */ + }, + { + 0x7ffc, /* first_code */ + 15, /* code_len */ + { + 0x3C, 0x60, 0x7B, + } /* symbols */ + }, + { + 0x7fff0, /* first_code */ + 19, /* code_len */ + { + 0x5C, 0xC3, 0xD0, + } /* symbols */ + }, + { + 0xfffe6, /* first_code */ + 20, /* code_len */ + { + 0x80, 0x82, 0x83, 0xA2, 0xB8, 0xC2, 0xE0, 0xE2, + } /* symbols */ + }, + { + 0x1fffdc, /* first_code */ + 21, /* code_len */ + { + 0x99, 0xA1, 0xA7, 0xAC, 0xB0, 0xB1, 0xB3, 0xD1, 0xD8, 0xD9, + 0xE3, 0xE5, 0xE6, + } /* symbols */ + }, + { + 0x3fffd2, /* first_code */ + 22, /* code_len */ + { + 0x81, 0x84, 0x85, 0x86, 0x88, 0x92, 0x9A, 0x9C, 0xA0, 0xA3, + 0xA4, 0xA9, 0xAA, 0xAD, 0xB2, 0xB5, 0xB9, 0xBA, 0xBB, 0xBD, + 0xBE, 0xC4, 0xC6, 0xE4, 0xE8, 0xE9, + } /* symbols */ + }, + { + 0x7fffd8, /* first_code */ + 23, /* code_len */ + { + 0x01, 0x87, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8F, 0x93, 0x95, + 0x96, 0x97, 0x98, 0x9B, 0x9D, 0x9E, 0xA5, 0xA6, 0xA8, 0xAE, + 0xAF, 0xB4, 0xB6, 0xB7, 0xBC, 0xBF, 0xC5, 0xE7, 0xEF, + } /* symbols */ + }, + { + 0xffffea, /* first_code */ + 24, /* code_len */ + { + 0x09, 0x8E, 0x90, 0x91, 0x94, 0x9F, 0xAB, 0xCE, 0xD7, 0xE1, + 0xEC, 0xED, + } /* symbols */ + }, + { + 0x1ffffec, /* first_code */ + 25, /* code_len */ + { + 0xC7, 0xCF, 0xEA, 0xEB, + } /* symbols */ + }, + { + 0x3ffffe0, /* first_code */ + 26, /* code_len */ + { + 0xC0, 0xC1, 0xC8, 0xC9, 0xCA, 0xCD, 0xD2, 0xD5, 0xDA, 0xDB, + 0xEE, 0xF0, 0xF2, 0xF3, 0xFF, + } /* symbols */ + }, + { + 0x7ffffde, /* first_code */ + 27, /* code_len */ + { + 0xCB, 0xCC, 0xD3, 0xD4, 0xD6, 0xDD, 0xDE, 0xDF, 0xF1, 0xF4, + 0xF5, 0xF6, 0xF7, 0xF8, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, + } /* symbols */ + }, + { + 0xfffffe2, /* first_code */ + 28, /* code_len */ + { + 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0B, 0x0C, 0x0E, + 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x17, 0x18, 0x19, + 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x7F, 0xDC, 0xF9, + } /* symbols */ + }, + { + 0x3ffffffc, /* first_code */ + 30, /* code_len */ + { + 0x0A, 0x0D, 0x16, + } /* symbols */ + }, +}; + +/* clang format-on */ + +always_inline hpack_huffman_group_t * +hpack_huffman_get_group (u32 value) +{ + if (value < 0xFF400000) + return &huff_code_table_slow[0]; + else if (value < 0xFFA00000) + return &huff_code_table_slow[1]; + else if (value < 0xFFC00000) + return &huff_code_table_slow[2]; + else if (value < 0xFFF00000) + return &huff_code_table_slow[3]; + else if (value < 0xFFF80000) + return &huff_code_table_slow[4]; + else if (value < 0xFFFE0000) + return &huff_code_table_slow[5]; + else if (value < 0xFFFE6000) + return &huff_code_table_slow[6]; + else if (value < 0xFFFEE000) + return &huff_code_table_slow[7]; + else if (value < 0xFFFF4800) + return &huff_code_table_slow[8]; + else if (value < 0xFFFFB000) + return &huff_code_table_slow[9]; + else if (value < 0xFFFFEA00) + return &huff_code_table_slow[10]; + else if (value < 0xFFFFF600) + return &huff_code_table_slow[11]; + else if (value < 0xFFFFF800) + return &huff_code_table_slow[12]; + else if (value < 0xFFFFFBC0) + return &huff_code_table_slow[13]; + else if (value < 0xFFFFFE20) + return &huff_code_table_slow[14]; + else if (value < 0xFFFFFFF0) + return &huff_code_table_slow[15]; + else + return &huff_code_table_slow[16]; +} + +#endif /* SRC_PLUGINS_HTTP_HUFFMAN_TABLE_H_ */ diff --git a/src/plugins/http/http_header_names.h b/src/plugins/http/http_header_names.h index 99acac786db..1778daf10d9 100644 --- a/src/plugins/http/http_header_names.h +++ b/src/plugins/http/http_header_names.h @@ -8,7 +8,8 @@ #include <http/http.h> static http_token_t http_header_names[] = { -#define _(sym, str) { http_token_lit (str) }, +#define _(sym, str_canonical, str_lower, hpack_index) \ + { http_token_lit (str_canonical) }, foreach_http_header_name #undef _ }; diff --git a/src/plugins/http/http_plugin.rst b/src/plugins/http/http_plugin.rst index 995e55e6f0f..4e799a57668 100644 --- a/src/plugins/http/http_plugin.rst +++ b/src/plugins/http/http_plugin.rst @@ -15,7 +15,7 @@ Usage ----- The plugin exposes following inline functions: ``http_validate_abs_path_syntax``, ``http_validate_query_syntax``, -``http_percent_decode``, ``http_path_remove_dot_segments``, ``http_build_header_table``, ``http_get_header``, +``http_percent_decode``, ``http_path_sanitize``, ``http_build_header_table``, ``http_get_header``, ``http_reset_header_table``, ``http_free_header_table``, ``http_init_headers_ctx``, ``http_add_header``, ``http_add_custom_header``, ``http_validate_target_syntax``, ``http_parse_authority``, ``http_serialize_authority``, ``http_parse_masque_host_port``, ``http_decap_udp_payload_datagram``, ``http_encap_udp_payload_datagram``, diff --git a/src/plugins/http/http_private.h b/src/plugins/http/http_private.h new file mode 100644 index 00000000000..154a63d2402 --- /dev/null +++ b/src/plugins/http/http_private.h @@ -0,0 +1,693 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. + */ + +#ifndef SRC_PLUGINS_HTTP_HTTP_PRIVATE_H_ +#define SRC_PLUGINS_HTTP_HTTP_PRIVATE_H_ + +#include <vppinfra/time_range.h> +#include <vnet/session/application.h> +#include <vnet/session/session.h> +#include <vnet/session/transport.h> +#include <http/http.h> +#include <http/http_buffer.h> + +#define HTTP_FIFO_THRESH (16 << 10) + +typedef u32 http_conn_handle_t; + +typedef struct http_conn_id_ +{ + union + { + session_handle_t app_session_handle; + u32 parent_app_api_ctx; + }; + session_handle_t tc_session_handle; + u32 parent_app_wrk_index; +} http_conn_id_t; + +STATIC_ASSERT (sizeof (http_conn_id_t) <= TRANSPORT_CONN_ID_LEN, + "ctx id must be less than TRANSPORT_CONN_ID_LEN"); + +#define foreach_http_conn_state \ + _ (LISTEN, "LISTEN") \ + _ (CONNECTING, "CONNECTING") \ + _ (ESTABLISHED, "ESTABLISHED") \ + _ (TRANSPORT_CLOSED, "TRANSPORT-CLOSED") \ + _ (APP_CLOSED, "APP-CLOSED") \ + _ (CLOSED, "CLOSED") + +typedef enum http_conn_state_ +{ +#define _(s, str) HTTP_CONN_STATE_##s, + foreach_http_conn_state +#undef _ +} http_conn_state_t; + +#define foreach_http_req_state \ + _ (0, IDLE, "idle") \ + _ (1, WAIT_APP_METHOD, "wait app method") \ + _ (2, WAIT_TRANSPORT_REPLY, "wait transport reply") \ + _ (3, TRANSPORT_IO_MORE_DATA, "transport io more data") \ + _ (4, WAIT_TRANSPORT_METHOD, "wait transport method") \ + _ (5, WAIT_APP_REPLY, "wait app reply") \ + _ (6, APP_IO_MORE_DATA, "app io more data") \ + _ (7, TUNNEL, "tunnel") \ + _ (8, UDP_TUNNEL, "udp tunnel") + +typedef enum http_req_state_ +{ +#define _(n, s, str) HTTP_REQ_STATE_##s = n, + foreach_http_req_state +#undef _ + HTTP_REQ_N_STATES +} http_req_state_t; + +typedef enum http_target_form_ +{ + HTTP_TARGET_ORIGIN_FORM, + HTTP_TARGET_ABSOLUTE_FORM, + HTTP_TARGET_AUTHORITY_FORM, + HTTP_TARGET_ASTERISK_FORM +} http_target_form_t; + +typedef enum http_version_ +{ + HTTP_VERSION_1, + HTTP_VERSION_2, + HTTP_VERSION_3, + HTTP_VERSION_NA = 7, +} http_version_t; + +typedef struct http_req_ +{ + /* in case of multiplexing we have app session for each stream */ + session_handle_t app_session_handle; + u32 as_fifo_offset; /* for peek */ + + http_req_state_t state; /* state-machine state */ + + http_buffer_t tx_buf; /* message body from app to be sent */ + + /* + * for parsing of incoming message from transport + */ + u32 rx_buf_offset; /* current offset during parsing */ + u32 control_data_len; /* start line + headers + empty line */ + + union + { + u64 to_recv; /* remaining bytes of body to receive from transport */ + u64 to_skip; /* remaining bytes of capsule to skip */ + }; + + u8 is_tunnel; + + /* + * parsed metadata for app + */ + union + { + http_status_code_t status_code; + http_req_method_t method; + }; + + http_target_form_t target_form; + u8 *target; + http_url_scheme_t scheme; + u32 target_authority_offset; + u32 target_authority_len; + u32 target_path_offset; + u32 target_path_len; + u32 target_query_offset; + u32 target_query_len; + + u32 headers_offset; + u32 headers_len; + + u32 body_offset; + u64 body_len; + + http_field_line_t *headers; + uword content_len_header_index; + uword connection_header_index; + uword upgrade_header_index; + uword host_header_index; + + http_upgrade_proto_t upgrade_proto; +} http_req_t; + +#define foreach_http_conn_flags \ + _ (HO_DONE, "ho-done") \ + _ (NO_APP_SESSION, "no-app-session") \ + _ (PENDING_TIMER, "pending-timer") \ + _ (IS_SERVER, "is-server") + +typedef enum http_conn_flags_bit_ +{ +#define _(sym, str) HTTP_CONN_F_BIT_##sym, + foreach_http_conn_flags +#undef _ +} http_conn_flags_bit_t; + +typedef enum http_conn_flags_ +{ +#define _(sym, str) HTTP_CONN_F_##sym = 1 << HTTP_CONN_F_BIT_##sym, + foreach_http_conn_flags +#undef _ +} __clib_packed http_conn_flags_t; + +typedef struct http_tc_ +{ + union + { + transport_connection_t connection; + http_conn_id_t c_http_conn_id; + }; +#define h_tc_session_handle c_http_conn_id.tc_session_handle +#define h_pa_wrk_index c_http_conn_id.parent_app_wrk_index +#define h_pa_session_handle c_http_conn_id.app_session_handle +#define h_pa_app_api_ctx c_http_conn_id.parent_app_api_ctx +#define h_hc_index connection.c_index + + http_version_t version; + http_conn_state_t state; + u32 timer_handle; + u32 timeout; + u8 *app_name; + u8 *host; + http_conn_flags_t flags; + http_udp_tunnel_mode_t udp_tunnel_mode; + + http_req_t *req_pool; /* multiplexing => request per stream */ +} http_conn_t; + +typedef struct http_worker_ +{ + http_conn_t *conn_pool; +} http_worker_t; + +typedef struct http_main_ +{ + http_worker_t *wrk; + http_conn_t *listener_pool; + http_conn_t *ho_conn_pool; + u32 *postponed_ho_free; + u32 *ho_free_list; + u32 app_index; + + u8 **rx_bufs; + u8 **tx_bufs; + u8 **app_header_lists; + + clib_timebase_t timebase; + + http_status_code_t *sc_by_u16; + /* + * Runtime config + */ + u8 is_init; + + /* + * Config + */ + u64 first_seg_size; + u64 add_seg_size; + u32 fifo_size; +} http_main_t; + +typedef struct http_engine_vft_ +{ + void (*app_tx_callback) (http_conn_t *hc, transport_send_params_t *sp); + void (*app_rx_evt_callback) (http_conn_t *hc); + void (*app_close_callback) (http_conn_t *hc); + void (*app_reset_callback) (http_conn_t *hc); + void (*transport_rx_callback) (http_conn_t *hc); + void (*transport_close_callback) (http_conn_t *hc); +} http_engine_vft_t; + +void http_register_engine (const http_engine_vft_t *vft, + http_version_t version); + +/* HTTP state machine result */ +typedef enum http_sm_result_t_ +{ + HTTP_SM_STOP = 0, + HTTP_SM_CONTINUE = 1, + HTTP_SM_ERROR = -1, +} http_sm_result_t; + +typedef http_sm_result_t (*http_sm_handler) (http_conn_t *hc, http_req_t *req, + transport_send_params_t *sp); + +#define expect_char(c) \ + if (*p++ != c) \ + { \ + clib_warning ("unexpected character"); \ + return -1; \ + } + +#define parse_int(val, mul) \ + do \ + { \ + if (!isdigit (*p)) \ + { \ + clib_warning ("expected digit"); \ + return -1; \ + } \ + val += mul * (*p++ - '0'); \ + } \ + while (0) + +#define http_field_line_value_token(_fl, _req, _rx_buf) \ + (const char *) ((_rx_buf) + (_req)->headers_offset + (_fl)->value_offset), \ + (_fl)->value_len + +u8 *format_http_req_state (u8 *s, va_list *va); +u8 *format_http_conn_state (u8 *s, va_list *args); +u8 *format_http_time_now (u8 *s, va_list *args); + +/** + * @brief Find the first occurrence of the string in the vector. + * + * @param vec The vector to be scanned. + * @param offset Search offset in the vector. + * @param num Maximum number of characters to be searched if non-zero. + * @param str The string to be searched. + * + * @return @c -1 if the string is not found within the vector; index otherwise. + */ +int http_v_find_index (u8 *vec, u32 offset, u32 num, char *str); + +/** + * Disconnect HTTP connection. + * + * @param hc HTTP connection to disconnect. + */ +void http_disconnect_transport (http_conn_t *hc); + +/** + * Convert numeric representation of status code to @c http_status_code_t. + * + * @param status_code Status code within the range of 100 to 599, inclusive. + * + * @return Registered status code or in case of unrecognized status code as + * equivalent to the x00 status code of that class. + */ +http_status_code_t http_sc_by_u16 (u16 status_code); + +/** + * Read header list sent by app. + * + * @param hc HTTP connection. + * @param msg HTTP msg sent by app. + * + * @return Pointer to the header list. + * + * @note For immediate processing, not for buffering. + */ +u8 *http_get_app_header_list (http_conn_t *hc, http_msg_t *msg); + +/** + * Get pre-allocated TX buffer/vector. + * + * @param hc HTTP connection. + * + * @return Pointer to the vector. + * + * @note Vector length is reset to zero, use as temporary storage. + */ +u8 *http_get_tx_buf (http_conn_t *hc); + +/** + * Get pre-allocated RX buffer/vector. + * + * @param hc HTTP connection. + * + * @return Pointer to the vector. + * + * @note Vector length is reset to zero, use as temporary storage. + */ +u8 *http_get_rx_buf (http_conn_t *hc); + +/** + * Read request target path sent by app. + * + * @param hc HTTP connection. + * @param msg HTTP msg sent by app. + * + * @return Pointer to the target path. + * + * @note Valid only with request lifetime. + */ +u8 *http_get_app_target (http_req_t *req, http_msg_t *msg); + +/** + * Initialize per-request HTTP TX buffer. + * + * @param req HTTP request. + * @param msg HTTP msg sent by app. + * + * @note Use for streaming of body sent by app. + */ +void http_req_tx_buffer_init (http_req_t *req, http_msg_t *msg); + +/** + * Allocate new request within given HTTP connection. + * + * @param hc HTTP connection. + * + * @return Request index in per-connection pool. + */ +always_inline u32 +http_alloc_req (http_conn_t *hc) +{ + http_req_t *req; + pool_get_zero (hc->req_pool, req); + req->app_session_handle = SESSION_INVALID_HANDLE; + return (req - hc->req_pool); +} + +/** + * Get request in per-connection pool. + * + * @param hc HTTP connection. + * @param req_index Request index. + * + * @return Pointer to the request data. + */ +always_inline http_req_t * +http_get_req (http_conn_t *hc, u32 req_index) +{ + return pool_elt_at_index (hc->req_pool, req_index); +} + +/** + * Get request in per-connection pool if valid. + * + * @param hc HTTP connection. + * @param req_index Request index. + * + * @return Pointer to the request data or @c 0 if not valid. + */ +always_inline http_req_t * +http_get_req_if_valid (http_conn_t *hc, u32 req_index) +{ + if (pool_is_free_index (hc->req_pool, req_index)) + return 0; + return pool_elt_at_index (hc->req_pool, req_index); +} + +/** + * Free request in per-connection pool. + * + * @param hc HTTP connection. + * @param req Pointer to the request. + */ +always_inline void +http_req_free (http_conn_t *hc, http_req_t *req) +{ + vec_free (req->headers); + vec_free (req->target); + http_buffer_free (&req->tx_buf); + if (CLIB_DEBUG) + memset (req, 0xba, sizeof (*req)); + pool_put (hc->req_pool, req); +} + +/** + * Change state of given HTTP request. + * + * @param req HTTP request. + * @param state New state. + */ +always_inline void +http_req_state_change (http_req_t *req, http_req_state_t state) +{ + HTTP_DBG (1, "changing http req state: %U -> %U", format_http_req_state, + req->state, format_http_req_state, state); + ASSERT (req->state != HTTP_REQ_STATE_TUNNEL); + req->state = state; +} + +/** + * Send RX event to the app worker. + * + * @param req HTTP request. + */ +always_inline void +http_app_worker_rx_notify (http_req_t *req) +{ + session_t *as; + app_worker_t *app_wrk; + + as = session_get_from_handle (req->app_session_handle); + app_wrk = app_worker_get_if_valid (as->app_wrk_index); + if (app_wrk) + app_worker_rx_notify (app_wrk, as); +} + +/** + * Get underlying transport protocol of the HTTP connection. + * + * @param hc HTTP connection. + * + * @return Transport protocol, @ref transport_proto_t. + */ +always_inline transport_proto_t +http_get_transport_proto (http_conn_t *hc) +{ + return session_get_transport_proto ( + session_get_from_handle (hc->h_tc_session_handle)); +} + +/** + * Read HTTP msg sent by app. + * + * @param req HTTP request. + * @param msg HTTP msq will be stored here. + */ +always_inline void +http_get_app_msg (http_req_t *req, http_msg_t *msg) +{ + session_t *as; + int rv; + + as = session_get_from_handle (req->app_session_handle); + rv = svm_fifo_dequeue (as->tx_fifo, sizeof (*msg), (u8 *) msg); + ASSERT (rv == sizeof (*msg)); +} + +/* Abstraction of app session fifo operations */ + +always_inline void +http_io_as_want_deq_ntf (http_req_t *req) +{ + session_t *as = session_get_from_handle (req->app_session_handle); + svm_fifo_add_want_deq_ntf (as->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); +} + +always_inline u32 +http_io_as_max_write (http_req_t *req) +{ + session_t *as = session_get_from_handle (req->app_session_handle); + return svm_fifo_max_enqueue_prod (as->rx_fifo); +} + +always_inline u32 +http_io_as_max_read (http_req_t *req) +{ + session_t *as = session_get_from_handle (req->app_session_handle); + return svm_fifo_max_dequeue_cons (as->tx_fifo); +} + +always_inline u32 +http_io_as_write_segs (http_req_t *req, const svm_fifo_seg_t segs[], + u32 n_segs) +{ + int n_written; + session_t *as = session_get_from_handle (req->app_session_handle); + n_written = svm_fifo_enqueue_segments (as->rx_fifo, segs, n_segs, 0); + ASSERT (n_written > 0); + return (u32) n_written; +} + +always_inline u32 +http_io_as_read (http_req_t *req, u8 *buf, u32 len, u8 peek) +{ + int n_read; + session_t *as = session_get_from_handle (req->app_session_handle); + + if (peek) + { + n_read = svm_fifo_peek (as->tx_fifo, req->as_fifo_offset, len, buf); + ASSERT (n_read > 0); + req->as_fifo_offset += len; + return (u32) n_read; + } + + n_read = svm_fifo_dequeue (as->tx_fifo, len, buf); + ASSERT (n_read == len); + return (u32) n_read; +} + +always_inline void +http_io_as_read_segs (http_req_t *req, svm_fifo_seg_t *segs, u32 *n_segs, + u32 max_bytes) +{ + int n_read; + session_t *as = session_get_from_handle (req->app_session_handle); + n_read = svm_fifo_segments (as->tx_fifo, 0, segs, n_segs, max_bytes); + ASSERT (n_read > 0); +} + +always_inline void +http_io_as_drain (http_req_t *req, u32 len) +{ + session_t *as = session_get_from_handle (req->app_session_handle); + svm_fifo_dequeue_drop (as->tx_fifo, len); + req->as_fifo_offset = 0; +} + +always_inline void +http_io_as_drain_all (http_req_t *req) +{ + session_t *as = session_get_from_handle (req->app_session_handle); + svm_fifo_dequeue_drop_all (as->tx_fifo); + req->as_fifo_offset = 0; +} + +/* Abstraction of transport session fifo operations */ + +always_inline u32 +http_io_ts_max_read (http_conn_t *hc) +{ + session_t *ts = session_get_from_handle (hc->h_tc_session_handle); + return svm_fifo_max_dequeue_cons (ts->rx_fifo); +} + +always_inline u32 +http_io_ts_max_write (http_conn_t *hc, transport_send_params_t *sp) +{ + session_t *ts = session_get_from_handle (hc->h_tc_session_handle); + return clib_min (svm_fifo_max_enqueue_prod (ts->tx_fifo), + sp->max_burst_size); +} + +always_inline u32 +http_io_ts_read (http_conn_t *hc, u8 *buf, u32 len, u8 peek) +{ + int n_read; + session_t *ts = session_get_from_handle (hc->h_tc_session_handle); + + if (peek) + { + n_read = svm_fifo_peek (ts->rx_fifo, 0, len, buf); + ASSERT (n_read > 0); + return (u32) n_read; + } + + n_read = svm_fifo_dequeue (ts->rx_fifo, len, buf); + ASSERT (n_read == len); + return (u32) n_read; +} + +always_inline void +http_io_ts_read_segs (http_conn_t *hc, svm_fifo_seg_t *segs, u32 *n_segs, + u32 max_bytes) +{ + int n_read; + session_t *ts = session_get_from_handle (hc->h_tc_session_handle); + n_read = svm_fifo_segments (ts->rx_fifo, 0, segs, n_segs, max_bytes); + ASSERT (n_read > 0); +} + +always_inline void +http_io_ts_drain (http_conn_t *hc, u32 len) +{ + session_t *ts = session_get_from_handle (hc->h_tc_session_handle); + svm_fifo_dequeue_drop (ts->rx_fifo, len); +} + +always_inline void +http_io_ts_drain_all (http_conn_t *hc) +{ + session_t *ts = session_get_from_handle (hc->h_tc_session_handle); + svm_fifo_dequeue_drop_all (ts->rx_fifo); +} + +always_inline void +http_io_ts_after_read (http_conn_t *hc, u8 clear_evt) +{ + session_t *ts = session_get_from_handle (hc->h_tc_session_handle); + if (clear_evt) + { + if (svm_fifo_is_empty_cons (ts->rx_fifo)) + svm_fifo_unset_event (ts->rx_fifo); + } + else + { + if (svm_fifo_max_dequeue_cons (ts->rx_fifo)) + session_program_rx_io_evt (hc->h_tc_session_handle); + } +} + +always_inline void +http_io_ts_write (http_conn_t *hc, u8 *data, u32 len, + transport_send_params_t *sp) +{ + int n_written; + session_t *ts = session_get_from_handle (hc->h_tc_session_handle); + + n_written = svm_fifo_enqueue (ts->tx_fifo, len, data); + ASSERT (n_written == len); + if (sp) + { + ASSERT (sp->max_burst_size >= len); + sp->bytes_dequeued += len; + sp->max_burst_size -= len; + } +} + +always_inline u32 +http_io_ts_write_segs (http_conn_t *hc, const svm_fifo_seg_t segs[], + u32 n_segs, transport_send_params_t *sp) +{ + int n_written; + session_t *ts = session_get_from_handle (hc->h_tc_session_handle); + n_written = svm_fifo_enqueue_segments (ts->tx_fifo, segs, n_segs, 0); + ASSERT (n_written > 0); + sp->bytes_dequeued += n_written; + sp->max_burst_size -= n_written; + return (u32) n_written; +} + +always_inline void +http_io_ts_after_write (http_conn_t *hc, transport_send_params_t *sp, u8 flush, + u8 written) +{ + session_t *ts = session_get_from_handle (hc->h_tc_session_handle); + + if (!flush) + { + if (written && svm_fifo_set_event (ts->tx_fifo)) + session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX); + + if (sp && (svm_fifo_max_enqueue (ts->tx_fifo) < HTTP_FIFO_THRESH)) + { + /* Deschedule http session and wait for deq notification if + * underlying ts tx fifo almost full */ + svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); + transport_connection_deschedule (&hc->connection); + sp->flags |= TRANSPORT_SND_F_DESCHED; + } + } + else + { + if (written && svm_fifo_set_event (ts->tx_fifo)) + session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX_FLUSH); + } +} + +#endif /* SRC_PLUGINS_HTTP_HTTP_PRIVATE_H_ */ diff --git a/src/plugins/http/http_timer.h b/src/plugins/http/http_timer.h index 43d20d004d8..50f634c5397 100644 --- a/src/plugins/http/http_timer.h +++ b/src/plugins/http/http_timer.h @@ -16,7 +16,7 @@ #ifndef SRC_PLUGINS_HTTP_HTTP_TIMER_H_ #define SRC_PLUGINS_HTTP_HTTP_TIMER_H_ -#include <http/http.h> +#include <http/http_private.h> #include <vppinfra/tw_timer_2t_1w_2048sl.h> #define HTTP_CONN_TIMEOUT 60 @@ -45,7 +45,8 @@ http_conn_timer_start (http_conn_t *hc) u32 hs_handle; ASSERT (hc->timer_handle == HTTP_TIMER_HANDLE_INVALID); - hs_handle = hc->c_thread_index << 24 | hc->c_c_index; + ASSERT (hc->h_hc_index <= 0x00FFFFFF); + hs_handle = hc->c_thread_index << 24 | hc->h_hc_index; clib_spinlock_lock (&twc->tw_lock); hc->timer_handle = @@ -58,7 +59,7 @@ http_conn_timer_stop (http_conn_t *hc) { http_tw_ctx_t *twc = &http_tw_ctx; - hc->pending_timer = 0; + hc->flags &= ~HTTP_CONN_F_PENDING_TIMER; if (hc->timer_handle == HTTP_TIMER_HANDLE_INVALID) return; @@ -79,7 +80,8 @@ http_conn_timer_update (http_conn_t *hc) tw_timer_update_2t_1w_2048sl (&twc->tw, hc->timer_handle, hc->timeout); else { - hs_handle = hc->c_thread_index << 24 | hc->c_c_index; + ASSERT (hc->h_hc_index <= 0x00FFFFFF); + hs_handle = hc->c_thread_index << 24 | hc->h_hc_index; hc->timer_handle = tw_timer_start_2t_1w_2048sl (&twc->tw, hs_handle, 0, hc->timeout); } diff --git a/src/plugins/http/test/http_test.c b/src/plugins/http/test/http_test.c index bfaa285eb35..f44d3cbd31b 100644 --- a/src/plugins/http/test/http_test.c +++ b/src/plugins/http/test/http_test.c @@ -6,6 +6,8 @@ #include <vpp/app/version.h> #include <http/http.h> #include <http/http_header_names.h> +#include <http/http2/hpack.h> +#include <http/http2/frame.h> #define HTTP_TEST_I(_cond, _comment, _args...) \ ({ \ @@ -533,6 +535,771 @@ http_test_http_header_table (vlib_main_t *vm) return 0; } +static int +http_test_parse_request (const char *first_req, uword first_req_len, + const char *second_req, uword second_req_len, + const char *third_req, uword third_req_len, + hpack_dynamic_table_t *dynamic_table) +{ + http2_error_t rv; + u8 *buf = 0; + hpack_request_control_data_t control_data; + http_field_line_t *headers = 0; + u16 parsed_bitmap = 0; + + static http2_error_t (*_hpack_parse_request) ( + u8 * src, u32 src_len, u8 * dst, u32 dst_len, + hpack_request_control_data_t * control_data, http_field_line_t * *headers, + hpack_dynamic_table_t * dynamic_table); + + _hpack_parse_request = + vlib_get_plugin_symbol ("http_plugin.so", "hpack_parse_request"); + + parsed_bitmap = + HPACK_PSEUDO_HEADER_METHOD_PARSED | HPACK_PSEUDO_HEADER_SCHEME_PARSED | + HPACK_PSEUDO_HEADER_PATH_PARSED | HPACK_PSEUDO_HEADER_AUTHORITY_PARSED; + + /* first request */ + vec_validate_init_empty (buf, 254, 0); + memset (&control_data, 0, sizeof (control_data)); + rv = _hpack_parse_request ((u8 *) first_req, (u32) first_req_len, buf, 254, + &control_data, &headers, dynamic_table); + if (rv != HTTP2_ERROR_NO_ERROR || + control_data.parsed_bitmap != parsed_bitmap || + control_data.method != HTTP_REQ_GET || + control_data.scheme != HTTP_URL_SCHEME_HTTP || + control_data.path_len != 1 || control_data.authority_len != 15 || + dynamic_table->used != 57 || vec_len (headers) != 0) + return 1; + if (memcmp (control_data.path, "/", 1)) + return 1; + if (memcmp (control_data.authority, "www.example.com", 15)) + return 1; + vec_free (headers); + vec_free (buf); + + /* second request */ + vec_validate_init_empty (buf, 254, 0); + memset (&control_data, 0, sizeof (control_data)); + rv = _hpack_parse_request ((u8 *) second_req, (u32) second_req_len, buf, 254, + &control_data, &headers, dynamic_table); + if (rv != HTTP2_ERROR_NO_ERROR || + control_data.parsed_bitmap != parsed_bitmap || + control_data.method != HTTP_REQ_GET || + control_data.scheme != HTTP_URL_SCHEME_HTTP || + control_data.path_len != 1 || control_data.authority_len != 15 || + dynamic_table->used != 110 || vec_len (headers) != 1 || + control_data.headers_len != 21) + return 2; + if (memcmp (control_data.path, "/", 1)) + return 2; + if (memcmp (control_data.authority, "www.example.com", 15)) + return 2; + if (headers[0].name_len != 13 || headers[0].value_len != 8) + return 2; + if (memcmp (control_data.headers + headers[0].name_offset, "cache-control", + 13)) + return 2; + if (memcmp (control_data.headers + headers[0].value_offset, "no-cache", 8)) + return 2; + vec_free (headers); + vec_free (buf); + + /* third request */ + vec_validate_init_empty (buf, 254, 0); + memset (&control_data, 0, sizeof (control_data)); + rv = _hpack_parse_request ((u8 *) third_req, (u32) third_req_len, buf, 254, + &control_data, &headers, dynamic_table); + if (rv != HTTP2_ERROR_NO_ERROR || + control_data.parsed_bitmap != parsed_bitmap || + control_data.method != HTTP_REQ_GET || + control_data.scheme != HTTP_URL_SCHEME_HTTPS || + control_data.path_len != 11 || control_data.authority_len != 15 || + dynamic_table->used != 164 || vec_len (headers) != 1 || + control_data.headers_len != 22) + return 3; + if (memcmp (control_data.path, "/index.html", 11)) + return 3; + if (memcmp (control_data.authority, "www.example.com", 15)) + return 3; + if (headers[0].name_len != 10 || headers[0].value_len != 12) + return 3; + if (memcmp (control_data.headers + headers[0].name_offset, "custom-key", 10)) + return 3; + if (memcmp (control_data.headers + headers[0].value_offset, "custom-value", + 12)) + return 3; + vec_free (headers); + vec_free (buf); + + return 0; +} + +static int +http_test_hpack (vlib_main_t *vm) +{ + vlib_cli_output (vm, "hpack_decode_int"); + + static uword (*_hpack_decode_int) (u8 * *pos, u8 * end, u8 prefix_len); + _hpack_decode_int = + vlib_get_plugin_symbol ("http_plugin.so", "hpack_decode_int"); + + u8 *pos, *end, *input = 0; + uword value; +#define TEST(i, pl, e) \ + vec_validate (input, sizeof (i) - 2); \ + memcpy (input, i, sizeof (i) - 1); \ + pos = input; \ + end = vec_end (input); \ + value = _hpack_decode_int (&pos, end, (u8) pl); \ + HTTP_TEST ((value == (uword) e && pos == end), \ + "%U with prefix length %u is %llu", format_hex_bytes, input, \ + vec_len (input), (u8) pl, value); \ + vec_free (input); + + TEST ("\x00", 8, 0); + TEST ("\x2A", 8, 42); + TEST ("\x72", 4, 2); + TEST ("\x7F\x00", 7, 127); + TEST ("\x7F\x01", 7, 128); + TEST ("\x9F\x9A\x0A", 5, 1337); + TEST ("\xFF\x80\x01", 7, 255); + /* max value to decode is CLIB_WORD_MAX, CLIB_UWORD_MAX is error */ + TEST ("\x7F\x80\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F", 7, CLIB_WORD_MAX); + +#undef TEST + +#define N_TEST(i, pl) \ + vec_validate (input, sizeof (i) - 2); \ + memcpy (input, i, sizeof (i) - 1); \ + pos = input; \ + end = vec_end (input); \ + value = _hpack_decode_int (&pos, end, (u8) pl); \ + HTTP_TEST ((value == HPACK_INVALID_INT), \ + "%U with prefix length %u should be invalid", format_hex_bytes, \ + input, vec_len (input), (u8) pl); \ + vec_free (input); + + /* incomplete */ + N_TEST ("\x7F", 7); + N_TEST ("\x0F\xFF\xFF", 4); + /* overflow */ + N_TEST ("\x0F\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x00", 4); + N_TEST ("\x0F\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x00", 4); + +#undef N_TEST + + vlib_cli_output (vm, "hpack_encode_int"); + + static u8 *(*_hpack_encode_int) (u8 * dst, uword value, u8 prefix_len); + _hpack_encode_int = + vlib_get_plugin_symbol ("http_plugin.so", "hpack_encode_int"); + + u8 *buf = 0; + u8 *p; + +#define TEST(v, pl, e) \ + vec_validate_init_empty (buf, 15, 0); \ + p = _hpack_encode_int (buf, v, (u8) pl); \ + HTTP_TEST (((p - buf) == (sizeof (e) - 1) && !memcmp (buf, e, p - buf)), \ + "%llu with prefix length %u is encoded as %U", v, (u8) pl, \ + format_hex_bytes, buf, p - buf); \ + vec_free (buf); + + TEST (0, 8, "\x00"); + TEST (2, 4, "\x02"); + TEST (42, 8, "\x2A"); + TEST (127, 7, "\x7F\x00"); + TEST (128, 7, "\x7F\x01"); + TEST (255, 7, "\x7F\x80\x01"); + TEST (1337, 5, "\x1F\x9A\x0A"); + TEST (CLIB_WORD_MAX, 7, "\x7F\x80\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F"); +#undef TEST + + vlib_cli_output (vm, "hpack_decode_string"); + + static http2_error_t (*_hpack_decode_string) (u8 * *src, u8 * end, u8 * *buf, + uword * buf_len); + _hpack_decode_string = + vlib_get_plugin_symbol ("http_plugin.so", "hpack_decode_string"); + + u8 *bp; + uword blen, len; + http2_error_t rv; + +#define TEST(i, e) \ + vec_validate (input, sizeof (i) - 2); \ + memcpy (input, i, sizeof (i) - 1); \ + pos = input; \ + vec_validate_init_empty (buf, 63, 0); \ + bp = buf; \ + blen = vec_len (buf); \ + rv = _hpack_decode_string (&pos, vec_end (input), &bp, &blen); \ + len = vec_len (buf) - blen; \ + HTTP_TEST ((len == strlen (e) && !memcmp (buf, e, len) && \ + pos == vec_end (input) && bp == buf + len && \ + rv == HTTP2_ERROR_NO_ERROR), \ + "%U is decoded as %U", format_hex_bytes, input, vec_len (input), \ + format_http_bytes, buf, len); \ + vec_free (input); \ + vec_free (buf); + + /* raw coding */ + TEST ("\x07private", "private"); + /* Huffman coding */ + TEST ("\x85\xAE\xC3\x77\x1A\x4B", "private"); + TEST ("\x86\xA8\xEB\x10\x64\x9C\xBF", "no-cache"); + TEST ("\x8C\xF1\xE3\xC2\xE5\xF2\x3A\x6B\xA0\xAB\x90\xF4\xFF", + "www.example.com"); + TEST ("\x96\xD0\x7A\xBE\x94\x10\x54\xD4\x44\xA8\x20\x05\x95\x04\x0B\x81\x66" + "\xE0\x82\xA6\x2D\x1B\xFF", + "Mon, 21 Oct 2013 20:13:21 GMT") + TEST ("\xAD\x94\xE7\x82\x1D\xD7\xF2\xE6\xC7\xB3\x35\xDF\xDF\xCD\x5B\x39\x60" + "\xD5\xAF\x27\x08\x7F\x36\x72\xC1\xAB\x27\x0F\xB5\x29\x1F\x95\x87\x31" + "\x60\x65\xC0\x03\xED\x4E\xE5\xB1\x06\x3D\x50\x07", + "foo=ASDJKHQKBZXOQWEOPIUAXQWEOIU; max-age=3600; version=1"); + TEST ("\x8A\x9C\xB4\x50\x75\x3C\x1E\xCA\x24\xFE\x3F", "hello world!") + TEST ("\x8A\xFF\xFE\x03\x18\xC6\x31\x8C\x63\x18\xC7", "\\aaaaaaaaaaaa"); + TEST ("\x8C\x1F\xFF\xF0\x18\xC6\x31\x80\x03\x18\xC6\x31\x8F", + "a\\aaaaa00aaaaaaa"); + TEST ("\x87\x1F\xFF\xF0\xFF\xFE\x11\xFF", "a\\\\b"); + TEST ("\x84\x1F\xF9\xFE\xA3", "a?'b"); + TEST ("\x84\x1F\xFA\xFF\x23", "a'?b"); + TEST ("\x8D\x1F\xFF\xFF\xFF\x0C\x63\x18\xC0\x01\x8C\x63\x18\xC7", + "\x61\xF9\x61\x61\x61\x61\x61\x30\x30\x61\x61\x61\x61\x61\x61\x61") +#undef TEST + +#define N_TEST(i, e) \ + vec_validate (input, sizeof (i) - 2); \ + memcpy (input, i, sizeof (i) - 1); \ + pos = input; \ + vec_validate_init_empty (buf, 15, 0); \ + bp = buf; \ + blen = vec_len (buf); \ + rv = _hpack_decode_string (&pos, vec_end (input), &bp, &blen); \ + HTTP_TEST ((rv == e), "%U should be invalid (%U)", format_hex_bytes, input, \ + vec_len (input), format_http2_error, rv); \ + vec_free (input); \ + vec_free (buf); + + /* incomplete */ + N_TEST ("\x87", HTTP2_ERROR_COMPRESSION_ERROR); + N_TEST ("\x07priv", HTTP2_ERROR_COMPRESSION_ERROR); + /* invalid length */ + N_TEST ("\x7Fprivate", HTTP2_ERROR_COMPRESSION_ERROR); + /* invalid EOF */ + N_TEST ("\x81\x8C", HTTP2_ERROR_COMPRESSION_ERROR); + /* not enough space for decoding */ + N_TEST ( + "\x96\xD0\x7A\xBE\x94\x10\x54\xD4\x44\xA8\x20\x05\x95\x04\x0B\x81\x66" + "\xE0\x82\xA6\x2D\x1B\xFF", + HTTP2_ERROR_INTERNAL_ERROR); +#undef N_TEST + + vlib_cli_output (vm, "hpack_encode_string"); + + static u8 *(*_hpack_encode_string) (u8 * dst, const u8 *value, + uword value_len); + _hpack_encode_string = + vlib_get_plugin_symbol ("http_plugin.so", "hpack_encode_string"); + +#define TEST(i, e) \ + vec_validate (input, sizeof (i) - 2); \ + memcpy (input, i, sizeof (i) - 1); \ + pos = input; \ + vec_validate_init_empty (buf, 63, 0); \ + p = _hpack_encode_string (buf, input, vec_len (input)); \ + HTTP_TEST (((p - buf) == (sizeof (e) - 1) && !memcmp (buf, e, p - buf)), \ + "%v is encoded as %U", input, format_hex_bytes, buf, p - buf); \ + vec_free (input); \ + vec_free (buf); + + /* Huffman coding */ + TEST ("private", "\x85\xAE\xC3\x77\x1A\x4B"); + TEST ("no-cache", "\x86\xA8\xEB\x10\x64\x9C\xBF"); + TEST ("www.example.com", + "\x8C\xF1\xE3\xC2\xE5\xF2\x3A\x6B\xA0\xAB\x90\xF4\xFF"); + TEST ("Mon, 21 Oct 2013 20:13:21 GMT", + "\x96\xD0\x7A\xBE\x94\x10\x54\xD4\x44\xA8\x20\x05\x95\x04\x0B\x81\x66" + "\xE0\x82\xA6\x2D\x1B\xFF") + TEST ("foo=ASDJKHQKBZXOQWEOPIUAXQWEOIU; max-age=3600; version=1", + "\xAD\x94\xE7\x82\x1D\xD7\xF2\xE6\xC7\xB3\x35\xDF\xDF\xCD\x5B\x39\x60" + "\xD5\xAF\x27\x08\x7F\x36\x72\xC1\xAB\x27\x0F\xB5\x29\x1F\x95\x87\x31" + "\x60\x65\xC0\x03\xED\x4E\xE5\xB1\x06\x3D\x50\x07"); + TEST ("hello world!", "\x8A\x9C\xB4\x50\x75\x3C\x1E\xCA\x24\xFE\x3F") + TEST ("\\aaaaaaaaaaaa", "\x8A\xFF\xFE\x03\x18\xC6\x31\x8C\x63\x18\xC7"); + /* raw coding */ + TEST ("[XZ]", "\x4[XZ]"); +#undef TEST + + vlib_cli_output (vm, "hpack_decode_header"); + + static http2_error_t (*_hpack_decode_header) ( + u8 * *src, u8 * end, u8 * *buf, uword * buf_len, u32 * name_len, + u32 * value_len, hpack_dynamic_table_t * dt); + + _hpack_decode_header = + vlib_get_plugin_symbol ("http_plugin.so", "hpack_decode_header"); + + static void (*_hpack_dynamic_table_init) (hpack_dynamic_table_t * table, + u32 max_size); + + _hpack_dynamic_table_init = + vlib_get_plugin_symbol ("http_plugin.so", "hpack_dynamic_table_init"); + + static void (*_hpack_dynamic_table_free) (hpack_dynamic_table_t * table); + + _hpack_dynamic_table_free = + vlib_get_plugin_symbol ("http_plugin.so", "hpack_dynamic_table_free"); + + u32 name_len, value_len; + hpack_dynamic_table_t table; + + _hpack_dynamic_table_init (&table, 128); + +#define TEST(i, e_name, e_value, dt_size) \ + vec_validate (input, sizeof (i) - 2); \ + memcpy (input, i, sizeof (i) - 1); \ + pos = input; \ + vec_validate_init_empty (buf, 63, 0); \ + bp = buf; \ + blen = vec_len (buf); \ + rv = _hpack_decode_header (&pos, vec_end (input), &bp, &blen, &name_len, \ + &value_len, &table); \ + len = vec_len (buf) - blen; \ + HTTP_TEST ((rv == HTTP2_ERROR_NO_ERROR && table.used == dt_size && \ + name_len == strlen (e_name) && value_len == strlen (e_value) && \ + !memcmp (buf, e_name, name_len) && \ + !memcmp (buf + name_len, e_value, value_len) && \ + vec_len (buf) == (blen + name_len + value_len) && \ + pos == vec_end (input) && bp == buf + name_len + value_len), \ + "%U is decoded as '%U: %U'", format_hex_bytes, input, \ + vec_len (input), format_http_bytes, buf, name_len, \ + format_http_bytes, buf + name_len, value_len); \ + vec_free (input); \ + vec_free (buf); + + /* C.2.1. Literal Header Field with Indexing */ + TEST ("\x40\x0A\x63\x75\x73\x74\x6F\x6D\x2D\x6B\x65\x79\x0D\x63\x75\x73\x74" + "\x6F\x6D\x2D\x68\x65\x61\x64\x65\x72", + "custom-key", "custom-header", 55); + /* C.2.2. Literal Header Field without Indexing */ + TEST ("\x04\x0C\x2F\x73\x61\x6D\x70\x6C\x65\x2F\x70\x61\x74\x68", ":path", + "/sample/path", 55); + /* C.2.3. Literal Header Field Never Indexed */ + TEST ("\x10\x08\x70\x61\x73\x73\x77\x6F\x72\x64\x06\x73\x65\x63\x72\x65\x74", + "password", "secret", 55); + /* C.2.4. Indexed Header Field */ + TEST ("\x82", ":method", "GET", 55); + TEST ("\xBE", "custom-key", "custom-header", 55); + /* Literal Header Field with Indexing - enough space in dynamic table */ + TEST ("\x41\x0F\x77\x77\x77\x2E\x65\x78\x61\x6D\x70\x6C\x65\x2E\x63\x6F\x6D", + ":authority", "www.example.com", 112); + /* verification */ + TEST ("\xBE", ":authority", "www.example.com", 112); + TEST ("\xBF", "custom-key", "custom-header", 112); + /* Literal Header Field with Indexing - eviction */ + TEST ("\x58\x08\x6E\x6F\x2D\x63\x61\x63\x68\x65", "cache-control", + "no-cache", 110); + /* verification */ + TEST ("\xBE", "cache-control", "no-cache", 110); + TEST ("\xBF", ":authority", "www.example.com", 110); + /* Literal Header Field with Indexing - eviction */ + TEST ("\x40\x0A\x63\x75\x73\x74\x6F\x6D\x2D\x6B\x65\x79\x0D\x63\x75\x73\x74" + "\x6F\x6D\x2D\x68\x65\x61\x64\x65\x72", + "custom-key", "custom-header", 108); + /* verification */ + TEST ("\xBE", "custom-key", "custom-header", 108); + TEST ("\xBF", "cache-control", "no-cache", 108); + /* Literal Header Field with Indexing - eviction */ + TEST ("\x41\x0F\x77\x77\x77\x2E\x65\x78\x61\x6D\x70\x6C\x65\x2E\x63\x6F\x6D", + ":authority", "www.example.com", 112); + /* verification */ + TEST ("\xBE", ":authority", "www.example.com", 112); + TEST ("\xBF", "custom-key", "custom-header", 112); + /* Literal Header Field with Indexing - eviction with reference */ + TEST ("\x7F\x00\x0C\x63\x75\x73\x74\x6F\x6D\x2D\x76\x61\x6C\x75\x65", + "custom-key", "custom-value", 111); + /* verification */ + TEST ("\xBE", "custom-key", "custom-value", 111); + TEST ("\xBF", ":authority", "www.example.com", 111); +#undef TEST + + _hpack_dynamic_table_free (&table); + + vlib_cli_output (vm, "hpack_parse_request"); + + int result; + /* C.3. Request Examples without Huffman Coding */ + _hpack_dynamic_table_init (&table, HPACK_DEFAULT_HEADER_TABLE_SIZE); + result = http_test_parse_request ( + http_token_lit ("\x82\x86\x84\x41\x0F\x77\x77\x77\x2E\x65\x78\x61" + "\x6D\x70\x6C\x65\x2E\x63\x6F\x6D"), + http_token_lit ( + "\x82\x86\x84\xBE\x58\x08\x6E\x6F\x2D\x63\x61\x63\x68\x65"), + http_token_lit ( + "\x82\x87\x85\xBF\x40\x0A\x63\x75\x73\x74\x6F\x6D\x2D\x6B" + "\x65\x79\x0C\x63\x75\x73\x74\x6F\x6D\x2D\x76\x61\x6C\x75\x65"), + &table); + _hpack_dynamic_table_free (&table); + HTTP_TEST ((result == 0), "request without Huffman Coding (result=%d)", + result); + /* C.4. Request Examples with Huffman Coding */ + _hpack_dynamic_table_init (&table, HPACK_DEFAULT_HEADER_TABLE_SIZE); + result = http_test_parse_request ( + http_token_lit ( + "\x82\x86\x84\x41\x8C\xF1\xE3\xC2\xE5\xF2\x3A\x6B\xA0\xAB\x90\xF4\xFF"), + http_token_lit ("\x82\x86\x84\xBE\x58\x86\xA8\xEB\x10\x64\x9C\xBF"), + http_token_lit ("\x82\x87\x85\xBF\x40\x88\x25\xA8\x49\xE9\x5B\xA9\x7D\x7F" + "\x89\x25\xA8\x49\xE9\x5B\xB8\xE8\xB4\xBF"), + &table); + _hpack_dynamic_table_free (&table); + HTTP_TEST ((result == 0), "request with Huffman Coding (result=%d)", result); + + vlib_cli_output (vm, "hpack_serialize_response"); + + hpack_response_control_data_t resp_cd; + u8 *server_name; + u8 *date; + + static void (*_hpack_serialize_response) ( + u8 * app_headers, u32 app_headers_len, + hpack_response_control_data_t * control_data, u8 * *dst); + + _hpack_serialize_response = + vlib_get_plugin_symbol ("http_plugin.so", "hpack_serialize_response"); + + server_name = format (0, "http unit tests"); + date = format (0, "Mon, 21 Oct 2013 20:13:21 GMT"); + + vec_validate (buf, 127); + vec_reset_length (buf); + resp_cd.sc = HTTP_STATUS_GATEWAY_TIMEOUT; + resp_cd.content_len = HPACK_ENCODER_SKIP_CONTENT_LEN; + resp_cd.server_name = server_name; + resp_cd.server_name_len = vec_len (server_name); + resp_cd.date = date; + resp_cd.date_len = vec_len (date); + u8 expected1[] = + "\x08\x03\x35\x30\x34\x0F\x27\x8B\x9D\x29\xAD\x4B\x6A\x32\x54\x49\x50\x94" + "\x7F\x0F\x12\x96\xD0\x7A\xBE\x94\x10\x54\xD4\x44\xA8\x20\x05\x95\x04\x0B" + "\x81\x66\xE0\x82\xA6\x2D\x1B\xFF"; + _hpack_serialize_response (0, 0, &resp_cd, &buf); + HTTP_TEST ((vec_len (buf) == (sizeof (expected1) - 1) && + !memcmp (buf, expected1, sizeof (expected1) - 1)), + "response encoded as %U", format_hex_bytes, buf, vec_len (buf)); + vec_reset_length (buf); + + resp_cd.sc = HTTP_STATUS_OK; + resp_cd.content_len = 1024; + http_headers_ctx_t headers; + u8 *headers_buf = 0; + vec_validate (headers_buf, 127); + http_init_headers_ctx (&headers, headers_buf, vec_len (headers_buf)); + http_add_header (&headers, HTTP_HEADER_CONTENT_TYPE, + http_token_lit ("text/plain")); + http_add_header (&headers, HTTP_HEADER_CACHE_STATUS, + http_token_lit ("ExampleCache; hit")); + http_add_custom_header (&headers, http_token_lit ("sandwich"), + http_token_lit ("spam")); + u8 expected2[] = + "\x88\x0F\x27\x8B\x9D\x29\xAD\x4B\x6A\x32\x54\x49\x50\x94\x7F\x0F\x12\x96" + "\xD0\x7A\xBE\x94\x10\x54\xD4\x44\xA8\x20\x05\x95\x04\x0B\x81\x66\xE0\x82" + "\xA6\x2D\x1B\xFF\x0F\x0D\x83\x08\x04\xD7\x0F\x10\x87\x49\x7C\xA5\x8A\xE8" + "\x19\xAA\x00\x88\x20\xC9\x39\x56\x42\x46\x9B\x51\x8D\xC1\xE4\x74\xD7\x41" + "\x6F\x0C\x93\x97\xED\x49\xCC\x9F\x00\x86\x40\xEA\x93\xC1\x89\x3F\x83\x45" + "\x63\xA7"; + _hpack_serialize_response (headers_buf, headers.tail_offset, &resp_cd, &buf); + HTTP_TEST ((vec_len (buf) == (sizeof (expected2) - 1) && + !memcmp (buf, expected2, sizeof (expected2) - 1)), + "response encoded as %U", format_hex_bytes, buf, vec_len (buf)); + vec_free (buf); + vec_free (headers_buf); + vec_free (server_name); + vec_free (date); + + return 0; +} + +static int +http_test_h2_frame (vlib_main_t *vm) +{ + static void (*_http2_frame_header_read) (u8 * src, + http2_frame_header_t * fh); + + _http2_frame_header_read = + vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_header_read"); + + vlib_cli_output (vm, "http2_frame_read_settings"); + + static http2_error_t (*_http2_frame_read_settings) ( + http2_conn_settings_t * settings, u8 * payload, u32 payload_len); + + _http2_frame_read_settings = + vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_read_settings"); + + http2_error_t rv; + http2_frame_header_t fh = { 0 }; + http2_conn_settings_t conn_settings = http2_default_conn_settings; + + u8 settings[] = { 0x0, 0x0, 0x12, 0x4, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x3, 0x0, 0x0, 0x0, 0x64, 0x0, 0x4, 0x40, + 0x0, 0x0, 0x0, 0x0, 0x2, 0x0, 0x0, 0x0, 0x0 }; + _http2_frame_header_read (settings, &fh); + HTTP_TEST ((fh.flags == 0 && fh.type == HTTP2_FRAME_TYPE_SETTINGS && + fh.stream_id == 0 && fh.length == 18), + "frame identified as SETTINGS"); + + rv = _http2_frame_read_settings ( + &conn_settings, settings + HTTP2_FRAME_HEADER_SIZE, fh.length); + HTTP_TEST ((rv == HTTP2_ERROR_NO_ERROR && + conn_settings.max_concurrent_streams == 100 && + conn_settings.initial_window_size == 1073741824 && + conn_settings.enable_push == 0), + "SETTINGS frame payload parsed") + + u8 settings_ack[] = { 0x0, 0x0, 0x0, 0x4, 0x1, 0x0, 0x0, 0x0, 0x0 }; + _http2_frame_header_read (settings_ack, &fh); + HTTP_TEST ((fh.flags == HTTP2_FRAME_FLAG_ACK && + fh.type == HTTP2_FRAME_TYPE_SETTINGS && fh.stream_id == 0 && + fh.length == 0), + "frame identified as SETTINGS ACK"); + + vlib_cli_output (vm, "http2_frame_write_settings_ack"); + + static void (*_http2_frame_write_settings_ack) (u8 * *dst); + + _http2_frame_write_settings_ack = vlib_get_plugin_symbol ( + "http_plugin.so", "http2_frame_write_settings_ack"); + + u8 *buf = 0; + + _http2_frame_write_settings_ack (&buf); + HTTP_TEST ((vec_len (buf) == sizeof (settings_ack)) && + !memcmp (buf, settings_ack, sizeof (settings_ack)), + "SETTINGS ACK frame written"); + vec_free (buf); + + vlib_cli_output (vm, "http2_frame_write_settings"); + + static void (*_http2_frame_write_settings) ( + http2_settings_entry_t * settings, u8 * *dst); + + _http2_frame_write_settings = + vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_write_settings"); + + http2_settings_entry_t *settings_list = 0; + vec_validate (settings_list, 2); + settings_list[0].identifier = HTTP2_SETTINGS_MAX_CONCURRENT_STREAMS; + settings_list[0].value = 100; + settings_list[1].identifier = HTTP2_SETTINGS_INITIAL_WINDOW_SIZE; + settings_list[1].value = 1073741824; + settings_list[2].identifier = HTTP2_SETTINGS_ENABLE_PUSH; + settings_list[2].value = 0; + + _http2_frame_write_settings (settings_list, &buf); + HTTP_TEST ((vec_len (buf) == sizeof (settings) && + !memcmp (buf, settings, sizeof (settings))), + "SETTINGS frame written"); + vec_free (settings_list); + vec_free (buf); + + vlib_cli_output (vm, "http2_frame_read_window_update"); + + static http2_error_t (*_http2_frame_read_window_update) ( + u32 * increment, u8 * payload, u32 payload_len); + + _http2_frame_read_window_update = vlib_get_plugin_symbol ( + "http_plugin.so", "http2_frame_read_window_update"); + + u32 win_increment; + u8 win_update[] = { 0x0, 0x0, 0x4, 0x8, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x3f, 0xff, 0x0, 0x1 }; + _http2_frame_header_read (win_update, &fh); + HTTP_TEST ((fh.flags == 0 && fh.type == HTTP2_FRAME_TYPE_WINDOW_UPDATE && + fh.stream_id == 0 && fh.length == 4), + "frame identified as WINDOW_UPDATE"); + + rv = _http2_frame_read_window_update ( + &win_increment, win_update + HTTP2_FRAME_HEADER_SIZE, fh.length); + HTTP_TEST ((rv == HTTP2_ERROR_NO_ERROR && win_increment == 1073676289), + "WINDOW_UPDATE frame payload parsed") + + vlib_cli_output (vm, "http2_frame_write_window_update"); + + static void (*_http2_frame_write_window_update) (u32 increment, + u32 stream_id, u8 * *dst); + + _http2_frame_write_window_update = vlib_get_plugin_symbol ( + "http_plugin.so", "http2_frame_write_window_update"); + + _http2_frame_write_window_update (1073676289, 0, &buf); + HTTP_TEST ((vec_len (buf) == sizeof (win_update) && + !memcmp (buf, win_update, sizeof (win_update))), + "WINDOW_UPDATE frame written"); + vec_free (buf); + + vlib_cli_output (vm, "http2_frame_read_rst_stream"); + + static http2_error_t (*_http2_frame_read_rst_stream) ( + u32 * error_code, u8 * payload, u32 payload_len); + + _http2_frame_read_rst_stream = + vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_read_rst_stream"); + + u32 error_code; + u8 rst_stream[] = { 0x0, 0x0, 0x4, 0x3, 0x0, 0x0, 0x0, + 0x0, 0x5, 0x0, 0x0, 0x0, 0x01 }; + _http2_frame_header_read (rst_stream, &fh); + HTTP_TEST ((fh.flags == 0 && fh.type == HTTP2_FRAME_TYPE_RST_STREAM && + fh.stream_id == 5 && fh.length == 4), + "frame identified as RST_STREAM"); + + rv = _http2_frame_read_rst_stream ( + &error_code, rst_stream + HTTP2_FRAME_HEADER_SIZE, fh.length); + HTTP_TEST ( + (rv == HTTP2_ERROR_NO_ERROR && error_code == HTTP2_ERROR_PROTOCOL_ERROR), + "RST_STREAM frame payload parsed") + + vlib_cli_output (vm, "http2_frame_write_rst_stream"); + + static void (*_http2_frame_write_rst_stream) (u32 increment, u32 stream_id, + u8 * *dst); + + _http2_frame_write_rst_stream = + vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_write_rst_stream"); + + _http2_frame_write_rst_stream (HTTP2_ERROR_PROTOCOL_ERROR, 5, &buf); + HTTP_TEST ((vec_len (buf) == sizeof (rst_stream) && + !memcmp (buf, rst_stream, sizeof (rst_stream))), + "RST_STREAM frame written"); + vec_free (buf); + + vlib_cli_output (vm, "http2_frame_read_goaway"); + + static http2_error_t (*_http2_frame_read_goaway) ( + u32 * error_code, u32 * last_stream_id, u8 * payload, u32 payload_len); + + _http2_frame_read_goaway = + vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_read_goaway"); + + u32 last_stream_id; + u8 goaway[] = { 0x0, 0x0, 0x8, 0x7, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x5, 0x0, 0x0, 0x0, 0x2 }; + + _http2_frame_header_read (goaway, &fh); + HTTP_TEST ((fh.flags == 0 && fh.type == HTTP2_FRAME_TYPE_GOAWAY && + fh.stream_id == 0 && fh.length == 8), + "frame identified as GOAWAY"); + + rv = _http2_frame_read_goaway (&error_code, &last_stream_id, + goaway + HTTP2_FRAME_HEADER_SIZE, fh.length); + HTTP_TEST ((rv == HTTP2_ERROR_NO_ERROR && + error_code == HTTP2_ERROR_INTERNAL_ERROR && last_stream_id == 5), + "GOAWAY frame payload parsed") + + vlib_cli_output (vm, "http2_frame_write_goaway"); + + static void (*_http2_frame_write_goaway) (http2_error_t error_code, + u32 last_stream_id, u8 * *dst); + + _http2_frame_write_goaway = + vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_write_goaway"); + + _http2_frame_write_goaway (HTTP2_ERROR_INTERNAL_ERROR, 5, &buf); + HTTP_TEST ((vec_len (buf) == sizeof (goaway) && + !memcmp (buf, goaway, sizeof (goaway))), + "GOAWAY frame written"); + vec_free (buf); + + vlib_cli_output (vm, "http2_frame_read_headers"); + + static http2_error_t (*_http2_frame_read_headers) ( + u8 * *headers, u32 * headers_len, u8 * payload, u32 payload_len, u8 flags); + + _http2_frame_read_headers = + vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_read_headers"); + + u8 *h; + u32 h_len; + u8 headers[] = { 0x0, 0x0, 0x28, 0x1, 0x5, 0x0, 0x0, 0x0, 0x3, 0x3f, + 0xe1, 0x1f, 0x82, 0x4, 0x88, 0x62, 0x7b, 0x69, 0x1d, 0x48, + 0x5d, 0x3e, 0x53, 0x86, 0x41, 0x88, 0xaa, 0x69, 0xd2, 0x9a, + 0xc4, 0xb9, 0xec, 0x9b, 0x7a, 0x88, 0x25, 0xb6, 0x50, 0xc3, + 0xab, 0xb8, 0x15, 0xc1, 0x53, 0x3, 0x2a, 0x2f, 0x2a }; + + _http2_frame_header_read (headers, &fh); + HTTP_TEST ((fh.flags == + (HTTP2_FRAME_FLAG_END_HEADERS | HTTP2_FRAME_FLAG_END_STREAM) && + fh.type == HTTP2_FRAME_TYPE_HEADERS && fh.stream_id == 3 && + fh.length == 40), + "frame identified as HEADERS"); + + rv = _http2_frame_read_headers ( + &h, &h_len, headers + HTTP2_FRAME_HEADER_SIZE, fh.length, fh.flags); + HTTP_TEST ((rv == HTTP2_ERROR_NO_ERROR && h_len == 40 && + *h == headers[HTTP2_FRAME_HEADER_SIZE]), + "HEADERS frame payload parsed") + + vlib_cli_output (vm, "http2_frame_write_headers_header"); + + static void (*_http2_frame_write_headers_header) ( + u32 headers_len, u32 stream_id, u8 flags, u8 * dst); + + _http2_frame_write_headers_header = vlib_get_plugin_symbol ( + "http_plugin.so", "http2_frame_write_headers_header"); + + u8 *p = http2_frame_header_alloc (&buf); + _http2_frame_write_headers_header ( + 40, 3, HTTP2_FRAME_FLAG_END_HEADERS | HTTP2_FRAME_FLAG_END_STREAM, p); + HTTP_TEST ((vec_len (buf) == HTTP2_FRAME_HEADER_SIZE && + !memcmp (buf, headers, HTTP2_FRAME_HEADER_SIZE)), + "HEADERS frame header written"); + vec_free (buf); + + vlib_cli_output (vm, "http2_frame_read_data"); + + static http2_error_t (*_http2_frame_read_data) ( + u8 * *data, u32 * data_len, u8 * payload, u32 payload_len, u8 flags); + + _http2_frame_read_data = + vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_read_data"); + + u8 *d; + u32 d_len; + u8 data[] = { 0x0, 0x0, 0x9, 0x0, 0x1, 0x0, 0x0, 0x0, 0x3, + 0x6e, 0x6f, 0x74, 0x20, 0x66, 0x6f, 0x75, 0x6e, 0x64 }; + + _http2_frame_header_read (data, &fh); + HTTP_TEST ((fh.flags == HTTP2_FRAME_FLAG_END_STREAM && + fh.type == HTTP2_FRAME_TYPE_DATA && fh.stream_id == 3 && + fh.length == 9), + "frame identified as DATA"); + + rv = _http2_frame_read_data (&d, &d_len, data + HTTP2_FRAME_HEADER_SIZE, + fh.length, fh.flags); + HTTP_TEST ((rv == HTTP2_ERROR_NO_ERROR && d_len == 9 && + *d == data[HTTP2_FRAME_HEADER_SIZE]), + "DATA frame payload parsed") + + vlib_cli_output (vm, "http2_frame_write_data_header"); + + static void (*_http2_frame_write_data_header) ( + u32 headers_len, u32 stream_id, u8 flags, u8 * dst); + + _http2_frame_write_data_header = + vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_write_data_header"); + + p = http2_frame_header_alloc (&buf); + _http2_frame_write_data_header (9, 3, HTTP2_FRAME_FLAG_END_STREAM, p); + HTTP_TEST ((vec_len (buf) == HTTP2_FRAME_HEADER_SIZE && + !memcmp (buf, data, HTTP2_FRAME_HEADER_SIZE)), + "DATA frame header written"); + vec_free (buf); + + return 0; +} + static clib_error_t * test_http_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) @@ -550,6 +1317,10 @@ test_http_command_fn (vlib_main_t *vm, unformat_input_t *input, res = http_test_http_token_is_case (vm); else if (unformat (input, "header-table")) res = http_test_http_header_table (vm); + else if (unformat (input, "hpack")) + res = http_test_hpack (vm); + else if (unformat (input, "h2-frame")) + res = http_test_h2_frame (vm); else if (unformat (input, "all")) { if ((res = http_test_parse_authority (vm))) @@ -562,6 +1333,10 @@ test_http_command_fn (vlib_main_t *vm, unformat_input_t *input, goto done; if ((res = http_test_http_header_table (vm))) goto done; + if ((res = http_test_hpack (vm))) + goto done; + if ((res = http_test_h2_frame (vm))) + goto done; } else break; diff --git a/src/plugins/http_static/static_server.c b/src/plugins/http_static/static_server.c index 074416873e3..d7958fd3f1f 100644 --- a/src/plugins/http_static/static_server.c +++ b/src/plugins/http_static/static_server.c @@ -27,9 +27,41 @@ /*? %%clicmd:group_label Static HTTP Server %% ?*/ #define HSS_FIFO_THRESH (16 << 10) - +#define HSS_HEADER_BUF_MAX_SIZE 16192 hss_main_t hss_main; +static int +hss_add_header (hss_session_t *hs, http_header_name_t name, const char *value, + uword value_len) +{ + u32 needed_size = 0; + while (http_add_header (&hs->resp_headers, name, value, value_len) == -1) + { + if (needed_size) + { + http_truncate_headers_list (&hs->resp_headers); + hs->data_len = 0; + return -1; + } + else + needed_size = hs->resp_headers.tail_offset + + sizeof (http_app_header_t) + value_len; + if (needed_size < HSS_HEADER_BUF_MAX_SIZE) + { + vec_resize (hs->headers_buf, sizeof (http_app_header_t) + value_len); + hs->resp_headers.len = needed_size; + hs->resp_headers.buf = hs->headers_buf; + } + else + { + http_truncate_headers_list (&hs->resp_headers); + hs->data_len = 0; + return -1; + } + } + return 0; +} + static hss_session_t * hss_session_alloc (u32 thread_index) { @@ -175,8 +207,9 @@ hss_session_send_data (hss_url_handler_args_t *args) /* Set content type only if we have some response data */ if (hs->data_len) - http_add_header (&hs->resp_headers, HTTP_HEADER_CONTENT_TYPE, - http_content_type_token (args->ct)); + if (hss_add_header (hs, HTTP_HEADER_CONTENT_TYPE, + http_content_type_token (args->ct))) + args->sc = HTTP_STATUS_INTERNAL_ERROR; start_send_data (hs, args->sc); } @@ -305,8 +338,9 @@ try_url_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt, /* Set content type only if we have some response data */ if (hs->data_len) - http_add_header (&hs->resp_headers, HTTP_HEADER_CONTENT_TYPE, - http_content_type_token (args.ct)); + if (hss_add_header (hs, HTTP_HEADER_CONTENT_TYPE, + http_content_type_token (args.ct))) + sc = HTTP_STATUS_INTERNAL_ERROR; start_send_data (hs, sc); @@ -383,8 +417,10 @@ try_index_file (hss_main_t *hsm, hss_session_t *hs, u8 *path) vec_free (port_str); - http_add_header (&hs->resp_headers, HTTP_HEADER_LOCATION, - (const char *) redirect, vec_len (redirect)); + if (hss_add_header (hs, HTTP_HEADER_LOCATION, (const char *) redirect, + vec_len (redirect))) + return HTTP_STATUS_INTERNAL_ERROR; + vec_free (redirect); hs->data_len = 0; hs->free_data = 1; @@ -406,8 +442,8 @@ try_file_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt, if (!hsm->www_root) return -1; - /* Remove dot segments to prevent path traversal */ - sanitized_path = http_path_remove_dot_segments (target); + /* Sanitize received path */ + sanitized_path = http_path_sanitize (target); /* * Construct the file to open @@ -463,13 +499,16 @@ try_file_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt, * Last-Modified */ type = content_type_from_request (target); - http_add_header (&hs->resp_headers, HTTP_HEADER_CONTENT_TYPE, - http_content_type_token (type)); - http_add_header (&hs->resp_headers, HTTP_HEADER_CACHE_CONTROL, - (const char *) hsm->max_age_formatted, - vec_len (hsm->max_age_formatted)); - http_add_header (&hs->resp_headers, HTTP_HEADER_LAST_MODIFIED, - (const char *) last_modified, vec_len (last_modified)); + if (hss_add_header (hs, HTTP_HEADER_CONTENT_TYPE, + http_content_type_token (type)) || + hss_add_header (hs, HTTP_HEADER_CACHE_CONTROL, + (const char *) hsm->max_age_formatted, + vec_len (hsm->max_age_formatted)) || + hss_add_header (hs, HTTP_HEADER_LAST_MODIFIED, + (const char *) last_modified, vec_len (last_modified))) + { + sc = HTTP_STATUS_INTERNAL_ERROR; + } done: vec_free (sanitized_path); @@ -510,6 +549,7 @@ hss_ts_rx_callback (session_t *ts) if (hs->free_data) vec_free (hs->data); hs->data = 0; + hs->data_len = 0; http_init_headers_ctx (&hs->resp_headers, hs->headers_buf, vec_len (hs->headers_buf)); @@ -520,9 +560,10 @@ hss_ts_rx_callback (session_t *ts) if (msg.type != HTTP_MSG_REQUEST || (msg.method_type != HTTP_REQ_GET && msg.method_type != HTTP_REQ_POST)) { - http_add_header (&hs->resp_headers, HTTP_HEADER_ALLOW, - http_token_lit ("GET, POST")); - start_send_data (hs, HTTP_STATUS_METHOD_NOT_ALLOWED); + if (hss_add_header (hs, HTTP_HEADER_ALLOW, http_token_lit ("GET, POST"))) + start_send_data (hs, HTTP_STATUS_INTERNAL_ERROR); + else + start_send_data (hs, HTTP_STATUS_METHOD_NOT_ALLOWED); goto err_done; } diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c index 1606f72224f..ca6483b3329 100644 --- a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c +++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c @@ -21,7 +21,7 @@ #include <vnet/vnet.h> #include <vnet/plugin/plugin.h> #include <ioam/export-common/ioam_export.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> #include <vlibapi/api.h> #include <vlibmemory/api.h> diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c index 839fd80b443..17084767c1e 100644 --- a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c +++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c @@ -16,8 +16,8 @@ #include <vnet/vnet.h> #include <vppinfra/error.h> #include <vnet/ip/ip.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> -#include <vnet/vxlan-gpe/vxlan_gpe_packet.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe_packet.h> #include <ioam/export-common/ioam_export.h> typedef struct diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c index 801faa98066..d8d52e9f0a1 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c +++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c @@ -17,8 +17,7 @@ #include <vnet/vnet.h> #include <vnet/ip/ip.h> #include <vnet/ethernet/ethernet.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h> #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h> #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h> @@ -68,8 +67,8 @@ vxlan_gpe_decap_ioam (vlib_main_t * vm, vlib_frame_t * from_frame, u8 is_ipv6) { u32 n_left_from, next_index, *from, *to_next; - vxlan_gpe_main_t *ngm = &vxlan_gpe_main; vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + vxlan_gpe_main_t *ngm = hm->gpe_main; from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c index de375df4f7c..9c742d8c293 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c +++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c @@ -17,7 +17,7 @@ #include <vnet/vnet.h> #include <vnet/ip/ip.h> #include <vnet/ethernet/ethernet.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h> #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h> #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h> @@ -71,7 +71,8 @@ vxlan_gpe_encap_ioam_v4 (vlib_main_t * vm, vlib_frame_t * from_frame) { u32 n_left_from, next_index, *from, *to_next; - vxlan_gpe_main_t *ngm = &vxlan_gpe_main; + vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main; + vxlan_gpe_main_t *ngm = sm->gpe_main; from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c index 2fa0aa29450..a80662b9d12 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c +++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c @@ -17,7 +17,7 @@ #include <vnet/vnet.h> #include <vnet/ip/ip.h> #include <vnet/ethernet/ethernet.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h> /* Statistics (not really errors) */ @@ -231,7 +231,8 @@ vxlan_gpe_pop_ioam (vlib_main_t * vm, vlib_frame_t * from_frame, u8 is_ipv6) { u32 n_left_from, next_index, *from, *to_next; - vxlan_gpe_main_t *ngm = &vxlan_gpe_main; + vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main; + vxlan_gpe_main_t *ngm = sm->gpe_main; from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c index e3c82725e26..02233cf9841 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c +++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c @@ -18,7 +18,7 @@ #include <vnet/ip/ip.h> #include <vnet/udp/udp_local.h> #include <vnet/ethernet/ethernet.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h> #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h> #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h> diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c index d61832d975a..6de1760b6b7 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c @@ -80,9 +80,9 @@ static void vl_api_vxlan_gpe_ioam_vni_enable_t_handler clib_error_t *error; vxlan4_gpe_tunnel_key_t key4; uword *p = NULL; - vxlan_gpe_main_t *gm = &vxlan_gpe_main; vxlan_gpe_tunnel_t *t = 0; vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + vxlan_gpe_main_t *gm = hm->gpe_main; u32 vni; @@ -130,7 +130,8 @@ static void vl_api_vxlan_gpe_ioam_vni_disable_t_handler clib_error_t *error; vxlan4_gpe_tunnel_key_t key4; uword *p = NULL; - vxlan_gpe_main_t *gm = &vxlan_gpe_main; + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + vxlan_gpe_main_t *gm = hm->gpe_main; vxlan_gpe_tunnel_t *t = 0; u32 vni; @@ -214,6 +215,13 @@ ioam_vxlan_gpe_init (vlib_main_t * vm) vlib_node_t *vxlan_gpe_decap_node = NULL; uword next_node = 0; + sm->gpe_main = + vlib_get_plugin_symbol ("vxlan-gpe_plugin.so", "vxlan_gpe_main"); + if (sm->gpe_main == 0) + { + return clib_error_return (0, "vxlan-gpe_plugin.so is not loaded"); + } + sm->vlib_main = vm; sm->vnet_main = vnet_get_main (); sm->unix_time_0 = (u32) time (0); /* Store starting time */ @@ -231,7 +239,7 @@ ioam_vxlan_gpe_init (vlib_main_t * vm) vlib_get_node_by_name (vm, (u8 *) "vxlan4-gpe-input"); next_node = vlib_node_add_next (vm, vxlan_gpe_decap_node->index, decap_node_index); - vxlan_gpe_register_decap_protocol (VXLAN_GPE_PROTOCOL_IOAM, next_node); + sm->gpe_main->register_decap_protocol (VXLAN_GPE_PROTOCOL_IOAM, next_node); vec_new (vxlan_gpe_ioam_sw_interface_t, pool_elts (sm->sw_interfaces)); sm->dst_by_ip4 = hash_create_mem (0, sizeof (fib_prefix_t), sizeof (uword)); @@ -243,7 +251,9 @@ ioam_vxlan_gpe_init (vlib_main_t * vm) return 0; } -VLIB_INIT_FUNCTION (ioam_vxlan_gpe_init); +VLIB_INIT_FUNCTION (ioam_vxlan_gpe_init) = { + .runs_after = VLIB_INITS ("vxlan_gpe_init"), +}; /* * fd.io coding-style-patch-verification: ON diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c index 327afc3fb61..f83c6e1ecc3 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c @@ -12,8 +12,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include <vnet/vxlan-gpe/vxlan_gpe.h> -#include <vnet/vxlan-gpe/vxlan_gpe_packet.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe_packet.h> #include <vnet/ip/format.h> #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h> #include <vnet/dpo/load_balance.h> @@ -423,7 +423,7 @@ vxlan_gpe_set_ioam_rewrite_command_fn (vlib_main_t * vxlan4_gpe_tunnel_key_t key4; vxlan6_gpe_tunnel_key_t key6; uword *p; - vxlan_gpe_main_t *gm = &vxlan_gpe_main; + vxlan_gpe_main_t *gm = hm->gpe_main; vxlan_gpe_tunnel_t *t = 0; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h index 0711b87abbe..f9374c9bb95 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h @@ -15,12 +15,11 @@ #ifndef __included_vxlan_gpe_ioam_h__ #define __included_vxlan_gpe_ioam_h__ -#include <vnet/vxlan-gpe/vxlan_gpe.h> -#include <vnet/vxlan-gpe/vxlan_gpe_packet.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe_packet.h> #include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h> #include <vnet/ip/ip.h> - typedef struct vxlan_gpe_sw_interface_ { u32 sw_if_index; @@ -100,7 +99,8 @@ typedef struct vxlan_gpe_ioam_main_ vlib_main_t *vlib_main; /** State convenience vnet_main_t */ vnet_main_t *vnet_main; - + /** State convenience vxlan_gpe_main_t */ + vxlan_gpe_main_t *gpe_main; } vxlan_gpe_ioam_main_t; extern vxlan_gpe_ioam_main_t vxlan_gpe_ioam_main; diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h index a7ef859ec58..515529ce794 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h @@ -15,8 +15,8 @@ #ifndef __included_vxlan_gpe_ioam_packet_h__ #define __included_vxlan_gpe_ioam_packet_h__ -#include <vnet/vxlan-gpe/vxlan_gpe.h> -#include <vnet/vxlan-gpe/vxlan_gpe_packet.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe_packet.h> #include <vnet/ip/ip.h> diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c index 9c783c747d0..9b1b8b824ff 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c @@ -16,8 +16,8 @@ #include <vnet/vnet.h> #include <vppinfra/error.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> -#include <vnet/vxlan-gpe/vxlan_gpe_packet.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe_packet.h> #include <vppinfra/hash.h> #include <vppinfra/error.h> diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h index c0ad8d9d03a..db7fd5651b1 100644 --- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h @@ -15,8 +15,8 @@ #ifndef __included_vxlan_gpe_ioam_util_h__ #define __included_vxlan_gpe_ioam_util_h__ -#include <vnet/vxlan-gpe/vxlan_gpe.h> -#include <vnet/vxlan-gpe/vxlan_gpe_packet.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe_packet.h> #include <vnet/ip/ip.h> diff --git a/src/plugins/linux-cp/lcp.api b/src/plugins/linux-cp/lcp.api index e7eaa5a3669..8b0fdb5eb53 100644 --- a/src/plugins/linux-cp/lcp.api +++ b/src/plugins/linux-cp/lcp.api @@ -177,6 +177,42 @@ autoendian define lcp_itf_pair_details option in_progress; }; +/** \brief Enable linux-cp-punt-xc for a given ethertype + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param ethertype - the ethertype to enable +*/ +autoreply define lcp_ethertype_enable +{ + u32 client_index; + u32 context; + u16 ethertype; +}; + +/** \brief Get the enabled ethertypes for linux-cp-punt-xc + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define lcp_ethertype_get +{ + u32 client_index; + u32 context; +}; + +/** \brief Reply to get the enabled ethertypes for linux-cp-punt-xc + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param count - number of enabled ethertypes + @param ethertypes - array of enabled ethertypes +*/ +define lcp_ethertype_get_reply +{ + u32 context; + i32 retval; + u16 count; + u16 ethertypes[count]; +}; + service { rpc lcp_itf_pair_get returns lcp_itf_pair_get_reply stream lcp_itf_pair_details; diff --git a/src/plugins/linux-cp/lcp_api.c b/src/plugins/linux-cp/lcp_api.c index 74421230e9d..0db502988d7 100644 --- a/src/plugins/linux-cp/lcp_api.c +++ b/src/plugins/linux-cp/lcp_api.c @@ -280,6 +280,40 @@ vl_api_lcp_itf_pair_replace_end_t_handler ( REPLY_MACRO (VL_API_LCP_ITF_PAIR_REPLACE_END_REPLY); } +static void +vl_api_lcp_ethertype_enable_t_handler (vl_api_lcp_ethertype_enable_t *mp) +{ + vl_api_lcp_ethertype_enable_reply_t *rmp; + int rv; + + rv = lcp_ethertype_enable (mp->ethertype); + + REPLY_MACRO (VL_API_LCP_ETHERTYPE_ENABLE_REPLY); +} + +static void +vl_api_lcp_ethertype_get_t_handler (vl_api_lcp_ethertype_get_t *mp) +{ + vl_api_lcp_ethertype_get_reply_t *rmp; + ethernet_type_t *ethertypes = vec_new (ethernet_type_t, 0); + u16 count = 0; + int rv = 0; + + rv = lcp_ethertype_get_enabled (ðertypes); + if (!rv) + count = vec_len (ethertypes); + + REPLY_MACRO3 (VL_API_LCP_ETHERTYPE_GET_REPLY, sizeof (u16) * count, ({ + rmp->count = htons (count); + for (int i = 0; i < count; i++) + { + rmp->ethertypes[i] = htons (ethertypes[i]); + } + })); + + vec_free (ethertypes); +} + /* * Set up the API message handling tables */ diff --git a/src/plugins/linux-cp/lcp_cli.c b/src/plugins/linux-cp/lcp_cli.c index 0dcf600b301..e89afd2a753 100644 --- a/src/plugins/linux-cp/lcp_cli.c +++ b/src/plugins/linux-cp/lcp_cli.c @@ -337,6 +337,62 @@ VLIB_CLI_COMMAND (lcp_itf_pair_show_cmd_node, static) = { .is_mp_safe = 1, }; +static clib_error_t * +lcp_ethertype_enable_cmd (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + ethernet_type_t ethertype; + int rv; + + if (!unformat (input, "%U", unformat_ethernet_type_host_byte_order, + ðertype)) + return clib_error_return (0, "Invalid ethertype"); + + rv = lcp_ethertype_enable (ethertype); + if (rv) + return clib_error_return (0, "Failed to enable ethertype (%d)", rv); + + return 0; +} + +VLIB_CLI_COMMAND (lcp_ethertype_enable_command, static) = { + .path = "lcp ethertype enable", + .short_help = + "lcp ethertype enable (<hex_ethertype_num>|<uc_ethertype_name>)", + .function = lcp_ethertype_enable_cmd, +}; + +static clib_error_t * +lcp_ethertype_show_cmd (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + ethernet_type_t *ethertypes = vec_new (ethernet_type_t, 0); + ethernet_type_t *etype; + int rv; + + rv = lcp_ethertype_get_enabled (ðertypes); + if (rv) + { + vec_free (ethertypes); + return clib_error_return (0, "Failed to get enabled ethertypes (%d)", + rv); + } + + vec_foreach (etype, ethertypes) + { + vlib_cli_output (vm, "0x%04x", *etype); + } + + vec_free (ethertypes); + return 0; +} + +VLIB_CLI_COMMAND (lcp_ethertype_show_command, static) = { + .path = "show lcp ethertype", + .short_help = "show lcp ethertype", + .function = lcp_ethertype_show_cmd, +}; + clib_error_t * lcp_cli_init (vlib_main_t *vm) { diff --git a/src/plugins/linux-cp/lcp_interface.c b/src/plugins/linux-cp/lcp_interface.c index 9a6b9b11be5..31864f791af 100644 --- a/src/plugins/linux-cp/lcp_interface.c +++ b/src/plugins/linux-cp/lcp_interface.c @@ -1230,6 +1230,53 @@ lcp_itf_pair_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags) return 0; } +int +lcp_ethertype_enable (ethernet_type_t ethertype) +{ + ethernet_main_t *em = ðernet_main; + ethernet_type_info_t *eti; + vlib_main_t *vm = vlib_get_main (); + vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "linux-cp-punt-xc"); + + if (!node) + return VNET_API_ERROR_UNIMPLEMENTED; + + eti = ethernet_get_type_info (em, ethertype); + if (!eti) + return VNET_API_ERROR_INVALID_VALUE; + + if (eti->node_index != ~0 && eti->node_index != node->index) + return VNET_API_ERROR_INVALID_REGISTRATION; + + ethernet_register_input_type (vm, ethertype, node->index); + return 0; +} + +int +lcp_ethertype_get_enabled (ethernet_type_t **ethertypes_vec) +{ + ethernet_main_t *em = ðernet_main; + ethernet_type_info_t *eti; + vlib_main_t *vm = vlib_get_main (); + vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "linux-cp-punt-xc"); + + if (!ethertypes_vec) + return VNET_API_ERROR_INVALID_ARGUMENT; + + if (!node) + return VNET_API_ERROR_UNIMPLEMENTED; + + vec_foreach (eti, em->type_infos) + { + if (eti->node_index == node->index) + { + vec_add1 (*ethertypes_vec, eti->type); + } + } + + return 0; +} + VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (lcp_itf_pair_link_up_down); static clib_error_t * diff --git a/src/plugins/linux-cp/lcp_interface.h b/src/plugins/linux-cp/lcp_interface.h index cfcd3925a15..8cf6d3f4da1 100644 --- a/src/plugins/linux-cp/lcp_interface.h +++ b/src/plugins/linux-cp/lcp_interface.h @@ -18,6 +18,7 @@ #include <vnet/dpo/dpo.h> #include <vnet/adj/adj.h> #include <vnet/ip/ip_types.h> +#include <vnet/ethernet/ethernet.h> #include <plugins/linux-cp/lcp.h> @@ -198,6 +199,18 @@ void lcp_itf_pair_sync_state (lcp_itf_pair_t *lip); void lcp_itf_pair_sync_state_hw (vnet_hw_interface_t *hi); void lcp_itf_pair_sync_state_all (); +/** + * Enable linux-cp-punt-xc for a given ethertype. + * @param ethertype - ethertype to enable + */ +int lcp_ethertype_enable (ethernet_type_t ethertype); + +/** + * Get the list of ethertypes enabled for linux-cp-punt-xc. + * @param ethertypes_vec - pointer to a vector to store the list of ethertypes + */ +int lcp_ethertype_get_enabled (ethernet_type_t **ethertypes_vec); + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/linux-cp/lcp_node.c b/src/plugins/linux-cp/lcp_node.c index 241cc5e4bff..9fa1aa5bd66 100644 --- a/src/plugins/linux-cp/lcp_node.c +++ b/src/plugins/linux-cp/lcp_node.c @@ -39,40 +39,51 @@ typedef enum { -#define _(sym, str) LIP_PUNT_NEXT_##sym, +#define _(sym, str) LIP_PUNT_XC_NEXT_##sym, foreach_lip_punt #undef _ - LIP_PUNT_N_NEXT, -} lip_punt_next_t; + LIP_PUNT_XC_N_NEXT, +} lip_punt_xc_next_t; -typedef struct lip_punt_trace_t_ +typedef struct lip_punt_xc_trace_t_ { + bool is_xc; u32 phy_sw_if_index; u32 host_sw_if_index; -} lip_punt_trace_t; +} lip_punt_xc_trace_t; /* packet trace format function */ static u8 * -format_lip_punt_trace (u8 *s, va_list *args) +format_lip_punt_xc_trace (u8 *s, va_list *args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - lip_punt_trace_t *t = va_arg (*args, lip_punt_trace_t *); + lip_punt_xc_trace_t *t = va_arg (*args, lip_punt_xc_trace_t *); - s = - format (s, "lip-punt: %u -> %u", t->phy_sw_if_index, t->host_sw_if_index); + if (t->is_xc) + { + s = format (s, "lip-xc: %u -> %u", t->host_sw_if_index, + t->phy_sw_if_index); + } + else + { + s = format (s, "lip-punt: %u -> %u", t->phy_sw_if_index, + t->host_sw_if_index); + } return s; } /** * Pass punted packets from the PHY to the HOST. + * Conditionally x-connect packets from the HOST to the PHY. */ -VLIB_NODE_FN (lip_punt_node) -(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +static_always_inline u32 +lip_punt_xc_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, bool check_xc) { u32 n_left_from, *from, *to_next, n_left_to_next; - lip_punt_next_t next_index; + lip_punt_xc_next_t next_index; next_index = node->cached_next_index; n_left_from = frame->n_vectors; @@ -89,6 +100,7 @@ VLIB_NODE_FN (lip_punt_node) u32 next0 = ~0; u32 bi0, lipi0; u32 sw_if_index0; + bool is_xc0 = 0; u8 len0; bi0 = to_next[0] = from[0]; @@ -97,18 +109,33 @@ VLIB_NODE_FN (lip_punt_node) to_next += 1; n_left_from -= 1; n_left_to_next -= 1; - next0 = LIP_PUNT_NEXT_DROP; + next0 = LIP_PUNT_XC_NEXT_DROP; b0 = vlib_get_buffer (vm, bi0); sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; lipi0 = lcp_itf_pair_find_by_phy (sw_if_index0); - if (PREDICT_FALSE (lipi0 == INDEX_INVALID)) - goto trace0; + + /* + * lip_punt_node: expect sw_if_index0 is phy in an itf pair + * lip_punt_xc_node: if sw_if_index0 is not phy, expect it is host + */ + if (!check_xc && (PREDICT_FALSE (lipi0 == INDEX_INVALID))) + { + goto trace0; + } + else if (check_xc && (lipi0 == INDEX_INVALID)) + { + is_xc0 = 1; + lipi0 = lcp_itf_pair_find_by_host (sw_if_index0); + if (PREDICT_FALSE (lipi0 == INDEX_INVALID)) + goto trace0; + } lip0 = lcp_itf_pair_get (lipi0); - next0 = LIP_PUNT_NEXT_IO; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = lip0->lip_host_sw_if_index; + next0 = LIP_PUNT_XC_NEXT_IO; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = + is_xc0 ? lip0->lip_phy_sw_if_index : lip0->lip_host_sw_if_index; if (PREDICT_TRUE (lip0->lip_host_type == LCP_ITF_HOST_TAP)) { @@ -129,10 +156,22 @@ VLIB_NODE_FN (lip_punt_node) trace0: if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED))) { - lip_punt_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); - t->phy_sw_if_index = sw_if_index0; - t->host_sw_if_index = - (lipi0 == INDEX_INVALID) ? ~0 : lip0->lip_host_sw_if_index; + lip_punt_xc_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + + t->is_xc = is_xc0; + if (is_xc0) + { + t->phy_sw_if_index = + (lipi0 == INDEX_INVALID) ? ~0 : lip0->lip_phy_sw_if_index; + t->host_sw_if_index = sw_if_index0; + } + else + { + t->phy_sw_if_index = sw_if_index0; + t->host_sw_if_index = + (lipi0 == INDEX_INVALID) ? ~0 : lip0->lip_host_sw_if_index; + } } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, @@ -145,16 +184,41 @@ VLIB_NODE_FN (lip_punt_node) return frame->n_vectors; } +VLIB_NODE_FN (lip_punt_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return (lip_punt_xc_inline (vm, node, frame, false /* xc */)); +} + +VLIB_NODE_FN (lip_punt_xc_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return (lip_punt_xc_inline (vm, node, frame, true /* xc */)); +} + VLIB_REGISTER_NODE (lip_punt_node) = { .name = "linux-cp-punt", .vector_size = sizeof (u32), - .format_trace = format_lip_punt_trace, + .format_trace = format_lip_punt_xc_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = LIP_PUNT_XC_N_NEXT, + .next_nodes = { + [LIP_PUNT_XC_NEXT_DROP] = "error-drop", + [LIP_PUNT_XC_NEXT_IO] = "interface-output", + }, +}; + +VLIB_REGISTER_NODE (lip_punt_xc_node) = { + .name = "linux-cp-punt-xc", + .vector_size = sizeof (u32), + .format_trace = format_lip_punt_xc_trace, .type = VLIB_NODE_TYPE_INTERNAL, - .n_next_nodes = LIP_PUNT_N_NEXT, + .n_next_nodes = LIP_PUNT_XC_N_NEXT, .next_nodes = { - [LIP_PUNT_NEXT_DROP] = "error-drop", - [LIP_PUNT_NEXT_IO] = "interface-output", + [LIP_PUNT_XC_NEXT_DROP] = "error-drop", + [LIP_PUNT_XC_NEXT_IO] = "interface-output", }, }; @@ -190,7 +254,7 @@ VLIB_NODE_FN (lcp_punt_l3_node) (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) { u32 n_left_from, *from, *to_next, n_left_to_next; - lip_punt_next_t next_index; + lip_punt_xc_next_t next_index; next_index = node->cached_next_index; n_left_from = frame->n_vectors; diff --git a/src/plugins/nsh/nsh.c b/src/plugins/nsh/nsh.c index a2c24e27b26..06dd45be944 100644 --- a/src/plugins/nsh/nsh.c +++ b/src/plugins/nsh/nsh.c @@ -20,7 +20,7 @@ #include <nsh/nsh.h> #include <gre/gre.h> #include <vxlan/vxlan.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> #include <vnet/l2/l2_classify.h> #include <vnet/adj/adj.h> #include <vpp/app/version.h> @@ -182,7 +182,8 @@ nsh_md2_set_next_ioam_export_override (uword next) clib_error_t * nsh_init (vlib_main_t * vm) { - vlib_node_t *node, *gre4_input, *gre6_input; + vlib_node_t *node, *gre4_input, *gre6_input, *vxlan4_gpe_input, + *vxlan6_gpe_input; nsh_main_t *nm = &nsh_main; clib_error_t *error = 0; uword next_node; @@ -222,20 +223,24 @@ nsh_init (vlib_main_t * vm) /* Add dispositions to nodes that feed nsh-input */ //alagalah - validate we don't really need to use the node value + vxlan4_gpe_input = vlib_get_node_by_name (vm, (u8 *) "vxlan4-gpe-input"); + vxlan6_gpe_input = vlib_get_node_by_name (vm, (u8 *) "vxlan6-gpe-input"); + nm->vgm = vlib_get_plugin_symbol ("vxlan-gpe_plugin.so", "vxlan_gpe_main"); + if (vxlan4_gpe_input == 0 || vxlan6_gpe_input == 0 || nm->vgm == 0) + { + error = clib_error_return (0, "vxlan_gpe_plugin.so is not loaded"); + return error; + } next_node = - vlib_node_add_next (vm, vxlan4_gpe_input_node.index, - nm->nsh_input_node_index); - vlib_node_add_next (vm, vxlan4_gpe_input_node.index, - nm->nsh_proxy_node_index); - vlib_node_add_next (vm, vxlan4_gpe_input_node.index, + vlib_node_add_next (vm, vxlan4_gpe_input->index, nm->nsh_input_node_index); + vlib_node_add_next (vm, vxlan4_gpe_input->index, nm->nsh_proxy_node_index); + vlib_node_add_next (vm, vxlan4_gpe_input->index, nsh_aware_vnf_proxy_node.index); - vxlan_gpe_register_decap_protocol (VXLAN_GPE_PROTOCOL_NSH, next_node); + nm->vgm->register_decap_protocol (VXLAN_GPE_PROTOCOL_NSH, next_node); - vlib_node_add_next (vm, vxlan6_gpe_input_node.index, - nm->nsh_input_node_index); - vlib_node_add_next (vm, vxlan6_gpe_input_node.index, - nm->nsh_proxy_node_index); - vlib_node_add_next (vm, vxlan6_gpe_input_node.index, + vlib_node_add_next (vm, vxlan6_gpe_input->index, nm->nsh_input_node_index); + vlib_node_add_next (vm, vxlan6_gpe_input->index, nm->nsh_proxy_node_index); + vlib_node_add_next (vm, vxlan6_gpe_input->index, nsh_aware_vnf_proxy_node.index); gre4_input = vlib_get_node_by_name (vm, (u8 *) "gre4-input"); @@ -280,7 +285,9 @@ nsh_init (vlib_main_t * vm) return error; } -VLIB_INIT_FUNCTION (nsh_init); +VLIB_INIT_FUNCTION (nsh_init) = { + .runs_after = VLIB_INITS ("vxlan_gpe_init"), +}; VLIB_PLUGIN_REGISTER () = { .version = VPP_BUILD_VER, diff --git a/src/plugins/nsh/nsh.h b/src/plugins/nsh/nsh.h index 86a9a7e95c3..c408ddb99a2 100644 --- a/src/plugins/nsh/nsh.h +++ b/src/plugins/nsh/nsh.h @@ -18,6 +18,7 @@ #include <vnet/vnet.h> #include <nsh/nsh_packet.h> #include <vnet/ip/ip4_packet.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> typedef struct { u16 class; @@ -166,6 +167,10 @@ typedef struct { /* convenience */ vlib_main_t * vlib_main; vnet_main_t * vnet_main; + + /* vxlan gpe plugin */ + vxlan_gpe_main_t *vgm; + } nsh_main_t; extern nsh_main_t nsh_main; diff --git a/src/plugins/nsh/nsh_pop.c b/src/plugins/nsh/nsh_pop.c index 8de319e158b..d66cfc9de27 100644 --- a/src/plugins/nsh/nsh_pop.c +++ b/src/plugins/nsh/nsh_pop.c @@ -19,7 +19,7 @@ #include <vnet/plugin/plugin.h> #include <nsh/nsh.h> #include <vnet/gre/packet.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <plugins/vxlan-gpe/vxlan_gpe.h> #include <vnet/l2/l2_classify.h> #include <vlibapi/api.h> diff --git a/src/plugins/ping/ping_api.c b/src/plugins/ping/ping_api.c index 5578fa560f2..a5af1033d0e 100644 --- a/src/plugins/ping/ping_api.c +++ b/src/plugins/ping/ping_api.c @@ -122,16 +122,22 @@ vl_api_want_ping_finished_events_t_handler ( while ((sleep_interval = time_ping_sent + ping_interval - vlib_time_now (vm)) > 0.0) { - uword event_type; + uword event_count; vlib_process_wait_for_event_or_clock (vm, sleep_interval); - event_type = vlib_process_get_events (vm, 0); - if (event_type == ~0) + if (dst_addr.version == AF_IP4) + event_count = + vlib_process_get_events_with_type (vm, 0, PING_RESPONSE_IP4); + else if (dst_addr.version == AF_IP6) + event_count = + vlib_process_get_events_with_type (vm, 0, PING_RESPONSE_IP6); + else break; - if (event_type == PING_RESPONSE_IP4 || - event_type == PING_RESPONSE_IP6) - reply_count += 1; + if (event_count == 0) + break; + + reply_count += 1; } } diff --git a/src/plugins/quic/quic_crypto.c b/src/plugins/quic/quic_crypto.c index 9e2c915daaa..4e11eff2431 100644 --- a/src/plugins/quic/quic_crypto.c +++ b/src/plugins/quic/quic_crypto.c @@ -248,8 +248,7 @@ quic_crypto_decrypt_packet (quic_ctx_t *qctx, quic_rx_packet_ctx_t *pctx) pctx->packet.octets.len - aead_off, pn, pctx->packet.octets.base, aead_off)) == SIZE_MAX) { - fprintf (stderr, "%s: aead decryption failure (pn: %d)\n", __FUNCTION__, - pn); + fprintf (stderr, "%s: aead decryption failure (pn: %d)\n", __func__, pn); return; } @@ -349,8 +348,7 @@ quic_crypto_cipher_setup_crypto (ptls_cipher_context_t *_ctx, int is_enc, } else { - QUIC_DBG (1, "%s, Invalid crypto cipher : ", __FUNCTION__, - _ctx->algo->name); + QUIC_DBG (1, "%s, Invalid crypto cipher : ", __func__, _ctx->algo->name); assert (0); } @@ -405,8 +403,7 @@ quic_crypto_aead_setup_crypto (ptls_aead_context_t *_ctx, int is_enc, } else { - QUIC_DBG (1, "%s, invalied aead cipher %s", __FUNCTION__, - _ctx->algo->name); + QUIC_DBG (1, "%s, invalied aead cipher %s", __func__, _ctx->algo->name); assert (0); } diff --git a/src/plugins/tlspicotls/pico_vpp_crypto.c b/src/plugins/tlspicotls/pico_vpp_crypto.c index 3d28d50b352..e8e4a875e33 100644 --- a/src/plugins/tlspicotls/pico_vpp_crypto.c +++ b/src/plugins/tlspicotls/pico_vpp_crypto.c @@ -107,8 +107,7 @@ ptls_vpp_crypto_cipher_setup_crypto (ptls_cipher_context_t * _ctx, int is_enc, } else { - TLS_DBG (1, "%s, Invalid crypto cipher : ", __FUNCTION__, - _ctx->algo->name); + TLS_DBG (1, "%s, Invalid crypto cipher : ", __func__, _ctx->algo->name); assert (0); } @@ -226,8 +225,7 @@ ptls_vpp_crypto_aead_setup_crypto (ptls_aead_context_t *_ctx, int is_enc, } else { - TLS_DBG (1, "%s, invalied aead cipher %s", __FUNCTION__, - _ctx->algo->name); + TLS_DBG (1, "%s, invalied aead cipher %s", __func__, _ctx->algo->name); return -1; } diff --git a/src/plugins/unittest/ipsec_test.c b/src/plugins/unittest/ipsec_test.c index b505c58de3f..869d53367b6 100644 --- a/src/plugins/unittest/ipsec_test.c +++ b/src/plugins/unittest/ipsec_test.c @@ -54,14 +54,11 @@ test_ipsec_command_fn (vlib_main_t *vm, unformat_input_t *input, if (irt) { - irt->seq = seq_num & 0xffffffff; - irt->seq_hi = seq_num >> 32; + irt->seq64 = seq_num; /* clear the window */ - if (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa)) - clib_bitmap_zero (irt->replay_window_huge); - else - irt->replay_window = 0; + uword_bitmap_clear (irt->replay_window, + irt->anti_replay_window_size / uword_bits); } ipsec_sa_unlock (sa_index); diff --git a/src/plugins/unittest/session_test.c b/src/plugins/unittest/session_test.c index 993f1be41a9..f0e5d4b4f3d 100644 --- a/src/plugins/unittest/session_test.c +++ b/src/plugins/unittest/session_test.c @@ -16,6 +16,7 @@ #include <arpa/inet.h> #include <vnet/session/application.h> #include <vnet/session/session.h> +#include <vnet/session/transport.h> #include <sys/epoll.h> #include <vnet/session/session_rules_table.h> @@ -50,6 +51,11 @@ placeholder_session_reset_callback (session_t * s) volatile u32 connected_session_index = ~0; volatile u32 connected_session_thread = ~0; +static u32 placeholder_accept; +volatile u32 accepted_session_index; +volatile u32 accepted_session_thread; +volatile int app_session_error = 0; + int placeholder_session_connected_callback (u32 app_index, u32 api_context, session_t * s, session_error_t err) @@ -81,13 +87,22 @@ placeholder_del_segment_callback (u32 client_index, u64 segment_handle) void placeholder_session_disconnect_callback (session_t * s) { - clib_warning ("called..."); + if (!(s->session_index == connected_session_index && + s->thread_index == connected_session_thread) && + !(s->session_index == accepted_session_index && + s->thread_index == accepted_session_thread)) + { + clib_warning (0, "unexpected disconnect s %u thread %u", + s->session_index, s->thread_index); + app_session_error = 1; + } + vnet_disconnect_args_t da = { + .handle = session_handle (s), + .app_index = app_worker_get (s->app_wrk_index)->app_index + }; + vnet_disconnect_session (&da); } -static u32 placeholder_accept; -volatile u32 accepted_session_index; -volatile u32 accepted_session_thread; - int placeholder_session_accept_callback (session_t * s) { @@ -105,12 +120,39 @@ placeholder_server_rx_callback (session_t * s) return -1; } +void +placeholder_cleanup_callback (session_t *s, session_cleanup_ntf_t ntf) +{ + if (ntf == SESSION_CLEANUP_TRANSPORT) + return; + + if (s->session_index == connected_session_index && + s->thread_index == connected_session_thread) + { + connected_session_index = ~0; + connected_session_thread = ~0; + } + else if (s->session_index == accepted_session_index && + s->thread_index == accepted_session_thread) + { + accepted_session_index = ~0; + accepted_session_thread = ~0; + } + else + { + clib_warning (0, "unexpected cleanup s %u thread %u", s->session_index, + s->thread_index); + app_session_error = 1; + } +} + static session_cb_vft_t placeholder_session_cbs = { .session_reset_callback = placeholder_session_reset_callback, .session_connected_callback = placeholder_session_connected_callback, .session_accept_callback = placeholder_session_accept_callback, .session_disconnect_callback = placeholder_session_disconnect_callback, .builtin_app_rx_callback = placeholder_server_rx_callback, + .session_cleanup_callback = placeholder_cleanup_callback, .add_segment_callback = placeholder_add_segment_callback, .del_segment_callback = placeholder_del_segment_callback, }; @@ -278,6 +320,7 @@ session_test_endpoint_cfg (vlib_main_t * vm, unformat_input_t * input) u64 options[APP_OPTIONS_N_OPTIONS], placeholder_secret = 1234; u16 placeholder_server_port = 1234, placeholder_client_port = 5678; session_endpoint_cfg_t server_sep = SESSION_ENDPOINT_CFG_NULL; + u32 client_vrf = 0, server_vrf = 1; ip4_address_t intf_addr[3]; transport_connection_t *tc; session_t *s; @@ -288,25 +331,25 @@ session_test_endpoint_cfg (vlib_main_t * vm, unformat_input_t * input) * Create the loopbacks */ intf_addr[0].as_u32 = clib_host_to_net_u32 (0x01010101); - session_create_lookpback (0, &sw_if_index[0], &intf_addr[0]); + session_create_lookpback (client_vrf, &sw_if_index[0], &intf_addr[0]); intf_addr[1].as_u32 = clib_host_to_net_u32 (0x02020202); - session_create_lookpback (1, &sw_if_index[1], &intf_addr[1]); + session_create_lookpback (server_vrf, &sw_if_index[1], &intf_addr[1]); - session_add_del_route_via_lookup_in_table (0, 1, &intf_addr[1], 32, - 1 /* is_add */ ); - session_add_del_route_via_lookup_in_table (1, 0, &intf_addr[0], 32, - 1 /* is_add */ ); + session_add_del_route_via_lookup_in_table ( + client_vrf, server_vrf, &intf_addr[1], 32, 1 /* is_add */); + session_add_del_route_via_lookup_in_table ( + server_vrf, client_vrf, &intf_addr[0], 32, 1 /* is_add */); /* * Insert namespace */ - appns_id = format (0, "appns1"); + appns_id = format (0, "appns_server"); vnet_app_namespace_add_del_args_t ns_args = { .ns_id = appns_id, .secret = placeholder_secret, - .sw_if_index = sw_if_index[1], - .ip4_fib_id = 0, + .sw_if_index = sw_if_index[1], /* server interface*/ + .ip4_fib_id = 0, /* sw_if_index takes precedence */ .is_add = 1 }; error = vnet_app_namespace_add_del (&ns_args); @@ -357,10 +400,10 @@ session_test_endpoint_cfg (vlib_main_t * vm, unformat_input_t * input) * Connect and force lcl ip */ client_sep.is_ip4 = 1; - client_sep.ip.ip4.as_u32 = clib_host_to_net_u32 (0x02020202); + client_sep.ip.ip4.as_u32 = intf_addr[1].as_u32; client_sep.port = placeholder_server_port; client_sep.peer.is_ip4 = 1; - client_sep.peer.ip.ip4.as_u32 = clib_host_to_net_u32 (0x01010101); + client_sep.peer.ip.ip4.as_u32 = intf_addr[0].as_u32; client_sep.peer.port = placeholder_client_port; client_sep.transport_proto = TRANSPORT_PROTO_TCP; @@ -401,6 +444,35 @@ session_test_endpoint_cfg (vlib_main_t * vm, unformat_input_t * input) SESSION_TEST ((tc->lcl_port == placeholder_client_port), "ports should be equal"); + /* Disconnect server session, should lead to faster port cleanup on client */ + vnet_disconnect_args_t disconnect_args = { + .handle = + session_make_handle (accepted_session_index, accepted_session_thread), + .app_index = server_index, + }; + + error = vnet_disconnect_session (&disconnect_args); + SESSION_TEST ((error == 0), "disconnect should work"); + + /* wait for stuff to happen */ + tries = 0; + while (connected_session_index != ~0 && ++tries < 100) + { + vlib_worker_thread_barrier_release (vm); + vlib_process_suspend (vm, 100e-3); + vlib_worker_thread_barrier_sync (vm); + } + + /* Active closes take longer to cleanup, don't wait */ + + clib_warning ("waited %.1f seconds for disconnect", tries / 10.0); + SESSION_TEST ((connected_session_index == ~0), "session should not exist"); + SESSION_TEST ((connected_session_thread == ~0), "thread should not exist"); + SESSION_TEST (transport_port_local_in_use () == 0, + "port should be cleaned up"); + SESSION_TEST ((app_session_error == 0), "no app session errors"); + + /* Start cleanup by detaching apps */ vnet_app_detach_args_t detach_args = { .app_index = server_index, .api_client_index = ~0, @@ -416,13 +488,167 @@ session_test_endpoint_cfg (vlib_main_t * vm, unformat_input_t * input) /* Allow the disconnects to finish before removing the routes. */ vlib_process_suspend (vm, 10e-3); - session_add_del_route_via_lookup_in_table (0, 1, &intf_addr[1], 32, - 0 /* is_add */ ); - session_add_del_route_via_lookup_in_table (1, 0, &intf_addr[0], 32, - 0 /* is_add */ ); + session_add_del_route_via_lookup_in_table ( + client_vrf, server_vrf, &intf_addr[1], 32, 0 /* is_add */); + session_add_del_route_via_lookup_in_table ( + server_vrf, client_vrf, &intf_addr[0], 32, 0 /* is_add */); session_delete_loopback (sw_if_index[0]); session_delete_loopback (sw_if_index[1]); + + /* + * Redo the test but with client in the non-default namespace + */ + + /* Create the loopbacks */ + client_vrf = 1; + server_vrf = 0; + session_create_lookpback (client_vrf, &sw_if_index[0], &intf_addr[0]); + session_create_lookpback (server_vrf, &sw_if_index[1], &intf_addr[1]); + + session_add_del_route_via_lookup_in_table ( + client_vrf, server_vrf, &intf_addr[1], 32, 1 /* is_add */); + session_add_del_route_via_lookup_in_table ( + server_vrf, client_vrf, &intf_addr[0], 32, 1 /* is_add */); + + /* Insert new client namespace */ + vec_free (appns_id); + appns_id = format (0, "appns_client"); + ns_args.ns_id = appns_id; + ns_args.sw_if_index = sw_if_index[0]; /* client interface*/ + ns_args.is_add = 1; + + error = vnet_app_namespace_add_del (&ns_args); + SESSION_TEST ((error == 0), "app ns insertion should succeed: %U", + format_session_error, error); + + /* Attach client */ + attach_args.name = format (0, "session_test_client"); + attach_args.namespace_id = appns_id; + attach_args.options[APP_OPTIONS_ADD_SEGMENT_SIZE] = 0; + attach_args.options[APP_OPTIONS_NAMESPACE_SECRET] = placeholder_secret; + attach_args.api_client_index = ~0; + + error = vnet_application_attach (&attach_args); + SESSION_TEST ((error == 0), "client app attached: %U", format_session_error, + error); + client_index = attach_args.app_index; + vec_free (attach_args.name); + + /* Attach server */ + attach_args.name = format (0, "session_test_server"); + attach_args.namespace_id = 0; + attach_args.options[APP_OPTIONS_ADD_SEGMENT_SIZE] = 32 << 20; + attach_args.options[APP_OPTIONS_NAMESPACE_SECRET] = 0; + attach_args.api_client_index = ~0; + error = vnet_application_attach (&attach_args); + SESSION_TEST ((error == 0), "server app attached: %U", format_session_error, + error); + vec_free (attach_args.name); + server_index = attach_args.app_index; + + /* Bind server */ + clib_memset (&server_sep, 0, sizeof (server_sep)); + server_sep.is_ip4 = 1; + server_sep.port = placeholder_server_port; + bind_args.sep_ext = server_sep; + bind_args.app_index = server_index; + error = vnet_listen (&bind_args); + SESSION_TEST ((error == 0), "server bind should work: %U", + format_session_error, error); + + /* Connect client */ + connected_session_index = connected_session_thread = ~0; + accepted_session_index = accepted_session_thread = ~0; + clib_memset (&client_sep, 0, sizeof (client_sep)); + client_sep.is_ip4 = 1; + client_sep.ip.ip4.as_u32 = intf_addr[1].as_u32; + client_sep.port = placeholder_server_port; + client_sep.peer.is_ip4 = 1; + client_sep.peer.ip.ip4.as_u32 = intf_addr[0].as_u32; + client_sep.peer.port = placeholder_client_port; + client_sep.transport_proto = TRANSPORT_PROTO_TCP; + + connect_args.sep_ext = client_sep; + connect_args.app_index = client_index; + error = vnet_connect (&connect_args); + SESSION_TEST ((error == 0), "connect should work"); + + /* wait for stuff to happen */ + while (connected_session_index == ~0 && ++tries < 100) + { + vlib_worker_thread_barrier_release (vm); + vlib_process_suspend (vm, 100e-3); + vlib_worker_thread_barrier_sync (vm); + } + while (accepted_session_index == ~0 && ++tries < 100) + { + vlib_worker_thread_barrier_release (vm); + vlib_process_suspend (vm, 100e-3); + vlib_worker_thread_barrier_sync (vm); + } + + clib_warning ("waited %.1f seconds for connections", tries / 10.0); + SESSION_TEST ((connected_session_index != ~0), "session should exist"); + SESSION_TEST ((connected_session_thread != ~0), "thread should exist"); + SESSION_TEST ((accepted_session_index != ~0), "session should exist"); + SESSION_TEST ((accepted_session_thread != ~0), "thread should exist"); + s = session_get (connected_session_index, connected_session_thread); + tc = session_get_transport (s); + SESSION_TEST ((tc != 0), "transport should exist"); + SESSION_TEST ( + (memcmp (&tc->lcl_ip, &client_sep.peer.ip, sizeof (tc->lcl_ip)) == 0), + "ips should be equal"); + SESSION_TEST ((tc->lcl_port == placeholder_client_port), + "ports should be equal"); + + /* Disconnect server session, for faster port cleanup on client */ + disconnect_args.app_index = server_index; + disconnect_args.handle = + session_make_handle (accepted_session_index, accepted_session_thread); + + error = vnet_disconnect_session (&disconnect_args); + SESSION_TEST ((error == 0), "disconnect should work"); + + /* wait for stuff to happen */ + tries = 0; + while (connected_session_index != ~0 && ++tries < 100) + { + vlib_worker_thread_barrier_release (vm); + vlib_process_suspend (vm, 100e-3); + vlib_worker_thread_barrier_sync (vm); + } + + /* Active closes take longer to cleanup, don't wait */ + + clib_warning ("waited %.1f seconds for disconnect", tries / 10.0); + SESSION_TEST ((connected_session_index == ~0), "session should not exist"); + SESSION_TEST ((connected_session_thread == ~0), "thread should not exist"); + SESSION_TEST ((app_session_error == 0), "no app session errors"); + SESSION_TEST (transport_port_local_in_use () == 0, + "port should be cleaned up"); + + /* Start cleanup by detaching apps */ + detach_args.app_index = server_index; + vnet_application_detach (&detach_args); + detach_args.app_index = client_index; + vnet_application_detach (&detach_args); + + ns_args.is_add = 0; + error = vnet_app_namespace_add_del (&ns_args); + SESSION_TEST ((error == 0), "app ns delete should succeed: %d", error); + + /* Allow the disconnects to finish before removing the routes. */ + vlib_process_suspend (vm, 10e-3); + + session_add_del_route_via_lookup_in_table ( + client_vrf, server_vrf, &intf_addr[1], 32, 0 /* is_add */); + session_add_del_route_via_lookup_in_table ( + server_vrf, client_vrf, &intf_addr[0], 32, 0 /* is_add */); + + session_delete_loopback (sw_if_index[0]); + session_delete_loopback (sw_if_index[1]); + return 0; } @@ -1781,6 +2007,11 @@ session_test_proxy (vlib_main_t * vm, unformat_input_t * input) unformat_free (&tmp_input); vec_free (attach_args.name); session_delete_loopback (sw_if_index); + + /* Revert default appns sw_if_index */ + app_ns = app_namespace_get_default (); + app_ns->sw_if_index = ~0; + return 0; } diff --git a/src/plugins/unittest/tcp_test.c b/src/plugins/unittest/tcp_test.c index bd39474ce93..4b53bc18906 100644 --- a/src/plugins/unittest/tcp_test.c +++ b/src/plugins/unittest/tcp_test.c @@ -1005,7 +1005,7 @@ static void tcp_test_set_time (u32 thread_index, u32 val) { session_main.wrk[thread_index].last_vlib_time = val; - tcp_set_time_now (&tcp_main.wrk_ctx[thread_index], val); + tcp_set_time_now (&tcp_main.wrk[thread_index], val); } static int diff --git a/src/plugins/urpf/urpf_dp.h b/src/plugins/urpf/urpf_dp.h index b17fed7e04b..edb4ec79171 100644 --- a/src/plugins/urpf/urpf_dp.h +++ b/src/plugins/urpf/urpf_dp.h @@ -98,8 +98,8 @@ urpf_perform_check_x1 (ip_address_family_t af, vlib_dir_t dir, lb_index = ip4_fib_forwarding_lookup (fib_index, &ip->src_address); /* Pass multicast. */ - lpass = (ip4_address_is_multicast (&ip->src_address) || - ip4_address_is_global_broadcast (&ip->src_address)); + lpass = (ip4_address_is_multicast (&ip->dst_address) || + ip4_address_is_global_broadcast (&ip->dst_address)); } else { @@ -108,7 +108,7 @@ urpf_perform_check_x1 (ip_address_family_t af, vlib_dir_t dir, ip = (ip6_header_t *) h; lb_index = ip6_fib_table_fwding_lookup (fib_index, &ip->src_address); - lpass = ip6_address_is_multicast (&ip->src_address); + lpass = ip6_address_is_multicast (&ip->dst_address); } llb = load_balance_get (lb_index); @@ -157,10 +157,10 @@ urpf_perform_check_x2 (ip_address_family_t af, vlib_dir_t dir, ip4_fib_forwarding_lookup_x2 (fib_index0, fib_index1, &ip0->src_address, &ip1->src_address, &lb_index0, &lb_index1); /* Pass multicast. */ - lpass0 = (ip4_address_is_multicast (&ip0->src_address) || - ip4_address_is_global_broadcast (&ip0->src_address)); - lpass1 = (ip4_address_is_multicast (&ip1->src_address) || - ip4_address_is_global_broadcast (&ip1->src_address)); + lpass0 = (ip4_address_is_multicast (&ip0->dst_address) || + ip4_address_is_global_broadcast (&ip0->dst_address)); + lpass1 = (ip4_address_is_multicast (&ip1->dst_address) || + ip4_address_is_global_broadcast (&ip1->dst_address)); } else { @@ -171,8 +171,8 @@ urpf_perform_check_x2 (ip_address_family_t af, vlib_dir_t dir, lb_index0 = ip6_fib_table_fwding_lookup (fib_index0, &ip0->src_address); lb_index1 = ip6_fib_table_fwding_lookup (fib_index1, &ip1->src_address); - lpass0 = ip6_address_is_multicast (&ip0->src_address); - lpass1 = ip6_address_is_multicast (&ip1->src_address); + lpass0 = ip6_address_is_multicast (&ip0->dst_address); + lpass1 = ip6_address_is_multicast (&ip1->dst_address); } llb0 = load_balance_get (lb_index0); diff --git a/src/plugins/vrrp/vrrp_periodic.c b/src/plugins/vrrp/vrrp_periodic.c index 5f9d7ae938e..e3a374a112d 100644 --- a/src/plugins/vrrp/vrrp_periodic.c +++ b/src/plugins/vrrp/vrrp_periodic.c @@ -187,7 +187,19 @@ vrrp_periodic_process (vlib_main_t * vm, timer = pool_elt_at_index (pm->vr_timers, next_timer); timeout = timer->expire_time - now; - vlib_process_wait_for_event_or_clock (vm, timeout); + /* + * Adding a virtual MAC to some NICs can take a significant amount + * of time (~1s). If a lot of VRs enter the master state around the + * same time, the process node can stay active for a very long time + * processing all of the transitions. + * + * Try to force a 10us sleep between processing events to ensure + * that the process node does not prevent API messages and RPCs + * from being handled for an extended period. This prevents + * vlib_process_wait_for_event_or_clock() from returning + * immediately. + */ + vlib_process_wait_for_event_or_clock (vm, clib_max (timeout, 10e-6)); } event_type = vlib_process_get_events (vm, (uword **) & event_data); diff --git a/src/plugins/vxlan-gpe/CMakeLists.txt b/src/plugins/vxlan-gpe/CMakeLists.txt new file mode 100644 index 00000000000..987ebcc2df9 --- /dev/null +++ b/src/plugins/vxlan-gpe/CMakeLists.txt @@ -0,0 +1,32 @@ +# Copyright (c) 2024 OpenInfra Foundation Europe +# Copyright (c) 2025 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_vpp_plugin(vxlan-gpe + SOURCES + encap.c + decap.c + vxlan_gpe.c + vxlan_gpe_api.c + vxlan_gpe_packet.h + plugin.c + + INSTALL_HEADERS + vxlan_gpe.h + + MULTIARCH_SOURCES + decap.c + + API_FILES + vxlan_gpe.api +) diff --git a/src/vnet/vxlan-gpe/FEATURE.yaml b/src/plugins/vxlan-gpe/FEATURE.yaml index f4ec2f4c517..f4ec2f4c517 100644 --- a/src/vnet/vxlan-gpe/FEATURE.yaml +++ b/src/plugins/vxlan-gpe/FEATURE.yaml diff --git a/src/vnet/vxlan-gpe/decap.c b/src/plugins/vxlan-gpe/decap.c index d4c7424630d..29f03e3b380 100644 --- a/src/vnet/vxlan-gpe/decap.c +++ b/src/plugins/vxlan-gpe/decap.c @@ -22,7 +22,7 @@ #include <vlib/vlib.h> #include <vnet/udp/udp_local.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <vxlan-gpe/vxlan_gpe.h> /** * @brief Struct for VXLAN GPE decap packet tracing @@ -617,7 +617,7 @@ VLIB_NODE_FN (vxlan6_gpe_input_node) (vlib_main_t * vm, */ static char *vxlan_gpe_error_strings[] = { #define vxlan_gpe_error(n,s) s, -#include <vnet/vxlan-gpe/vxlan_gpe_error.def> +#include <vxlan-gpe/vxlan_gpe_error.def> #undef vxlan_gpe_error #undef _ }; diff --git a/src/vnet/vxlan-gpe/dir.dox b/src/plugins/vxlan-gpe/dir.dox index c154733b21f..c154733b21f 100644 --- a/src/vnet/vxlan-gpe/dir.dox +++ b/src/plugins/vxlan-gpe/dir.dox diff --git a/src/vnet/vxlan-gpe/encap.c b/src/plugins/vxlan-gpe/encap.c index a769861577d..d8bab921493 100644 --- a/src/vnet/vxlan-gpe/encap.c +++ b/src/plugins/vxlan-gpe/encap.c @@ -23,7 +23,7 @@ #include <vnet/ip/ip.h> #include <vnet/ethernet/ethernet.h> #include <vnet/udp/udp_inlines.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <vxlan-gpe/vxlan_gpe.h> /** Statistics (not really errors) */ #define foreach_vxlan_gpe_encap_error \ diff --git a/src/plugins/vxlan-gpe/plugin.c b/src/plugins/vxlan-gpe/plugin.c new file mode 100644 index 00000000000..5a711a39d78 --- /dev/null +++ b/src/plugins/vxlan-gpe/plugin.c @@ -0,0 +1,26 @@ +/* + * plugin.c: vxlan-gpe + * + * Copyright (c) OpenInfra Foundation Europe. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/plugin/plugin.h> +#include <vpp/app/version.h> +// register a plugin + +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "VxLan GPE Tunnels", +}; diff --git a/src/vnet/vxlan-gpe/vxlan-gpe-rfc.txt b/src/plugins/vxlan-gpe/vxlan-gpe-rfc.txt index 35cee50f573..35cee50f573 100644 --- a/src/vnet/vxlan-gpe/vxlan-gpe-rfc.txt +++ b/src/plugins/vxlan-gpe/vxlan-gpe-rfc.txt diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.api b/src/plugins/vxlan-gpe/vxlan_gpe.api index 3cbd7ab7f71..3cbd7ab7f71 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe.api +++ b/src/plugins/vxlan-gpe/vxlan_gpe.api diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.c b/src/plugins/vxlan-gpe/vxlan_gpe.c index 5a5262ea9db..abb2049a356 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe.c +++ b/src/plugins/vxlan-gpe/vxlan_gpe.c @@ -17,7 +17,7 @@ * @brief Common utility functions for IPv4 and IPv6 VXLAN GPE tunnels * */ -#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <vxlan-gpe/vxlan_gpe.h> #include <vnet/fib/fib.h> #include <vnet/ip/format.h> #include <vnet/fib/fib_entry.h> @@ -44,7 +44,7 @@ * You can refer to this kind of L2 overlay bridge domain as a VXLAN-GPE segment. */ -vxlan_gpe_main_t vxlan_gpe_main; +vxlan_gpe_main_t vxlan_gpe_main __clib_export; static u8 * format_decap_next (u8 * s, va_list * args) @@ -1212,11 +1212,13 @@ VNET_FEATURE_INIT (ip6_vxlan_gpe_bypass, static) = * @return error * */ -clib_error_t * -vxlan_gpe_init (vlib_main_t * vm) +__clib_export clib_error_t * +vxlan_gpe_init (vlib_main_t *vm) { vxlan_gpe_main_t *ngm = &vxlan_gpe_main; + ngm->register_decap_protocol = vxlan_gpe_register_decap_protocol; + ngm->unregister_decap_protocol = vxlan_gpe_unregister_decap_protocol; ngm->vnet_main = vnet_get_main (); ngm->vlib_main = vm; diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.h b/src/plugins/vxlan-gpe/vxlan_gpe.h index aabaafeee6f..138ae840ef5 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe.h +++ b/src/plugins/vxlan-gpe/vxlan_gpe.h @@ -29,7 +29,7 @@ #include <vnet/l2/l2_output.h> #include <vnet/l2/l2_bd.h> #include <vnet/ethernet/ethernet.h> -#include <vnet/vxlan-gpe/vxlan_gpe_packet.h> +#include <vxlan-gpe/vxlan_gpe_packet.h> #include <vnet/ip/ip4_packet.h> #include <vnet/ip/ip6_packet.h> #include <vnet/udp/udp_packet.h> @@ -196,11 +196,16 @@ typedef enum typedef enum { #define vxlan_gpe_error(n,s) VXLAN_GPE_ERROR_##n, -#include <vnet/vxlan-gpe/vxlan_gpe_error.def> +#include <plugins/vxlan-gpe/vxlan_gpe_error.def> #undef vxlan_gpe_error VXLAN_GPE_N_ERROR, } vxlan_gpe_input_error_t; +typedef void (*vxlan_gpe_register_decap_protocol_callback_t) ( + u8 protocol_id, uword next_node_index); +typedef void (*vxlan_gpe_unregister_decap_protocol_callback_t) ( + u8 protocol_id, uword next_node_index); + /** Struct for VXLAN GPE node state */ typedef struct { @@ -233,6 +238,10 @@ typedef struct /** List of next nodes for the decap indexed on protocol */ uword decap_next_node_list[VXLAN_GPE_PROTOCOL_MAX]; + + /* export callbacks to register/unregister decapsulation protocol */ + vxlan_gpe_register_decap_protocol_callback_t register_decap_protocol; + vxlan_gpe_unregister_decap_protocol_callback_t unregister_decap_protocol; } vxlan_gpe_main_t; extern vxlan_gpe_main_t vxlan_gpe_main; @@ -279,13 +288,10 @@ typedef enum VXLAN_GPE_ENCAP_N_NEXT } vxlan_gpe_encap_next_t; - +void vxlan_gpe_register_decap_protocol (u8 protocol_id, uword next_node_index); void vxlan_gpe_unregister_decap_protocol (u8 protocol_id, uword next_node_index); -void vxlan_gpe_register_decap_protocol (u8 protocol_id, - uword next_node_index); - void vnet_int_vxlan_gpe_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable); diff --git a/src/vnet/vxlan-gpe/vxlan_gpe_api.c b/src/plugins/vxlan-gpe/vxlan_gpe_api.c index cc74e1f58d4..e82445498e8 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe_api.c +++ b/src/plugins/vxlan-gpe/vxlan_gpe_api.c @@ -23,13 +23,13 @@ #include <vnet/interface.h> #include <vnet/api_errno.h> #include <vnet/feature/feature.h> -#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <vxlan-gpe/vxlan_gpe.h> #include <vnet/fib/fib_table.h> #include <vnet/format_fns.h> #include <vnet/ip/ip_types_api.h> -#include <vnet/vxlan-gpe/vxlan_gpe.api_enum.h> -#include <vnet/vxlan-gpe/vxlan_gpe.api_types.h> +#include <vxlan-gpe/vxlan_gpe.api_enum.h> +#include <vxlan-gpe/vxlan_gpe.api_types.h> #define REPLY_MSG_ID_BASE msg_id_base #include <vlibapi/api_helper_macros.h> diff --git a/src/vnet/vxlan-gpe/vxlan_gpe_error.def b/src/plugins/vxlan-gpe/vxlan_gpe_error.def index 9cf1b1cb656..9cf1b1cb656 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe_error.def +++ b/src/plugins/vxlan-gpe/vxlan_gpe_error.def diff --git a/src/vnet/vxlan-gpe/vxlan_gpe_packet.h b/src/plugins/vxlan-gpe/vxlan_gpe_packet.h index f5e5ddc2347..f5e5ddc2347 100644 --- a/src/vnet/vxlan-gpe/vxlan_gpe_packet.h +++ b/src/plugins/vxlan-gpe/vxlan_gpe_packet.h diff --git a/src/vcl/vcl_locked.c b/src/vcl/vcl_locked.c index f38df8fbf47..7ba9fab25fa 100644 --- a/src/vcl/vcl_locked.c +++ b/src/vcl/vcl_locked.c @@ -743,6 +743,7 @@ vls_listener_wrk_start_listen (vcl_locked_session_t * vls, u32 wrk_index) if (ls->flags & VCL_SESSION_F_PENDING_LISTEN) return; + ls->flags &= ~VCL_SESSION_F_LISTEN_NO_MQ; vcl_send_session_listen (wrk, ls); vls_listener_wrk_set (vls, wrk_index, 1 /* is_active */); @@ -759,7 +760,7 @@ vls_listener_wrk_stop_listen (vcl_locked_session_t * vls, u32 wrk_index) if (s->session_state != VCL_STATE_LISTEN) return; vcl_send_session_unlisten (wrk, s); - s->session_state = VCL_STATE_LISTEN_NO_MQ; + s->flags |= VCL_SESSION_F_LISTEN_NO_MQ; vls_listener_wrk_set (vls, wrk_index, 0 /* is_active */ ); } @@ -912,7 +913,7 @@ vls_share_session (vls_worker_t * vls_wrk, vcl_locked_session_t * vls) if (s->session_state == VCL_STATE_LISTEN) { - s->session_state = VCL_STATE_LISTEN_NO_MQ; + s->flags |= VCL_SESSION_F_LISTEN_NO_MQ; s->rx_fifo = s->tx_fifo = 0; } else if (s->rx_fifo) @@ -1384,36 +1385,41 @@ vls_mp_checks (vcl_locked_session_t * vls, int is_add) switch (s->session_state) { case VCL_STATE_LISTEN: - if (is_add) + if (!(s->flags & VCL_SESSION_F_LISTEN_NO_MQ)) { - vls_listener_wrk_set (vls, vls->vcl_wrk_index, 1 /* is_active */); - break; + if (is_add) + { + vls_listener_wrk_set (vls, vls->vcl_wrk_index, + 1 /* is_active */); + break; + } + /* Although removal from epoll means listener no longer accepts new + * sessions, the accept queue built by vpp cannot be drained by + * stopping the listener. Morover, some applications, e.g., nginx, + * might constantly remove and add listeners to their epfds. Removing + * listeners in such situations causes a lot of churn in vpp as + * segments and segment managers need to be recreated. */ + /* vls_listener_wrk_stop_listen (vls, vls->vcl_wrk_index); */ + } + else + { + if (!is_add) + break; + + /* Register worker as listener */ + vls_listener_wrk_start_listen (vls, vls->vcl_wrk_index); + + /* If owner worker did not attempt to accept/xpoll on the session, + * force a listen stop for it, since it may not be interested in + * accepting new sessions. + * This is pretty much a hack done to give app workers the illusion + * that it is fine to listen and not accept new sessions for a + * given listener. Without it, we would accumulate unhandled + * accepts on the passive worker message queue. */ + owner_wrk = vls_shared_get_owner (vls); + if (!vls_listener_wrk_is_active (vls, owner_wrk)) + vls_listener_wrk_stop_listen (vls, owner_wrk); } - /* Although removal from epoll means listener no longer accepts new - * sessions, the accept queue built by vpp cannot be drained by stopping - * the listener. Morover, some applications, e.g., nginx, might - * constantly remove and add listeners to their epfds. Removing - * listeners in such situations causes a lot of churn in vpp as segments - * and segment managers need to be recreated. */ - /* vls_listener_wrk_stop_listen (vls, vls->vcl_wrk_index); */ - break; - case VCL_STATE_LISTEN_NO_MQ: - if (!is_add) - break; - - /* Register worker as listener */ - vls_listener_wrk_start_listen (vls, vls->vcl_wrk_index); - - /* If owner worker did not attempt to accept/xpoll on the session, - * force a listen stop for it, since it may not be interested in - * accepting new sessions. - * This is pretty much a hack done to give app workers the illusion - * that it is fine to listen and not accept new sessions for a - * given listener. Without it, we would accumulate unhandled - * accepts on the passive worker message queue. */ - owner_wrk = vls_shared_get_owner (vls); - if (!vls_listener_wrk_is_active (vls, owner_wrk)) - vls_listener_wrk_stop_listen (vls, owner_wrk); break; default: break; diff --git a/src/vcl/vcl_private.c b/src/vcl/vcl_private.c index d9814394f0d..6892688da5a 100644 --- a/src/vcl/vcl_private.c +++ b/src/vcl/vcl_private.c @@ -201,17 +201,17 @@ vcl_worker_detach_sessions (vcl_worker_t *wrk) { if (s->session_state == VCL_STATE_LISTEN) { - s->session_state = VCL_STATE_LISTEN_NO_MQ; + s->flags |= VCL_SESSION_F_LISTEN_NO_MQ; continue; } if ((s->flags & VCL_SESSION_F_IS_VEP) || - s->session_state == VCL_STATE_LISTEN_NO_MQ || s->session_state == VCL_STATE_CLOSED) continue; hash_set (seg_indices_map, s->tx_fifo->segment_index, 1); s->session_state = VCL_STATE_DETACHED; + s->flags |= VCL_SESSION_F_APP_CLOSING; vec_add2 (wrk->unhandled_evts_vector, e, 1); e->event_type = SESSION_CTRL_EVT_DISCONNECTED; e->session_index = s->session_index; @@ -221,11 +221,22 @@ vcl_worker_detach_sessions (vcl_worker_t *wrk) hash_foreach (seg_index, val, seg_indices_map, ({ vec_add1 (seg_indices, seg_index); })); + /* If multi-threaded apps, wait for all threads to hopefully finish + * their blocking operations */ + if (wrk->pre_wait_fn) + wrk->pre_wait_fn (VCL_INVALID_SESSION_INDEX); + sleep (1); + if (wrk->post_wait_fn) + wrk->post_wait_fn (VCL_INVALID_SESSION_INDEX); + vcl_segment_detach_segments (seg_indices); /* Detach worker's mqs segment */ vcl_segment_detach (vcl_vpp_worker_segment_handle (wrk->wrk_index)); + wrk->app_event_queue = 0; + wrk->ctrl_mq = 0; + vec_free (seg_indices); hash_free (seg_indices_map); } @@ -364,8 +375,8 @@ vcl_session_read_ready (vcl_session_t * s) } else { - return (s->session_state == VCL_STATE_DISCONNECT) ? - VPPCOM_ECONNRESET : VPPCOM_ENOTCONN; + return (s->session_state == VCL_STATE_DISCONNECT) ? VPPCOM_ECONNRESET : + VPPCOM_ENOTCONN; } } @@ -773,9 +784,6 @@ vcl_session_state_str (vcl_session_state_t state) case VCL_STATE_UPDATED: st = "STATE_UPDATED"; break; - case VCL_STATE_LISTEN_NO_MQ: - st = "STATE_LISTEN_NO_MQ"; - break; default: st = "UNKNOWN_STATE"; break; diff --git a/src/vcl/vcl_private.h b/src/vcl/vcl_private.h index c98e1cde9b1..c92bb58169d 100644 --- a/src/vcl/vcl_private.h +++ b/src/vcl/vcl_private.h @@ -71,7 +71,6 @@ typedef enum vcl_session_state_ VCL_STATE_DISCONNECT, VCL_STATE_DETACHED, VCL_STATE_UPDATED, - VCL_STATE_LISTEN_NO_MQ, } vcl_session_state_t; typedef struct epoll_event vppcom_epoll_event_t; @@ -144,6 +143,7 @@ typedef enum vcl_session_flags_ VCL_SESSION_F_PENDING_FREE = 1 << 7, VCL_SESSION_F_PENDING_LISTEN = 1 << 8, VCL_SESSION_F_APP_CLOSING = 1 << 9, + VCL_SESSION_F_LISTEN_NO_MQ = 1 << 10, } __clib_packed vcl_session_flags_t; typedef enum vcl_worker_wait_ @@ -563,9 +563,8 @@ vcl_session_table_lookup_listener (vcl_worker_t * wrk, u64 handle) return 0; } - ASSERT (s->session_state == VCL_STATE_LISTEN - || s->session_state == VCL_STATE_LISTEN_NO_MQ - || vcl_session_is_connectable_listener (wrk, s)); + ASSERT (s->session_state == VCL_STATE_LISTEN || + vcl_session_is_connectable_listener (wrk, s)); return s; } diff --git a/src/vcl/vppcom.c b/src/vcl/vppcom.c index 19d58c349b7..b4f985e5562 100644 --- a/src/vcl/vppcom.c +++ b/src/vcl/vppcom.c @@ -519,8 +519,7 @@ vcl_session_reset_handler (vcl_worker_t * wrk, } /* Caught a reset before actually accepting the session */ - if (session->session_state == VCL_STATE_LISTEN || - session->session_state == VCL_STATE_LISTEN_NO_MQ) + if (session->session_state == VCL_STATE_LISTEN) { if (!vcl_flag_accepted_session (session, reset_msg->handle, VCL_ACCEPTED_F_RESET)) @@ -712,8 +711,7 @@ vcl_session_disconnected_handler (vcl_worker_t * wrk, return 0; /* Caught a disconnect before actually accepting the session */ - if (session->session_state == VCL_STATE_LISTEN || - session->session_state == VCL_STATE_LISTEN_NO_MQ) + if (session->session_state == VCL_STATE_LISTEN) { if (!vcl_flag_accepted_session (session, msg->handle, VCL_ACCEPTED_F_CLOSED)) @@ -1085,8 +1083,7 @@ vcl_handle_mq_event (vcl_worker_t * wrk, session_event_t * e) * VPP_CLOSING state instead can been marked as ACCEPTED_F_CLOSED. */ if (vcl_session_has_attr (s, VCL_SESS_ATTR_NONBLOCK) && - !(s->session_state == VCL_STATE_LISTEN || - s->session_state == VCL_STATE_LISTEN_NO_MQ)) + !(s->session_state == VCL_STATE_LISTEN)) { s->session_state = VCL_STATE_VPP_CLOSING; s->flags |= VCL_SESSION_F_PENDING_DISCONNECT; @@ -1114,8 +1111,7 @@ vcl_handle_mq_event (vcl_worker_t * wrk, session_event_t * e) * DISCONNECT state instead can been marked as ACCEPTED_F_RESET. */ if (vcl_session_has_attr (s, VCL_SESS_ATTR_NONBLOCK) && - !(s->session_state == VCL_STATE_LISTEN || - s->session_state == VCL_STATE_LISTEN_NO_MQ)) + !(s->session_state == VCL_STATE_LISTEN)) { s->flags |= VCL_SESSION_F_PENDING_DISCONNECT; s->session_state = VCL_STATE_DISCONNECT; @@ -1331,6 +1327,12 @@ vppcom_session_unbind (u32 session_handle) } clib_fifo_free (session->accept_evts_fifo); + if (session->flags & VCL_SESSION_F_LISTEN_NO_MQ) + { + vcl_session_free (wrk, session); + return VPPCOM_OK; + } + vcl_send_session_unlisten (wrk, session); VDBG (0, "session %u [0x%llx]: sending unbind!", session->session_index, @@ -1425,10 +1427,11 @@ vcl_api_retry_attach (vcl_worker_t *wrk) { if (s->flags & VCL_SESSION_F_IS_VEP) continue; - if (s->session_state == VCL_STATE_LISTEN_NO_MQ) + if (s->session_state == VCL_STATE_LISTEN) vppcom_session_listen (vcl_session_handle (s), 10); else - VDBG (0, "internal error: unexpected state %d", s->session_state); + VDBG (0, "reattach error: %u unexpected state %d", s->session_index, + s->session_state); } } @@ -1769,12 +1772,20 @@ vppcom_session_listen (uint32_t listen_sh, uint32_t q_len) return VPPCOM_EBADFD; listen_vpp_handle = listen_session->vpp_handle; - if (listen_session->session_state == VCL_STATE_LISTEN) + if (listen_session->session_state == VCL_STATE_LISTEN && + !(listen_session->flags & VCL_SESSION_F_LISTEN_NO_MQ)) + { + VDBG (0, "session %u [0x%llx]: already in listen state!", listen_sh, + listen_vpp_handle); + return VPPCOM_OK; + } + if (PREDICT_FALSE (!wrk->ctrl_mq)) { - VDBG (0, "session %u [0x%llx]: already in listen state!", - listen_sh, listen_vpp_handle); + listen_session->session_state = VCL_STATE_LISTEN; + listen_session->flags |= VCL_SESSION_F_LISTEN_NO_MQ; return VPPCOM_OK; } + listen_session->flags &= ~VCL_SESSION_F_LISTEN_NO_MQ; VDBG (0, "session %u: sending vpp listen request...", listen_sh); @@ -1851,7 +1862,6 @@ again: return VPPCOM_EBADFD; if ((ls->session_state != VCL_STATE_LISTEN) && - (ls->session_state != VCL_STATE_LISTEN_NO_MQ) && (!vcl_session_is_connectable_listener (wrk, ls))) { VDBG (0, "ERROR: session [0x%llx]: not in listen state! state (%s)", @@ -2653,6 +2663,9 @@ vcl_select_handle_mq_event (vcl_worker_t * wrk, session_event_t * e, *bits_set += 1; } break; + case SESSION_CTRL_EVT_BOUND: + vcl_session_bound_handler (wrk, (session_bound_msg_t *) e->data); + break; case SESSION_CTRL_EVT_UNLISTEN_REPLY: vcl_session_unlisten_reply_handler (wrk, e->data); break; diff --git a/src/vlib/main.c b/src/vlib/main.c index a2f833711ab..731e788d30d 100644 --- a/src/vlib/main.c +++ b/src/vlib/main.c @@ -972,7 +972,7 @@ dispatch_node (vlib_main_t * vm, { ELOG_TYPE_DECLARE (e) = { - .function = (char *) __FUNCTION__, + .function = (char *) __func__, .format = "%s vector length %d, switching to %s", .format_args = "T4i4t4", .n_enum_strings = 2, diff --git a/src/vlib/threads.c b/src/vlib/threads.c index fa8d949d549..9dce1b8c8bd 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -1304,7 +1304,7 @@ vlib_worker_thread_initial_barrier_sync_and_release (vlib_main_t * vm) { if ((now = vlib_time_now (vm)) > deadline) { - fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__); + fformat (stderr, "%s: worker thread deadlock\n", __func__); os_panic (); } CLIB_PAUSE (); @@ -1409,7 +1409,7 @@ vlib_worker_thread_barrier_sync_int (vlib_main_t * vm, const char *func_name) { if ((now = vlib_time_now (vm)) > deadline) { - fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__); + fformat (stderr, "%s: worker thread deadlock\n", __func__); os_panic (); } } @@ -1485,7 +1485,7 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm) { if ((now = vlib_time_now (vm)) > deadline) { - fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__); + fformat (stderr, "%s: worker thread deadlock\n", __func__); os_panic (); } } @@ -1502,7 +1502,7 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm) if ((now = vlib_time_now (vm)) > deadline) { fformat (stderr, "%s: worker thread refork deadlock\n", - __FUNCTION__); + __func__); os_panic (); } } diff --git a/src/vlib/threads.h b/src/vlib/threads.h index c671aa78c39..da2c41fec73 100644 --- a/src/vlib/threads.h +++ b/src/vlib/threads.h @@ -166,7 +166,10 @@ u32 vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts); #define BARRIER_SYNC_TIMEOUT (1.0) #endif -#define vlib_worker_thread_barrier_sync(X) {vlib_worker_thread_barrier_sync_int(X, __FUNCTION__);} +#define vlib_worker_thread_barrier_sync(X) \ + { \ + vlib_worker_thread_barrier_sync_int (X, __func__); \ + } void vlib_worker_thread_barrier_sync_int (vlib_main_t * vm, const char *func_name); @@ -195,7 +198,7 @@ vlib_smp_unsafe_warning (void) if (CLIB_DEBUG > 0) { if (vlib_get_thread_index ()) - fformat (stderr, "%s: SMP unsafe warning...\n", __FUNCTION__); + fformat (stderr, "%s: SMP unsafe warning...\n", __func__); } } diff --git a/src/vnet/CMakeLists.txt b/src/vnet/CMakeLists.txt index a071709542a..9e7734e20cb 100644 --- a/src/vnet/CMakeLists.txt +++ b/src/vnet/CMakeLists.txt @@ -750,29 +750,6 @@ list(APPEND VNET_HEADERS list(APPEND VNET_API_FILES mpls/mpls.api) ############################################################################## -# Tunnel protocol: vxlan-gpe -############################################################################## - -list(APPEND VNET_SOURCES - vxlan-gpe/vxlan_gpe.c - vxlan-gpe/encap.c - vxlan-gpe/decap.c - vxlan-gpe/vxlan_gpe_api.c -) - -list (APPEND VNET_MULTIARCH_SOURCES - vxlan-gpe/decap.c -) - -list(APPEND VNET_HEADERS - vxlan-gpe/vxlan_gpe.h - vxlan-gpe/vxlan_gpe_packet.h - vxlan-gpe/vxlan_gpe_error.def -) - -list(APPEND VNET_API_FILES vxlan-gpe/vxlan_gpe.api) - -############################################################################## # ipv6 segment routing ############################################################################## diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c index 8f2a0de6ea8..f6f9392a42b 100644 --- a/src/vnet/dpo/load_balance.c +++ b/src/vnet/dpo/load_balance.c @@ -1030,6 +1030,7 @@ load_balance_module_init (void) * This should never be used, but just in case, stack it on a drop. */ lbi = load_balance_create(1, DPO_PROTO_IP4, 0); + ASSERT(0 == lbi); load_balance_set_bucket(lbi, 0, drop_dpo_get(DPO_PROTO_IP4)); load_balance_logger = @@ -1038,6 +1039,12 @@ load_balance_module_init (void) load_balance_map_module_init(); } +void +load_balance_pool_alloc (uword size) +{ + pool_alloc_aligned(load_balance_pool, size, CLIB_CACHE_LINE_BYTES); +} + static clib_error_t * load_balance_show (vlib_main_t * vm, unformat_input_t * input, diff --git a/src/vnet/dpo/load_balance.h b/src/vnet/dpo/load_balance.h index eee073f5892..76aa7982401 100644 --- a/src/vnet/dpo/load_balance.h +++ b/src/vnet/dpo/load_balance.h @@ -260,5 +260,6 @@ load_balance_get_bucket_i (const load_balance_t *lb, } extern void load_balance_module_init(void); +extern void load_balance_pool_alloc (uword size); #endif diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c index adf880b8bbb..c86941fce9a 100644 --- a/src/vnet/fib/fib_entry.c +++ b/src/vnet/fib/fib_entry.c @@ -1772,6 +1772,12 @@ fib_entry_module_init (void) fib_entry_track_module_init(); } +void +fib_entry_pool_alloc (uword size) +{ + pool_alloc(fib_entry_pool, size); +} + fib_route_path_t * fib_entry_encode (fib_node_index_t fib_entry_index) { diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h index 7331f803ec4..2c88d1e5f6a 100644 --- a/src/vnet/fib/fib_entry.h +++ b/src/vnet/fib/fib_entry.h @@ -480,6 +480,7 @@ extern void fib_entry_set_flow_hash_config(fib_node_index_t fib_entry_index, flow_hash_config_t hash_config); extern void fib_entry_module_init(void); +extern void fib_entry_pool_alloc(uword size); extern u32 fib_entry_get_stats_index(fib_node_index_t fib_entry_index); diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c index 00855f7db43..df70dc9edca 100644 --- a/src/vnet/ip/ip4_mtrie.c +++ b/src/vnet/ip/ip4_mtrie.c @@ -190,7 +190,7 @@ ip4_mtrie_8_init (ip4_mtrie_8_t *m) { ip4_mtrie_8_ply_t *root; - pool_get (ip4_ply_pool, root); + pool_get_aligned (ip4_ply_pool, root, CLIB_CACHE_LINE_BYTES); m->root_ply = root - ip4_ply_pool; ply_8_init (root, IP4_MTRIE_LEAF_EMPTY, 0, 0); @@ -853,13 +853,19 @@ ip4_mtrie_module_init (vlib_main_t * vm) clib_error_t *error = NULL; /* Burn one ply so index 0 is taken */ - pool_get (ip4_ply_pool, p); + pool_get_aligned (ip4_ply_pool, p, CLIB_CACHE_LINE_BYTES); return (error); } VLIB_INIT_FUNCTION (ip4_mtrie_module_init); +void +ip4_mtrie_pool_alloc (uword size) +{ + pool_alloc_aligned (ip4_ply_pool, size, CLIB_CACHE_LINE_BYTES); +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/ip/ip4_mtrie.h b/src/vnet/ip/ip4_mtrie.h index 16c524745be..2631f07eb2b 100644 --- a/src/vnet/ip/ip4_mtrie.h +++ b/src/vnet/ip/ip4_mtrie.h @@ -179,6 +179,11 @@ format_function_t format_ip4_mtrie_8; extern ip4_mtrie_8_ply_t *ip4_ply_pool; /** + * @brief Pre-allocate the pool of plys + */ +extern void ip4_mtrie_pool_alloc (uword size); + +/** * Is the leaf terminal (i.e. an LB index) or non-terminal (i.e. a PLY index) */ always_inline u32 diff --git a/src/vnet/ip/ip_init.c b/src/vnet/ip/ip_init.c index c2490f196ef..cfc3644a1bf 100644 --- a/src/vnet/ip/ip_init.c +++ b/src/vnet/ip/ip_init.c @@ -38,6 +38,9 @@ */ #include <vnet/ip/ip.h> +#include <vnet/ip/ip4_mtrie.h> +#include <vnet/fib/fib_entry.h> +#include <vnet/dpo/load_balance.h> ip_main_t ip_main; @@ -112,6 +115,39 @@ VLIB_INIT_FUNCTION (ip_main_init) = { "flow_classify_init"), }; +static clib_error_t * +ip_config_init (vlib_main_t *vm, unformat_input_t *input) +{ + uword lbsz = 0, fibentrysz = 0, mtriesz = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "load-balance-pool-size %U", unformat_memory_size, + &lbsz)) + ; + else if (unformat (input, "fib-entry-pool-size %U", unformat_memory_size, + &fibentrysz)) + ; + else if (unformat (input, "ip4-mtrie-pool-size %U", unformat_memory_size, + &mtriesz)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + if (lbsz) + load_balance_pool_alloc (lbsz); + if (fibentrysz) + fib_entry_pool_alloc (fibentrysz); + if (mtriesz) + ip4_mtrie_pool_alloc (mtriesz); + + return 0; +} + +VLIB_CONFIG_FUNCTION (ip_config_init, "l3fib"); + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/ipsec/ah_decrypt.c b/src/vnet/ipsec/ah_decrypt.c index 6b62ff7f05c..30ea4429a75 100644 --- a/src/vnet/ipsec/ah_decrypt.c +++ b/src/vnet/ipsec/ah_decrypt.c @@ -202,16 +202,8 @@ ah_decrypt_inline (vlib_main_t * vm, pd->seq = clib_host_to_net_u32 (ah0->seq_no); /* anti-replay check */ - if (PREDICT_FALSE (irt->anti_reply_huge)) - { - anti_replay_result = ipsec_sa_anti_replay_and_sn_advance ( - irt, pd->seq, ~0, false, &pd->seq_hi, true); - } - else - { - anti_replay_result = ipsec_sa_anti_replay_and_sn_advance ( - irt, pd->seq, ~0, false, &pd->seq_hi, false); - } + anti_replay_result = ipsec_sa_anti_replay_and_sn_advance ( + irt, pd->seq, ~0, false, &pd->seq_hi); if (anti_replay_result) { ah_decrypt_set_next_index (b[0], node, vm->thread_index, @@ -317,32 +309,16 @@ ah_decrypt_inline (vlib_main_t * vm, if (PREDICT_TRUE (irt->integ_icv_size)) { /* redo the anti-reply check. see esp_decrypt for details */ - if (PREDICT_FALSE (irt->anti_reply_huge)) + if (ipsec_sa_anti_replay_and_sn_advance (irt, pd->seq, pd->seq_hi, + true, NULL)) { - if (ipsec_sa_anti_replay_and_sn_advance ( - irt, pd->seq, pd->seq_hi, true, NULL, true)) - { - ah_decrypt_set_next_index ( - b[0], node, vm->thread_index, AH_DECRYPT_ERROR_REPLAY, 0, - next, AH_DECRYPT_NEXT_DROP, pd->sa_index); - goto trace; - } - n_lost = ipsec_sa_anti_replay_advance ( - irt, thread_index, pd->seq, pd->seq_hi, true); - } - else - { - if (ipsec_sa_anti_replay_and_sn_advance ( - irt, pd->seq, pd->seq_hi, true, NULL, false)) - { - ah_decrypt_set_next_index ( - b[0], node, vm->thread_index, AH_DECRYPT_ERROR_REPLAY, 0, - next, AH_DECRYPT_NEXT_DROP, pd->sa_index); - goto trace; - } - n_lost = ipsec_sa_anti_replay_advance ( - irt, thread_index, pd->seq, pd->seq_hi, false); + ah_decrypt_set_next_index (b[0], node, vm->thread_index, + AH_DECRYPT_ERROR_REPLAY, 0, next, + AH_DECRYPT_NEXT_DROP, pd->sa_index); + goto trace; } + n_lost = ipsec_sa_anti_replay_advance (irt, thread_index, pd->seq, + pd->seq_hi); vlib_prefetch_simple_counter ( &ipsec_sa_err_counters[IPSEC_SA_ERROR_LOST], thread_index, pd->sa_index); diff --git a/src/vnet/ipsec/esp_decrypt.c b/src/vnet/ipsec/esp_decrypt.c index 345a60a7fdd..928f1b06f9b 100644 --- a/src/vnet/ipsec/esp_decrypt.c +++ b/src/vnet/ipsec/esp_decrypt.c @@ -60,8 +60,7 @@ typedef enum typedef struct { u32 seq; - u32 sa_seq; - u32 sa_seq_hi; + u64 sa_seq64; u32 pkt_seq_hi; ipsec_crypto_alg_t crypto_alg; ipsec_integ_alg_t integ_alg; @@ -81,10 +80,10 @@ format_esp_decrypt_trace (u8 * s, va_list * args) esp_decrypt_trace_t *t = va_arg (*args, esp_decrypt_trace_t *); s = format (s, - "esp: crypto %U integrity %U pkt-seq %d sa-seq %u sa-seq-hi %u " + "esp: crypto %U integrity %U pkt-seq %d sa-seq %lu " "pkt-seq-hi %u", format_ipsec_crypto_alg, t->crypto_alg, format_ipsec_integ_alg, - t->integ_alg, t->seq, t->sa_seq, t->sa_seq_hi, t->pkt_seq_hi); + t->integ_alg, t->seq, t->sa_seq64, t->pkt_seq_hi); return s; } @@ -810,32 +809,16 @@ esp_decrypt_post_crypto (vlib_main_t *vm, vlib_node_runtime_t *node, * a sequence s, s+1, s+2, s+3, ... s+n and nothing will prevent any * implementation, sequential or batching, from decrypting these. */ - if (PREDICT_FALSE (irt->anti_reply_huge)) + if (ipsec_sa_anti_replay_and_sn_advance (irt, pd->seq, pd->seq_hi, true, + NULL)) { - if (ipsec_sa_anti_replay_and_sn_advance (irt, pd->seq, pd->seq_hi, true, - NULL, true)) - { - esp_decrypt_set_next_index (b, node, vm->thread_index, - ESP_DECRYPT_ERROR_REPLAY, 0, next, - ESP_DECRYPT_NEXT_DROP, pd->sa_index); - return; - } - n_lost = ipsec_sa_anti_replay_advance (irt, vm->thread_index, pd->seq, - pd->seq_hi, true); - } - else - { - if (ipsec_sa_anti_replay_and_sn_advance (irt, pd->seq, pd->seq_hi, true, - NULL, false)) - { - esp_decrypt_set_next_index (b, node, vm->thread_index, - ESP_DECRYPT_ERROR_REPLAY, 0, next, - ESP_DECRYPT_NEXT_DROP, pd->sa_index); - return; - } - n_lost = ipsec_sa_anti_replay_advance (irt, vm->thread_index, pd->seq, - pd->seq_hi, false); + esp_decrypt_set_next_index (b, node, vm->thread_index, + ESP_DECRYPT_ERROR_REPLAY, 0, next, + ESP_DECRYPT_NEXT_DROP, pd->sa_index); + return; } + n_lost = + ipsec_sa_anti_replay_advance (irt, vm->thread_index, pd->seq, pd->seq_hi); vlib_prefetch_simple_counter (&ipsec_sa_err_counters[IPSEC_SA_ERROR_LOST], vm->thread_index, pd->sa_index); @@ -1205,16 +1188,8 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, pd->current_length = b[0]->current_length; /* anti-reply check */ - if (PREDICT_FALSE (irt->anti_reply_huge)) - { - anti_replay_result = ipsec_sa_anti_replay_and_sn_advance ( - irt, pd->seq, ~0, false, &pd->seq_hi, true); - } - else - { - anti_replay_result = ipsec_sa_anti_replay_and_sn_advance ( - irt, pd->seq, ~0, false, &pd->seq_hi, false); - } + anti_replay_result = ipsec_sa_anti_replay_and_sn_advance ( + irt, pd->seq, ~0, false, &pd->seq_hi); if (anti_replay_result) { @@ -1393,8 +1368,7 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, tr->crypto_alg = sa->crypto_alg; tr->integ_alg = sa->integ_alg; tr->seq = pd->seq; - tr->sa_seq = irt->seq; - tr->sa_seq_hi = irt->seq_hi; + tr->sa_seq64 = irt->seq64; tr->pkt_seq_hi = pd->seq_hi; } @@ -1466,8 +1440,7 @@ esp_decrypt_post_inline (vlib_main_t * vm, tr->crypto_alg = sa->crypto_alg; tr->integ_alg = sa->integ_alg; tr->seq = pd->seq; - tr->sa_seq = irt->seq; - tr->sa_seq_hi = irt->seq_hi; + tr->sa_seq64 = irt->seq64; } n_left--; diff --git a/src/vnet/ipsec/ipsec_api.c b/src/vnet/ipsec/ipsec_api.c index 262a8cb8c88..d27e9827074 100644 --- a/src/vnet/ipsec/ipsec_api.c +++ b/src/vnet/ipsec/ipsec_api.c @@ -44,11 +44,9 @@ static inline u64 ipsec_sa_get_inb_seq (ipsec_sa_t *sa) { ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt (sa); - u64 seq; - - seq = irt->seq; - if (ipsec_sa_is_set_USE_ESN (sa)) - seq |= (u64) irt->seq_hi << 32; + u64 seq = irt->seq64; + if (!ipsec_sa_is_set_USE_ESN (sa)) + seq = (u32) seq; return seq; } @@ -1361,7 +1359,7 @@ send_ipsec_sa_v5_details (ipsec_sa_t *sa, void *arg) mp->replay_window = clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (irt)); mp->entry.anti_replay_window_size = - clib_host_to_net_u32 (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (irt)); + clib_host_to_net_u32 (irt->anti_replay_window_size); } if (ort) diff --git a/src/vnet/ipsec/ipsec_format.c b/src/vnet/ipsec/ipsec_format.c index 0bbdc85aaed..e27892185e7 100644 --- a/src/vnet/ipsec/ipsec_format.c +++ b/src/vnet/ipsec/ipsec_format.c @@ -474,13 +474,12 @@ format_ipsec_sa (u8 * s, va_list * args) if (ort) s = format (s, "\n outbound thread-index:%d", ort->thread_index); if (irt) - s = format (s, "\n inbound seq %u seq-hi %u", irt->seq, irt->seq_hi); + s = format (s, "\n inbound seq %lu", irt->seq64); if (ort) s = format (s, "\n outbound seq %lu", ort->seq64); if (irt) { - s = format (s, "\n window-size: %llu", - IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (irt)); + s = format (s, "\n window-size: %llu", irt->anti_replay_window_size); s = format (s, "\n window: Bl <- %U Tl", format_ipsec_replay_window, ipsec_sa_anti_replay_get_64b_window (irt)); } diff --git a/src/vnet/ipsec/ipsec_input.c b/src/vnet/ipsec/ipsec_input.c index 6a25f6c583c..a7c7482a125 100644 --- a/src/vnet/ipsec/ipsec_input.c +++ b/src/vnet/ipsec/ipsec_input.c @@ -211,6 +211,39 @@ ipsec_input_policy_match (ipsec_spd_t *spd, u32 sa, u32 da, return 0; } +always_inline uword +ip6_addr_match_range (ip6_address_t *a, ip6_address_t *la, ip6_address_t *ua) +{ + if ((memcmp (a->as_u64, la->as_u64, 2 * sizeof (u64)) >= 0) && + (memcmp (a->as_u64, ua->as_u64, 2 * sizeof (u64)) <= 0)) + return 1; + + return 0; +} + +always_inline ipsec_policy_t * +ipsec6_input_policy_match (ipsec_spd_t *spd, ip6_address_t *sa, + ip6_address_t *da, + ipsec_spd_policy_type_t policy_type) +{ + ipsec_main_t *im = &ipsec_main; + ipsec_policy_t *p; + u32 *i; + + vec_foreach (i, spd->policies[policy_type]) + { + p = pool_elt_at_index (im->policies, *i); + + if (!ip6_addr_match_range (sa, &p->raddr.start.ip6, &p->raddr.stop.ip6)) + continue; + + if (!ip6_addr_match_range (da, &p->laddr.start.ip6, &p->laddr.stop.ip6)) + continue; + return p; + } + return 0; +} + always_inline ipsec_policy_t * ipsec_input_protect_policy_match (ipsec_spd_t *spd, u32 sa, u32 da, u32 spi) { @@ -263,16 +296,6 @@ ipsec_input_protect_policy_match (ipsec_spd_t *spd, u32 sa, u32 da, u32 spi) return 0; } -always_inline uword -ip6_addr_match_range (ip6_address_t * a, ip6_address_t * la, - ip6_address_t * ua) -{ - if ((memcmp (a->as_u64, la->as_u64, 2 * sizeof (u64)) >= 0) && - (memcmp (a->as_u64, ua->as_u64, 2 * sizeof (u64)) <= 0)) - return 1; - return 0; -} - always_inline void ipsec_collect_ah_trace (vlib_buffer_t **b, vlib_node_runtime_t *node, vlib_main_t *vm, ip4_header_t *ip0, ah_header_t *ah0, @@ -514,7 +537,7 @@ udp_or_esp: has_space0 = vlib_buffer_has_space (b[0], (clib_address_t) (esp0 + 1) - (clib_address_t) ip0); - if (PREDICT_TRUE ((p0 != NULL) & (has_space0))) + if (PREDICT_TRUE ((p0 != NULL) && (has_space0))) { *ipsec_matched += 1; @@ -740,8 +763,6 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm, spd0, b, node, &ipsec_bypassed, &ipsec_dropped, &ipsec_matched, &ipsec_unprocessed, next); - if (ipsec_bypassed > 0) - goto ipsec_bypassed; } } else if (PREDICT_TRUE (ip0->protocol == IP_PROTOCOL_IPSEC_ESP)) @@ -751,8 +772,6 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm, spd0, b, node, &ipsec_bypassed, &ipsec_dropped, &ipsec_matched, &ipsec_unprocessed, next); - if (ipsec_bypassed > 0) - goto ipsec_bypassed; } else if (ip0->protocol == IP_PROTOCOL_IPSEC_AH) { @@ -764,7 +783,6 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm, } else { - ipsec_bypassed: ipsec_unprocessed += 1; } n_left_from -= 1; @@ -813,6 +831,142 @@ VLIB_REGISTER_NODE (ipsec4_input_node) = { extern vlib_node_registration_t ipsec6_input_node; +always_inline void +ipsec6_esp_packet_process (vlib_main_t *vm, ipsec_main_t *im, + ip6_header_t *ip0, esp_header_t *esp0, + u32 thread_index, ipsec_spd_t *spd0, + vlib_buffer_t **b, vlib_node_runtime_t *node, + u64 *ipsec_bypassed, u64 *ipsec_dropped, + u64 *ipsec_matched, u64 *ipsec_unprocessed, + u32 *next) + +{ + ipsec_policy_t *p0 = NULL; + u32 pi0 = ~0; + u8 has_space0 = 0; + ipsec_policy_t *policies[1]; + ipsec_fp_5tuple_t tuples[1]; + bool ip_v6 = true; + + if (im->fp_spd_ipv6_in_is_enabled && + PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip6_in_lookup_hash_idx)) + ipsec_fp_in_5tuple_from_ip6_range ( + &tuples[0], &ip0->src_address, &ip0->dst_address, + clib_net_to_host_u32 (esp0->spi), IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT); + + if (esp0->spi != 0) + { + if (im->fp_spd_ipv6_in_is_enabled && + PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip6_in_lookup_hash_idx)) + { + ipsec_fp_in_policy_match_n (&spd0->fp_spd, ip_v6, tuples, policies, + 1); + p0 = policies[0]; + } + else /* linear search if fast path is not enabled */ + { + p0 = ipsec6_input_protect_policy_match ( + spd0, &ip0->src_address, &ip0->dst_address, + clib_net_to_host_u32 (esp0->spi)); + } + has_space0 = vlib_buffer_has_space (b[0], (clib_address_t) (esp0 + 1) - + (clib_address_t) ip0); + + if (PREDICT_TRUE ((p0 != NULL) && (has_space0))) + { + *ipsec_matched += 1; + + pi0 = p0 - im->policies; + vlib_increment_combined_counter ( + &ipsec_spd_policy_counters, thread_index, pi0, 1, + clib_net_to_host_u16 (ip0->payload_length)); + + vnet_buffer (b[0])->ipsec.sad_index = p0->sa_index; + next[0] = im->esp6_decrypt_next_index; + vlib_buffer_advance (b[0], ((u8 *) esp0 - (u8 *) ip0)); + goto trace0; + } + } + + if (im->fp_spd_ipv6_in_is_enabled && + PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip6_in_lookup_hash_idx)) + { + tuples->action = IPSEC_SPD_POLICY_IP6_INBOUND_BYPASS; + ipsec_fp_in_policy_match_n (&spd0->fp_spd, ip_v6, tuples, policies, 1); + p0 = policies[0]; + } + else + { + p0 = + ipsec6_input_policy_match (spd0, &ip0->src_address, &ip0->dst_address, + IPSEC_SPD_POLICY_IP6_INBOUND_BYPASS); + } + + if (PREDICT_TRUE ((p0 != NULL))) + { + *ipsec_bypassed += 1; + + pi0 = p0 - im->policies; + vlib_increment_combined_counter ( + &ipsec_spd_policy_counters, thread_index, pi0, 1, + clib_net_to_host_u16 (ip0->payload_length)); + goto trace0; + } + else + { + p0 = NULL; + pi0 = ~0; + } + + if (im->fp_spd_ipv6_in_is_enabled && + PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip6_in_lookup_hash_idx)) + { + tuples->action = IPSEC_SPD_POLICY_IP6_INBOUND_DISCARD; + ipsec_fp_in_policy_match_n (&spd0->fp_spd, ip_v6, tuples, policies, 1); + p0 = policies[0]; + } + else + { + p0 = + ipsec6_input_policy_match (spd0, &ip0->src_address, &ip0->dst_address, + IPSEC_SPD_POLICY_IP6_INBOUND_DISCARD); + } + + if (PREDICT_TRUE ((p0 != NULL))) + { + *ipsec_dropped += 1; + + pi0 = p0 - im->policies; + vlib_increment_combined_counter ( + &ipsec_spd_policy_counters, thread_index, pi0, 1, + clib_net_to_host_u16 (ip0->payload_length)); + next[0] = IPSEC_INPUT_NEXT_DROP; + goto trace0; + } + else + { + p0 = 0; + pi0 = ~0; + } + + /* Drop by default if no match on PROTECT, BYPASS or DISCARD */ + *ipsec_unprocessed += 1; + next[0] = IPSEC_INPUT_NEXT_DROP; + +trace0: + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED)) + { + ipsec_input_trace_t *tr = vlib_add_trace (vm, node, b[0], sizeof (*tr)); + + tr->proto = ip0->protocol; + tr->sa_id = p0 ? p0->sa_id : ~0; + tr->spi = has_space0 ? clib_net_to_host_u32 (esp0->spi) : ~0; + tr->seq = has_space0 ? clib_net_to_host_u32 (esp0->seq) : ~0; + tr->spd = spd0->id; + tr->policy_index = pi0; + } +} VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -822,9 +976,6 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm, ipsec_main_t *im = &ipsec_main; u32 ipsec_unprocessed = 0; u32 ipsec_matched = 0; - ipsec_policy_t *policies[1]; - ipsec_fp_5tuple_t tuples[1]; - bool ip_v6 = true; from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; @@ -843,12 +994,13 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm, u32 bi0, next0, pi0 = ~0; vlib_buffer_t *b0; ip6_header_t *ip0; - esp_header_t *esp0; + esp_header_t *esp0 = NULL; ip4_ipsec_config_t *c0; ipsec_spd_t *spd0; ipsec_policy_t *p0 = 0; - ah_header_t *ah0; u32 header_size = sizeof (ip0[0]); + u64 ipsec_unprocessed = 0, ipsec_matched = 0; + u64 ipsec_dropped = 0, ipsec_bypassed = 0; bi0 = to_next[0] = from[0]; from += 1; @@ -864,113 +1016,76 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm, spd0 = pool_elt_at_index (im->spds, c0->spd_index); ip0 = vlib_buffer_get_current (b0); + + if (ip0->protocol == IP_PROTOCOL_UDP) + { + udp_header_t *udp0 = (udp_header_t *) ((u8 *) ip0 + header_size); + + /* RFC5996 Section 2.23: "Port 4500 is reserved for + * UDP-encapsulated ESP and IKE." + * RFC5996 Section 3.1: "IKE messages use UDP ports 500 and/or + 4500" + */ + if ((clib_host_to_net_u16 (500) == udp0->dst_port) || + (clib_host_to_net_u16 (4500) == udp0->dst_port)) + esp0 = (esp_header_t *) ((u8 *) udp0 + sizeof (udp_header_t)); + } + else if (ip0->protocol == IP_PROTOCOL_IPSEC_ESP) esp0 = (esp_header_t *) ((u8 *) ip0 + header_size); - ah0 = (ah_header_t *) ((u8 *) ip0 + header_size); - if (PREDICT_TRUE (ip0->protocol == IP_PROTOCOL_IPSEC_ESP)) - { -#if 0 - clib_warning - ("packet received from %U to %U spi %u size %u spd_id %u", - format_ip6_address, &ip0->src_address, format_ip6_address, - &ip0->dst_address, clib_net_to_host_u32 (esp0->spi), - clib_net_to_host_u16 (ip0->payload_length) + header_size, - spd0->id); -#endif - if (im->fp_spd_ipv6_in_is_enabled && - PREDICT_TRUE (INDEX_INVALID != - spd0->fp_spd.ip6_in_lookup_hash_idx)) - { - ipsec_fp_in_5tuple_from_ip6_range ( - &tuples[0], &ip0->src_address, &ip0->dst_address, - clib_net_to_host_u32 (esp0->spi), - IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT); - ipsec_fp_in_policy_match_n (&spd0->fp_spd, ip_v6, tuples, - policies, 1); - p0 = policies[0]; - } - else - p0 = ipsec6_input_protect_policy_match ( - spd0, &ip0->src_address, &ip0->dst_address, - clib_net_to_host_u32 (esp0->spi)); - - if (PREDICT_TRUE (p0 != 0)) - { - ipsec_matched += 1; - - pi0 = p0 - im->policies; - vlib_increment_combined_counter - (&ipsec_spd_policy_counters, - thread_index, pi0, 1, - clib_net_to_host_u16 (ip0->payload_length) + - header_size); - - vnet_buffer (b0)->ipsec.sad_index = p0->sa_index; - next0 = im->esp6_decrypt_next_index; - vlib_buffer_advance (b0, header_size); - /* TODO Add policy matching for bypass and discard policy - * type */ - goto trace0; - } - else - { - pi0 = ~0; - ipsec_unprocessed += 1; - next0 = IPSEC_INPUT_NEXT_DROP; - } - } + if (esp0 != NULL) + { + ipsec6_esp_packet_process (vm, im, ip0, esp0, thread_index, spd0, + &b0, node, &ipsec_bypassed, + &ipsec_dropped, &ipsec_matched, + &ipsec_unprocessed, &next0); + } else if (ip0->protocol == IP_PROTOCOL_IPSEC_AH) { - p0 = ipsec6_input_protect_policy_match (spd0, - &ip0->src_address, - &ip0->dst_address, - clib_net_to_host_u32 - (ah0->spi)); - - if (PREDICT_TRUE (p0 != 0)) - { - ipsec_matched += 1; - pi0 = p0 - im->policies; - vlib_increment_combined_counter - (&ipsec_spd_policy_counters, - thread_index, pi0, 1, - clib_net_to_host_u16 (ip0->payload_length) + - header_size); - - vnet_buffer (b0)->ipsec.sad_index = p0->sa_index; - next0 = im->ah6_decrypt_next_index; - goto trace0; - } - else - { - pi0 = ~0; - ipsec_unprocessed += 1; - next0 = IPSEC_INPUT_NEXT_DROP; - } + ah_header_t *ah0 = (ah_header_t *) ((u8 *) ip0 + header_size); + + p0 = ipsec6_input_protect_policy_match ( + spd0, &ip0->src_address, &ip0->dst_address, + clib_net_to_host_u32 (ah0->spi)); + + if (PREDICT_TRUE (p0 != 0)) + { + ipsec_matched += 1; + pi0 = p0 - im->policies; + vlib_increment_combined_counter ( + &ipsec_spd_policy_counters, thread_index, pi0, 1, + clib_net_to_host_u16 (ip0->payload_length) + header_size); + + vnet_buffer (b0)->ipsec.sad_index = p0->sa_index; + next0 = im->ah6_decrypt_next_index; + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + ipsec_input_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + + if (p0) + { + tr->sa_id = p0->sa_id; + tr->policy_type = p0->type; + } + + tr->proto = ip0->protocol; + tr->spi = clib_net_to_host_u32 (ah0->spi); + tr->spd = spd0->id; + tr->policy_index = pi0; + } + } + else + { + pi0 = ~0; + ipsec_unprocessed += 1; + next0 = IPSEC_INPUT_NEXT_DROP; + } } else { - ipsec_unprocessed += 1; - } - - trace0: - if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && - PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - ipsec_input_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); - - if (p0) - { - tr->sa_id = p0->sa_id; - tr->policy_type = p0->type; - } - - tr->proto = ip0->protocol; - tr->spi = clib_net_to_host_u32 (esp0->spi); - tr->seq = clib_net_to_host_u32 (esp0->seq); - tr->spd = spd0->id; - tr->policy_index = pi0; + ipsec_unprocessed += 1; } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, diff --git a/src/vnet/ipsec/ipsec_sa.c b/src/vnet/ipsec/ipsec_sa.c index eb4270ac2b4..ec281cd9a11 100644 --- a/src/vnet/ipsec/ipsec_sa.c +++ b/src/vnet/ipsec/ipsec_sa.c @@ -214,7 +214,6 @@ ipsec_sa_init_runtime (ipsec_sa_t *sa) if (ipsec_sa_get_inb_rt (sa)) { ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt (sa); - irt->anti_reply_huge = ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa); irt->use_anti_replay = ipsec_sa_is_set_USE_ANTI_REPLAY (sa); irt->use_esn = ipsec_sa_is_set_USE_ESN (sa); irt->is_tunnel = ipsec_sa_is_set_IS_TUNNEL (sa); @@ -426,7 +425,8 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto, ipsec_sa_outb_rt_t *ort; clib_error_t *err; ipsec_sa_t *sa; - u32 sa_index; + u32 sa_index, irt_sz; + u16 thread_index = (vlib_num_workers ()) ? ~0 : 0; u64 rand[2]; uword *p; int rv; @@ -440,17 +440,35 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto, pool_get_aligned_zero (im->sa_pool, sa, CLIB_CACHE_LINE_BYTES); sa_index = sa - im->sa_pool; + sa->flags = flags; + + if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && anti_replay_window_size > 64) + /* window size rounded up to next power of 2 */ + anti_replay_window_size = 1 << max_log2 (anti_replay_window_size); + else + anti_replay_window_size = 64; + vec_validate (im->inb_sa_runtimes, sa_index); vec_validate (im->outb_sa_runtimes, sa_index); - irt = clib_mem_alloc_aligned (sizeof (ipsec_sa_inb_rt_t), - _Alignof (ipsec_sa_inb_rt_t)); + irt_sz = sizeof (ipsec_sa_inb_rt_t); + irt_sz += anti_replay_window_size / 8; + irt_sz = round_pow2 (irt_sz, CLIB_CACHE_LINE_BYTES); + + irt = clib_mem_alloc_aligned (irt_sz, alignof (ipsec_sa_inb_rt_t)); ort = clib_mem_alloc_aligned (sizeof (ipsec_sa_outb_rt_t), - _Alignof (ipsec_sa_outb_rt_t)); + alignof (ipsec_sa_outb_rt_t)); im->inb_sa_runtimes[sa_index] = irt; im->outb_sa_runtimes[sa_index] = ort; - clib_memset (irt, 0, sizeof (ipsec_sa_inb_rt_t)); - clib_memset (ort, 0, sizeof (ipsec_sa_outb_rt_t)); + + *irt = (ipsec_sa_inb_rt_t){ + .thread_index = thread_index, + .anti_replay_window_size = anti_replay_window_size, + }; + + *ort = (ipsec_sa_outb_rt_t){ + .thread_index = thread_index, + }; clib_pcg64i_srandom_r (&ort->iv_prng, rand[0], rand[1]); @@ -470,12 +488,7 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto, sa->spi = spi; sa->stat_index = sa_index; sa->protocol = proto; - sa->flags = flags; sa->salt = salt; - if (irt) - irt->thread_index = (vlib_num_workers ()) ? ~0 : 0; - if (ort) - ort->thread_index = (vlib_num_workers ()) ? ~0 : 0; if (integ_alg != IPSEC_INTEG_ALG_NONE) { @@ -485,9 +498,6 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto, ipsec_sa_set_crypto_alg (sa, crypto_alg); ipsec_sa_set_async_op_ids (sa); - if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && anti_replay_window_size > 64) - ipsec_sa_set_ANTI_REPLAY_HUGE (sa); - clib_memcpy (&sa->crypto_key, ck, sizeof (sa->crypto_key)); if (crypto_alg != IPSEC_CRYPTO_ALG_NONE) @@ -600,17 +610,8 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto, ipsec_register_udp_port (dst_port, !ipsec_sa_is_set_IS_TUNNEL_V6 (sa)); } - /* window size rounded up to next power of 2 */ - if (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa)) - { - anti_replay_window_size = 1 << max_log2 (anti_replay_window_size); - irt->replay_window_huge = - clib_bitmap_set_region (0, 0, 1, anti_replay_window_size); - } - else - { - irt->replay_window = ~0; - } + for (u32 i = 0; i < anti_replay_window_size / uword_bits; i++) + irt->replay_window[i] = ~0ULL; hash_set (im->sa_index_by_sa_id, sa->id, sa_index); @@ -651,9 +652,6 @@ ipsec_sa_del (ipsec_sa_t * sa) vnet_crypto_key_del (vm, sa->crypto_sync_key_index); if (sa->integ_alg != IPSEC_INTEG_ALG_NONE) vnet_crypto_key_del (vm, sa->integ_sync_key_index); - if (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa)) - if (irt && irt->replay_window_huge) - clib_bitmap_free (irt->replay_window_huge); foreach_pointer (p, irt, ort) if (p) clib_mem_free (p); diff --git a/src/vnet/ipsec/ipsec_sa.h b/src/vnet/ipsec/ipsec_sa.h index ce2964a9493..9d8c90bab41 100644 --- a/src/vnet/ipsec/ipsec_sa.h +++ b/src/vnet/ipsec/ipsec_sa.h @@ -100,8 +100,7 @@ typedef struct ipsec_key_t_ _ (32, IS_PROTECT, "Protect") \ _ (64, IS_INBOUND, "inbound") \ _ (512, IS_ASYNC, "async") \ - _ (1024, NO_ALGO_NO_DROP, "no-algo-no-drop") \ - _ (4096, ANTI_REPLAY_HUGE, "anti-replay-huge") + _ (1024, NO_ALGO_NO_DROP, "no-algo-no-drop") typedef enum ipsec_sad_flags_t_ { @@ -149,7 +148,6 @@ typedef struct u16 is_null_gmac : 1; u16 use_esn : 1; u16 use_anti_replay : 1; - u16 anti_reply_huge : 1; u16 is_protect : 1; u16 is_tunnel : 1; u16 is_transport : 1; @@ -161,16 +159,12 @@ typedef struct u8 udp_sz; u16 thread_index; u32 salt; - u32 seq; - u32 seq_hi; + u64 seq64; u16 async_op_id; vnet_crypto_key_index_t cipher_key_index; vnet_crypto_key_index_t integ_key_index; - union - { - u64 replay_window; - clib_bitmap_t *replay_window_huge; - }; + u32 anti_replay_window_size; + uword replay_window[]; } ipsec_sa_inb_rt_t; typedef struct @@ -186,20 +180,20 @@ typedef struct u16 use_anti_replay : 1; u16 drop_no_crypto : 1; u16 is_async : 1; - clib_pcg64i_random_t iv_prng; u16 cipher_op_id; u16 integ_op_id; u8 cipher_iv_size; u8 esp_block_align; u8 integ_icv_size; + ip_dscp_t t_dscp; + tunnel_encap_decap_flags_t tunnel_flags; u16 thread_index; + u16 async_op_id; u32 salt; - u64 seq64; u32 spi_be; - ip_dscp_t t_dscp; + u64 seq64; dpo_id_t dpo; - tunnel_encap_decap_flags_t tunnel_flags; - u16 async_op_id; + clib_pcg64i_random_t iv_prng; vnet_crypto_key_index_t cipher_key_index; vnet_crypto_key_index_t integ_key_index; union @@ -332,86 +326,35 @@ extern uword unformat_ipsec_key (unformat_input_t *input, va_list *args); #define IPSEC_UDP_PORT_NONE ((u16) ~0) -/* - * Anti Replay definitions - */ - -#define IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE(_irt) \ - (u32) (PREDICT_FALSE (_irt->anti_reply_huge) ? \ - clib_bitmap_bytes (_irt->replay_window_huge) * 8 : \ - BITS (_irt->replay_window)) - -#define IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN(_irt, _is_huge) \ - (u32) (_is_huge ? clib_bitmap_bytes (_irt->replay_window_huge) * 8 : \ - BITS (_irt->replay_window)) - -#define IPSEC_SA_ANTI_REPLAY_WINDOW_N_SEEN(_irt) \ - (u64) (PREDICT_FALSE (_irt->anti_reply_huge) ? \ - clib_bitmap_count_set_bits (_irt->replay_window_huge) : \ - count_set_bits (_irt->replay_window)) - -#define IPSEC_SA_ANTI_REPLAY_WINDOW_N_SEEN_KNOWN_WIN(_irt, _is_huge) \ - (u64) (_is_huge ? clib_bitmap_count_set_bits (_irt->replay_window_huge) : \ - count_set_bits (_irt->replay_window)) - -#define IPSEC_SA_ANTI_REPLAY_WINDOW_MAX_INDEX(_irt) \ - (u32) (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (_irt) - 1) - -#define IPSEC_SA_ANTI_REPLAY_WINDOW_MAX_INDEX_KNOWN_WIN(_irt, _is_huge) \ - (u32) (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (_irt, _is_huge) - 1) - -/* - * sequence number less than the lower bound are outside of the window - * From RFC4303 Appendix A: - * Bl = Tl - W + 1 - */ -#define IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND(_sa) \ - (u32) (_sa->seq - IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (_sa) + 1) - -#define IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND_KNOWN_WIN(_sa, _is_huge) \ - (u32) (_sa->seq - \ - IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (_sa, _is_huge) + 1) - always_inline u64 ipsec_sa_anti_replay_get_64b_window (const ipsec_sa_inb_rt_t *irt) { - if (!irt->anti_reply_huge) - return irt->replay_window; - u64 w; - u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (irt); - u32 tl_win_index = irt->seq & (window_size - 1); + u32 window_size = irt->anti_replay_window_size; + u32 tl_win_index = irt->seq64 & (window_size - 1); + uword *bmp = (uword *) irt->replay_window; if (PREDICT_TRUE (tl_win_index >= 63)) - return clib_bitmap_get_multiple (irt->replay_window_huge, - tl_win_index - 63, 64); + return uword_bitmap_get_multiple (bmp, tl_win_index - 63, 64); - w = clib_bitmap_get_multiple_no_check (irt->replay_window_huge, 0, - tl_win_index + 1) + w = uword_bitmap_get_multiple_no_check (bmp, 0, tl_win_index + 1) << (63 - tl_win_index); - w |= clib_bitmap_get_multiple_no_check (irt->replay_window_huge, - window_size - 63 + tl_win_index, - 63 - tl_win_index); + w |= uword_bitmap_get_multiple_no_check ( + bmp, window_size - 63 + tl_win_index, 63 - tl_win_index); return w; } always_inline int -ipsec_sa_anti_replay_check (const ipsec_sa_inb_rt_t *irt, u32 seq, - bool ar_huge) +ipsec_sa_anti_replay_check (const ipsec_sa_inb_rt_t *irt, u32 window_size, + u32 seq) { - u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (irt, ar_huge); - /* we assume that the packet is in the window. * if the packet falls left (sa->seq - seq >= window size), * the result is wrong */ - if (ar_huge) - return clib_bitmap_get (irt->replay_window_huge, seq & (window_size - 1)); - else - return (irt->replay_window >> (window_size + seq - irt->seq - 1)) & 1; - - return 0; + return uword_bitmap_is_bit_set ((uword *) irt->replay_window, + seq & (window_size - 1)); } /* @@ -431,13 +374,14 @@ ipsec_sa_anti_replay_check (const ipsec_sa_inb_rt_t *irt, u32 seq, always_inline int ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_inb_rt_t *irt, u32 seq, u32 hi_seq_used, bool post_decrypt, - u32 *hi_seq_req, bool ar_huge) + u32 *hi_seq_req) { ASSERT ((post_decrypt == false) == (hi_seq_req != 0)); - u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (irt, ar_huge); - u32 window_lower_bound = - IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND_KNOWN_WIN (irt, ar_huge); + u32 window_size = irt->anti_replay_window_size; + u32 exp_lo = irt->seq64; + u32 exp_hi = irt->seq64 >> 32; + u32 window_lower_bound = exp_lo - window_size + 1; if (!irt->use_esn) { @@ -448,14 +392,14 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_inb_rt_t *irt, u32 seq, if (!irt->use_anti_replay) return 0; - if (PREDICT_TRUE (seq > irt->seq)) + if (PREDICT_TRUE (seq > exp_lo)) return 0; /* does the packet fall out on the left of the window */ - if (irt->seq >= seq + window_size) + if (exp_lo >= seq + window_size) return 1; - return ipsec_sa_anti_replay_check (irt, seq, ar_huge); + return ipsec_sa_anti_replay_check (irt, window_size, seq); } if (!irt->use_anti_replay) @@ -473,20 +417,20 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_inb_rt_t *irt, u32 seq, */ if (hi_seq_req) { - if (seq >= irt->seq) + if (seq >= exp_lo) /* The packet's sequence number is larger that the SA's. * that can't be a warp - unless we lost more than * 2^32 packets ... how could we know? */ - *hi_seq_req = irt->seq_hi; + *hi_seq_req = exp_hi; else { /* The packet's SN is less than the SAs, so either the SN has * wrapped or the SN is just old. */ - if (irt->seq - seq > (1 << 30)) + if (exp_lo - seq > (1 << 30)) /* It's really really really old => it wrapped */ - *hi_seq_req = irt->seq_hi + 1; + *hi_seq_req = exp_hi + 1; else - *hi_seq_req = irt->seq_hi; + *hi_seq_req = exp_hi; } } /* @@ -496,7 +440,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_inb_rt_t *irt, u32 seq, return 0; } - if (PREDICT_TRUE (window_size > 0 && irt->seq >= window_size - 1)) + if (PREDICT_TRUE (exp_lo >= window_size - 1)) { /* * the last sequence number VPP received is more than one @@ -513,7 +457,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_inb_rt_t *irt, u32 seq, */ if (post_decrypt) { - if (hi_seq_used == irt->seq_hi) + if (hi_seq_used == exp_hi) /* the high sequence number used to succesfully decrypt this * packet is the same as the last-sequence number of the SA. * that means this packet did not cause a wrap. @@ -530,7 +474,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_inb_rt_t *irt, u32 seq, /* pre-decrypt it might be the packet that causes a wrap, we * need to decrypt it to find out */ if (hi_seq_req) - *hi_seq_req = irt->seq_hi + 1; + *hi_seq_req = exp_hi + 1; return 0; } } @@ -541,13 +485,13 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_inb_rt_t *irt, u32 seq, * end of the window. */ if (hi_seq_req) - *hi_seq_req = irt->seq_hi; - if (seq <= irt->seq) + *hi_seq_req = exp_hi; + if (seq <= exp_lo) /* * The received seq number is within bounds of the window * check if it's a duplicate */ - return ipsec_sa_anti_replay_check (irt, seq, ar_huge); + return ipsec_sa_anti_replay_check (irt, window_size, seq); else /* * The received sequence number is greater than the window @@ -572,15 +516,15 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_inb_rt_t *irt, u32 seq, /* * the sequence number is less than the lower bound. */ - if (seq <= irt->seq) + if (seq <= exp_lo) { /* * the packet is within the window upper bound. * check for duplicates. */ if (hi_seq_req) - *hi_seq_req = irt->seq_hi; - return ipsec_sa_anti_replay_check (irt, seq, ar_huge); + *hi_seq_req = exp_hi; + return ipsec_sa_anti_replay_check (irt, window_size, seq); } else { @@ -594,7 +538,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_inb_rt_t *irt, u32 seq, * we've lost close to 2^32 packets. */ if (hi_seq_req) - *hi_seq_req = irt->seq_hi; + *hi_seq_req = exp_hi; return 0; } } @@ -607,8 +551,8 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_inb_rt_t *irt, u32 seq, * received packet, the SA has moved on to a higher sequence number. */ if (hi_seq_req) - *hi_seq_req = irt->seq_hi - 1; - return ipsec_sa_anti_replay_check (irt, seq, ar_huge); + *hi_seq_req = exp_hi - 1; + return ipsec_sa_anti_replay_check (irt, window_size, seq); } } @@ -618,121 +562,97 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_inb_rt_t *irt, u32 seq, } always_inline u32 -ipsec_sa_anti_replay_window_shift (ipsec_sa_inb_rt_t *irt, u32 inc, - bool ar_huge) +ipsec_sa_anti_replay_window_shift (ipsec_sa_inb_rt_t *irt, u32 window_size, + u32 inc) { + uword *window = irt->replay_window; + u32 window_mask = window_size - 1; u32 n_lost = 0; u32 seen = 0; - u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (irt, ar_huge); if (inc < window_size) { - if (ar_huge) - { - /* the number of packets we saw in this section of the window */ - clib_bitmap_t *window = irt->replay_window_huge; - u32 window_lower_bound = (irt->seq + 1) & (window_size - 1); - u32 window_next_lower_bound = - (window_lower_bound + inc) & (window_size - 1); + /* the number of packets we saw in this section of the window */ + u32 window_lower_bound = (irt->seq64 + 1) & window_mask; + u32 window_next_lower_bound = (window_lower_bound + inc) & window_mask; - uword i_block, i_word_start, i_word_end, full_words; - uword n_blocks = window_size >> log2_uword_bits; - uword mask; + uword i_block, i_word_start, i_word_end, full_words; + uword n_blocks = window_size >> log2_uword_bits; + uword mask; - i_block = window_lower_bound >> log2_uword_bits; + i_block = window_lower_bound >> log2_uword_bits; - i_word_start = window_lower_bound & (uword_bits - 1); - i_word_end = window_next_lower_bound & (uword_bits - 1); + i_word_start = window_lower_bound & (uword_bits - 1); + i_word_end = window_next_lower_bound & (uword_bits - 1); - /* We stay in the same word */ - if (i_word_start + inc <= uword_bits) - { - mask = pow2_mask (inc) << i_word_start; - seen += count_set_bits (window[i_block] & mask); - window[i_block] &= ~mask; - } - else + /* We stay in the same word */ + if (i_word_start + inc <= uword_bits) + { + mask = pow2_mask (inc) << i_word_start; + seen += count_set_bits (window[i_block] & mask); + window[i_block] &= ~mask; + } + else + { + full_words = + (inc + i_word_start - uword_bits - i_word_end) >> log2_uword_bits; + + /* count set bits in the first word */ + mask = (uword) ~0 << i_word_start; + seen += count_set_bits (window[i_block] & mask); + window[i_block] &= ~mask; + i_block = (i_block + 1) & (n_blocks - 1); + + /* count set bits in the next full words */ + /* even if the last word need to be fully counted, we treat it + * apart */ + while (full_words >= 8) { - full_words = (inc + i_word_start - uword_bits - i_word_end) >> - log2_uword_bits; - - /* count set bits in the first word */ - mask = (uword) ~0 << i_word_start; - seen += count_set_bits (window[i_block] & mask); - window[i_block] &= ~mask; - i_block = (i_block + 1) & (n_blocks - 1); - - /* count set bits in the next full words */ - /* even if the last word need to be fully counted, we treat it - * apart */ - while (full_words >= 8) + if (full_words >= 16) { - if (full_words >= 16) - { - /* prefect the next 8 blocks (64 bytes) */ - clib_prefetch_store ( - &window[(i_block + 8) & (n_blocks - 1)]); - } - - seen += count_set_bits (window[i_block]); - seen += - count_set_bits (window[(i_block + 1) & (n_blocks - 1)]); - seen += - count_set_bits (window[(i_block + 2) & (n_blocks - 1)]); - seen += - count_set_bits (window[(i_block + 3) & (n_blocks - 1)]); - seen += - count_set_bits (window[(i_block + 4) & (n_blocks - 1)]); - seen += - count_set_bits (window[(i_block + 5) & (n_blocks - 1)]); - seen += - count_set_bits (window[(i_block + 6) & (n_blocks - 1)]); - seen += - count_set_bits (window[(i_block + 7) & (n_blocks - 1)]); - window[i_block] = 0; - window[(i_block + 1) & (n_blocks - 1)] = 0; - window[(i_block + 2) & (n_blocks - 1)] = 0; - window[(i_block + 3) & (n_blocks - 1)] = 0; - window[(i_block + 4) & (n_blocks - 1)] = 0; - window[(i_block + 5) & (n_blocks - 1)] = 0; - window[(i_block + 6) & (n_blocks - 1)] = 0; - window[(i_block + 7) & (n_blocks - 1)] = 0; - - i_block = (i_block + 8) & (n_blocks - 1); - full_words -= 8; - } - while (full_words > 0) - { - // last word is treated after the loop - seen += count_set_bits (window[i_block]); - window[i_block] = 0; - i_block = (i_block + 1) & (n_blocks - 1); - full_words--; + /* prefect the next 8 blocks (64 bytes) */ + clib_prefetch_store ( + &window[(i_block + 8) & (n_blocks - 1)]); } - /* the last word */ - mask = pow2_mask (i_word_end); - seen += count_set_bits (window[i_block] & mask); - window[i_block] &= ~mask; + seen += count_set_bits (window[i_block]); + seen += count_set_bits (window[(i_block + 1) & (n_blocks - 1)]); + seen += count_set_bits (window[(i_block + 2) & (n_blocks - 1)]); + seen += count_set_bits (window[(i_block + 3) & (n_blocks - 1)]); + seen += count_set_bits (window[(i_block + 4) & (n_blocks - 1)]); + seen += count_set_bits (window[(i_block + 5) & (n_blocks - 1)]); + seen += count_set_bits (window[(i_block + 6) & (n_blocks - 1)]); + seen += count_set_bits (window[(i_block + 7) & (n_blocks - 1)]); + window[i_block] = 0; + window[(i_block + 1) & (n_blocks - 1)] = 0; + window[(i_block + 2) & (n_blocks - 1)] = 0; + window[(i_block + 3) & (n_blocks - 1)] = 0; + window[(i_block + 4) & (n_blocks - 1)] = 0; + window[(i_block + 5) & (n_blocks - 1)] = 0; + window[(i_block + 6) & (n_blocks - 1)] = 0; + window[(i_block + 7) & (n_blocks - 1)] = 0; + + i_block = (i_block + 8) & (n_blocks - 1); + full_words -= 8; + } + while (full_words > 0) + { + // last word is treated after the loop + seen += count_set_bits (window[i_block]); + window[i_block] = 0; + i_block = (i_block + 1) & (n_blocks - 1); + full_words--; } - clib_bitmap_set_no_check (window, - (irt->seq + inc) & (window_size - 1), 1); - } - else - { - /* - * count how many holes there are in the portion - * of the window that we will right shift of the end - * as a result of this increments - */ - u64 old = irt->replay_window & pow2_mask (inc); - /* the number of packets we saw in this section of the window */ - seen = count_set_bits (old); - irt->replay_window = - ((irt->replay_window) >> inc) | (1ULL << (window_size - 1)); + /* the last word */ + mask = pow2_mask (i_word_end); + seen += count_set_bits (window[i_block] & mask); + window[i_block] &= ~mask; } + uword_bitmap_set_bits_at_index (window, (irt->seq64 + inc) & window_mask, + 1); + /* * the number we missed is the size of the window section * minus the number we saw. @@ -741,24 +661,17 @@ ipsec_sa_anti_replay_window_shift (ipsec_sa_inb_rt_t *irt, u32 inc, } else { + u32 n_uwords = window_size / uword_bits; /* holes in the replay window are lost packets */ - n_lost = window_size - - IPSEC_SA_ANTI_REPLAY_WINDOW_N_SEEN_KNOWN_WIN (irt, ar_huge); + n_lost = window_size - uword_bitmap_count_set_bits (window, n_uwords); /* any sequence numbers that now fall outside the window * are forever lost */ n_lost += inc - window_size; - if (PREDICT_FALSE (ar_huge)) - { - clib_bitmap_zero (irt->replay_window_huge); - clib_bitmap_set_no_check (irt->replay_window_huge, - (irt->seq + inc) & (window_size - 1), 1); - } - else - { - irt->replay_window = 1ULL << (window_size - 1); - } + uword_bitmap_clear (window, n_uwords); + uword_bitmap_set_bits_at_index (window, (irt->seq64 + inc) & window_mask, + 1); } return n_lost; @@ -775,65 +688,44 @@ ipsec_sa_anti_replay_window_shift (ipsec_sa_inb_rt_t *irt, u32 inc, */ always_inline u64 ipsec_sa_anti_replay_advance (ipsec_sa_inb_rt_t *irt, u32 thread_index, - u32 seq, u32 hi_seq, bool ar_huge) + u32 seq, u32 hi_seq) { u64 n_lost = 0; - u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (irt, ar_huge); + u32 window_size = irt->anti_replay_window_size; + u32 masked_seq = seq & (window_size - 1); + u32 exp_lo = irt->seq64; + u32 exp_hi = irt->seq64 >> 32; u32 pos; if (irt->use_esn) { - int wrap = hi_seq - irt->seq_hi; + int wrap = hi_seq - exp_hi; - if (wrap == 0 && seq > irt->seq) + if (wrap == 0 && seq > exp_lo) { - pos = seq - irt->seq; - n_lost = ipsec_sa_anti_replay_window_shift (irt, pos, ar_huge); - irt->seq = seq; + pos = seq - exp_lo; + n_lost = ipsec_sa_anti_replay_window_shift (irt, window_size, pos); + irt->seq64 = (u64) exp_hi << 32 | seq; } else if (wrap > 0) { - pos = seq + ~irt->seq + 1; - n_lost = ipsec_sa_anti_replay_window_shift (irt, pos, ar_huge); - irt->seq = seq; - irt->seq_hi = hi_seq; - } - else if (wrap < 0) - { - pos = ~seq + irt->seq + 1; - if (ar_huge) - clib_bitmap_set_no_check (irt->replay_window_huge, - seq & (window_size - 1), 1); - else - irt->replay_window |= (1ULL << (window_size - 1 - pos)); + pos = seq + ~exp_lo + 1; + n_lost = ipsec_sa_anti_replay_window_shift (irt, window_size, pos); + irt->seq64 = (u64) hi_seq << 32 | seq; } else - { - pos = irt->seq - seq; - if (ar_huge) - clib_bitmap_set_no_check (irt->replay_window_huge, - seq & (window_size - 1), 1); - else - irt->replay_window |= (1ULL << (window_size - 1 - pos)); - } + uword_bitmap_set_bits_at_index (irt->replay_window, masked_seq, 1); } else { - if (seq > irt->seq) + if (seq > exp_lo) { - pos = seq - irt->seq; - n_lost = ipsec_sa_anti_replay_window_shift (irt, pos, ar_huge); - irt->seq = seq; + pos = seq - exp_lo; + n_lost = ipsec_sa_anti_replay_window_shift (irt, window_size, pos); + irt->seq64 = (u64) exp_hi << 32 | seq; } else - { - pos = irt->seq - seq; - if (ar_huge) - clib_bitmap_set_no_check (irt->replay_window_huge, - seq & (window_size - 1), 1); - else - irt->replay_window |= (1ULL << (window_size - 1 - pos)); - } + uword_bitmap_set_bits_at_index (irt->replay_window, masked_seq, 1); } return n_lost; diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c index a62f914d43a..e2f9a6883fe 100644 --- a/src/vnet/session/application_interface.c +++ b/src/vnet/session/application_interface.c @@ -38,12 +38,12 @@ * */ uword -unformat_vnet_uri (unformat_input_t * input, va_list * args) +unformat_vnet_uri (unformat_input_t *input, va_list *args) { session_endpoint_cfg_t *sep = va_arg (*args, session_endpoint_cfg_t *); u32 transport_proto = 0, port; - if (unformat (input, "%U://%U/%d", unformat_transport_proto, + if (unformat (input, "%U://%U:%d", unformat_transport_proto, &transport_proto, unformat_ip4_address, &sep->ip.ip4, &port)) { sep->transport_proto = transport_proto; @@ -52,6 +52,54 @@ unformat_vnet_uri (unformat_input_t * input, va_list * args) return 1; } else if (unformat (input, "%U://%U/%d", unformat_transport_proto, + &transport_proto, unformat_ip4_address, &sep->ip.ip4, + &port)) + { + sep->transport_proto = transport_proto; + sep->port = clib_host_to_net_u16 (port); + sep->is_ip4 = 1; + return 1; + } + else if (unformat (input, "%U://%U", unformat_transport_proto, + &transport_proto, unformat_ip4_address, &sep->ip.ip4)) + { + sep->transport_proto = transport_proto; + if (sep->transport_proto == TRANSPORT_PROTO_HTTP) + port = 80; + else if (sep->transport_proto == TRANSPORT_PROTO_TLS) + port = 443; + else + return 0; + + sep->port = clib_host_to_net_u16 (port); + sep->is_ip4 = 1; + return 1; + } + else if (unformat (input, "%U://[%U]:%d", unformat_transport_proto, + &transport_proto, unformat_ip6_address, &sep->ip.ip6, + &port)) + { + sep->transport_proto = transport_proto; + sep->port = clib_host_to_net_u16 (port); + sep->is_ip4 = 0; + return 1; + } + else if (unformat (input, "%U://[%U]", unformat_transport_proto, + &transport_proto, unformat_ip6_address, &sep->ip.ip6)) + { + sep->transport_proto = transport_proto; + if (sep->transport_proto == TRANSPORT_PROTO_HTTP) + port = 80; + else if (sep->transport_proto == TRANSPORT_PROTO_TLS) + port = 443; + else + return 0; + + sep->port = clib_host_to_net_u16 (port); + sep->is_ip4 = 0; + return 1; + } + else if (unformat (input, "%U://%U/%d", unformat_transport_proto, &transport_proto, unformat_ip6_address, &sep->ip.ip6, &port)) { @@ -106,6 +154,45 @@ parse_uri (char *uri, session_endpoint_cfg_t *sep) return 0; } +/* Use before 'parse_uri()'. Removes target from URI and copies it to 'char + * **target'. char **target is resized automatically. + */ +session_error_t +parse_target (char **uri, char **target) +{ + u8 counter = 0; + + for (u32 i = 0; i < (u32) strlen (*uri); i++) + { + if ((*uri)[i] == '/') + counter++; + + if (counter == 3) + { + /* resize and make space for NULL terminator */ + if (vec_len (*target) < strlen (*uri) - i + 2) + vec_resize (*target, strlen (*uri) - i + 2); + + strncpy (*target, *uri + i, strlen (*uri) - i); + (*uri)[i + 1] = '\0'; + break; + } + } + + if (!*target) + { + vec_resize (*target, 2); + **target = '/'; + } + + vec_terminate_c_string (*target); + + if (!*target) + return SESSION_E_INVALID; + + return 0; +} + session_error_t vnet_bind_uri (vnet_listen_args_t *a) { diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h index 21ed97998f2..33b61187fe3 100644 --- a/src/vnet/session/application_interface.h +++ b/src/vnet/session/application_interface.h @@ -281,6 +281,7 @@ typedef enum session_fd_flag_ } session_fd_flag_t; session_error_t parse_uri (char *uri, session_endpoint_cfg_t *sep); +session_error_t parse_target (char **uri, char **target); session_error_t vnet_bind_uri (vnet_listen_args_t *); session_error_t vnet_unbind_uri (vnet_unlisten_args_t *a); session_error_t vnet_connect_uri (vnet_connect_args_t *a); diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index 2a6ac283fb9..d65371e81e5 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -28,9 +28,17 @@ session_main_t session_main; +typedef enum +{ + SESSION_EVT_RPC, + SESSION_EVT_IO, + SESSION_EVT_SESSION, +} session_evt_family_t; + static inline int session_send_evt_to_thread (void *data, void *args, u32 thread_index, - session_evt_type_t evt_type) + session_evt_type_t evt_type, + session_evt_family_t family) { session_worker_t *wrk = session_main_get_worker (thread_index); session_event_t *evt; @@ -45,30 +53,33 @@ session_send_evt_to_thread (void *data, void *args, u32 thread_index, svm_msg_q_unlock (mq); return -2; } - switch (evt_type) + switch (family) { - case SESSION_CTRL_EVT_RPC: + case SESSION_EVT_RPC: + ASSERT (evt_type == SESSION_CTRL_EVT_RPC); msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING); evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg); evt->rpc_args.fp = data; evt->rpc_args.arg = args; break; - case SESSION_IO_EVT_RX: - case SESSION_IO_EVT_TX: - case SESSION_IO_EVT_TX_FLUSH: - case SESSION_IO_EVT_BUILTIN_RX: + case SESSION_EVT_IO: + ASSERT (evt_type == SESSION_IO_EVT_RX || evt_type == SESSION_IO_EVT_TX || + evt_type == SESSION_IO_EVT_TX_FLUSH || + evt_type == SESSION_IO_EVT_BUILTIN_RX); msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING); evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg); evt->session_index = *(u32 *) data; break; - case SESSION_IO_EVT_TX_MAIN: - case SESSION_CTRL_EVT_CLOSE: - case SESSION_CTRL_EVT_RESET: + case SESSION_EVT_SESSION: + ASSERT (evt_type == SESSION_CTRL_EVT_CLOSE || + evt_type == SESSION_CTRL_EVT_HALF_CLOSE || + evt_type == SESSION_CTRL_EVT_RESET); msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING); evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg); evt->session_handle = session_handle ((session_t *) data); break; default: + ASSERT (0); clib_warning ("evt unhandled!"); svm_msg_q_unlock (mq); return -1; @@ -88,7 +99,8 @@ int session_send_io_evt_to_thread (svm_fifo_t * f, session_evt_type_t evt_type) { return session_send_evt_to_thread (&f->vpp_session_index, 0, - f->master_thread_index, evt_type); + f->master_thread_index, evt_type, + SESSION_EVT_IO); } /* Deprecated, use session_program_* functions */ @@ -96,14 +108,16 @@ int session_send_io_evt_to_thread_custom (void *data, u32 thread_index, session_evt_type_t evt_type) { - return session_send_evt_to_thread (data, 0, thread_index, evt_type); + return session_send_evt_to_thread (data, 0, thread_index, evt_type, + SESSION_EVT_IO); } int session_program_tx_io_evt (session_handle_tu_t sh, session_evt_type_t evt_type) { return session_send_evt_to_thread ((void *) &sh.session_index, 0, - (u32) sh.thread_index, evt_type); + (u32) sh.thread_index, evt_type, + SESSION_EVT_IO); } int @@ -116,9 +130,9 @@ session_program_rx_io_evt (session_handle_tu_t sh) } else { - return session_send_evt_to_thread ((void *) &sh.session_index, 0, - (u32) sh.thread_index, - SESSION_IO_EVT_BUILTIN_RX); + return session_send_evt_to_thread ( + (void *) &sh.session_index, 0, (u32) sh.thread_index, + SESSION_IO_EVT_BUILTIN_RX, SESSION_EVT_IO); } } @@ -127,25 +141,24 @@ session_program_transport_io_evt (session_handle_tu_t sh, session_evt_type_t evt_type) { return session_send_evt_to_thread ((void *) &sh.session_index, 0, - (u32) sh.thread_index, evt_type); + (u32) sh.thread_index, evt_type, + SESSION_EVT_IO); } int session_send_ctrl_evt_to_thread (session_t * s, session_evt_type_t evt_type) { /* only events supported are disconnect, shutdown and reset */ - ASSERT (evt_type == SESSION_CTRL_EVT_CLOSE || - evt_type == SESSION_CTRL_EVT_HALF_CLOSE || - evt_type == SESSION_CTRL_EVT_RESET); - return session_send_evt_to_thread (s, 0, s->thread_index, evt_type); + return session_send_evt_to_thread (s, 0, s->thread_index, evt_type, + SESSION_EVT_SESSION); } void session_send_rpc_evt_to_thread_force (u32 thread_index, void *fp, void *rpc_args) { - session_send_evt_to_thread (fp, rpc_args, thread_index, - SESSION_CTRL_EVT_RPC); + session_send_evt_to_thread (fp, rpc_args, thread_index, SESSION_CTRL_EVT_RPC, + SESSION_EVT_RPC); } void @@ -480,115 +493,6 @@ session_alloc_for_half_open (transport_connection_t *tc) return s; } -/** - * Discards bytes from buffer chain - * - * It discards n_bytes_to_drop starting at first buffer after chain_b - */ -always_inline void -session_enqueue_discard_chain_bytes (vlib_main_t * vm, vlib_buffer_t * b, - vlib_buffer_t ** chain_b, - u32 n_bytes_to_drop) -{ - vlib_buffer_t *next = *chain_b; - u32 to_drop = n_bytes_to_drop; - ASSERT (b->flags & VLIB_BUFFER_NEXT_PRESENT); - while (to_drop && (next->flags & VLIB_BUFFER_NEXT_PRESENT)) - { - next = vlib_get_buffer (vm, next->next_buffer); - if (next->current_length > to_drop) - { - vlib_buffer_advance (next, to_drop); - to_drop = 0; - } - else - { - to_drop -= next->current_length; - next->current_length = 0; - } - } - *chain_b = next; - - if (to_drop == 0) - b->total_length_not_including_first_buffer -= n_bytes_to_drop; -} - -/** - * Enqueue buffer chain tail - */ -always_inline int -session_enqueue_chain_tail (session_t * s, vlib_buffer_t * b, - u32 offset, u8 is_in_order) -{ - vlib_buffer_t *chain_b; - u32 chain_bi, len, diff; - vlib_main_t *vm = vlib_get_main (); - u8 *data; - u32 written = 0; - int rv = 0; - - if (is_in_order && offset) - { - diff = offset - b->current_length; - if (diff > b->total_length_not_including_first_buffer) - return 0; - chain_b = b; - session_enqueue_discard_chain_bytes (vm, b, &chain_b, diff); - chain_bi = vlib_get_buffer_index (vm, chain_b); - } - else - chain_bi = b->next_buffer; - - do - { - chain_b = vlib_get_buffer (vm, chain_bi); - data = vlib_buffer_get_current (chain_b); - len = chain_b->current_length; - if (!len) - continue; - if (is_in_order) - { - rv = svm_fifo_enqueue (s->rx_fifo, len, data); - if (rv == len) - { - written += rv; - } - else if (rv < len) - { - return (rv > 0) ? (written + rv) : written; - } - else if (rv > len) - { - written += rv; - - /* written more than what was left in chain */ - if (written > b->total_length_not_including_first_buffer) - return written; - - /* drop the bytes that have already been delivered */ - session_enqueue_discard_chain_bytes (vm, b, &chain_b, rv - len); - } - } - else - { - rv = svm_fifo_enqueue_with_offset (s->rx_fifo, offset, len, data); - if (rv) - { - clib_warning ("failed to enqueue multi-buffer seg"); - return -1; - } - offset += len; - } - } - while ((chain_bi = (chain_b->flags & VLIB_BUFFER_NEXT_PRESENT) - ? chain_b->next_buffer : 0)); - - if (is_in_order) - return written; - - return 0; -} - void session_fifo_tuning (session_t * s, svm_fifo_t * f, session_ft_action_t act, u32 len) @@ -760,154 +664,6 @@ session_main_flush_enqueue_events (transport_proto_t transport_proto, wrk->session_to_enqueue[transport_proto] = handles; } -/* - * Enqueue data for delivery to app. If requested, it queues app notification - * event for later delivery. - * - * @param tc Transport connection which is to be enqueued data - * @param b Buffer to be enqueued - * @param offset Offset at which to start enqueueing if out-of-order - * @param queue_event Flag to indicate if peer is to be notified or if event - * is to be queued. The former is useful when more data is - * enqueued and only one event is to be generated. - * @param is_in_order Flag to indicate if data is in order - * @return Number of bytes enqueued or a negative value if enqueueing failed. - */ -int -session_enqueue_stream_connection (transport_connection_t * tc, - vlib_buffer_t * b, u32 offset, - u8 queue_event, u8 is_in_order) -{ - session_t *s; - int enqueued = 0, rv, in_order_off; - - s = session_get (tc->s_index, tc->thread_index); - - if (is_in_order) - { - enqueued = svm_fifo_enqueue (s->rx_fifo, - b->current_length, - vlib_buffer_get_current (b)); - if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) - && enqueued >= 0)) - { - in_order_off = enqueued > b->current_length ? enqueued : 0; - rv = session_enqueue_chain_tail (s, b, in_order_off, 1); - if (rv > 0) - enqueued += rv; - } - } - else - { - rv = svm_fifo_enqueue_with_offset (s->rx_fifo, offset, - b->current_length, - vlib_buffer_get_current (b)); - if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) && !rv)) - session_enqueue_chain_tail (s, b, offset + b->current_length, 0); - /* if something was enqueued, report even this as success for ooo - * segment handling */ - return rv; - } - - if (queue_event) - { - /* Queue RX event on this fifo. Eventually these will need to be - * flushed by calling @ref session_main_flush_enqueue_events () */ - if (!(s->flags & SESSION_F_RX_EVT)) - { - session_worker_t *wrk = session_main_get_worker (s->thread_index); - ASSERT (s->thread_index == vlib_get_thread_index ()); - s->flags |= SESSION_F_RX_EVT; - vec_add1 (wrk->session_to_enqueue[tc->proto], session_handle (s)); - } - - session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0); - } - - return enqueued; -} - -always_inline int -session_enqueue_dgram_connection_inline (session_t *s, - session_dgram_hdr_t *hdr, - vlib_buffer_t *b, u8 proto, - u8 queue_event, u32 is_cl) -{ - int rv; - - ASSERT (svm_fifo_max_enqueue_prod (s->rx_fifo) - >= b->current_length + sizeof (*hdr)); - - if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))) - { - svm_fifo_seg_t segs[2] = { - { (u8 *) hdr, sizeof (*hdr) }, - { vlib_buffer_get_current (b), b->current_length } - }; - - rv = svm_fifo_enqueue_segments (s->rx_fifo, segs, 2, - 0 /* allow_partial */ ); - } - else - { - vlib_main_t *vm = vlib_get_main (); - svm_fifo_seg_t *segs = 0, *seg; - vlib_buffer_t *it = b; - u32 n_segs = 1; - - vec_add2 (segs, seg, 1); - seg->data = (u8 *) hdr; - seg->len = sizeof (*hdr); - while (it) - { - vec_add2 (segs, seg, 1); - seg->data = vlib_buffer_get_current (it); - seg->len = it->current_length; - n_segs++; - if (!(it->flags & VLIB_BUFFER_NEXT_PRESENT)) - break; - it = vlib_get_buffer (vm, it->next_buffer); - } - rv = svm_fifo_enqueue_segments (s->rx_fifo, segs, n_segs, - 0 /* allow partial */ ); - vec_free (segs); - } - - if (queue_event && rv > 0) - { - /* Queue RX event on this fifo. Eventually these will need to be - * flushed by calling @ref session_main_flush_enqueue_events () */ - if (!(s->flags & SESSION_F_RX_EVT)) - { - u32 thread_index = - is_cl ? vlib_get_thread_index () : s->thread_index; - session_worker_t *wrk = session_main_get_worker (thread_index); - ASSERT (s->thread_index == vlib_get_thread_index () || is_cl); - s->flags |= SESSION_F_RX_EVT; - vec_add1 (wrk->session_to_enqueue[proto], session_handle (s)); - } - - session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0); - } - return rv > 0 ? rv : 0; -} - -int -session_enqueue_dgram_connection (session_t *s, session_dgram_hdr_t *hdr, - vlib_buffer_t *b, u8 proto, u8 queue_event) -{ - return session_enqueue_dgram_connection_inline (s, hdr, b, proto, - queue_event, 0 /* is_cl */); -} - -int -session_enqueue_dgram_connection2 (session_t *s, session_dgram_hdr_t *hdr, - vlib_buffer_t *b, u8 proto, u8 queue_event) -{ - return session_enqueue_dgram_connection_inline (s, hdr, b, proto, - queue_event, 1 /* is_cl */); -} - int session_enqueue_dgram_connection_cl (session_t *s, session_dgram_hdr_t *hdr, vlib_buffer_t *b, u8 proto, diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h index daa3bf97f56..d6c5cbc6085 100644 --- a/src/vnet/session/session.h +++ b/src/vnet/session/session.h @@ -151,6 +151,8 @@ typedef struct session_worker_ /** Per-app-worker bitmap of pending notifications */ uword *app_wrks_pending_ntf; + svm_fifo_seg_t *rx_segs; + int config_index; u8 dma_enabled; session_dma_transfer *dma_trans; @@ -323,6 +325,67 @@ typedef struct _session_enable_disable_args_t #define TRANSPORT_PROTO_INVALID (session_main.last_transport_proto_type + 1) #define TRANSPORT_N_PROTOS (session_main.last_transport_proto_type + 1) +/* + * Session layer functions + */ + +always_inline session_main_t * +vnet_get_session_main () +{ + return &session_main; +} + +always_inline session_worker_t * +session_main_get_worker (u32 thread_index) +{ + return vec_elt_at_index (session_main.wrk, thread_index); +} + +static inline session_worker_t * +session_main_get_worker_if_valid (u32 thread_index) +{ + if (thread_index > vec_len (session_main.wrk)) + return 0; + return session_main_get_worker (thread_index); +} + +always_inline svm_msg_q_t * +session_main_get_vpp_event_queue (u32 thread_index) +{ + return session_main_get_worker (thread_index)->vpp_event_queue; +} + +always_inline u8 +session_main_is_enabled () +{ + return session_main.is_enabled == 1; +} + +always_inline void +session_worker_stat_error_inc (session_worker_t *wrk, int error, int value) +{ + if ((-(error) >= 0 && -(error) < SESSION_N_ERRORS)) + wrk->stats.errors[-error] += value; + else + SESSION_DBG ("unknown session counter"); +} + +always_inline void +session_stat_error_inc (int error, int value) +{ + session_worker_t *wrk; + wrk = session_main_get_worker (vlib_get_thread_index ()); + session_worker_stat_error_inc (wrk, error, value); +} + +#define session_cli_return_if_not_enabled() \ + do \ + { \ + if (!session_main.is_enabled) \ + return clib_error_return (0, "session layer is not enabled"); \ + } \ + while (0) + static inline void session_evt_add_old (session_worker_t * wrk, session_evt_elt_t * elt) { @@ -517,20 +580,6 @@ uword unformat_transport_connection (unformat_input_t * input, * Interface to transport protos */ -int session_enqueue_stream_connection (transport_connection_t * tc, - vlib_buffer_t * b, u32 offset, - u8 queue_event, u8 is_in_order); -int session_enqueue_dgram_connection (session_t * s, - session_dgram_hdr_t * hdr, - vlib_buffer_t * b, u8 proto, - u8 queue_event); -int session_enqueue_dgram_connection2 (session_t *s, session_dgram_hdr_t *hdr, - vlib_buffer_t *b, u8 proto, - u8 queue_event); -int session_enqueue_dgram_connection_cl (session_t *s, - session_dgram_hdr_t *hdr, - vlib_buffer_t *b, u8 proto, - u8 queue_event); int session_stream_connect_notify (transport_connection_t * tc, session_error_t err); int session_dgram_connect_notify (transport_connection_t * tc, @@ -566,9 +615,279 @@ void session_register_transport (transport_proto_t transport_proto, u32 output_node); transport_proto_t session_add_transport_proto (void); void session_register_update_time_fn (session_update_time_fn fn, u8 is_add); +void session_main_flush_enqueue_events (transport_proto_t transport_proto, + u32 thread_index); +void session_queue_run_on_main_thread (vlib_main_t *vm); int session_tx_fifo_peek_bytes (transport_connection_t * tc, u8 * buffer, u32 offset, u32 max_bytes); u32 session_tx_fifo_dequeue_drop (transport_connection_t * tc, u32 max_bytes); +int session_enqueue_dgram_connection_cl (session_t *s, + session_dgram_hdr_t *hdr, + vlib_buffer_t *b, u8 proto, + u8 queue_event); +void session_fifo_tuning (session_t *s, svm_fifo_t *f, session_ft_action_t act, + u32 len); + +/** + * Discards bytes from buffer chain + * + * It discards n_bytes_to_drop starting at first buffer after chain_b + */ +always_inline void +session_enqueue_discard_chain_bytes (vlib_main_t *vm, vlib_buffer_t *b, + vlib_buffer_t **chain_b, + u32 n_bytes_to_drop) +{ + vlib_buffer_t *next = *chain_b; + u32 to_drop = n_bytes_to_drop; + ASSERT (b->flags & VLIB_BUFFER_NEXT_PRESENT); + while (to_drop && (next->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + next = vlib_get_buffer (vm, next->next_buffer); + if (next->current_length > to_drop) + { + vlib_buffer_advance (next, to_drop); + to_drop = 0; + } + else + { + to_drop -= next->current_length; + next->current_length = 0; + } + } + *chain_b = next; + + if (to_drop == 0) + b->total_length_not_including_first_buffer -= n_bytes_to_drop; +} + +/** + * Enqueue buffer chain tail + */ +always_inline int +session_enqueue_chain_tail (session_t *s, vlib_buffer_t *b, u32 offset, + u8 is_in_order) +{ + vlib_buffer_t *chain_b; + u32 chain_bi; + + if (is_in_order) + { + session_worker_t *wrk = session_main_get_worker (s->thread_index); + u32 diff, written = 0; + + if (offset) + { + diff = offset - b->current_length; + if (diff > b->total_length_not_including_first_buffer) + return 0; + chain_b = b; + session_enqueue_discard_chain_bytes (wrk->vm, b, &chain_b, diff); + chain_bi = vlib_get_buffer_index (wrk->vm, chain_b); + } + else + { + chain_bi = b->next_buffer; + } + + chain_b = vlib_get_buffer (wrk->vm, chain_bi); + svm_fifo_seg_t *seg; + + while (chain_b) + { + vec_add2 (wrk->rx_segs, seg, 1); + seg->data = vlib_buffer_get_current (chain_b); + seg->len = chain_b->current_length; + chain_b = (chain_b->flags & VLIB_BUFFER_NEXT_PRESENT) ? + vlib_get_buffer (wrk->vm, chain_b->next_buffer) : + 0; + } + + written = svm_fifo_enqueue_segments (s->rx_fifo, wrk->rx_segs, + vec_len (wrk->rx_segs), + 1 /* allow partial*/); + + vec_reset_length (wrk->rx_segs); + + return written; + } + else + { + vlib_main_t *vm = vlib_get_main (); + int rv = 0; + u8 *data; + u32 len; + + /* TODO svm_fifo_enqueue_segments with offset */ + chain_bi = b->next_buffer; + do + { + chain_b = vlib_get_buffer (vm, chain_bi); + data = vlib_buffer_get_current (chain_b); + len = chain_b->current_length; + if (!len) + continue; + + rv = svm_fifo_enqueue_with_offset (s->rx_fifo, offset, len, data); + if (rv) + { + clib_warning ("failed to enqueue multi-buffer seg"); + return -1; + } + offset += len; + } + while ((chain_bi = (chain_b->flags & VLIB_BUFFER_NEXT_PRESENT) ? + chain_b->next_buffer : + 0)); + + return 0; + } +} + +/* + * Enqueue data for delivery to app. If requested, it queues app notification + * event for later delivery. + * + * @param tc Transport connection which is to be enqueued data + * @param b Buffer to be enqueued + * @param offset Offset at which to start enqueueing if out-of-order + * @param queue_event Flag to indicate if peer is to be notified or if event + * is to be queued. The former is useful when more data is + * enqueued and only one event is to be generated. + * @param is_in_order Flag to indicate if data is in order + * @return Number of bytes enqueued or a negative value if enqueueing failed. + */ +always_inline int +session_enqueue_stream_connection (transport_connection_t *tc, + vlib_buffer_t *b, u32 offset, + u8 queue_event, u8 is_in_order) +{ + session_t *s; + int enqueued = 0, rv, in_order_off; + + s = session_get (tc->s_index, tc->thread_index); + + if (is_in_order) + { + enqueued = svm_fifo_enqueue (s->rx_fifo, b->current_length, + vlib_buffer_get_current (b)); + if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) && + enqueued >= 0)) + { + in_order_off = enqueued > b->current_length ? enqueued : 0; + rv = session_enqueue_chain_tail (s, b, in_order_off, 1); + if (rv > 0) + enqueued += rv; + } + } + else + { + rv = svm_fifo_enqueue_with_offset (s->rx_fifo, offset, b->current_length, + vlib_buffer_get_current (b)); + if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) && !rv)) + session_enqueue_chain_tail (s, b, offset + b->current_length, 0); + /* if something was enqueued, report even this as success for ooo + * segment handling */ + return rv; + } + + if (queue_event) + { + /* Queue RX event on this fifo. Eventually these will need to be + * flushed by calling @ref session_main_flush_enqueue_events () */ + if (!(s->flags & SESSION_F_RX_EVT)) + { + session_worker_t *wrk = session_main_get_worker (s->thread_index); + ASSERT (s->thread_index == vlib_get_thread_index ()); + s->flags |= SESSION_F_RX_EVT; + vec_add1 (wrk->session_to_enqueue[tc->proto], session_handle (s)); + } + + session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0); + } + + return enqueued; +} + +always_inline int +session_enqueue_dgram_connection_inline (session_t *s, + session_dgram_hdr_t *hdr, + vlib_buffer_t *b, u8 proto, + u8 queue_event, u32 is_cl) +{ + int rv; + + ASSERT (svm_fifo_max_enqueue_prod (s->rx_fifo) >= + b->current_length + sizeof (*hdr)); + + if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))) + { + svm_fifo_seg_t segs[2] = { { (u8 *) hdr, sizeof (*hdr) }, + { vlib_buffer_get_current (b), + b->current_length } }; + + rv = + svm_fifo_enqueue_segments (s->rx_fifo, segs, 2, 0 /* allow_partial */); + } + else + { + vlib_main_t *vm = vlib_get_main (); + svm_fifo_seg_t *segs = 0, *seg; + vlib_buffer_t *it = b; + u32 n_segs = 1; + + vec_add2 (segs, seg, 1); + seg->data = (u8 *) hdr; + seg->len = sizeof (*hdr); + while (it) + { + vec_add2 (segs, seg, 1); + seg->data = vlib_buffer_get_current (it); + seg->len = it->current_length; + n_segs++; + if (!(it->flags & VLIB_BUFFER_NEXT_PRESENT)) + break; + it = vlib_get_buffer (vm, it->next_buffer); + } + rv = svm_fifo_enqueue_segments (s->rx_fifo, segs, n_segs, + 0 /* allow partial */); + vec_free (segs); + } + + if (queue_event && rv > 0) + { + /* Queue RX event on this fifo. Eventually these will need to be + * flushed by calling @ref session_main_flush_enqueue_events () */ + if (!(s->flags & SESSION_F_RX_EVT)) + { + u32 thread_index = + is_cl ? vlib_get_thread_index () : s->thread_index; + session_worker_t *wrk = session_main_get_worker (thread_index); + ASSERT (s->thread_index == vlib_get_thread_index () || is_cl); + s->flags |= SESSION_F_RX_EVT; + vec_add1 (wrk->session_to_enqueue[proto], session_handle (s)); + } + + session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0); + } + return rv > 0 ? rv : 0; +} + +always_inline int +session_enqueue_dgram_connection (session_t *s, session_dgram_hdr_t *hdr, + vlib_buffer_t *b, u8 proto, u8 queue_event) +{ + return session_enqueue_dgram_connection_inline (s, hdr, b, proto, + queue_event, 0 /* is_cl */); +} + +always_inline int +session_enqueue_dgram_connection2 (session_t *s, session_dgram_hdr_t *hdr, + vlib_buffer_t *b, u8 proto, u8 queue_event) +{ + return session_enqueue_dgram_connection_inline (s, hdr, b, proto, + queue_event, 1 /* is_cl */); +} always_inline void session_set_state (session_t *s, session_state_t session_state) @@ -753,69 +1072,6 @@ ho_session_free (session_t *s) transport_connection_t *listen_session_get_transport (session_t * s); -/* - * Session layer functions - */ - -always_inline session_main_t * -vnet_get_session_main () -{ - return &session_main; -} - -always_inline session_worker_t * -session_main_get_worker (u32 thread_index) -{ - return vec_elt_at_index (session_main.wrk, thread_index); -} - -static inline session_worker_t * -session_main_get_worker_if_valid (u32 thread_index) -{ - if (thread_index > vec_len (session_main.wrk)) - return 0; - return session_main_get_worker (thread_index); -} - -always_inline svm_msg_q_t * -session_main_get_vpp_event_queue (u32 thread_index) -{ - return session_main_get_worker (thread_index)->vpp_event_queue; -} - -always_inline u8 -session_main_is_enabled () -{ - return session_main.is_enabled == 1; -} - -always_inline void -session_worker_stat_error_inc (session_worker_t *wrk, int error, int value) -{ - if ((-(error) >= 0 && -(error) < SESSION_N_ERRORS)) - wrk->stats.errors[-error] += value; - else - SESSION_DBG ("unknown session counter"); -} - -always_inline void -session_stat_error_inc (int error, int value) -{ - session_worker_t *wrk; - wrk = session_main_get_worker (vlib_get_thread_index ()); - session_worker_stat_error_inc (wrk, error, value); -} - -#define session_cli_return_if_not_enabled() \ -do { \ - if (!session_main.is_enabled) \ - return clib_error_return (0, "session layer is not enabled"); \ -} while (0) - -void session_main_flush_enqueue_events (transport_proto_t transport_proto, - u32 thread_index); -void session_queue_run_on_main_thread (vlib_main_t * vm); - /** * Add session node pending buffer with custom node * diff --git a/src/vnet/session/session_lookup.c b/src/vnet/session/session_lookup.c index 28a1feb1ed8..7678b0e0761 100644 --- a/src/vnet/session/session_lookup.c +++ b/src/vnet/session/session_lookup.c @@ -1380,6 +1380,71 @@ session_lookup_connection (u32 fib_index, ip46_address_t * lcl, lcl_port, rmt_port, proto); } +/** + * Lookup exact match 6-tuple amongst established and half-open sessions + * + * Does not look into session rules table and does not try to find a listener. + */ +transport_connection_t * +session_lookup_6tuple (u32 fib_index, ip46_address_t *lcl, ip46_address_t *rmt, + u16 lcl_port, u16 rmt_port, u8 proto, u8 is_ip4) +{ + session_table_t *st; + session_t *s; + int rv; + + if (is_ip4) + { + session_kv4_t kv4; + + st = session_table_get_for_fib_index (FIB_PROTOCOL_IP4, fib_index); + if (PREDICT_FALSE (!st)) + return 0; + + /* + * Lookup session amongst established ones + */ + make_v4_ss_kv (&kv4, &lcl->ip4, &rmt->ip4, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_16_8 (&st->v4_session_hash, &kv4); + if (rv == 0) + { + s = session_get_from_handle (kv4.value); + return transport_get_connection (proto, s->connection_index, + s->thread_index); + } + + /* + * Try half-open connections + */ + rv = clib_bihash_search_inline_16_8 (&st->v4_half_open_hash, &kv4); + if (rv == 0) + return transport_get_half_open (proto, kv4.value & 0xFFFFFFFF); + } + else + { + session_kv6_t kv6; + + st = session_table_get_for_fib_index (FIB_PROTOCOL_IP6, fib_index); + if (PREDICT_FALSE (!st)) + return 0; + + make_v6_ss_kv (&kv6, &lcl->ip6, &rmt->ip6, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_48_8 (&st->v6_session_hash, &kv6); + if (rv == 0) + { + s = session_get_from_handle (kv6.value); + return transport_get_connection (proto, s->connection_index, + s->thread_index); + } + + /* Try half-open connections */ + rv = clib_bihash_search_inline_48_8 (&st->v6_half_open_hash, &kv6); + if (rv == 0) + return transport_get_half_open (proto, kv6.value & 0xFFFFFFFF); + } + return 0; +} + session_error_t vnet_session_rule_add_del (session_rule_add_del_args_t *args) { diff --git a/src/vnet/session/session_lookup.h b/src/vnet/session/session_lookup.h index 9f56af20a87..8f9ff7ee9bc 100644 --- a/src/vnet/session/session_lookup.h +++ b/src/vnet/session/session_lookup.h @@ -72,6 +72,9 @@ transport_connection_t *session_lookup_connection (u32 fib_index, ip46_address_t * rmt, u16 lcl_port, u16 rmt_port, u8 proto, u8 is_ip4); +transport_connection_t * +session_lookup_6tuple (u32 fib_index, ip46_address_t *lcl, ip46_address_t *rmt, + u16 lcl_port, u16 rmt_port, u8 proto, u8 is_ip4); session_t *session_lookup_listener4 (u32 fib_index, ip4_address_t * lcl, u16 lcl_port, u8 proto, u8 use_wildcard); session_t *session_lookup_listener6 (u32 fib_index, ip6_address_t * lcl, diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c index c0ff1de39bc..655f7ada09e 100644 --- a/src/vnet/session/session_node.c +++ b/src/vnet/session/session_node.c @@ -836,8 +836,7 @@ vlib_node_registration_t session_queue_node; typedef struct { - u32 session_index; - u32 server_thread_index; + u32 thread_index; } session_queue_trace_t; /* packet trace format function */ @@ -848,8 +847,7 @@ format_session_queue_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); session_queue_trace_t *t = va_arg (*args, session_queue_trace_t *); - s = format (s, "session index %d thread index %d", - t->session_index, t->server_thread_index); + s = format (s, "thread index %d", t->thread_index); return s; } @@ -880,25 +878,25 @@ enum }; static void -session_tx_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node, - u32 next_index, vlib_buffer_t **bufs, u16 n_segs, - session_t *s, u32 n_trace) +session_tx_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node, u32 *bis, + u16 *nexts, u16 n_bufs) { - vlib_buffer_t **b = bufs; + u32 n_trace = vlib_get_trace_count (vm, node), *bi = bis; + u16 *next = nexts; + vlib_buffer_t *b; - while (n_trace && n_segs) + while (n_trace && n_bufs) { - if (PREDICT_TRUE (vlib_trace_buffer (vm, node, next_index, b[0], - 1 /* follow_chain */))) + b = vlib_get_buffer (vm, bi[0]); + if (PREDICT_TRUE ( + vlib_trace_buffer (vm, node, next[0], b, 1 /* follow_chain */))) { - session_queue_trace_t *t = - vlib_add_trace (vm, node, b[0], sizeof (*t)); - t->session_index = s->session_index; - t->server_thread_index = s->thread_index; + session_queue_trace_t *t = vlib_add_trace (vm, node, b, sizeof (*t)); + t->thread_index = vm->thread_index; n_trace--; } - b++; - n_segs--; + bi++; + n_bufs--; } vlib_set_trace_count (vm, node, n_trace); } @@ -1194,7 +1192,7 @@ session_tx_not_ready (session_t * s, u8 peek_data) } else { - if (s->session_state == SESSION_STATE_TRANSPORT_DELETED) + if (s->session_state == SESSION_STATE_TRANSPORT_DELETED || !s->tx_fifo) return 2; } return 0; @@ -1402,7 +1400,7 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk, session_evt_elt_t * elt, int *n_tx_packets, u8 peek_data) { - u32 n_trace, n_left, pbi, next_index, max_burst; + u32 n_left, pbi, next_index, max_burst; session_tx_context_t *ctx = &wrk->ctx; session_main_t *smm = &session_main; session_event_t *e = &elt->evt; @@ -1576,10 +1574,6 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk, ctx->transport_vft->push_header (ctx->tc, ctx->transport_pending_bufs, ctx->n_segs_per_evt); - if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node)) > 0)) - session_tx_trace_frame (vm, node, next_index, ctx->transport_pending_bufs, - ctx->n_segs_per_evt, ctx->s, n_trace); - if (PREDICT_FALSE (n_bufs)) vlib_buffer_free (vm, ctx->tx_buffers, n_bufs); @@ -2072,7 +2066,13 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, SESSION_EVT (SESSION_EVT_DSP_CNTRS, OLD_IO_EVTS, wrk); if (vec_len (wrk->pending_tx_buffers)) - session_flush_pending_tx_buffers (wrk, node); + { + if (PREDICT_FALSE (vlib_get_trace_count (vm, node) > 0)) + session_tx_trace_frame (vm, node, wrk->pending_tx_buffers, + wrk->pending_tx_nexts, + vec_len (wrk->pending_tx_nexts)); + session_flush_pending_tx_buffers (wrk, node); + } vlib_node_increment_counter (vm, session_queue_node.index, SESSION_QUEUE_ERROR_TX, n_tx_packets); diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c index ac9b54f333a..3bd12d82fd8 100644 --- a/src/vnet/session/transport.c +++ b/src/vnet/session/transport.c @@ -592,6 +592,7 @@ transport_endpoint_mark_used (u8 proto, u32 fib_index, ip46_address_t *ip, /* Pool reallocs with worker barrier */ lep = transport_endpoint_alloc (); clib_memcpy_fast (&lep->ep.ip, ip, sizeof (*ip)); + lep->ep.fib_index = fib_index; lep->ep.port = port; lep->proto = proto; lep->refcnt = 1; @@ -661,8 +662,8 @@ transport_alloc_local_port (u8 proto, ip46_address_t *lcl_addr, break; /* IP:port pair already in use, check if 6-tuple available */ - if (session_lookup_connection (rmt->fib_index, lcl_addr, &rmt->ip, port, - rmt->port, proto, rmt->is_ip4)) + if (session_lookup_6tuple (rmt->fib_index, lcl_addr, &rmt->ip, port, + rmt->port, proto, rmt->is_ip4)) continue; /* 6-tuple is available so increment lcl endpoint refcount */ @@ -683,6 +684,13 @@ transport_port_alloc_max_tries () return tm->port_alloc_max_tries; } +u32 +transport_port_local_in_use () +{ + transport_main_t *tm = &tp_main; + return pool_elts (tm->local_endpoints) - vec_len (tm->lcl_endpts_freelist); +} + void transport_clear_stats () { @@ -792,9 +800,9 @@ transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt_cfg, return 0; /* IP:port pair already in use, check if 6-tuple available */ - if (session_lookup_connection (rmt->fib_index, lcl_addr, &rmt->ip, - rmt_cfg->peer.port, rmt->port, proto, - rmt->is_ip4)) + if (session_lookup_6tuple (rmt->fib_index, lcl_addr, &rmt->ip, + rmt_cfg->peer.port, rmt->port, proto, + rmt->is_ip4)) return SESSION_E_PORTINUSE; /* 6-tuple is available so increment lcl endpoint refcount */ diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h index c864be139f9..de2a6becaae 100644 --- a/src/vnet/session/transport.h +++ b/src/vnet/session/transport.h @@ -253,6 +253,7 @@ void transport_share_local_endpoint (u8 proto, u32 fib_index, int transport_release_local_endpoint (u8 proto, u32 fib_index, ip46_address_t *lcl_ip, u16 port); u16 transport_port_alloc_max_tries (); +u32 transport_port_local_in_use (); void transport_clear_stats (); void transport_enable_disable (vlib_main_t * vm, u8 is_en); void transport_init (void); diff --git a/src/vnet/srv6/sr_policy_rewrite.c b/src/vnet/srv6/sr_policy_rewrite.c index a9114628f95..92586669378 100644 --- a/src/vnet/srv6/sr_policy_rewrite.c +++ b/src/vnet/srv6/sr_policy_rewrite.c @@ -503,8 +503,9 @@ update_lb (ip6_sr_policy_t * sr_policy) }; /* Add FIB entry for BSID */ - fhc = fib_table_get_flow_hash_config (sr_policy->fib_table, - FIB_PROTOCOL_IP6); + fhc = fib_table_get_flow_hash_config ( + fib_table_find (FIB_PROTOCOL_IP6, sr_policy->fib_table), + FIB_PROTOCOL_IP6); dpo_set (&sr_policy->bsid_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP6, load_balance_create (0, DPO_PROTO_IP6, fhc)); diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index 8851fb9c77e..02239d991bd 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -1467,7 +1467,7 @@ tcp_stats_collector_fn (vlib_stats_collector_data_t *d) tcp_wrk_stats_t acc = {}; tcp_worker_ctx_t *wrk; - vec_foreach (wrk, tm->wrk_ctx) + vec_foreach (wrk, tm->wrk) { #define _(name, type, str) acc.name += wrk->stats.name; foreach_tcp_wrk_stat @@ -1515,7 +1515,7 @@ tcp_main_enable (vlib_main_t * vm) int thread; /* Already initialized */ - if (tm->wrk_ctx) + if (tm->wrk) return 0; if ((error = vlib_call_init_function (vm, ip_main_init))) @@ -1537,11 +1537,11 @@ tcp_main_enable (vlib_main_t * vm) */ num_threads = 1 /* main thread */ + vtm->n_threads; - vec_validate (tm->wrk_ctx, num_threads - 1); + vec_validate (tm->wrk, num_threads - 1); n_workers = num_threads == 1 ? 1 : vtm->n_threads; prealloc_conn_per_wrk = tcp_cfg.preallocated_connections / n_workers; - wrk = &tm->wrk_ctx[0]; + wrk = &tm->wrk[0]; wrk->tco_next_node[0] = vlib_node_get_next (vm, session_queue_node.index, tcp4_output_node.index); wrk->tco_next_node[1] = vlib_node_get_next (vm, session_queue_node.index, @@ -1549,7 +1549,7 @@ tcp_main_enable (vlib_main_t * vm) for (thread = 0; thread < num_threads; thread++) { - wrk = &tm->wrk_ctx[thread]; + wrk = &tm->wrk[thread]; vec_validate (wrk->pending_deq_acked, 255); vec_validate (wrk->pending_disconnects, 255); @@ -1562,8 +1562,8 @@ tcp_main_enable (vlib_main_t * vm) if (thread > 0) { - wrk->tco_next_node[0] = tm->wrk_ctx[0].tco_next_node[0]; - wrk->tco_next_node[1] = tm->wrk_ctx[0].tco_next_node[1]; + wrk->tco_next_node[0] = tm->wrk[0].tco_next_node[0]; + wrk->tco_next_node[1] = tm->wrk[0].tco_next_node[1]; } /* diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index 8feac807d59..830b81df9ee 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -220,7 +220,7 @@ typedef struct tcp_configuration_ typedef struct _tcp_main { /** per-worker context */ - tcp_worker_ctx_t *wrk_ctx; + tcp_worker_ctx_t *wrk; /* Pool of listeners. */ tcp_connection_t *listener_pool; @@ -301,8 +301,8 @@ vnet_get_tcp_main () always_inline tcp_worker_ctx_t * tcp_get_worker (u32 thread_index) { - ASSERT (thread_index < vec_len (tcp_main.wrk_ctx)); - return &tcp_main.wrk_ctx[thread_index]; + ASSERT (thread_index < vec_len (tcp_main.wrk)); + return &tcp_main.wrk[thread_index]; } tcp_connection_t *tcp_connection_alloc (u8 thread_index); diff --git a/src/vnet/tcp/tcp_cli.c b/src/vnet/tcp/tcp_cli.c index 55bc5764df2..c14994aa440 100644 --- a/src/vnet/tcp/tcp_cli.c +++ b/src/vnet/tcp/tcp_cli.c @@ -919,7 +919,7 @@ show_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input, if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); - for (thread = 0; thread < vec_len (tm->wrk_ctx); thread++) + for (thread = 0; thread < vec_len (tm->wrk); thread++) { wrk = tcp_get_worker (thread); vlib_cli_output (vm, "Thread %u:\n", thread); @@ -957,7 +957,7 @@ clear_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input, return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); - for (thread = 0; thread < vec_len (tm->wrk_ctx); thread++) + for (thread = 0; thread < vec_len (tm->wrk); thread++) { wrk = tcp_get_worker (thread); clib_memset (&wrk->stats, 0, sizeof (wrk->stats)); diff --git a/src/vnet/tcp/tcp_inlines.h b/src/vnet/tcp/tcp_inlines.h index ccd0e3fe3ee..4c48f9ecfc5 100644 --- a/src/vnet/tcp/tcp_inlines.h +++ b/src/vnet/tcp/tcp_inlines.h @@ -68,7 +68,7 @@ always_inline tcp_connection_t * tcp_connection_get_if_valid (u32 conn_index, u32 thread_index) { tcp_worker_ctx_t *wrk; - if (thread_index >= vec_len (tcp_main.wrk_ctx)) + if (thread_index >= vec_len (tcp_main.wrk)) return 0; wrk = tcp_get_worker (thread_index); if (pool_is_free_index (wrk->connections, conn_index)) @@ -217,7 +217,7 @@ tcp_is_lost_fin (tcp_connection_t * tc) always_inline u32 tcp_time_tstamp (u32 thread_index) { - return tcp_main.wrk_ctx[thread_index].time_tstamp; + return tcp_main.wrk[thread_index].time_tstamp; } /** @@ -226,14 +226,13 @@ tcp_time_tstamp (u32 thread_index) always_inline u32 tcp_tstamp (tcp_connection_t * tc) { - return (tcp_main.wrk_ctx[tc->c_thread_index].time_tstamp - - tc->timestamp_delta); + return (tcp_main.wrk[tc->c_thread_index].time_tstamp - tc->timestamp_delta); } always_inline f64 tcp_time_now_us (u32 thread_index) { - return tcp_main.wrk_ctx[thread_index].time_us; + return tcp_main.wrk[thread_index].time_us; } always_inline void diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index cd3e4b7700c..15b2c92dcf1 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -217,20 +217,6 @@ static int tcp_segment_validate (tcp_worker_ctx_t * wrk, tcp_connection_t * tc0, vlib_buffer_t * b0, tcp_header_t * th0, u32 * error0) { - /* We could get a burst of RSTs interleaved with acks */ - if (PREDICT_FALSE (tc0->state == TCP_STATE_CLOSED)) - { - tcp_send_reset (tc0); - *error0 = TCP_ERROR_CONNECTION_CLOSED; - goto error; - } - - if (PREDICT_FALSE (!tcp_ack (th0) && !tcp_rst (th0) && !tcp_syn (th0))) - { - *error0 = TCP_ERROR_SEGMENT_INVALID; - goto error; - } - if (PREDICT_FALSE (tcp_options_parse (th0, &tc0->rcv_opts, 0))) { *error0 = TCP_ERROR_OPTIONS; @@ -1372,6 +1358,42 @@ tcp_established_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node, } } +always_inline int +tcp_segment_is_exception (tcp_connection_t *tc, tcp_header_t *th) +{ + /* TODO(fcoras): tcp-input should not allow segments without one of ack, rst, + * syn flags, so we shouldn't be checking for their presence. Leave the check + * in for now, remove in due time */ + ASSERT (th->flags & (TCP_FLAG_ACK | TCP_FLAG_RST | TCP_FLAG_SYN)); + return !tc || tc->state == TCP_STATE_CLOSED || + !(th->flags & (TCP_FLAG_ACK | TCP_FLAG_RST | TCP_FLAG_SYN)); +} + +always_inline void +tcp_segment_handle_exception (tcp_connection_t *tc, tcp_header_t *th, + u32 *error) +{ + if (!tc) + { + *error = TCP_ERROR_INVALID_CONNECTION; + return; + } + + /* We could get a burst of RSTs interleaved with acks */ + if (tc->state == TCP_STATE_CLOSED) + { + tcp_send_reset (tc); + *error = TCP_ERROR_CONNECTION_CLOSED; + return; + } + + if (!(th->flags & (TCP_FLAG_ACK | TCP_FLAG_RST | TCP_FLAG_SYN))) + { + *error = TCP_ERROR_SEGMENT_INVALID; + return; + } +} + always_inline uword tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, int is_ip4) @@ -1404,15 +1426,14 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node, tc = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index, thread_index); + th = tcp_buffer_hdr (b[0]); - if (PREDICT_FALSE (tc == 0)) + if (PREDICT_FALSE (tcp_segment_is_exception (tc, th))) { - error = TCP_ERROR_INVALID_CONNECTION; + tcp_segment_handle_exception (tc, th, &error); goto done; } - th = tcp_buffer_hdr (b[0]); - /* TODO header prediction fast path */ /* 1-4: check SEQ, RST, SYN */ @@ -2819,8 +2840,6 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); } - next[0] = next[1] = TCP_INPUT_NEXT_DROP; - tc0 = tcp_input_lookup_buffer (b[0], thread_index, &error0, is_ip4, is_nolookup); tc1 = tcp_input_lookup_buffer (b[1], thread_index, &error1, is_ip4, @@ -2881,7 +2900,6 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, CLIB_PREFETCH (b[1]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); } - next[0] = TCP_INPUT_NEXT_DROP; tc0 = tcp_input_lookup_buffer (b[0], thread_index, &error0, is_ip4, is_nolookup); if (PREDICT_TRUE (tc0 != 0)) diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 2fd20acf241..2e8a10896eb 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -299,7 +299,7 @@ tcp_make_options (tcp_connection_t * tc, tcp_options_t * opts, void tcp_update_burst_snd_vars (tcp_connection_t * tc) { - tcp_main_t *tm = &tcp_main; + tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index); /* Compute options to be used for connection. These may be reused when * sending data or to compute the effective mss (snd_mss) */ @@ -310,8 +310,7 @@ tcp_update_burst_snd_vars (tcp_connection_t * tc) tc->snd_mss = clib_min (tc->mss, tc->rcv_opts.mss) - tc->snd_opts_len; ASSERT (tc->snd_mss > 0); - tcp_options_write (tm->wrk_ctx[tc->c_thread_index].cached_opts, - &tc->snd_opts); + tcp_options_write (wrk->cached_opts, &tc->snd_opts); tcp_update_rcv_wnd (tc); @@ -875,7 +874,6 @@ tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b, u32 snd_nxt, { u8 tcp_hdr_opts_len, flags = TCP_FLAG_ACK; u32 advertise_wnd, data_len; - tcp_main_t *tm = &tcp_main; tcp_header_t *th; data_len = b->current_length; @@ -907,9 +905,8 @@ tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b, u32 snd_nxt, if (maybe_burst) { - clib_memcpy_fast ((u8 *) (th + 1), - tm->wrk_ctx[tc->c_thread_index].cached_opts, - tc->snd_opts_len); + tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index); + clib_memcpy_fast ((u8 *) (th + 1), wrk->cached_opts, tc->snd_opts_len); } else { diff --git a/src/vnet/udp/udp_cli.c b/src/vnet/udp/udp_cli.c index 6c8992cd0de..dd1da0a01d6 100644 --- a/src/vnet/udp/udp_cli.c +++ b/src/vnet/udp/udp_cli.c @@ -97,7 +97,8 @@ format_udp_vars (u8 * s, va_list * args) s = format (s, " index %u%U flags: %U\n", uc->c_c_index, format_udp_cfg_flags, uc, format_udp_connection_flags, uc); - s = format (s, " fib_index: %u next_node: %u opaque: %u ", uc->c_fib_index); + s = format (s, " fib_index: %u next_node: %u opaque: %u ", uc->c_fib_index, + uc->next_node_index, uc->next_node_opaque); if (!(uc->flags & UDP_CONN_F_LISTEN)) s = format (s, " sw_if_index: %d mss: %u\n", uc->sw_if_index, uc->mss); else diff --git a/src/vpp-api/python/vpp_papi/vpp_papi_async.py b/src/vpp-api/python/vpp_papi/vpp_papi_async.py index d9a4fabb69e..44e2a78eeea 100644 --- a/src/vpp-api/python/vpp_papi/vpp_papi_async.py +++ b/src/vpp-api/python/vpp_papi/vpp_papi_async.py @@ -451,7 +451,8 @@ class VPPApiClient: for m in r.message_table: n = m.name self.message_table[n] = m.index - self.vpp_dictionary_maxid = len(self.message_table) + # Find the maximum index of the message table + self.vpp_dictionary_maxid = max(self.message_table.values() or [0]) # self.worker_task = asyncio.create_task(self.message_handler(event_queue)) requests = {} diff --git a/src/vpp-api/python/vpp_papi/vpp_transport_socket.py b/src/vpp-api/python/vpp_papi/vpp_transport_socket.py index 174ab74d0b8..1ba365ad6e1 100644 --- a/src/vpp-api/python/vpp_papi/vpp_transport_socket.py +++ b/src/vpp-api/python/vpp_papi/vpp_transport_socket.py @@ -177,7 +177,8 @@ class VppTransport: return 0 def msg_table_max_index(self): - return len(self.message_table) + """Return the maximum index of the message table.""" + return max(self.message_table.values() or [0]) def write(self, buf): """Send a binary-packed message to VPP.""" diff --git a/src/vpp/conf/startup.conf b/src/vpp/conf/startup.conf index a30a15ab2b1..8e7aebd8271 100644 --- a/src/vpp/conf/startup.conf +++ b/src/vpp/conf/startup.conf @@ -231,6 +231,18 @@ cpu { # update-interval <f64-seconds>, sets the segment scrape / update interval # } +## L3 FIB +# l3fib { + ## load balance pool size preallocation (expected number of objects) + # load-balance-pool-size 1M + + ## fib entry pool size preallocation (expected number of objects) + # fib-entry-pool-size 1M + + ## ip4 mtrie pool size preallocation (expected number of mtries) + # ip4-mtrie-pool-size 1K +# } + ## L2 FIB # l2fib { ## l2fib hash table size. diff --git a/src/vppinfra/bihash_vec8_8.h b/src/vppinfra/bihash_vec8_8.h index 822f1bcc51f..1532103e9c1 100644 --- a/src/vppinfra/bihash_vec8_8.h +++ b/src/vppinfra/bihash_vec8_8.h @@ -46,6 +46,7 @@ static inline void clib_bihash_mark_free_vec8_8 (clib_bihash_kv_vec8_8_t *v) { v->value = 0xFEEDFACE8BADF00DULL; + v->key = ~0ULL; } /** Decide if a clib_bihash_kv_vec8_8_t instance is free diff --git a/src/vppinfra/bitops.h b/src/vppinfra/bitops.h index c1122f59ff6..bf73bd95a84 100644 --- a/src/vppinfra/bitops.h +++ b/src/vppinfra/bitops.h @@ -195,6 +195,13 @@ next_with_same_number_of_set_bits (uword x) return ripple | ones; } +static_always_inline void +uword_bitmap_clear (uword *bmp, uword n_uwords) +{ + while (n_uwords--) + bmp++[0] = 0; +} + #define foreach_set_bit_index(i, v) \ for (uword _tmp = (v) + 0 * (uword) (i = get_lowest_set_bit_index (v)); \ _tmp; \ @@ -273,6 +280,34 @@ uword_bitmap_find_first_set (uword *bmp) return (b - bmp) * uword_bits + get_lowest_set_bit_index (b[0]); } +always_inline uword +uword_bitmap_get_multiple (uword *bmp, uword i, uword n_bits) +{ + uword rv; + + bmp += i / uword_bits; + i %= uword_bits; + + rv = (bmp[0] >> i); + rv &= pow2_mask (n_bits); + + if (i + n_bits <= uword_bits) + return rv; + + n_bits -= uword_bits - i; + rv |= (bmp[1] & pow2_mask (n_bits)) << (uword_bits - i); + + return rv; +} + +always_inline uword +uword_bitmap_get_multiple_no_check (uword *bmp, uword i, uword n_bits) +{ + bmp += i / uword_bits; + i %= uword_bits; + return ((bmp[0] >> i) & pow2_mask (n_bits)); +} + static_always_inline u32 bit_extract_u32 (u32 v, u32 mask) { diff --git a/src/vppinfra/clib_error.h b/src/vppinfra/clib_error.h index 45f18eb1fe4..5db1a5e3440 100644 --- a/src/vppinfra/clib_error.h +++ b/src/vppinfra/clib_error.h @@ -23,7 +23,7 @@ typedef struct /* Error message. */ u8 *what; - /* Where error occurred (e.g. __FUNCTION__ __LINE__) */ + /* Where error occurred (e.g. __func__ __LINE__) */ const u8 *where; uword flags; diff --git a/src/vppinfra/elog.h b/src/vppinfra/elog.h index d0825bdd5b2..6a66319148d 100644 --- a/src/vppinfra/elog.h +++ b/src/vppinfra/elog.h @@ -444,21 +444,21 @@ elog_data_inline (elog_main_t * em, elog_event_type_t * type, #define ELOG_TYPE_INIT_FORMAT_AND_FUNCTION(fmt,func) \ { .format = fmt, .function = func, } -#define ELOG_TYPE_INIT(fmt) \ - ELOG_TYPE_INIT_FORMAT_AND_FUNCTION(fmt,(char *) __FUNCTION__) +#define ELOG_TYPE_INIT(fmt) \ + ELOG_TYPE_INIT_FORMAT_AND_FUNCTION (fmt, (char *) __func__) #define ELOG_TYPE_DECLARE_HELPER(f,fmt,func) \ static elog_event_type_t __ELOG_TYPE_VAR(f) = \ ELOG_TYPE_INIT_FORMAT_AND_FUNCTION (fmt, func) -#define ELOG_TYPE_DECLARE_FORMAT_AND_FUNCTION(f,fmt) \ - ELOG_TYPE_DECLARE_HELPER (f, fmt, (char *) __FUNCTION__) +#define ELOG_TYPE_DECLARE_FORMAT_AND_FUNCTION(f, fmt) \ + ELOG_TYPE_DECLARE_HELPER (f, fmt, (char *) __func__) #define ELOG_TYPE_DECLARE_FORMAT(f,fmt) \ ELOG_TYPE_DECLARE_HELPER (f, fmt, 0) -/* Shorthands with and without __FUNCTION__. - D for decimal; X for hex. F for __FUNCTION__. */ +/* Shorthands with and without __func__. + D for decimal; X for hex. F for __func__. */ #define ELOG_TYPE(f,fmt) ELOG_TYPE_DECLARE_FORMAT_AND_FUNCTION(f,fmt) #define ELOG_TYPE_D(f) ELOG_TYPE_DECLARE_FORMAT (f, #f " %d") #define ELOG_TYPE_X(f) ELOG_TYPE_DECLARE_FORMAT (f, #f " 0x%x") diff --git a/src/vppinfra/error_bootstrap.h b/src/vppinfra/error_bootstrap.h index ae23d1bcca8..d3eed1b83ae 100644 --- a/src/vppinfra/error_bootstrap.h +++ b/src/vppinfra/error_bootstrap.h @@ -53,7 +53,7 @@ enum }; /* Current function name. Need (char *) cast to silence gcc4 pointer signedness warning. */ -#define clib_error_function ((char *) __FUNCTION__) +#define clib_error_function ((char *) __func__) #ifndef CLIB_ASSERT_ENABLE #define CLIB_ASSERT_ENABLE (CLIB_DEBUG > 0) diff --git a/src/vppinfra/mem.h b/src/vppinfra/mem.h index 6211bb51f0a..893978081d0 100644 --- a/src/vppinfra/mem.h +++ b/src/vppinfra/mem.h @@ -259,7 +259,7 @@ uword clib_mem_size (void *p); void clib_mem_free_s (void *p); /* Memory allocator which panics when it fails. - Use macro so that clib_panic macro can expand __FUNCTION__ and __LINE__. */ + Use macro so that clib_panic macro can expand __func__ and __LINE__. */ #define clib_mem_alloc_aligned_no_fail(size,align) \ ({ \ uword _clib_mem_alloc_size = (size); \ diff --git a/src/vppinfra/string.c b/src/vppinfra/string.c index ea9480875a5..aedaf428a31 100644 --- a/src/vppinfra/string.c +++ b/src/vppinfra/string.c @@ -94,7 +94,7 @@ clib_memswap (void *_a, void *_b, uword bytes) __clib_export void clib_c11_violation (const char *s) { - _clib_error (CLIB_ERROR_WARNING, (char *) __FUNCTION__, 0, (char *) s); + _clib_error (CLIB_ERROR_WARNING, (char *) __func__, 0, (char *) s); } /** diff --git a/src/vppinfra/time.c b/src/vppinfra/time.c index f1736499a0a..7c0ea44b481 100644 --- a/src/vppinfra/time.c +++ b/src/vppinfra/time.c @@ -332,6 +332,7 @@ format_clib_time (u8 * s, va_list * args) clib_time_t *c = va_arg (*args, clib_time_t *); int verbose = va_arg (*args, int); f64 now, reftime, delta_reftime_in_seconds, error; + u32 indent = format_get_indent (s); /* Compute vpp elapsed time from the CPU clock */ reftime = unix_time_now (); @@ -346,8 +347,14 @@ format_clib_time (u8 * s, va_list * args) error = now - delta_reftime_in_seconds; - s = format (s, ", reftime %.6f, error %.6f, clocks/sec %.6f", - delta_reftime_in_seconds, error, c->clocks_per_second); + s = format (s, "\n%Ucpu time %.6f now %lu last %lu since start %lu \n", + format_white_space, indent, now, clib_cpu_time_now (), + c->last_cpu_time, c->total_cpu_time); + s = format (s, "%Ureftime %.6f now %.6f last %.6f init %.6f\n", + format_white_space, indent, delta_reftime_in_seconds, reftime, + c->last_verify_reference_time, c->init_reference_time); + s = format (s, "%Uerror %.6f, clocks/sec %.6f", format_white_space, indent, + error, c->clocks_per_second); return (s); } diff --git a/test/Makefile b/test/Makefile index 79feba9165e..17bf33d0884 100644 --- a/test/Makefile +++ b/test/Makefile @@ -388,24 +388,32 @@ COV_REM_TODO_NO_TEST="*/vpp-api/client/*" "*/plugins/prom/*" \ "*/vnet/ipsec/esp_format.c" "*/vnet/ethernet/sfp.c" \ "*/vnet/ethernet/ethernet_format_fns.h" \ "*/plugins/ikev2/ikev2_format.c" "*/vnet/bier/bier_types.c" - -COV_REM_ALT_TEST="*/plugins/hs_apps/*" "*/plugins/http/*.h" +ifeq ($(HS-TEST),1) +COV_REM_HST_UNUSED_FEAT= "*/plugins/ping/*" "*/plugins/unittest/mpcap_node.c" "*/vnet/bfd/*" \ + "*/vnet/bier/*" "*/vnet/bonding/*" "*/vnet/classify/*" \ + "*/vnet/gso/*" "*/vnet/ipfix-export/*" "*/vnet/ipip/*" \ + "*/vnet/ipsec/*" "*/vnet/l2/*" "*/vnet/mpls/*" \ + "*/vnet/pg/*" "*/vnet/policer/*" "*/vnet/snap/*" \ + "*/vnet/span/*" "*/vnet/srv6/*" "*/vnet/teib/*" \ + "*/vnet/tunnel/*" "*/vpp-api/vapi/*" "*/vpp/app/vpe_cli.c" \ + "*/vppinfra/pcap.c" "*/vppinfra/pcap_funcs.h" +endif .PHONY: cov-post cov-post: wipe-cov $(BUILD_COV_DIR) - @lcov --ignore-errors --capture \ + @lcov --ignore-errors unused,empty,mismatch,gcov --capture \ --directory $(VPP_BUILD_DIR) \ --output-file $(BUILD_COV_DIR)/coverage$(HS_TEST).info @test -z "$(EXTERN_COV_DIR)" || \ - lcov --ignore-errors --capture \ + lcov --ignore-errors unused,empty,mismatch,gcov --capture \ --directory $(EXTERN_COV_DIR) \ --output-file $(BUILD_COV_DIR)/extern-coverage$(HS_TEST).info - @lcov --ignore-errors --remove $(BUILD_COV_DIR)/coverage$(HS_TEST).info \ + @lcov --ignore-errors unused,empty,mismatch,gcov --remove $(BUILD_COV_DIR)/coverage$(HS_TEST).info \ $(COV_REM_NOT_CODE) \ $(COV_REM_DRIVERS) \ $(COV_REM_TODO_NO_TEST) \ $(COV_REM_UNUSED_FEAT) \ - $(COV_REM_ALT_TEST) \ + $(COV_REM_HST_UNUSED_FEAT) \ -o $(BUILD_COV_DIR)/coverage-filtered$(HS_TEST).info @genhtml $(BUILD_COV_DIR)/coverage-filtered$(HS_TEST).info \ --output-directory $(BUILD_COV_DIR)/html diff --git a/test/template_ipsec.py b/test/template_ipsec.py index 4e68d44013f..ab5aa9390da 100644 --- a/test/template_ipsec.py +++ b/test/template_ipsec.py @@ -16,6 +16,8 @@ from scapy.layers.inet6 import ( IPv6ExtHdrDestOpt, ) +from scapy.layers.isakmp import ISAKMP + from framework import VppTestCase from asfframework import VppTestRunner @@ -3246,11 +3248,22 @@ class IPSecIPv6Fwd(VppTestCase): payload = self.info_to_payload(info) # create the packet itself p = ( - Ether(dst=src_if.local_mac, src=src_if.remote_mac) - / IPv6(src=src_if.remote_ip6, dst=dst_if.remote_ip6) - / UDP(sport=src_prt, dport=dst_prt) - / Raw(payload) + ( + Ether(dst=src_if.local_mac, src=src_if.remote_mac) + / IPv6(src=src_if.remote_ip6, dst=dst_if.remote_ip6) + / UDP(sport=src_prt, dport=dst_prt) + / ISAKMP() + / Raw(payload) + ) + if (src_prt == 500 or src_prt == 4500) + else ( + Ether(dst=src_if.local_mac, src=src_if.remote_mac) + / IPv6(src=src_if.remote_ip6, dst=dst_if.remote_ip6) + / UDP(sport=src_prt, dport=dst_prt) + / Raw(payload) + ) ) + # store a copy of the packet in the packet info info.data = p.copy() # append the packet to the list diff --git a/test/test_ipsec_spd_fp_input.py b/test/test_ipsec_spd_fp_input.py index 1953bbe5eaf..ed38a51abdb 100644 --- a/test/test_ipsec_spd_fp_input.py +++ b/test/test_ipsec_spd_fp_input.py @@ -835,9 +835,6 @@ class IPSec4SpdTestCaseMultiple(SpdFastPathInbound): self.verify_policy_match(0, policy_22) -@unittest.skipIf( - "ping" in config.excluded_plugins, "Exclude tests requiring Ping plugin" -) class IPSec6SpdTestCaseProtect(SpdFastPathIPv6InboundProtect): """ IPSec/IPv6 inbound: Policy mode test case with fast path \ (add protect)""" @@ -889,6 +886,155 @@ class IPSec6SpdTestCaseProtect(SpdFastPathIPv6InboundProtect): self.assertEqual(p.tra_sa_in.get_err("lost"), 0) +class IPSec6SpdTestCaseBypass(SpdFastPathIPv6Inbound): + """ IPSec/IPv6 inbound: Policy mode test case with fast path \ + (add bypass)""" + + def test_ipsec_spd_inbound_bypass(self): + # In this test case, packets in IPv6 FWD path are configured + # to go through IPSec inbound SPD policy lookup. + # + # 2 inbound SPD rules (1 HIGH and 1 LOW) are added. + # - High priority rule action is set to DISCARD. + # - Low priority rule action is set to BYPASS. + # + # Since BYPASS rules take precedence over DISCARD + # (the order being PROTECT, BYPASS, DISCARD) we expect the + # BYPASS rule to match and traffic to be correctly forwarded. + self.create_interfaces(2) + pkt_count = 5 + + self.spd_create_and_intf_add(1, [self.pg1, self.pg0]) + + # create input rules + # bypass rule should take precedence over discard rule, + # even though it's lower priority, because for input policies + # matching PROTECT policies precedes matching BYPASS policies + # which preceeds matching for DISCARD policies. + # Any hit stops the process. + policy_0 = self.spd_add_rem_policy( # inbound, priority 10 + 1, + self.pg1, + self.pg0, + socket.IPPROTO_UDP, + is_out=0, + priority=10, + policy_type="bypass", + ip_range=True, + local_ip_start=self.pg1.remote_ip6, + local_ip_stop=self.pg1.remote_ip6, + remote_ip_start=self.pg0.remote_ip6, + remote_ip_stop=self.pg0.remote_ip6, + ) + policy_1 = self.spd_add_rem_policy( # inbound, priority 15 + 1, + self.pg1, + self.pg0, + socket.IPPROTO_UDP, + is_out=0, + priority=15, + policy_type="discard", + ip_range=True, + local_ip_start=self.pg1.remote_ip6, + local_ip_stop=self.pg1.remote_ip6, + remote_ip_start=self.pg0.remote_ip6, + remote_ip_stop=self.pg0.remote_ip6, + ) + + # create output rule so we can capture forwarded packets + policy_2 = self.spd_add_rem_policy( # outbound, priority 10 + 1, + self.pg0, + self.pg1, + socket.IPPROTO_UDP, + is_out=1, + priority=10, + policy_type="bypass", + ) + + # create the packet stream + packets = self.create_stream( + self.pg0, self.pg1, pkt_count, src_prt=500, dst_prt=500 + ) + # add the stream to the source interface + self.pg0.add_stream(packets) + self.pg1.enable_capture() + self.pg_start() + + # check capture on pg1 + capture = self.pg1.get_capture() + for packet in capture: + try: + self.logger.debug(ppp("SPD Add - Got packet:", packet)) + except Exception: + self.logger.error(ppp("Unexpected or invalid packet:", packet)) + raise + self.logger.debug("SPD: Num packets: %s", len(capture.res)) + + # verify captured packets + self.verify_capture(self.pg0, self.pg1, capture) + # verify all policies matched the expected number of times + self.verify_policy_match(pkt_count, policy_0) + self.verify_policy_match(0, policy_1) + self.verify_policy_match(pkt_count, policy_2) + + +class IPSec6SpdTestCaseDiscard(SpdFastPathIPv6Inbound): + """ IPSec/IPv6 inbound: Policy mode test case with fast path \ + (add discard)""" + + def test_ipsec_spd_inbound_discard(self): + # In this test case, packets in IPv6 FWD path are configured + # to go through IPSec inbound SPD policy lookup. + # + # Rule action is set to DISCARD. + + self.create_interfaces(2) + pkt_count = 5 + + self.spd_create_and_intf_add(1, [self.pg1, self.pg0]) + + # create input rules + # bypass rule should take precedence over discard rule, + # even though it's lower priority + policy_0 = self.spd_add_rem_policy( # inbound, priority 10 + 1, + self.pg1, + self.pg0, + socket.IPPROTO_UDP, + is_out=0, + priority=10, + policy_type="discard", + ) + + # create output rule so we can capture forwarded packets + policy_1 = self.spd_add_rem_policy( # outbound, priority 10 + 1, + self.pg1, + self.pg0, + socket.IPPROTO_UDP, + is_out=1, + priority=10, + policy_type="bypass", + ) + + # create the packet stream + packets = self.create_stream( + self.pg0, self.pg1, pkt_count, src_prt=500, dst_prt=500 + ) + # add the stream to the source interface + self.pg0.add_stream(packets) + self.pg1.enable_capture() + self.pg_start() + + # check capture on pg1 + capture = self.pg1.assert_nothing_captured() + + # verify all policies matched the expected number of times + self.verify_policy_match(pkt_count, policy_0) + self.verify_policy_match(0, policy_1) + + class IPSec6SpdTestCaseTunProtect(SpdFastPathIPv6InboundProtect): """IPSec/IPv6 inbound: Policy mode test case with fast path""" diff --git a/test/test_linux_cp.py b/test/test_linux_cp.py index ff6023cea26..d7116233236 100644 --- a/test/test_linux_cp.py +++ b/test/test_linux_cp.py @@ -6,6 +6,14 @@ import socket from scapy.layers.inet import IP, UDP from scapy.layers.inet6 import IPv6, Raw from scapy.layers.l2 import Ether, ARP +from scapy.contrib.lacp import LACP +from scapy.contrib.lldp import ( + LLDPDUChassisID, + LLDPDUPortID, + LLDPDUTimeToLive, + LLDPDUEndOfLLDPDU, + LLDPDU, +) from util import reassemble4 from vpp_object import VppObject @@ -427,5 +435,131 @@ class TestLinuxCPIpsec(TemplateIpsec, TemplateIpsecItf4, IpsecTun4): self.unconfig_network(p) +@unittest.skipIf("linux-cp" in config.excluded_plugins, "Exclude linux-cp plugin tests") +class TestLinuxCPEthertype(VppTestCase): + """Linux CP Ethertype""" + + extra_vpp_plugin_config = [ + "plugin", + "linux_cp_plugin.so", + "{", + "enable", + "}", + "plugin", + "linux_cp_unittest_plugin.so", + "{", + "enable", + "}", + "plugin", + "lldp_plugin.so", + "{", + "disable", + "}", + ] + + LACP_ETHERTYPE = 0x8809 + LLDP_ETHERTYPE = 0x88CC + + @classmethod + def setUpClass(cls): + super(TestLinuxCPEthertype, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(TestLinuxCPEthertype, cls).tearDownClass() + + def setUp(self): + super(TestLinuxCPEthertype, self).setUp() + self.create_pg_interfaces(range(2)) + for i in self.pg_interfaces: + i.admin_up() + + self.host = self.pg0 + self.phy = self.pg1 + + self.pair = VppLcpPair(self, self.phy, self.host).add_vpp_config() + self.logger.info(self.vapi.cli("sh lcp")) + + def tearDown(self): + self.pair.remove_vpp_config() + + for i in self.pg_interfaces: + i.admin_down() + super(TestLinuxCPEthertype, self).tearDown() + + def send_packet(self, sender, receiver, ethertype, dst, data, expect_copy=True): + packet = Ether(src=sender.remote_mac, dst=dst, type=ethertype) / data + if expect_copy: + rxs = self.send_and_expect(sender, [packet], receiver) + for rx in rxs: + self.assertEqual(packet.show2(True), rx.show2(True)) + else: + self.send_and_assert_no_replies(sender, [packet]) + + def send_lacp_packet(self, sender, receiver, expect_copy=True): + data = LACP( + actor_system="00:00:00:00:00:01", partner_system="00:00:00:00:00:02" + ) + self.send_packet( + sender, + receiver, + self.LACP_ETHERTYPE, + "01:80:c2:00:00:02", + data, + expect_copy, + ) + + def send_lldp_packet(self, sender, receiver, expect_copy=True): + data = ( + LLDPDUChassisID(subtype=4, id="01:02:03:04:05:06") + / LLDPDUPortID(subtype=3, id="07:08:09:0a:0b:0c") + / LLDPDUTimeToLive(ttl=120) + / LLDPDUEndOfLLDPDU() + ) + self.send_packet( + sender, + receiver, + self.LLDP_ETHERTYPE, + "01:80:c2:00:00:0e", + data, + expect_copy, + ) + + def check_ethertype_enabled(self, ethertype, enabled=True): + reply = self.vapi.lcp_ethertype_get() + output = self.vapi.cli("show lcp ethertype") + + if enabled: + self.assertIn(ethertype, reply.ethertypes) + self.assertIn(hex(ethertype), output) + else: + self.assertNotIn(ethertype, reply.ethertypes) + self.assertNotIn(hex(ethertype), output) + + def test_linux_cp_lacp(self): + """Linux CP LACP Test""" + self.check_ethertype_enabled(self.LACP_ETHERTYPE, enabled=False) + self.send_lacp_packet(self.phy, self.host, expect_copy=False) + self.send_lacp_packet(self.host, self.phy, expect_copy=False) + + self.vapi.cli("lcp ethertype enable " + str(self.LACP_ETHERTYPE)) + + self.check_ethertype_enabled(self.LACP_ETHERTYPE, enabled=True) + self.send_lacp_packet(self.phy, self.host, expect_copy=True) + self.send_lacp_packet(self.host, self.phy, expect_copy=True) + + def test_linux_cp_lldp(self): + """Linux CP LLDP Test""" + self.check_ethertype_enabled(self.LLDP_ETHERTYPE, enabled=False) + self.send_lldp_packet(self.phy, self.host, expect_copy=False) + self.send_lldp_packet(self.host, self.phy, expect_copy=False) + + self.vapi.cli("lcp ethertype enable " + str(self.LLDP_ETHERTYPE)) + + self.check_ethertype_enabled(self.LLDP_ETHERTYPE, enabled=True) + self.send_lldp_packet(self.phy, self.host, expect_copy=True) + self.send_lldp_packet(self.host, self.phy, expect_copy=True) + + if __name__ == "__main__": unittest.main(testRunner=VppTestRunner) |