diff options
148 files changed, 9167 insertions, 3458 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index b1065cdbb71..4ad31098826 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -903,3 +903,9 @@ Netmap I: netmap M: Tom Jones <thj@freebsd.org> F: src/plugins/netmap/ + +sFlow +I: sflow +M: Pim van Pelt <pim@ipng.nl> +M: Neil McKee <neil.mckee@inmon.com> +F: src/plugins/sflow/ @@ -57,16 +57,29 @@ GDB_ARGS= -ex "handle SIGUSR1 noprint nostop" ifneq ($(shell uname),Darwin) OS_ID = $(shell grep '^ID=' /etc/os-release | cut -f2- -d= | sed -e 's/\"//g') OS_VERSION_ID= $(shell grep '^VERSION_ID=' /etc/os-release | cut -f2- -d= | sed -e 's/\"//g') +OS_CODENAME = $(shell grep '^VERSION_CODENAME=' /etc/os-release | cut -f2- -d= | sed -e 's/\"//g') +endif + +# Fill in OS_VERSION_ID based on codename if its absent +ifeq ($(OS_VERSION_ID),) +# Debian testing doesn't define version_id and therefore need to be referenced by name +ifeq ($(OS_CODENAME),trixie) +OS_VERSION_ID = 13 +endif endif ifeq ($(filter ubuntu debian linuxmint,$(OS_ID)),$(OS_ID)) PKG=deb -else ifeq ($(filter rhel centos fedora opensuse-leap rocky almalinux,$(OS_ID)),$(OS_ID)) +else ifeq ($(filter rhel centos fedora opensuse-leap rocky almalinux anolis,$(OS_ID)),$(OS_ID)) PKG=rpm else ifeq ($(filter freebsd,$(OS_ID)),$(OS_ID)) PKG=pkg endif +ifeq ($(filter anolis,$(OS_ID)),$(OS_ID)) +OS_VERSION_ID= $(shell grep '^VERSION_ID=' /etc/os-release | cut -f2 -d= | sed -e 's/\"//g' | cut -d. -f1) +endif + # +libganglia1-dev if building the gmond plugin DEB_DEPENDS = curl build-essential autoconf automake ccache @@ -127,6 +140,13 @@ else ifeq ($(OS_ID)-$(OS_VERSION_ID),debian-12) # TODO: remove once ubuntu 20.04 is deprecated and extras/scripts/checkstyle.sh is upgraded to -15 export CLANG_FORMAT_VER=15 LIBFFI=libffi8 +else ifeq ($(OS_ID)-$(OS_VERSION_ID),debian-13) + DEB_DEPENDS += virtualenv + DEB_DEPENDS += clang-19 clang-format-19 + # for extras/scripts/checkstyle.sh + # TODO: remove once ubuntu 20.04 is deprecated and extras/scripts/checkstyle.sh is upgraded to -15 + export CLANG_FORMAT_VER=15 + LIBFFI=libffi8 else DEB_DEPENDS += clang-11 clang-format-11 LIBFFI=libffi7 @@ -183,6 +203,15 @@ else ifeq ($(OS_ID)-$(OS_VERSION_ID),centos-8) RPM_DEPENDS += infiniband-diags libibumad RPM_DEPENDS += libpcap-devel llvm-toolset RPM_DEPENDS_GROUPS = 'Development Tools' +else ifeq ($(OS_ID)-$(OS_VERSION_ID),anolis-8) + RPM_DEPENDS += yum-utils + RPM_DEPENDS += compat-openssl10 openssl-devel + RPM_DEPENDS += python2-devel python36-devel python3-ply + RPM_DEPENDS += python3-virtualenv python3-jsonschema + RPM_DEPENDS += libarchive cmake + RPM_DEPENDS += libpcap-devel llvm-toolset git-clang-format python3-pyyaml + RPM_DEPENDS_GROUPS = 'Development Tools' + export CLANG_FORMAT_VER=15 else RPM_DEPENDS += yum-utils RPM_DEPENDS += openssl-devel @@ -402,6 +431,12 @@ else ifeq ($(OS_ID),fedora) @sudo -E dnf install $(CONFIRM) $(RPM_DEPENDS) @sudo -E debuginfo-install $(CONFIRM) glibc openssl-libs zlib endif +else ifeq ($(OS_ID)-$(OS_VERSION_ID),anolis-8) + @sudo -E dnf install $(CONFIRM) dnf-plugins-core epel-release + @sudo -E dnf config-manager --set-enabled \ + $(shell dnf repolist all 2>/dev/null|grep -i powertools|cut -d' ' -f1|grep -v source) + @sudo -E dnf groupinstall $(CONFIRM) $(RPM_DEPENDS_GROUPS) + @sudo -E dnf install --skip-broken $(CONFIRM) $(RPM_DEPENDS) else ifeq ($(filter opensuse-leap-15.3 opensuse-leap-15.4 ,$(OS_ID)-$(OS_VERSION_ID)),$(OS_ID)-$(OS_VERSION_ID)) @sudo -E zypper refresh @sudo -E zypper install -y $(RPM_SUSE_DEPENDS) @@ -631,10 +666,13 @@ test-wipe-all: @$(MAKE) -C test wipe-all # Note: All python venv consolidated in test/Makefile, test/requirements*.txt +# Also, this target is used by ci-management/jjb/scripts/vpp/checkstyle-test.sh, +# thus inclusion of checkstyle-go here to include checkstyle for hs-test +# in the vpp-checkstyle-verify-*-*-* jobs .PHONY: test-checkstyle test-checkstyle: - $(warning test-checkstyle is deprecated. Running checkstyle-python.") @$(MAKE) -C test checkstyle-python-all + @$(MAKE) -C extras/hs-test checkstyle-go # Note: All python venv consolidated in test/Makefile, test/requirements*.txt .PHONY: test-checkstyle-diff diff --git a/build/external/mlx_rdma_dpdk_matrix.txt b/build/external/mlx_rdma_dpdk_matrix.txt index 17cf1aaa5e5..184a2bede1e 100644 --- a/build/external/mlx_rdma_dpdk_matrix.txt +++ b/build/external/mlx_rdma_dpdk_matrix.txt @@ -1,3 +1,4 @@ rdma=49.0 dpdk=23.11 rdma=51.0 dpdk=24.03 -rdma=52.0 dpdk=24.07 +rdma=55.0 dpdk=24.07 +rdma=55.0 dpdk=24.11.1 diff --git a/build/external/packages/dpdk.mk b/build/external/packages/dpdk.mk index 88051da8851..da6802e480b 100644 --- a/build/external/packages/dpdk.mk +++ b/build/external/packages/dpdk.mk @@ -21,10 +21,11 @@ DPDK_MLX_IBV_LINK ?= static # On most of the systems, default value for max lcores is 128 DPDK_MAX_LCORES ?= -dpdk_version ?= 24.07 +dpdk_version ?= 24.11.1 dpdk_base_url ?= http://fast.dpdk.org/rel dpdk_tarball := dpdk-$(dpdk_version).tar.xz -dpdk_tarball_sha256sum_24.07 := 9944f7e5f268e7ac9b4193e2cd54ef6d98f6e1d7dddc967c77ae4f6616d6fbbd + +dpdk_tarball_sha256sum_24.11.1 := bcae7d42c449fc456dfb279feabcbe0599a29bebb2fe2905761e187339d96b8e dpdk_tarball_sha256sum := $(dpdk_tarball_sha256sum_$(dpdk_version)) dpdk_url := $(dpdk_base_url)/$(dpdk_tarball) @@ -198,8 +199,8 @@ define dpdk_config_cmds mkdir -p ../dpdk-meson-venv && \ python3 -m venv ../dpdk-meson-venv && \ source ../dpdk-meson-venv/bin/activate && \ - (if ! ls $(PIP_DOWNLOAD_DIR)meson* ; then pip3 download -d $(PIP_DOWNLOAD_DIR) -f $(DL_CACHE_DIR) meson==0.55.3 setuptools wheel pyelftools; fi) && \ - pip3 install --no-index --find-links=$(PIP_DOWNLOAD_DIR) meson==0.55.3 pyelftools && \ + (if ! ls $(PIP_DOWNLOAD_DIR)meson* ; then pip3 download -d $(PIP_DOWNLOAD_DIR) -f $(DL_CACHE_DIR) meson==0.57.2 setuptools wheel pyelftools; fi) && \ + pip3 install --no-index --find-links=$(PIP_DOWNLOAD_DIR) meson==0.57.2 pyelftools && \ PKG_CONFIG_PATH=$(dpdk_install_dir)/lib/pkgconfig meson setup $(dpdk_src_dir) \ $(dpdk_build_dir) \ $(DPDK_MESON_ARGS) \ diff --git a/build/external/packages/octeon-roc.mk b/build/external/packages/octeon-roc.mk index 3d902171652..0dade11ccc8 100644 --- a/build/external/packages/octeon-roc.mk +++ b/build/external/packages/octeon-roc.mk @@ -2,9 +2,9 @@ # SPDX-License-Identifier: Apache-2.0 # https://spdx.org/licenses/Apache-2.0.html -octeon-roc_version := 0.5 +octeon-roc_version := 0.6 octeon-roc_tarball := v$(octeon-roc_version).tar.gz -octeon-roc_tarball_sha256sum := 030fc0f58d761525bf8814ed9d95f5ce999541b19bd75eb123dee90c9e2c52a0 +octeon-roc_tarball_sha256sum := 5018e6da80c80898444b648482b4240cbf62591c64eb463b3f681cf68c07239c octeon-roc_tarball_strip_dirs := 1 octeon-roc_url := https://github.com/MarvellEmbeddedProcessors/marvell-octeon-roc/archive/refs/tags/$(octeon-roc_tarball) diff --git a/build/external/packages/rdma-core.mk b/build/external/packages/rdma-core.mk index fabacbddb8e..6107ec815bd 100644 --- a/build/external/packages/rdma-core.mk +++ b/build/external/packages/rdma-core.mk @@ -23,9 +23,9 @@ RDMA_CORE_DEBUG?=n # 2. Verify that the file build/external/dpdk_mlx_default.sh was generated # and contains 'DPDK_MLX_DEFAULT=y' # -rdma-core_version := 52.0 +rdma-core_version := 55.0 rdma-core_tarball := rdma-core-$(rdma-core_version).tar.gz -rdma-core_tarball_sha256sum_52.0 := 1f0ce5f2462c982b20d21156707076278807a7adf4d10e9142f3be4bec1b2b83 +rdma-core_tarball_sha256sum_55.0 := 6f8b97267807cdae54845f542ee3d75de80fdc24fe2632f5db1573ecef132d0f rdma-core_tarball_sha256sum := $(rdma-core_tarball_sha256sum_$(rdma-core_version)) rdma-core_tarball_strip_dirs := 1 rdma-core_url := http://github.com/linux-rdma/rdma-core/releases/download/v$(rdma-core_version)/$(rdma-core_tarball) diff --git a/docs/_scripts/siphon/generate.py b/docs/_scripts/siphon/generate.py index 1244c4658e4..1bd3e03fdcb 100644 --- a/docs/_scripts/siphon/generate.py +++ b/docs/_scripts/siphon/generate.py @@ -28,10 +28,10 @@ siphon_patterns = [] class Generate(object): """Matches a siphon comment block start""" - siphon_block_start = re.compile("^\s*/\*\?\s*(.*)$") + siphon_block_start = re.compile(r"^\s*/\*\?\s*(.*)$") """Matches a siphon comment block stop""" - siphon_block_stop = re.compile("^(.*)\s*\?\*/\s*$") + siphon_block_stop = re.compile(r"^(.*)\s*\?\*/\s*$") """Siphon block directive delimiter""" siphon_block_delimiter = "%%" @@ -39,12 +39,12 @@ class Generate(object): """Matches a siphon block directive such as '%clicmd:group_label Debug CLI%'""" siphon_block_directive = re.compile( - "(%s)\s*([a-zA-Z0-9_:]+)\s+(.*)\s*(%s)" + r"(%s)\s*([a-zA-Z0-9_:]+)\s+(.*)\s*(%s)" % (siphon_block_delimiter, siphon_block_delimiter) ) """Matches the start of an initializer block""" - siphon_initializer = re.compile("\s*=") + siphon_initializer = re.compile(r"\s*=") """Collated output for each siphon""" output = None diff --git a/docs/_scripts/siphon/generate_clicmd.py b/docs/_scripts/siphon/generate_clicmd.py index 2e2f6281a39..8db9d0c0056 100644 --- a/docs/_scripts/siphon/generate_clicmd.py +++ b/docs/_scripts/siphon/generate_clicmd.py @@ -20,7 +20,7 @@ from . import generate generate.siphon_patterns.append( ( re.compile( - "(?P<m>VLIB_CLI_COMMAND)\s*" "[(](?P<name>[a-zA-Z0-9_]+)(,[^)]*)?[)]" + r"(?P<m>VLIB_CLI_COMMAND)\s*" "[(](?P<name>[a-zA-Z0-9_]+)(,[^)]*)?[)]" ), "clicmd", ) diff --git a/docs/_scripts/siphon/generate_syscfg.py b/docs/_scripts/siphon/generate_syscfg.py index 105a59c8262..a403d311539 100644 --- a/docs/_scripts/siphon/generate_syscfg.py +++ b/docs/_scripts/siphon/generate_syscfg.py @@ -20,8 +20,8 @@ from . import generate generate.siphon_patterns.append( ( re.compile( - "(?P<m>VLIB_CONFIG_FUNCTION)\s*" - '[(](?P<fn>[a-zA-Z0-9_]+)\s*,\s*"(?P<name>[^"]*)"[)]' + r"(?P<m>VLIB_CONFIG_FUNCTION)\s*" + r'[(](?P<fn>[a-zA-Z0-9_]+)\s*,\s*"(?P<name>[^"]*)"[)]' ), "syscfg", ) diff --git a/docs/aboutvpp/releasenotes/index.rst b/docs/aboutvpp/releasenotes/index.rst index a587fb692be..7e4f7b93f31 100644 --- a/docs/aboutvpp/releasenotes/index.rst +++ b/docs/aboutvpp/releasenotes/index.rst @@ -9,5 +9,4 @@ Release notes v24.10 v24.06 v24.02 - v23.10 past diff --git a/docs/aboutvpp/releasenotes/past.rst b/docs/aboutvpp/releasenotes/past.rst index 9e1a6e31cf1..d4437a470dd 100644 --- a/docs/aboutvpp/releasenotes/past.rst +++ b/docs/aboutvpp/releasenotes/past.rst @@ -6,6 +6,7 @@ Past releases .. toctree:: :maxdepth: 1 + v23.10 v23.06 v23.02 v22.10.1 diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index c69869665bb..b1962cc460c 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -1381,3 +1381,5 @@ zoomout zx µs oflags +sflow +sFlow diff --git a/extras/hs-test/Makefile b/extras/hs-test/Makefile index ccccf4fb895..9fbb21c9ccb 100644 --- a/extras/hs-test/Makefile +++ b/extras/hs-test/Makefile @@ -200,14 +200,14 @@ checkstyle-go: .goimports.ok if [ $$status -ne 0 ]; then \ exit $$status; \ elif [ -z "$$output" ]; then \ - echo "*******************************************************************"; \ - echo "Checkstyle OK."; \ - echo "*******************************************************************"; \ + echo "******************************************************************************"; \ + echo "* HST Golang Checkstyle OK."; \ + echo "******************************************************************************"; \ else \ echo "$$output"; \ - echo "*******************************************************************"; \ - echo "Checkstyle failed. Use 'make fixstyle-go' or fix errors manually."; \ - echo "*******************************************************************"; \ + echo "******************************************************************************"; \ + echo "* HST Golang Checkstyle FAILED. Use 'make fixstyle-go' or fix errors manually."; \ + echo "******************************************************************************"; \ exit 1; \ fi diff --git a/extras/hs-test/docker/Dockerfile.nginx b/extras/hs-test/docker/Dockerfile.nginx index c2a3e98df28..fc85f00aaae 100644 --- a/extras/hs-test/docker/Dockerfile.nginx +++ b/extras/hs-test/docker/Dockerfile.nginx @@ -6,7 +6,6 @@ RUN apt-get update \ && apt-get install -y nginx gdb less libunwind-dev \ && rm -rf /var/lib/apt/lists/* -COPY vpp-data/lib/* /usr/lib/ COPY resources/nginx/nginx.conf /nginx.conf COPY script/nginx_ldp.sh /usr/bin/nginx_ldp.sh @@ -16,4 +15,7 @@ ENV LDP_DEBUG=0 ENV VCL_DEBUG=0 ENV LDP_SID_BIT=8 +# copy vpp-data last to take advantage of caching (do not change) +COPY vpp-data/lib/* /usr/lib/ + ENTRYPOINT ["nginx_ldp.sh", "nginx", "-c", "/nginx.conf"] diff --git a/extras/hs-test/docker/Dockerfile.nginx-http3 b/extras/hs-test/docker/Dockerfile.nginx-http3 index 1070be9446a..bde73e32da1 100644 --- a/extras/hs-test/docker/Dockerfile.nginx-http3 +++ b/extras/hs-test/docker/Dockerfile.nginx-http3 @@ -14,11 +14,9 @@ RUN bash -c 'echo -e "Package: *\nPin: origin nginx.org\nPin: release o=nginx\nP RUN apt update && apt install -y nginx=1.26.2* -COPY vpp-data/lib/* /usr/lib/ COPY resources/nginx/vcl.conf /vcl.conf COPY resources/nginx/nginx_http3.conf /nginx.conf COPY script/nginx_ldp.sh /usr/bin/nginx_ldp.sh - COPY resources/nginx/html/index.html /usr/share/nginx/index.html ENV VCL_CONFIG=/vcl.conf @@ -27,4 +25,7 @@ ENV LDP_DEBUG=0 ENV VCL_DEBUG=0 ENV LDP_SID_BIT=8 +# copy vpp-data last to take advantage of caching (do not change) +COPY vpp-data/lib/* /usr/lib/ + ENTRYPOINT ["nginx_ldp.sh", "nginx", "-c", "/nginx.conf"] diff --git a/extras/hs-test/go.mod b/extras/hs-test/go.mod index 01cb9000bdc..4cc24d510cc 100644 --- a/extras/hs-test/go.mod +++ b/extras/hs-test/go.mod @@ -9,6 +9,7 @@ require ( github.com/edwarnicke/exechelper v1.0.3 github.com/onsi/ginkgo/v2 v2.17.2 github.com/onsi/gomega v1.33.1 + github.com/quic-go/quic-go v0.48.2 github.com/sirupsen/logrus v1.9.3 go.fd.io/govpp v0.10.0 gopkg.in/yaml.v3 v3.0.1 @@ -39,6 +40,7 @@ require ( github.com/opencontainers/image-spec v1.1.0-rc2.0.20221005185240-3a7f492d3f1b // indirect github.com/petermattis/goid v0.0.0-20180202154549-b0b1615b78e5 // indirect github.com/pkg/errors v0.9.1 // indirect + github.com/quic-go/qpack v0.5.1 // indirect github.com/sasha-s/go-deadlock v0.3.1 // indirect github.com/spf13/cobra v1.8.1 // indirect github.com/spf13/pflag v1.0.6-0.20210604193023-d5e0c0615ace // indirect @@ -50,11 +52,14 @@ require ( go.opentelemetry.io/otel/metric v1.28.0 // indirect go.opentelemetry.io/otel/sdk v1.28.0 // indirect go.opentelemetry.io/otel/trace v1.28.0 // indirect + go.uber.org/mock v0.4.0 // indirect + golang.org/x/crypto v0.26.0 // indirect golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 // indirect - golang.org/x/net v0.26.0 // indirect - golang.org/x/sync v0.7.0 // indirect - golang.org/x/sys v0.21.0 // indirect - golang.org/x/text v0.16.0 // indirect + golang.org/x/mod v0.18.0 // indirect + golang.org/x/net v0.28.0 // indirect + golang.org/x/sync v0.8.0 // indirect + golang.org/x/sys v0.23.0 // indirect + golang.org/x/text v0.17.0 // indirect golang.org/x/time v0.5.0 // indirect golang.org/x/tools v0.22.0 // indirect google.golang.org/protobuf v1.34.2 // indirect diff --git a/extras/hs-test/go.sum b/extras/hs-test/go.sum index fb555ad7abf..19a120b3007 100644 --- a/extras/hs-test/go.sum +++ b/extras/hs-test/go.sum @@ -127,6 +127,10 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw= github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= +github.com/quic-go/qpack v0.5.1 h1:giqksBPnT/HDtZ6VhtFKgoLOWmlyo9Ei6u9PqzIMbhI= +github.com/quic-go/qpack v0.5.1/go.mod h1:+PC4XFrEskIVkcLzpEkbLqq1uCoxPhQuvK5rH1ZgaEg= +github.com/quic-go/quic-go v0.48.2 h1:wsKXZPeGWpMpCGSWqOcqpW2wZYic/8T3aqiOID0/KWE= +github.com/quic-go/quic-go v0.48.2/go.mod h1:yBgs3rWBOADpga7F+jJsb6Ybg1LSYiQvwWlLX+/6HMs= github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= @@ -193,6 +197,8 @@ go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+ go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI= go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= +go.uber.org/mock v0.4.0 h1:VcM4ZOtdbR4f6VXfiOpwpVJDL6lCReaZ6mw31wqh7KU= +go.uber.org/mock v0.4.0/go.mod h1:a6FSlNadKUHUa9IP5Vyt1zh4fC7uAwxMutEAscFbkZc= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go4.org/netipx v0.0.0-20231129151722-fdeea329fbba h1:0b9z3AuHCjxk0x/opv64kcgZLBseWJUpBw5I82+2U4M= @@ -200,32 +206,36 @@ go4.org/netipx v0.0.0-20231129151722-fdeea329fbba/go.mod h1:PLyyIXexvUFg3Owu6p/W golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= +golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 h1:yixxcjnhBmY0nkL253HFVIm0JsFHwrHdT3Yh6szTnfY= golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0= +golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= -golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= +golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= +golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= -golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200217220822-9197077df867/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= -golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM= +golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= -golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= +golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= +golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/extras/hs-test/http_test.go b/extras/hs-test/http_test.go index 99e812ded14..68934550b69 100644 --- a/extras/hs-test/http_test.go +++ b/extras/hs-test/http_test.go @@ -26,7 +26,7 @@ func init() { RegisterVethTests(HttpCliTest, HttpCliConnectErrorTest) RegisterSoloVethTests(HttpClientGetMemLeakTest) RegisterNoTopoTests(HeaderServerTest, HttpPersistentConnectionTest, HttpPipeliningTest, - HttpStaticMovedTest, HttpStaticNotFoundTest, HttpCliMethodNotAllowedTest, + HttpStaticMovedTest, HttpStaticNotFoundTest, HttpCliMethodNotAllowedTest, HttpAbsoluteFormUriTest, HttpCliBadRequestTest, HttpStaticBuildInUrlGetIfStatsTest, HttpStaticBuildInUrlPostIfStatsTest, HttpInvalidRequestLineTest, HttpMethodNotImplementedTest, HttpInvalidHeadersTest, HttpContentLengthTest, HttpStaticBuildInUrlGetIfListTest, HttpStaticBuildInUrlGetVersionTest, @@ -36,10 +36,10 @@ func init() { HttpClientErrRespTest, HttpClientPostFormTest, HttpClientGet128kbResponseTest, HttpClientGetResponseBodyTest, HttpClientGetNoResponseBodyTest, HttpClientPostFileTest, HttpClientPostFilePtrTest, HttpUnitTest, HttpRequestLineTest, HttpClientGetTimeout, HttpStaticFileHandlerWrkTest, HttpStaticUrlHandlerWrkTest, HttpConnTimeoutTest, - HttpClientGetRepeat, HttpClientPostRepeat, HttpIgnoreH2UpgradeTest) + HttpClientGetRepeat, HttpClientPostRepeat, HttpIgnoreH2UpgradeTest, HttpInvalidAuthorityFormUriTest, HttpHeaderErrorConnectionDropTest) RegisterNoTopoSoloTests(HttpStaticPromTest, HttpGetTpsTest, HttpGetTpsInterruptModeTest, PromConcurrentConnectionsTest, PromMemLeakTest, HttpClientPostMemLeakTest, HttpInvalidClientRequestMemLeakTest, HttpPostTpsTest, HttpPostTpsInterruptModeTest, - PromConsecutiveConnectionsTest) + PromConsecutiveConnectionsTest, HttpGetTpsTlsTest, HttpPostTpsTlsTest) } const wwwRootPath = "/tmp/www_root" @@ -76,6 +76,16 @@ func HttpGetTpsTest(s *NoTopoSuite) { s.RunBenchmark("HTTP tps download 10M", 10, 0, httpDownloadBenchmark, url) } +func HttpGetTpsTlsTest(s *NoTopoSuite) { + vpp := s.Containers.Vpp.VppInstance + serverAddress := s.VppAddr() + url := "https://" + serverAddress + ":8080/test_file_10M" + + vpp.Vppctl("http tps uri tls://0.0.0.0/8080") + + s.RunBenchmark("HTTP tps download 10M", 10, 0, httpDownloadBenchmark, url) +} + func httpUploadBenchmark(s *HstSuite, experiment *gmeasure.Experiment, data interface{}) { url, isValid := data.(string) s.AssertEqual(true, isValid) @@ -109,6 +119,16 @@ func HttpPostTpsTest(s *NoTopoSuite) { s.RunBenchmark("HTTP tps upload 10M", 10, 0, httpUploadBenchmark, url) } +func HttpPostTpsTlsTest(s *NoTopoSuite) { + vpp := s.Containers.Vpp.VppInstance + serverAddress := s.VppAddr() + url := "https://" + serverAddress + ":8080/test_file_10M" + + vpp.Vppctl("http tps uri tls://0.0.0.0/8080") + + s.RunBenchmark("HTTP tps upload 10M", 10, 0, httpUploadBenchmark, url) +} + func HttpPersistentConnectionTest(s *NoTopoSuite) { // testing url handler app do not support multi-thread s.SkipIfMultiWorker() @@ -1184,6 +1204,18 @@ func HttpInvalidTargetSyntaxTest(s *NoTopoSuite) { s.AssertNil(err, fmt.Sprint(err)) s.AssertContains(resp, "HTTP/1.1 400 Bad Request", "after '%' there must be two hex-digit characters in target query") + + resp, err = TcpSendReceive(serverAddress+":80", "GET * HTTP/1.1\r\n\r\n") + s.AssertNil(err, fmt.Sprint(err)) + s.AssertContains(resp, "HTTP/1.1 400 Bad Request", "asterisk-form is only used for a server-wide OPTIONS request") + + resp, err = TcpSendReceive(serverAddress+":80", "GET www.example.com:80 HTTP/1.1\r\n\r\n") + s.AssertNil(err, fmt.Sprint(err)) + s.AssertContains(resp, "HTTP/1.1 400 Bad Request", "authority-form is only used for CONNECT requests") + + resp, err = TcpSendReceive(serverAddress+":80", "CONNECT https://www.example.com/tunnel HTTP/1.1\r\n\r\n") + s.AssertNil(err, fmt.Sprint(err)) + s.AssertContains(resp, "HTTP/1.1 400 Bad Request", "CONNECT requests must use authority-form only") } func HttpInvalidContentLengthTest(s *NoTopoSuite) { @@ -1208,7 +1240,7 @@ func HttpInvalidContentLengthTest(s *NoTopoSuite) { func HttpContentLengthTest(s *NoTopoSuite) { vpp := s.Containers.Vpp.VppInstance serverAddress := s.VppAddr() - s.Log(vpp.Vppctl("http static server uri tcp://" + serverAddress + "/80 url-handlers debug")) + s.Log(vpp.Vppctl("http static server uri tcp://" + serverAddress + "/80 url-handlers debug max-body-size 12")) ifName := s.VppIfName() resp, err := TcpSendReceive(serverAddress+":80", @@ -1227,6 +1259,25 @@ func HttpContentLengthTest(s *NoTopoSuite) { validatePostInterfaceStats(s, resp) } +func HttpHeaderErrorConnectionDropTest(s *NoTopoSuite) { + vpp := s.Containers.Vpp.VppInstance + serverAddress := s.VppAddr() + s.Log(vpp.Vppctl("http static server uri tcp://" + serverAddress + "/80 url-handlers debug max-body-size 12")) + request := "POST /interface_stats.json HTTP/1.1\r\nContent-Length: 18234234\r\n\r\n" + s.VppIfName() + conn, err := net.DialTimeout("tcp", serverAddress+":80", time.Second*30) + s.AssertNil(err, fmt.Sprint(err)) + err = conn.SetDeadline(time.Now().Add(time.Second * 10)) + s.AssertNil(err, fmt.Sprint(err)) + _, err = conn.Write([]byte(request)) + s.AssertNil(err, fmt.Sprint(err)) + reply := make([]byte, 1024) + _, err = conn.Read(reply) + s.AssertNil(err, fmt.Sprint(err)) + s.AssertContains(string(reply), "HTTP/1.1 413 Content Too Large") + check := make([]byte, 1) + _, err = conn.Read(check) + s.AssertEqual(err, io.EOF) +} func HttpMethodNotImplementedTest(s *NoTopoSuite) { vpp := s.Containers.Vpp.VppInstance serverAddress := s.VppAddr() @@ -1274,6 +1325,58 @@ func HttpUriDecodeTest(s *NoTopoSuite) { s.AssertHttpHeaderWithValue(resp, "Content-Type", "text/html") } +func HttpAbsoluteFormUriTest(s *NoTopoSuite) { + vpp := s.Containers.Vpp.VppInstance + serverAddress := s.VppAddr() + vpp.Vppctl("http cli server") + + resp, err := TcpSendReceive(serverAddress+":80", "GET http://"+serverAddress+"/show/version HTTP/1.1\r\n\r\n") + s.AssertNil(err, fmt.Sprint(err)) + s.AssertContains(resp, "HTTP/1.1 200 OK") + + resp, err = TcpSendReceive(serverAddress+":80", "GET http://"+serverAddress+":80/show/version HTTP/1.1\r\n\r\n") + s.AssertNil(err, fmt.Sprint(err)) + s.AssertContains(resp, "HTTP/1.1 200 OK") +} + +func HttpInvalidAuthorityFormUriTest(s *NoTopoSuite) { + vpp := s.Containers.Vpp.VppInstance + serverAddress := s.VppAddr() + vpp.Vppctl("test proxy server fifo-size 512k server-uri http://%s/8080", serverAddress) + + resp, err := TcpSendReceive(serverAddress+":8080", "CONNECT 1.2.3.4:80a HTTP/1.1\r\n\r\n") + s.AssertNil(err, fmt.Sprint(err)) + s.AssertContains(resp, "HTTP/1.1 400 Bad Request") + + resp, err = TcpSendReceive(serverAddress+":8080", "CONNECT 1.2.3.4:80000000 HTTP/1.1\r\n\r\n") + s.AssertNil(err, fmt.Sprint(err)) + s.AssertContains(resp, "HTTP/1.1 400 Bad Request") + + resp, err = TcpSendReceive(serverAddress+":8080", "CONNECT 1.2a3.4:80 HTTP/1.1\r\n\r\n") + s.AssertNil(err, fmt.Sprint(err)) + s.AssertContains(resp, "HTTP/1.1 400 Bad Request") + + resp, err = TcpSendReceive(serverAddress+":8080", "CONNECT 1.2.4:80 HTTP/1.1\r\n\r\n") + s.AssertNil(err, fmt.Sprint(err)) + s.AssertContains(resp, "HTTP/1.1 400 Bad Request") + + resp, err = TcpSendReceive(serverAddress+":8080", "CONNECT [dead:beef::1234:443 HTTP/1.1\r\n\r\n") + s.AssertNil(err, fmt.Sprint(err)) + s.AssertContains(resp, "HTTP/1.1 400 Bad Request") + + resp, err = TcpSendReceive(serverAddress+":8080", "CONNECT [zyx:beef::1234]:443 HTTP/1.1\r\n\r\n") + s.AssertNil(err, fmt.Sprint(err)) + s.AssertContains(resp, "HTTP/1.1 400 Bad Request") + + resp, err = TcpSendReceive(serverAddress+":8080", "CONNECT dead:beef::1234:443 HTTP/1.1\r\n\r\n") + s.AssertNil(err, fmt.Sprint(err)) + s.AssertContains(resp, "HTTP/1.1 400 Bad Request") + + resp, err = TcpSendReceive(serverAddress+":8080", "CONNECT example.org:443 HTTP/1.1\r\n\r\n") + s.AssertNil(err, fmt.Sprint(err)) + s.AssertContains(resp, "HTTP/1.1 400 Bad Request", "name resolution not supported") +} + func HttpHeadersTest(s *NoTopoSuite) { vpp := s.Containers.Vpp.VppInstance serverAddress := s.VppAddr() diff --git a/extras/hs-test/infra/connect_udp_client.go b/extras/hs-test/infra/connect_udp_client.go new file mode 100644 index 00000000000..a30ad3a769a --- /dev/null +++ b/extras/hs-test/infra/connect_udp_client.go @@ -0,0 +1,148 @@ +package hst + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "io" + "net" + "net/http" + "net/url" + "time" + + "github.com/quic-go/quic-go/http3" + "github.com/quic-go/quic-go/quicvarint" +) + +type CapsuleParseError struct { + Err error +} + +func (e *CapsuleParseError) Error() string { + return e.Err.Error() +} + +type ConnectUdpClient struct { + log bool + suite *HstSuite + timeout time.Duration + Conn net.Conn +} + +func (s *HstSuite) NewConnectUdpClient(timeout time.Duration, log bool) *ConnectUdpClient { + client := &ConnectUdpClient{log: log, suite: s, timeout: timeout} + return client +} + +func writeConnectUdpReq(target string) []byte { + var b bytes.Buffer + + fmt.Fprintf(&b, "GET %s HTTP/1.1\r\n", target) + u, _ := url.Parse(target) + fmt.Fprintf(&b, "Host: %s\r\n", u.Host) + fmt.Fprintf(&b, "User-Agent: hs-test\r\n") + fmt.Fprintf(&b, "Connection: Upgrade\r\n") + fmt.Fprintf(&b, "Upgrade: connect-udp\r\n") + fmt.Fprintf(&b, "Capsule-Protocol: ?1\r\n") + io.WriteString(&b, "\r\n") + + return b.Bytes() +} + +func (c *ConnectUdpClient) Dial(proxyAddress, targetUri string) error { + req := writeConnectUdpReq(targetUri) + conn, err := net.DialTimeout("tcp", proxyAddress, c.timeout) + if err != nil { + return err + } + + if c.log { + c.suite.Log("* Connected to proxy") + } + + conn.SetDeadline(time.Now().Add(time.Second * c.timeout)) + _, err = conn.Write(req) + if err != nil { + return err + } + + r := bufio.NewReader(conn) + resp, err := http.ReadResponse(r, nil) + if err != nil { + return err + } + + if c.log { + c.suite.Log(DumpHttpResp(resp, true)) + } + + if resp.StatusCode != http.StatusSwitchingProtocols { + return errors.New("request failed: " + resp.Status) + } + if resp.Header.Get("Connection") != "upgrade" || resp.Header.Get("Upgrade") != "connect-udp" || resp.Header.Get("Capsule-Protocol") != "?1" { + conn.Close() + return errors.New("invalid response") + } + + if c.log { + c.suite.Log("* CONNECT-UDP tunnel established") + } + c.Conn = conn + return nil +} + +func (c *ConnectUdpClient) Close() error { + return c.Conn.Close() +} + +func (c *ConnectUdpClient) WriteCapsule(capsuleType http3.CapsuleType, payload []byte) error { + err := c.Conn.SetWriteDeadline(time.Now().Add(c.timeout)) + if err != nil { + return err + } + var buf bytes.Buffer + err = http3.WriteCapsule(&buf, capsuleType, payload) + if err != nil { + return err + } + _, err = c.Conn.Write(buf.Bytes()) + if err != nil { + return err + } + return nil +} + +func (c *ConnectUdpClient) WriteDgramCapsule(payload []byte) error { + b := make([]byte, 0) + b = quicvarint.Append(b, 0) + b = append(b, payload...) + return c.WriteCapsule(0, b) +} + +func (c *ConnectUdpClient) ReadDgramCapsule() ([]byte, error) { + err := c.Conn.SetReadDeadline(time.Now().Add(c.timeout)) + if err != nil { + return nil, err + } + r := bufio.NewReader(c.Conn) + capsuleType, payloadReader, err := http3.ParseCapsule(r) + if err != nil { + return nil, err + } + if capsuleType != 0 { + return nil, &CapsuleParseError{errors.New("capsule type should be 0")} + } + b := make([]byte, 1024) + n, err := payloadReader.Read(b) + if err != nil { + return nil, err + } + if n < 3 { + return nil, &CapsuleParseError{errors.New("response payload too short")} + } + if b[0] != 0 { + return nil, &CapsuleParseError{errors.New("context id should be 0")} + } + return b[1:n], nil +} diff --git a/extras/hs-test/infra/suite_vpp_udp_proxy.go b/extras/hs-test/infra/suite_vpp_udp_proxy.go index 2290aeec6a2..62bf3ddd466 100644 --- a/extras/hs-test/infra/suite_vpp_udp_proxy.go +++ b/extras/hs-test/infra/suite_vpp_udp_proxy.go @@ -5,6 +5,7 @@ import ( "net" "reflect" "runtime" + "strconv" "strings" "time" @@ -15,6 +16,7 @@ type VppUdpProxySuite struct { HstSuite proxyPort int serverPort int + MaxTimeout time.Duration Interfaces struct { Client *NetInterface Server *NetInterface @@ -42,13 +44,21 @@ func (s *VppUdpProxySuite) SetupSuite() { s.Interfaces.Client = s.GetInterfaceByName("hstcln") s.Interfaces.Server = s.GetInterfaceByName("hstsrv") s.Containers.VppProxy = s.GetContainerByName("vpp") + + if *IsVppDebug { + s.MaxTimeout = time.Second * 600 + } else { + s.MaxTimeout = time.Second * 2 + } } func (s *VppUdpProxySuite) SetupTest() { s.HstSuite.SetupTest() // VPP proxy - vpp, err := s.Containers.VppProxy.newVppInstance(s.Containers.VppProxy.AllocatedCpus) + var memoryConfig Stanza + memoryConfig.NewStanza("memory").Append("main-heap-size 2G") + vpp, err := s.Containers.VppProxy.newVppInstance(s.Containers.VppProxy.AllocatedCpus, memoryConfig) s.AssertNotNil(vpp, fmt.Sprint(err)) s.AssertNil(vpp.Start()) @@ -119,7 +129,7 @@ func (s *VppUdpProxySuite) StartEchoServer() *net.UDPConn { } } }() - s.Log("started") + s.Log("* started udp echo server " + s.ServerAddr() + ":" + strconv.Itoa(s.ServerPort())) return conn } diff --git a/extras/hs-test/infra/utils.go b/extras/hs-test/infra/utils.go index b690efc32ca..bd603f863fc 100644 --- a/extras/hs-test/infra/utils.go +++ b/extras/hs-test/infra/utils.go @@ -17,6 +17,7 @@ import ( const networkTopologyDir string = "topo-network/" const containerTopologyDir string = "topo-containers/" +const HttpCapsuleTypeDatagram = uint64(0) type Stanza struct { content string diff --git a/extras/hs-test/proxy_test.go b/extras/hs-test/proxy_test.go index d371de46cbb..192451fe8f4 100644 --- a/extras/hs-test/proxy_test.go +++ b/extras/hs-test/proxy_test.go @@ -11,6 +11,7 @@ import ( "net/http" "os" "strconv" + "strings" "sync" "sync/atomic" "time" @@ -23,9 +24,10 @@ func init() { RegisterVppProxyTests(VppProxyHttpGetTcpTest, VppProxyHttpGetTlsTest, VppProxyHttpPutTcpTest, VppProxyHttpPutTlsTest, VppConnectProxyGetTest, VppConnectProxyPutTest) RegisterVppProxySoloTests(VppProxyHttpGetTcpMTTest, VppProxyHttpPutTcpMTTest, VppProxyTcpIperfMTTest, - VppProxyUdpIperfMTTest, VppConnectProxyStressTest, VppConnectProxyStressMTTest) - RegisterVppUdpProxyTests(VppProxyUdpTest) - RegisterVppUdpProxySoloTests(VppProxyUdpMigrationMTTest) + VppProxyUdpIperfMTTest, VppConnectProxyStressTest, VppConnectProxyStressMTTest, VppConnectProxyConnectionFailedMTTest) + RegisterVppUdpProxyTests(VppProxyUdpTest, VppConnectUdpProxyTest, VppConnectUdpInvalidCapsuleTest, + VppConnectUdpUnknownCapsuleTest, VppConnectUdpClientCloseTest, VppConnectUdpInvalidTargetTest) + RegisterVppUdpProxySoloTests(VppProxyUdpMigrationMTTest, VppConnectUdpStressMTTest, VppConnectUdpStressTest) RegisterEnvoyProxyTests(EnvoyProxyHttpGetTcpTest, EnvoyProxyHttpPutTcpTest) RegisterNginxProxyTests(NginxMirroringTest) RegisterNginxProxySoloTests(MirrorMultiThreadTest) @@ -185,6 +187,17 @@ func VppConnectProxyGetTest(s *VppProxySuite) { s.CurlDownloadResourceViaTunnel(targetUri, proxyUri) } +func VppConnectProxyConnectionFailedMTTest(s *VppProxySuite) { + var proxyPort uint16 = 8080 + s.SetupNginxServer() + configureVppProxy(s, "http", proxyPort) + + targetUri := fmt.Sprintf("http://%s:%d/httpTestFile", s.ServerAddr(), s.ServerPort()+1) + proxyUri := fmt.Sprintf("http://%s:%d", s.VppProxyAddr(), proxyPort) + _, log := s.CurlRequestViaTunnel(targetUri, proxyUri) + s.AssertContains(log, "HTTP/1.1 502 Bad Gateway") +} + func VppConnectProxyPutTest(s *VppProxySuite) { var proxyPort uint16 = 8080 s.SetupNginxServer() @@ -373,3 +386,269 @@ func VppProxyUdpMigrationMTTest(s *VppUdpProxySuite) { s.Log(s.Containers.VppProxy.VppInstance.Vppctl("show session verbose 2")) } + +func VppConnectUdpProxyTest(s *VppUdpProxySuite) { + remoteServerConn := s.StartEchoServer() + defer remoteServerConn.Close() + + vppProxy := s.Containers.VppProxy.VppInstance + cmd := fmt.Sprintf("test proxy server fifo-size 512k server-uri http://%s/%d", s.VppProxyAddr(), s.ProxyPort()) + s.Log(vppProxy.Vppctl(cmd)) + + proxyAddress := fmt.Sprintf("%s:%d", s.VppProxyAddr(), s.ProxyPort()) + targetUri := fmt.Sprintf("http://%s:%d/.well-known/masque/udp/%s/%d/", s.VppProxyAddr(), s.ProxyPort(), s.ServerAddr(), s.ServerPort()) + c := s.NewConnectUdpClient(s.MaxTimeout, true) + err := c.Dial(proxyAddress, targetUri) + s.AssertNil(err, fmt.Sprint(err)) + defer c.Close() + + data := []byte("hello") + + err = c.WriteDgramCapsule(data) + s.AssertNil(err, fmt.Sprint(err)) + payload, err := c.ReadDgramCapsule() + s.AssertNil(err, fmt.Sprint(err)) + s.AssertEqual(data, payload) +} + +func VppConnectUdpInvalidTargetTest(s *VppUdpProxySuite) { + vppProxy := s.Containers.VppProxy.VppInstance + cmd := fmt.Sprintf("test proxy server fifo-size 512k server-uri http://%s/%d", s.VppProxyAddr(), s.ProxyPort()) + s.Log(vppProxy.Vppctl(cmd)) + + proxyAddress := fmt.Sprintf("%s:%d", s.VppProxyAddr(), s.ProxyPort()) + + targetUri := fmt.Sprintf("http://%s:%d/.well-known/masque/udp/example.com/80/", s.VppProxyAddr(), s.ProxyPort()) + c := s.NewConnectUdpClient(s.MaxTimeout, true) + err := c.Dial(proxyAddress, targetUri) + s.AssertNotNil(err, "name resolution not supported") + + targetUri = fmt.Sprintf("http://%s:%d/.well-known/masque/udp/1.2.3.4/800000000/", s.VppProxyAddr(), s.ProxyPort()) + c = s.NewConnectUdpClient(s.MaxTimeout, true) + err = c.Dial(proxyAddress, targetUri) + s.AssertNotNil(err, "invalid port number") + + targetUri = fmt.Sprintf("http://%s:%d/masque/udp/1.2.3.4/80/", s.VppProxyAddr(), s.ProxyPort()) + c = s.NewConnectUdpClient(s.MaxTimeout, true) + err = c.Dial(proxyAddress, targetUri) + s.AssertNotNil(err, "invalid prefix") +} + +func VppConnectUdpInvalidCapsuleTest(s *VppUdpProxySuite) { + remoteServerConn := s.StartEchoServer() + defer remoteServerConn.Close() + + vppProxy := s.Containers.VppProxy.VppInstance + cmd := fmt.Sprintf("test proxy server fifo-size 512k server-uri http://%s/%d", s.VppProxyAddr(), s.ProxyPort()) + s.Log(vppProxy.Vppctl(cmd)) + + proxyAddress := fmt.Sprintf("%s:%d", s.VppProxyAddr(), s.ProxyPort()) + targetUri := fmt.Sprintf("http://%s:%d/.well-known/masque/udp/%s/%d/", s.VppProxyAddr(), s.ProxyPort(), s.ServerAddr(), s.ServerPort()) + c := s.NewConnectUdpClient(s.MaxTimeout, true) + err := c.Dial(proxyAddress, targetUri) + s.AssertNil(err, fmt.Sprint(err)) + defer c.Close() + + // Capsule length is set to 494878333 which exceed maximum allowed UDP payload length 65527 and connection must be aborted + capsule := []byte{ + 0x00, // type + 0x9D, 0x7F, 0x3E, 0x7D, // length + 0x00, // context ID + 0x4B, 0x6E, 0x69, 0x67, 0x68, 0x74, 0x73, 0x20, 0x6F, 0x66, 0x20, 0x4E, 0x69, // some extra junk + } + n, err := c.Conn.Write(capsule) + s.AssertNil(err, fmt.Sprint(err)) + s.AssertEqual(n, len(capsule)) + b := make([]byte, 1) + _, err = c.Conn.Read(b) + s.AssertMatchError(err, io.EOF, "connection not closed by proxy") +} + +func VppConnectUdpUnknownCapsuleTest(s *VppUdpProxySuite) { + remoteServerConn := s.StartEchoServer() + defer remoteServerConn.Close() + + vppProxy := s.Containers.VppProxy.VppInstance + cmd := fmt.Sprintf("test proxy server fifo-size 512k server-uri http://%s/%d", s.VppProxyAddr(), s.ProxyPort()) + s.Log(vppProxy.Vppctl(cmd)) + + proxyAddress := fmt.Sprintf("%s:%d", s.VppProxyAddr(), s.ProxyPort()) + targetUri := fmt.Sprintf("http://%s:%d/.well-known/masque/udp/%s/%d/", s.VppProxyAddr(), s.ProxyPort(), s.ServerAddr(), s.ServerPort()) + c := s.NewConnectUdpClient(s.MaxTimeout, true) + err := c.Dial(proxyAddress, targetUri) + s.AssertNil(err, fmt.Sprint(err)) + defer c.Close() + + // Send capsule with unknown type 0x40 which is outside range for standards (0x00 - 0x3f) + // Endpoint that receives capsule with unknown type must silently drop that capsule and skip over to parse the next capsule + err = c.WriteCapsule(0x4040, []byte("None shall pass")) + s.AssertNil(err, fmt.Sprint(err)) + + // Send valid capsule to verify that previous was dropped + data := []byte("hello") + err = c.WriteDgramCapsule(data) + s.AssertNil(err, fmt.Sprint(err)) + payload, err := c.ReadDgramCapsule() + s.AssertNil(err, fmt.Sprint(err)) + s.AssertEqual(data, payload) +} + +func VppConnectUdpClientCloseTest(s *VppUdpProxySuite) { + remoteServerConn := s.StartEchoServer() + defer remoteServerConn.Close() + + vppProxy := s.Containers.VppProxy.VppInstance + cmd := fmt.Sprintf("test proxy server fifo-size 512k server-uri http://%s/%d", s.VppProxyAddr(), s.ProxyPort()) + s.Log(vppProxy.Vppctl(cmd)) + + proxyAddress := fmt.Sprintf("%s:%d", s.VppProxyAddr(), s.ProxyPort()) + targetUri := fmt.Sprintf("http://%s:%d/.well-known/masque/udp/%s/%d/", s.VppProxyAddr(), s.ProxyPort(), s.ServerAddr(), s.ServerPort()) + c := s.NewConnectUdpClient(s.MaxTimeout, true) + err := c.Dial(proxyAddress, targetUri) + s.AssertNil(err, fmt.Sprint(err)) + + err = c.Close() + s.AssertNil(err, fmt.Sprint(err)) + proxyClientConn := fmt.Sprintf("[T] %s:%d->%s", s.VppProxyAddr(), s.ProxyPort(), s.ClientAddr()) + proxyTargetConn := fmt.Sprintf("[U] %s:", s.Interfaces.Server.Peer.Ip4AddressString()) + for nTries := 0; nTries < 10; nTries++ { + o := vppProxy.Vppctl("show session verbose 2") + if !strings.Contains(o, proxyClientConn) { + break + } + time.Sleep(1 * time.Second) + } + sessions := vppProxy.Vppctl("show session verbose 2") + s.Log(sessions) + s.AssertNotContains(sessions, proxyClientConn, "client-proxy session not closed") + s.AssertNotContains(sessions, proxyTargetConn, "proxy-server session not closed") +} + +func vppConnectUdpStressLoad(s *VppUdpProxySuite) { + var ( + connectError, timeout, readError, writeError, invalidData, total atomic.Uint32 + wg sync.WaitGroup + ) + + proxyAddress := fmt.Sprintf("%s:%d", s.VppProxyAddr(), s.ProxyPort()) + targetUri := fmt.Sprintf("http://%s/.well-known/masque/udp/%s/%d/", proxyAddress, s.ServerAddr(), s.ServerPort()) + + // warm-up + warmUp := s.NewConnectUdpClient(s.MaxTimeout, false) + err := warmUp.Dial(proxyAddress, targetUri) + s.AssertNil(err, fmt.Sprint(err)) + defer warmUp.Close() + data := []byte("hello") + err = warmUp.WriteDgramCapsule(data) + s.AssertNil(err, fmt.Sprint(err)) + payload, err := warmUp.ReadDgramCapsule() + s.AssertNil(err, fmt.Sprint(err)) + s.AssertEqual(data, payload) + warmUp.Close() + + stop := make(chan struct{}) + + s.Log("Running 30s test @ " + targetUri) + for i := 0; i < 1000; i++ { + wg.Add(1) + go func() { + var tot, timed, re, we uint32 + defer wg.Done() + defer func() { + total.Add(tot) + timeout.Add(timed) + readError.Add(re) + writeError.Add(we) + }() + restart: + c := s.NewConnectUdpClient(s.MaxTimeout, false) + e := c.Dial(proxyAddress, targetUri) + if e != nil { + connectError.Add(1) + return + } + defer c.Close() + + req := make([]byte, 64) + rand.Read(req) + + for { + select { + default: + tot += 1 + e = c.WriteDgramCapsule(req) + if e != nil { + if errors.Is(e, os.ErrDeadlineExceeded) { + timed += 1 + } else { + we += 1 + } + continue + } + resp, e := c.ReadDgramCapsule() + if e != nil { + if errors.Is(e, os.ErrDeadlineExceeded) { + timed += 1 + } else if errors.Is(e, err.(*CapsuleParseError)) { + invalidData.Add(1) + } else { + re += 1 + } + c.Close() + goto restart + } + if bytes.Compare(req, resp) != 0 { + invalidData.Add(1) + c.Close() + goto restart + } + case <-stop: + return + } + } + }() + } + for i := 0; i < 30; i++ { + GinkgoWriter.Print(".") + time.Sleep(time.Second) + } + GinkgoWriter.Print("\n") + close(stop) // tell clients to stop + wg.Wait() // wait until clients finish + successRatio := (float64(total.Load()-(timeout.Load()+readError.Load()+writeError.Load()+invalidData.Load())) / float64(total.Load())) * 100.0 + summary := fmt.Sprintf("1000 connections %d requests in 30s", total.Load()) + report := fmt.Sprintf("Requests/sec: %d\n", total.Load()/30) + report += fmt.Sprintf("Errors: timeout %d, read %d, write %d, invalid data received %d, connection %d\n", timeout.Load(), readError.Load(), writeError.Load(), invalidData.Load(), connectError.Load()) + report += fmt.Sprintf("Successes ratio: %.2f%%\n", successRatio) + AddReportEntry(summary, report) + s.AssertGreaterThan(successRatio, 90.0) +} + +func VppConnectUdpStressTest(s *VppUdpProxySuite) { + remoteServerConn := s.StartEchoServer() + defer remoteServerConn.Close() + + vppProxy := s.Containers.VppProxy.VppInstance + cmd := fmt.Sprintf("test proxy server fifo-size 512k server-uri http://%s/%d", s.VppProxyAddr(), s.ProxyPort()) + s.Log(vppProxy.Vppctl(cmd)) + + // no goVPP less noise + vppProxy.Disconnect() + + vppConnectUdpStressLoad(s) +} + +func VppConnectUdpStressMTTest(s *VppUdpProxySuite) { + remoteServerConn := s.StartEchoServer() + defer remoteServerConn.Close() + + vppProxy := s.Containers.VppProxy.VppInstance + vppProxy.Disconnect() + cmd := fmt.Sprintf("test proxy server fifo-size 512k server-uri http://%s/%d", s.VppProxyAddr(), s.ProxyPort()) + s.Log(vppProxy.Vppctl(cmd)) + + // no goVPP less noise + vppProxy.Disconnect() + + vppConnectUdpStressLoad(s) +} diff --git a/extras/hs-test/raw_session_test.go b/extras/hs-test/raw_session_test.go index cbf85a54530..c104031f78f 100644 --- a/extras/hs-test/raw_session_test.go +++ b/extras/hs-test/raw_session_test.go @@ -7,6 +7,7 @@ func init() { } func VppEchoQuicTest(s *VethsSuite) { + s.Skip("temp skip (broken?)") testVppEcho(s, "quic") } diff --git a/extras/hs-test/script/build_hst.sh b/extras/hs-test/script/build_hst.sh index 04983b08da7..4e03453fbe9 100755 --- a/extras/hs-test/script/build_hst.sh +++ b/extras/hs-test/script/build_hst.sh @@ -5,13 +5,22 @@ if [ "$(lsb_release -is)" != Ubuntu ]; then exit 1 fi +export VPP_WS=../.. export UBUNTU_VERSION=${UBUNTU_VERSION:-"$(lsb_release -rs)"} echo "Ubuntu version is set to ${UBUNTU_VERSION}" +if [ "$1" == "debug" ]; then + VPP_BUILD_ROOT=${VPP_WS}/build-root/build-vpp_debug-native/vpp +elif [ "$1" == "gcov" ]; then + VPP_BUILD_ROOT=${VPP_WS}/build-root/build-vpp_gcov-native/vpp +else + VPP_BUILD_ROOT=${VPP_WS}/build-root/build-vpp-native/vpp +fi + LAST_STATE_FILE=".last_state_hash" # get current state hash and ubuntu version -current_state_hash=$(git status --porcelain | grep -vE '(/\.|/10|\.go$|\.sum$|\.mod$|\.txt$|\.test$)' | sha1sum | awk '{print $1}') +current_state_hash=$(ls -l "$VPP_BUILD_ROOT"/.mu_build_install_timestamp; ls -l docker | sha1sum | awk '{print $1}') current_state_hash=$current_state_hash$UBUNTU_VERSION$1 if [ -f "$LAST_STATE_FILE" ]; then @@ -22,12 +31,10 @@ fi # compare current state with last state and check FORCE_BUILD if [ "$current_state_hash" = "$last_state_hash" ] && [ "$2" = "false" ]; then - echo "*** Skipping docker build - no new changes \ -(excluding .go, .txt, .sum, .mod, dotfiles, IP address files) ***" + echo "*** Skipping docker build - no new changes ***" exit 0 fi -export VPP_WS=../.. OS_ARCH="$(uname -m)" DOCKER_BUILD_DIR="/scratch/docker-build" DOCKER_CACHE_DIR="${DOCKER_BUILD_DIR}/docker_cache" @@ -43,13 +50,6 @@ if [ -d "${DOCKER_BUILD_DIR}" ] ; then DOCKER_CACHE_ARGS="--builder=${DOCKER_HST_BUILDER} --load --cache-to type=local,dest=${DOCKER_CACHE_DIR},mode=max --cache-from type=local,src=${DOCKER_CACHE_DIR}" fi -if [ "$1" == "debug" ]; then - VPP_BUILD_ROOT=${VPP_WS}/build-root/build-vpp_debug-native/vpp -elif [ "$1" == "gcov" ]; then - VPP_BUILD_ROOT=${VPP_WS}/build-root/build-vpp_gcov-native/vpp -else - VPP_BUILD_ROOT=${VPP_WS}/build-root/build-vpp-native/vpp -fi echo "Taking build objects from ${VPP_BUILD_ROOT}" export HST_LDPRELOAD=${VPP_BUILD_ROOT}/lib/${OS_ARCH}-linux-gnu/libvcl_ldpreload.so diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f6d07be0b79..de1ee713d5c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -13,21 +13,6 @@ cmake_minimum_required(VERSION 3.13) -if(DEFINED VPP_PLATFORM AND VPP_PLATFORM STREQUAL "default") - unset(VPP_PLATFORM) - unset(VPP_PLATFORM CACHE) - set(VPP_PLATFORM_NAME "default") -elseif(DEFINED VPP_PLATFORM) - set(platform_file ${CMAKE_SOURCE_DIR}/cmake/platform/${VPP_PLATFORM}.cmake) - if(NOT EXISTS ${platform_file}) - message(FATAL_ERROR "unknown platform ${VPP_PLATFORM}") - endif() - include(${platform_file}) - set(VPP_PLATFORM_NAME ${VPP_PLATFORM}) -else() - set(VPP_PLATFORM_NAME "default") -endif() - if (DEFINED VPP_PLATFORM_C_COMPILER_NAMES) set(CMAKE_C_COMPILER_NAMES ${VPP_PLATFORM_C_COMPILER_NAME}) else() diff --git a/src/cmake/CMakeLists.txt b/src/cmake/CMakeLists.txt index 74cd385a1ce..8f7ad0e8513 100644 --- a/src/cmake/CMakeLists.txt +++ b/src/cmake/CMakeLists.txt @@ -25,3 +25,13 @@ install( COMPONENT vpp-dev ) + +install( + DIRECTORY + platform/ + + DESTINATION + ${VPP_LIBRARY_DIR}/cmake/vpp/platform + + COMPONENT vpp-dev +) diff --git a/src/cmake/cpu.cmake b/src/cmake/cpu.cmake index 25e966dcf7a..b1b802a5506 100644 --- a/src/cmake/cpu.cmake +++ b/src/cmake/cpu.cmake @@ -22,6 +22,31 @@ macro(set_log2_cacheline_size var n) endmacro() ############################################################################## +# Platform selection +############################################################################## + +if(DEFINED VPP_PLATFORM AND VPP_PLATFORM STREQUAL "default") + unset(VPP_PLATFORM) + unset(VPP_PLATFORM CACHE) + set(VPP_PLATFORM_NAME "default") +elseif(DEFINED VPP_PLATFORM) + set(platform_file ${CMAKE_CURRENT_LIST_DIR}/platform/${VPP_PLATFORM}.cmake) + if(NOT EXISTS ${platform_file}) + message(FATAL_ERROR "unknown platform ${VPP_PLATFORM}") + endif() + include(${platform_file}) + set(VPP_PLATFORM_NAME ${VPP_PLATFORM}) +else() + set(VPP_PLATFORM_NAME "default") +endif() + +if (DEFINED VPP_PLATFORM_C_COMPILER_NAMES) + set(CMAKE_C_COMPILER_NAMES ${VPP_PLATFORM_C_COMPILER_NAME}) +else() + set(CMAKE_C_COMPILER_NAMES clang gcc cc) +endif() + +############################################################################## # Cache line size ############################################################################## diff --git a/src/crypto_engines/ipsecmb/ipsecmb.c b/src/crypto_engines/ipsecmb/ipsecmb.c index 256856bed8c..9981d738401 100644 --- a/src/crypto_engines/ipsecmb/ipsecmb.c +++ b/src/crypto_engines/ipsecmb/ipsecmb.c @@ -50,7 +50,6 @@ typedef struct static ipsecmb_main_t ipsecmb_main = { }; -/* clang-format off */ /* * (Alg, JOB_HASH_ALG, fn, block-size-bytes, hash-size-bytes, digest-size-bytes) */ @@ -73,13 +72,21 @@ static ipsecmb_main_t ipsecmb_main = { }; _ (AES_256_CTR, 256, CNTR) /* - * (Alg, key-len-bytes, iv-len-bytes) + * (Alg, key-len-bytes, iv-len-bytes, fixed, aad-len) */ -#define foreach_ipsecmb_gcm_cipher_op \ - _(AES_128_GCM, 128) \ - _(AES_192_GCM, 192) \ - _(AES_256_GCM, 256) -/* clang-format on */ +#define foreach_ipsecmb_gcm_cipher_op \ + _ (AES_128_GCM, 128, 0, 0) \ + _ (AES_128_GCM_TAG16_AAD8, 128, 1, 8) \ + _ (AES_128_GCM_TAG16_AAD12, 128, 1, 12) \ + _ (AES_192_GCM, 192, 0, 0) \ + _ (AES_192_GCM_TAG16_AAD8, 192, 1, 8) \ + _ (AES_192_GCM_TAG16_AAD12, 192, 1, 12) \ + _ (AES_256_GCM, 256, 0, 0) \ + _ (AES_256_GCM_TAG16_AAD8, 256, 1, 8) \ + _ (AES_256_GCM_TAG16_AAD12, 256, 1, 12) + +#define foreach_chacha_poly_fixed_aad_lengths _ (0) _ (8) _ (12) + static_always_inline vnet_crypto_op_status_t ipsecmb_status_job (IMB_STATUS status) { @@ -234,7 +241,6 @@ ipsecmb_ops_hmac_inline (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops, } #endif -/* clang-format off */ #define _(a, b, c, d, e, f) \ static_always_inline u32 \ ipsecmb_ops_hmac_##a (vlib_main_t * vm, \ @@ -245,7 +251,6 @@ ipsecmb_ops_hmac_##a (vlib_main_t * vm, \ foreach_ipsecmb_hmac_op; #undef _ -/* clang-format on */ always_inline void ipsecmb_retire_cipher_job (IMB_JOB *job, u32 *n_fail) @@ -368,7 +373,6 @@ ipsecmb_ops_aes_cipher_inline (vlib_main_t *vm, vnet_crypto_op_t *ops[], } #endif -/* clang-format off */ #define _(a, b, c) \ static_always_inline u32 ipsecmb_ops_cipher_enc_##a ( \ vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) \ @@ -387,143 +391,131 @@ ipsecmb_ops_aes_cipher_inline (vlib_main_t *vm, vnet_crypto_op_t *ops[], foreach_ipsecmb_cipher_op; #undef _ -#define _(a, b) \ -static_always_inline u32 \ -ipsecmb_ops_gcm_cipher_enc_##a##_chained (vlib_main_t * vm, \ - vnet_crypto_op_t * ops[], vnet_crypto_op_chunk_t *chunks, u32 n_ops) \ -{ \ - ipsecmb_main_t *imbm = &ipsecmb_main; \ - ipsecmb_per_thread_data_t *ptd = imbm->per_thread_data + \ - vm->thread_index; \ - IMB_MGR *m = ptd->mgr; \ - vnet_crypto_op_chunk_t *chp; \ - u32 i, j; \ - \ - for (i = 0; i < n_ops; i++) \ - { \ - struct gcm_key_data *kd; \ - struct gcm_context_data ctx; \ - vnet_crypto_op_t *op = ops[i]; \ - \ - kd = (struct gcm_key_data *) imbm->key_data[op->key_index]; \ - ASSERT (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS); \ - IMB_AES##b##_GCM_INIT(m, kd, &ctx, op->iv, op->aad, op->aad_len); \ - chp = chunks + op->chunk_index; \ - for (j = 0; j < op->n_chunks; j++) \ - { \ - IMB_AES##b##_GCM_ENC_UPDATE (m, kd, &ctx, chp->dst, chp->src, \ - chp->len); \ - chp += 1; \ - } \ - IMB_AES##b##_GCM_ENC_FINALIZE(m, kd, &ctx, op->tag, op->tag_len); \ - \ - op->status = VNET_CRYPTO_OP_STATUS_COMPLETED; \ - } \ - \ - return n_ops; \ -} \ - \ -static_always_inline u32 \ -ipsecmb_ops_gcm_cipher_enc_##a (vlib_main_t * vm, vnet_crypto_op_t * ops[], \ - u32 n_ops) \ -{ \ - ipsecmb_main_t *imbm = &ipsecmb_main; \ - ipsecmb_per_thread_data_t *ptd = imbm->per_thread_data + \ - vm->thread_index; \ - IMB_MGR *m = ptd->mgr; \ - u32 i; \ - \ - for (i = 0; i < n_ops; i++) \ - { \ - struct gcm_key_data *kd; \ - struct gcm_context_data ctx; \ - vnet_crypto_op_t *op = ops[i]; \ - \ - kd = (struct gcm_key_data *) imbm->key_data[op->key_index]; \ - IMB_AES##b##_GCM_ENC (m, kd, &ctx, op->dst, op->src, op->len, op->iv, \ - op->aad, op->aad_len, op->tag, op->tag_len); \ - \ - op->status = VNET_CRYPTO_OP_STATUS_COMPLETED; \ - } \ - \ - return n_ops; \ -} \ - \ -static_always_inline u32 \ -ipsecmb_ops_gcm_cipher_dec_##a##_chained (vlib_main_t * vm, \ - vnet_crypto_op_t * ops[], vnet_crypto_op_chunk_t *chunks, u32 n_ops) \ -{ \ - ipsecmb_main_t *imbm = &ipsecmb_main; \ - ipsecmb_per_thread_data_t *ptd = imbm->per_thread_data + \ - vm->thread_index; \ - IMB_MGR *m = ptd->mgr; \ - vnet_crypto_op_chunk_t *chp; \ - u32 i, j, n_failed = 0; \ - \ - for (i = 0; i < n_ops; i++) \ - { \ - struct gcm_key_data *kd; \ - struct gcm_context_data ctx; \ - vnet_crypto_op_t *op = ops[i]; \ - u8 scratch[64]; \ - \ - kd = (struct gcm_key_data *) imbm->key_data[op->key_index]; \ - ASSERT (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS); \ - IMB_AES##b##_GCM_INIT(m, kd, &ctx, op->iv, op->aad, op->aad_len); \ - chp = chunks + op->chunk_index; \ - for (j = 0; j < op->n_chunks; j++) \ - { \ - IMB_AES##b##_GCM_DEC_UPDATE (m, kd, &ctx, chp->dst, chp->src, \ - chp->len); \ - chp += 1; \ - } \ - IMB_AES##b##_GCM_DEC_FINALIZE(m, kd, &ctx, scratch, op->tag_len); \ - \ - if ((memcmp (op->tag, scratch, op->tag_len))) \ - { \ - op->status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC; \ - n_failed++; \ - } \ - else \ - op->status = VNET_CRYPTO_OP_STATUS_COMPLETED; \ - } \ - \ - return n_ops - n_failed; \ -} \ - \ -static_always_inline u32 \ -ipsecmb_ops_gcm_cipher_dec_##a (vlib_main_t * vm, vnet_crypto_op_t * ops[], \ - u32 n_ops) \ -{ \ - ipsecmb_main_t *imbm = &ipsecmb_main; \ - ipsecmb_per_thread_data_t *ptd = imbm->per_thread_data + \ - vm->thread_index; \ - IMB_MGR *m = ptd->mgr; \ - u32 i, n_failed = 0; \ - \ - for (i = 0; i < n_ops; i++) \ - { \ - struct gcm_key_data *kd; \ - struct gcm_context_data ctx; \ - vnet_crypto_op_t *op = ops[i]; \ - u8 scratch[64]; \ - \ - kd = (struct gcm_key_data *) imbm->key_data[op->key_index]; \ - IMB_AES##b##_GCM_DEC (m, kd, &ctx, op->dst, op->src, op->len, op->iv, \ - op->aad, op->aad_len, scratch, op->tag_len); \ - \ - if ((memcmp (op->tag, scratch, op->tag_len))) \ - { \ - op->status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC; \ - n_failed++; \ - } \ - else \ - op->status = VNET_CRYPTO_OP_STATUS_COMPLETED; \ - } \ - \ - return n_ops - n_failed; \ +typedef struct +{ + aes_gcm_enc_dec_t enc_dec_fn; + aes_gcm_init_t init_fn; + aes_gcm_enc_dec_update_t upd_fn; + aes_gcm_enc_dec_finalize_t finalize_fn; + u32 is_dec; + u32 chained; + u32 fixed; + u32 aadlen; +} ipsecmb_ops_gcm_args_t; + +static_always_inline u32 +ipsecmb_ops_gcm (vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, + u32 n_ops, ipsecmb_ops_gcm_args_t a) +{ + ipsecmb_main_t *imbm = &ipsecmb_main; + vnet_crypto_op_chunk_t *chp; + u32 i, j, n_failed = 0; + + for (i = 0; i < n_ops; i++) + { + struct gcm_key_data *kd; + struct gcm_context_data ctx; + vnet_crypto_op_t *op = ops[i]; + u8 scratch[64], *tag = a.is_dec ? scratch : op->tag; + u32 taglen = 16, aadlen = a.aadlen; + + if (!a.fixed) + { + aadlen = op->aad_len; + taglen = op->tag_len; + } + + kd = (struct gcm_key_data *) imbm->key_data[op->key_index]; + if (a.chained) + { + ASSERT (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS); + a.init_fn (kd, &ctx, op->iv, op->aad, aadlen); + chp = chunks + op->chunk_index; + for (j = 0; j < op->n_chunks; j++) + { + a.upd_fn (kd, &ctx, chp->dst, chp->src, chp->len); + chp += 1; + } + a.finalize_fn (kd, &ctx, tag, taglen); + } + else + { + a.enc_dec_fn (kd, &ctx, op->dst, op->src, op->len, op->iv, op->aad, + aadlen, tag, taglen); + } + + if (a.is_dec && (memcmp (op->tag, tag, taglen))) + { + op->status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC; + n_failed++; + } + else + op->status = VNET_CRYPTO_OP_STATUS_COMPLETED; + } + + return n_ops - n_failed; +} + +static_always_inline IMB_MGR * +get_mgr (vlib_main_t *vm) +{ + ipsecmb_main_t *imbm = &ipsecmb_main; + ipsecmb_per_thread_data_t *ptd = imbm->per_thread_data + vm->thread_index; + return ptd->mgr; } -/* clang-format on */ + +#define _(a, b, f, l) \ + static_always_inline u32 ipsecmb_ops_gcm_cipher_enc_##a ( \ + vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) \ + { \ + return ipsecmb_ops_gcm ( \ + ops, 0, n_ops, \ + (ipsecmb_ops_gcm_args_t){ .enc_dec_fn = get_mgr (vm)->gcm##b##_enc, \ + .fixed = (f), \ + .aadlen = (l) }); \ + } \ + \ + static_always_inline u32 ipsecmb_ops_gcm_cipher_enc_##a##_chained ( \ + vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, \ + u32 n_ops) \ + { \ + IMB_MGR *m = get_mgr (vm); \ + return ipsecmb_ops_gcm ( \ + ops, chunks, n_ops, \ + (ipsecmb_ops_gcm_args_t){ .init_fn = m->gcm##b##_init, \ + .upd_fn = m->gcm##b##_enc_update, \ + .finalize_fn = m->gcm##b##_enc_finalize, \ + .chained = 1, \ + .fixed = (f), \ + .aadlen = (l) }); \ + } \ + \ + static_always_inline u32 ipsecmb_ops_gcm_cipher_dec_##a ( \ + vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) \ + { \ + return ipsecmb_ops_gcm ( \ + ops, 0, n_ops, \ + (ipsecmb_ops_gcm_args_t){ .enc_dec_fn = get_mgr (vm)->gcm##b##_dec, \ + .fixed = (f), \ + .aadlen = (l), \ + .is_dec = 1 }); \ + } \ + \ + static_always_inline u32 ipsecmb_ops_gcm_cipher_dec_##a##_chained ( \ + vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, \ + u32 n_ops) \ + { \ + IMB_MGR *m = get_mgr (vm); \ + return ipsecmb_ops_gcm ( \ + ops, chunks, n_ops, \ + (ipsecmb_ops_gcm_args_t){ .init_fn = m->gcm##b##_init, \ + .upd_fn = m->gcm##b##_dec_update, \ + .finalize_fn = m->gcm##b##_dec_finalize, \ + .chained = 1, \ + .fixed = (f), \ + .aadlen = (l), \ + .is_dec = 1 }); \ + } foreach_ipsecmb_gcm_cipher_op; #undef _ @@ -558,7 +550,7 @@ ipsecmb_retire_aead_job (IMB_JOB *job, u32 *n_fail) static_always_inline u32 ipsecmb_ops_chacha_poly (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops, - IMB_CIPHER_DIRECTION dir) + IMB_CIPHER_DIRECTION dir, u32 fixed, u32 aad_len) { ipsecmb_main_t *imbm = &ipsecmb_main; ipsecmb_per_thread_data_t *ptd = imbm->per_thread_data + vm->thread_index; @@ -589,7 +581,8 @@ ipsecmb_ops_chacha_poly (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops, job->key_len_in_bytes = 32; job->u.CHACHA20_POLY1305.aad = op->aad; - job->u.CHACHA20_POLY1305.aad_len_in_bytes = op->aad_len; + job->u.CHACHA20_POLY1305.aad_len_in_bytes = + fixed ? aad_len : op->aad_len; job->src = op->src; job->dst = op->dst; @@ -622,20 +615,36 @@ static_always_inline u32 ipsecmb_ops_chacha_poly_enc (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) { - return ipsecmb_ops_chacha_poly (vm, ops, n_ops, IMB_DIR_ENCRYPT); + return ipsecmb_ops_chacha_poly (vm, ops, n_ops, IMB_DIR_ENCRYPT, 0, 0); } static_always_inline u32 ipsecmb_ops_chacha_poly_dec (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) { - return ipsecmb_ops_chacha_poly (vm, ops, n_ops, IMB_DIR_DECRYPT); + return ipsecmb_ops_chacha_poly (vm, ops, n_ops, IMB_DIR_DECRYPT, 0, 0); } -static_always_inline u32 -ipsecmb_ops_chacha_poly_chained (vlib_main_t *vm, vnet_crypto_op_t *ops[], - vnet_crypto_op_chunk_t *chunks, u32 n_ops, - IMB_CIPHER_DIRECTION dir) +#define _(a) \ + static_always_inline u32 ipsecmb_ops_chacha_poly_tag16_aad##a##_enc ( \ + vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) \ + { \ + return ipsecmb_ops_chacha_poly (vm, ops, n_ops, IMB_DIR_ENCRYPT, 1, a); \ + } \ + \ + static_always_inline u32 ipsecmb_ops_chacha_poly_tag16_aad##a##_dec ( \ + vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) \ + { \ + return ipsecmb_ops_chacha_poly (vm, ops, n_ops, IMB_DIR_DECRYPT, 1, a); \ + } +foreach_chacha_poly_fixed_aad_lengths +#undef _ + + static_always_inline u32 + ipsecmb_ops_chacha_poly_chained (vlib_main_t *vm, vnet_crypto_op_t *ops[], + vnet_crypto_op_chunk_t *chunks, u32 n_ops, + IMB_CIPHER_DIRECTION dir, u32 fixed, + u32 aad_len) { ipsecmb_main_t *imbm = &ipsecmb_main; ipsecmb_per_thread_data_t *ptd = imbm->per_thread_data + vm->thread_index; @@ -663,7 +672,7 @@ ipsecmb_ops_chacha_poly_chained (vlib_main_t *vm, vnet_crypto_op_t *ops[], } IMB_CHACHA20_POLY1305_INIT (m, key, &ctx, op->iv, op->aad, - op->aad_len); + fixed ? aad_len : op->aad_len); chp = chunks + op->chunk_index; for (j = 0; j < op->n_chunks; j++) @@ -673,7 +682,8 @@ ipsecmb_ops_chacha_poly_chained (vlib_main_t *vm, vnet_crypto_op_t *ops[], chp += 1; } - IMB_CHACHA20_POLY1305_ENC_FINALIZE (m, &ctx, op->tag, op->tag_len); + IMB_CHACHA20_POLY1305_ENC_FINALIZE (m, &ctx, op->tag, + fixed ? 16 : op->tag_len); op->status = VNET_CRYPTO_OP_STATUS_COMPLETED; } @@ -699,7 +709,7 @@ ipsecmb_ops_chacha_poly_chained (vlib_main_t *vm, vnet_crypto_op_t *ops[], } IMB_CHACHA20_POLY1305_INIT (m, key, &ctx, op->iv, op->aad, - op->aad_len); + fixed ? aad_len : op->aad_len); chp = chunks + op->chunk_index; for (j = 0; j < op->n_chunks; j++) @@ -709,9 +719,10 @@ ipsecmb_ops_chacha_poly_chained (vlib_main_t *vm, vnet_crypto_op_t *ops[], chp += 1; } - IMB_CHACHA20_POLY1305_DEC_FINALIZE (m, &ctx, scratch, op->tag_len); + IMB_CHACHA20_POLY1305_DEC_FINALIZE (m, &ctx, scratch, + fixed ? 16 : op->tag_len); - if (memcmp (op->tag, scratch, op->tag_len)) + if (memcmp (op->tag, scratch, fixed ? 16 : op->tag_len)) { n_fail = n_fail + 1; op->status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC; @@ -730,7 +741,7 @@ ipsec_mb_ops_chacha_poly_enc_chained (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) { return ipsecmb_ops_chacha_poly_chained (vm, ops, chunks, n_ops, - IMB_DIR_ENCRYPT); + IMB_DIR_ENCRYPT, 0, 0); } static_always_inline u32 @@ -739,13 +750,34 @@ ipsec_mb_ops_chacha_poly_dec_chained (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) { return ipsecmb_ops_chacha_poly_chained (vm, ops, chunks, n_ops, - IMB_DIR_DECRYPT); + IMB_DIR_DECRYPT, 0, 0); } + +#define _(a) \ + static_always_inline u32 \ + ipsec_mb_ops_chacha_poly_tag16_aad##a##_enc_chained ( \ + vlib_main_t *vm, vnet_crypto_op_t *ops[], \ + vnet_crypto_op_chunk_t *chunks, u32 n_ops) \ + { \ + return ipsecmb_ops_chacha_poly_chained (vm, ops, chunks, n_ops, \ + IMB_DIR_ENCRYPT, 1, a); \ + } \ + \ + static_always_inline u32 \ + ipsec_mb_ops_chacha_poly_tag16_aad##a##_dec_chained ( \ + vlib_main_t *vm, vnet_crypto_op_t *ops[], \ + vnet_crypto_op_chunk_t *chunks, u32 n_ops) \ + { \ + return ipsecmb_ops_chacha_poly_chained (vm, ops, chunks, n_ops, \ + IMB_DIR_DECRYPT, 1, a); \ + } +foreach_chacha_poly_fixed_aad_lengths +#undef _ #endif -static void -crypto_ipsecmb_key_handler (vnet_crypto_key_op_t kop, - vnet_crypto_key_index_t idx) + static void + crypto_ipsecmb_key_handler (vnet_crypto_key_op_t kop, + vnet_crypto_key_index_t idx) { ipsecmb_main_t *imbm = &ipsecmb_main; vnet_crypto_key_t *key = vnet_crypto_get_key (idx); @@ -754,7 +786,7 @@ crypto_ipsecmb_key_handler (vnet_crypto_key_op_t kop, void *kd; /** TODO: add linked alg support **/ - if (key->type == VNET_CRYPTO_KEY_TYPE_LINK) + if (key->is_link) return; if (kop == VNET_CRYPTO_KEY_OP_DEL) @@ -805,10 +837,10 @@ crypto_ipsecmb_key_handler (vnet_crypto_key_op_t kop, u64 pad[block_qw], key_hash[block_qw]; clib_memset_u8 (key_hash, 0, HMAC_MAX_BLOCK_SIZE); - if (vec_len (key->data) <= ad->block_size) - clib_memcpy_fast (key_hash, key->data, vec_len (key->data)); + if (key->length <= ad->block_size) + clib_memcpy_fast (key_hash, key->data, key->length); else - ad->hash_fn (key->data, vec_len (key->data), key_hash); + ad->hash_fn (key->data, key->length, key_hash); for (i = 0; i < block_qw; i++) pad[i] = key_hash[i] ^ 0x3636363636363636; @@ -870,7 +902,7 @@ crypto_ipsecmb_init (vnet_crypto_engine_registration_t *r) foreach_ipsecmb_cipher_op; #undef _ -#define _(a, b) \ +#define _(a, b, f, l) \ ad = imbm->alg_data + VNET_CRYPTO_ALG_##a; \ ad->data_size = sizeof (struct gcm_key_data); \ ad->aes_gcm_pre = m->gcm##b##_pre; @@ -887,7 +919,7 @@ crypto_ipsecmb_init (vnet_crypto_engine_registration_t *r) } vnet_crypto_engine_op_handlers_t op_handlers[] = { -#define _(a, b) \ +#define _(a, b, f, l) \ { \ .opt = VNET_CRYPTO_OP_##a##_ENC, \ .fn = ipsecmb_ops_gcm_cipher_enc_##a, \ @@ -918,6 +950,19 @@ vnet_crypto_engine_op_handlers_t op_handlers[] = { { .opt = VNET_CRYPTO_OP_CHACHA20_POLY1305_DEC, .fn = ipsecmb_ops_chacha_poly_dec, .cfn = ipsec_mb_ops_chacha_poly_dec_chained }, +#define _(a) \ + { \ + .opt = VNET_CRYPTO_OP_CHACHA20_POLY1305_TAG16_AAD##a##_ENC, \ + .fn = ipsecmb_ops_chacha_poly_tag16_aad##a##_enc, \ + .cfn = ipsec_mb_ops_chacha_poly_tag16_aad##a##_enc_chained, \ + }, \ + { \ + .opt = VNET_CRYPTO_OP_CHACHA20_POLY1305_TAG16_AAD##a##_DEC, \ + .fn = ipsecmb_ops_chacha_poly_tag16_aad##a##_dec, \ + .cfn = ipsec_mb_ops_chacha_poly_tag16_aad##a##_dec_chained, \ + }, + foreach_chacha_poly_fixed_aad_lengths +#undef _ #endif {} }; diff --git a/src/crypto_engines/native/aes_gcm.c b/src/crypto_engines/native/aes_gcm.c index 57eee17f3d0..efabf341982 100644 --- a/src/crypto_engines/native/aes_gcm.c +++ b/src/crypto_engines/native/aes_gcm.c @@ -26,8 +26,8 @@ #endif static_always_inline u32 -aes_ops_enc_aes_gcm (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops, - aes_key_size_t ks) +aes_ops_enc_aes_gcm (vnet_crypto_op_t *ops[], u32 n_ops, aes_key_size_t ks, + u32 fixed, u32 aad_len) { crypto_native_main_t *cm = &crypto_native_main; vnet_crypto_op_t *op = ops[0]; @@ -37,8 +37,8 @@ aes_ops_enc_aes_gcm (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops, next: kd = (aes_gcm_key_data_t *) cm->key_data[op->key_index]; aes_gcm (op->src, op->dst, op->aad, (u8 *) op->iv, op->tag, op->len, - op->aad_len, op->tag_len, kd, AES_KEY_ROUNDS (ks), - AES_GCM_OP_ENCRYPT); + fixed ? aad_len : op->aad_len, fixed ? 16 : op->tag_len, kd, + AES_KEY_ROUNDS (ks), AES_GCM_OP_ENCRYPT); op->status = VNET_CRYPTO_OP_STATUS_COMPLETED; if (--n_left) @@ -51,8 +51,8 @@ next: } static_always_inline u32 -aes_ops_dec_aes_gcm (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops, - aes_key_size_t ks) +aes_ops_dec_aes_gcm (vnet_crypto_op_t *ops[], u32 n_ops, aes_key_size_t ks, + u32 fixed, u32 aad_len) { crypto_native_main_t *cm = &crypto_native_main; vnet_crypto_op_t *op = ops[0]; @@ -63,8 +63,8 @@ aes_ops_dec_aes_gcm (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops, next: kd = (aes_gcm_key_data_t *) cm->key_data[op->key_index]; rv = aes_gcm (op->src, op->dst, op->aad, (u8 *) op->iv, op->tag, op->len, - op->aad_len, op->tag_len, kd, AES_KEY_ROUNDS (ks), - AES_GCM_OP_DECRYPT); + fixed ? aad_len : op->aad_len, fixed ? 16 : op->tag_len, kd, + AES_KEY_ROUNDS (ks), AES_GCM_OP_DECRYPT); if (rv) { @@ -103,12 +103,32 @@ aes_gcm_key_exp (vnet_crypto_key_t *key, aes_key_size_t ks) static u32 aes_ops_dec_aes_gcm_##x (vlib_main_t *vm, \ vnet_crypto_op_t *ops[], u32 n_ops) \ { \ - return aes_ops_dec_aes_gcm (vm, ops, n_ops, AES_KEY_##x); \ + return aes_ops_dec_aes_gcm (ops, n_ops, AES_KEY_##x, 0, 0); \ } \ static u32 aes_ops_enc_aes_gcm_##x (vlib_main_t *vm, \ vnet_crypto_op_t *ops[], u32 n_ops) \ { \ - return aes_ops_enc_aes_gcm (vm, ops, n_ops, AES_KEY_##x); \ + return aes_ops_enc_aes_gcm (ops, n_ops, AES_KEY_##x, 0, 0); \ + } \ + static u32 aes_ops_dec_aes_gcm_##x##_tag16_aad8 ( \ + vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) \ + { \ + return aes_ops_dec_aes_gcm (ops, n_ops, AES_KEY_##x, 1, 8); \ + } \ + static u32 aes_ops_enc_aes_gcm_##x##_tag16_aad8 ( \ + vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) \ + { \ + return aes_ops_enc_aes_gcm (ops, n_ops, AES_KEY_##x, 1, 8); \ + } \ + static u32 aes_ops_dec_aes_gcm_##x##_tag16_aad12 ( \ + vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) \ + { \ + return aes_ops_dec_aes_gcm (ops, n_ops, AES_KEY_##x, 1, 12); \ + } \ + static u32 aes_ops_enc_aes_gcm_##x##_tag16_aad12 ( \ + vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) \ + { \ + return aes_ops_enc_aes_gcm (ops, n_ops, AES_KEY_##x, 1, 12); \ } \ static void *aes_gcm_key_exp_##x (vnet_crypto_key_t *key) \ { \ @@ -156,6 +176,30 @@ probe () .fn = aes_ops_dec_aes_gcm_##b, \ .probe = probe, \ }; \ + CRYPTO_NATIVE_OP_HANDLER (aes_##b##_gcm_enc_tag16_aad8) = { \ + .op_id = VNET_CRYPTO_OP_AES_##b##_GCM_TAG16_AAD8_ENC, \ + .fn = aes_ops_enc_aes_gcm_##b##_tag16_aad8, \ + .probe = probe, \ + }; \ + \ + CRYPTO_NATIVE_OP_HANDLER (aes_##b##_gcm_dec_tag16_aad8) = { \ + .op_id = VNET_CRYPTO_OP_AES_##b##_GCM_TAG16_AAD8_DEC, \ + .fn = aes_ops_dec_aes_gcm_##b##_tag16_aad8, \ + .probe = probe, \ + }; \ + \ + CRYPTO_NATIVE_OP_HANDLER (aes_##b##_gcm_enc_tag16_aad12) = { \ + .op_id = VNET_CRYPTO_OP_AES_##b##_GCM_TAG16_AAD12_ENC, \ + .fn = aes_ops_enc_aes_gcm_##b##_tag16_aad12, \ + .probe = probe, \ + }; \ + \ + CRYPTO_NATIVE_OP_HANDLER (aes_##b##_gcm_dec_tag16_aad12) = { \ + .op_id = VNET_CRYPTO_OP_AES_##b##_GCM_TAG16_AAD12_DEC, \ + .fn = aes_ops_dec_aes_gcm_##b##_tag16_aad12, \ + .probe = probe, \ + }; \ + \ CRYPTO_NATIVE_KEY_HANDLER (aes_##b##_gcm) = { \ .alg_id = VNET_CRYPTO_ALG_AES_##b##_GCM, \ .key_fn = aes_gcm_key_exp_##b, \ diff --git a/src/crypto_engines/native/crypto_native.h b/src/crypto_engines/native/crypto_native.h index 0fcb6a99524..9e2a6b42e52 100644 --- a/src/crypto_engines/native/crypto_native.h +++ b/src/crypto_engines/native/crypto_native.h @@ -25,8 +25,8 @@ typedef struct crypto_native_op_handler { struct crypto_native_op_handler *next; vnet_crypto_op_id_t op_id; - vnet_crypto_ops_handler_t *fn; - vnet_crypto_chained_ops_handler_t *cfn; + vnet_crypto_simple_op_fn_t *fn; + vnet_crypto_chained_op_fn_t *cfn; crypto_native_variant_probe_t *probe; int priority; } crypto_native_op_handler_t; diff --git a/src/crypto_engines/native/main.c b/src/crypto_engines/native/main.c index e9e71b6fb6d..97dbe6cf67e 100644 --- a/src/crypto_engines/native/main.c +++ b/src/crypto_engines/native/main.c @@ -9,7 +9,7 @@ #include <native/crypto_native.h> crypto_native_main_t crypto_native_main; -vnet_crypto_engine_op_handlers_t op_handlers[24], *ophp = op_handlers; +vnet_crypto_engine_op_handlers_t op_handlers[64], *ophp = op_handlers; static void crypto_native_key_handler (vnet_crypto_key_op_t kop, @@ -19,7 +19,7 @@ crypto_native_key_handler (vnet_crypto_key_op_t kop, crypto_native_main_t *cm = &crypto_native_main; /** TODO: add linked alg support **/ - if (key->type == VNET_CRYPTO_KEY_TYPE_LINK) + if (key->is_link) return; if (cm->key_fn[key->alg] == 0) diff --git a/src/crypto_engines/native/sha2.c b/src/crypto_engines/native/sha2.c index b61a5f08060..46a71b5b327 100644 --- a/src/crypto_engines/native/sha2.c +++ b/src/crypto_engines/native/sha2.c @@ -110,7 +110,7 @@ sha2_key_add (vnet_crypto_key_t *key, clib_sha2_type_t type) clib_sha2_hmac_key_data_t *kd; kd = clib_mem_alloc_aligned (sizeof (*kd), CLIB_CACHE_LINE_BYTES); - clib_sha2_hmac_key_data (type, key->data, vec_len (key->data), kd); + clib_sha2_hmac_key_data (type, key->data, key->length, kd); return kd; } diff --git a/src/crypto_engines/openssl/main.c b/src/crypto_engines/openssl/main.c index c5636add266..f6c2229d3cd 100644 --- a/src/crypto_engines/openssl/main.c +++ b/src/crypto_engines/openssl/main.c @@ -28,23 +28,35 @@ static openssl_per_thread_data_t *per_thread_data; static u32 num_threads; #define foreach_openssl_aes_evp_op \ - _ (cbc, DES_CBC, EVP_des_cbc) \ - _ (cbc, 3DES_CBC, EVP_des_ede3_cbc) \ - _ (cbc, AES_128_CBC, EVP_aes_128_cbc) \ - _ (cbc, AES_192_CBC, EVP_aes_192_cbc) \ - _ (cbc, AES_256_CBC, EVP_aes_256_cbc) \ - _ (gcm, AES_128_GCM, EVP_aes_128_gcm) \ - _ (gcm, AES_192_GCM, EVP_aes_192_gcm) \ - _ (gcm, AES_256_GCM, EVP_aes_256_gcm) \ - _ (cbc, AES_128_CTR, EVP_aes_128_ctr) \ - _ (cbc, AES_192_CTR, EVP_aes_192_ctr) \ - _ (cbc, AES_256_CTR, EVP_aes_256_ctr) \ - _ (null_gmac, AES_128_NULL_GMAC, EVP_aes_128_gcm) \ - _ (null_gmac, AES_192_NULL_GMAC, EVP_aes_192_gcm) \ - _ (null_gmac, AES_256_NULL_GMAC, EVP_aes_256_gcm) + _ (cbc, DES_CBC, EVP_des_cbc, 0, 0) \ + _ (cbc, 3DES_CBC, EVP_des_ede3_cbc, 0, 0) \ + _ (cbc, AES_128_CBC, EVP_aes_128_cbc, 0, 0) \ + _ (cbc, AES_192_CBC, EVP_aes_192_cbc, 0, 0) \ + _ (cbc, AES_256_CBC, EVP_aes_256_cbc, 0, 0) \ + _ (gcm, AES_128_GCM, EVP_aes_128_gcm, 0, 0) \ + _ (gcm, AES_128_GCM_TAG16_AAD8, EVP_aes_128_gcm, 1, 8) \ + _ (gcm, AES_128_GCM_TAG16_AAD12, EVP_aes_128_gcm, 1, 12) \ + _ (gcm, AES_192_GCM, EVP_aes_192_gcm, 0, 0) \ + _ (gcm, AES_192_GCM_TAG16_AAD8, EVP_aes_192_gcm, 1, 8) \ + _ (gcm, AES_192_GCM_TAG16_AAD12, EVP_aes_192_gcm, 1, 12) \ + _ (gcm, AES_256_GCM, EVP_aes_256_gcm, 0, 0) \ + _ (gcm, AES_256_GCM_TAG16_AAD8, EVP_aes_256_gcm, 1, 8) \ + _ (gcm, AES_256_GCM_TAG16_AAD12, EVP_aes_256_gcm, 1, 12) \ + _ (cbc, AES_128_CTR, EVP_aes_128_ctr, 0, 0) \ + _ (cbc, AES_192_CTR, EVP_aes_192_ctr, 0, 0) \ + _ (cbc, AES_256_CTR, EVP_aes_256_ctr, 0, 0) \ + _ (null_gmac, AES_128_NULL_GMAC, EVP_aes_128_gcm, 0, 0) \ + _ (null_gmac, AES_192_NULL_GMAC, EVP_aes_192_gcm, 0, 0) \ + _ (null_gmac, AES_256_NULL_GMAC, EVP_aes_256_gcm, 0, 0) #define foreach_openssl_chacha20_evp_op \ - _ (chacha20_poly1305, CHACHA20_POLY1305, EVP_chacha20_poly1305) + _ (chacha20_poly1305, CHACHA20_POLY1305, EVP_chacha20_poly1305, 0, 0) \ + _ (chacha20_poly1305, CHACHA20_POLY1305_TAG16_AAD0, EVP_chacha20_poly1305, \ + 1, 0) \ + _ (chacha20_poly1305, CHACHA20_POLY1305_TAG16_AAD8, EVP_chacha20_poly1305, \ + 1, 8) \ + _ (chacha20_poly1305, CHACHA20_POLY1305_TAG16_AAD12, EVP_chacha20_poly1305, \ + 1, 12) #if OPENSSL_VERSION_NUMBER >= 0x10100000L #define foreach_openssl_evp_op foreach_openssl_aes_evp_op \ @@ -81,7 +93,7 @@ crypto_openssl_main_t crypto_openssl_main; static_always_inline u32 openssl_ops_enc_cbc (vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, u32 n_ops, - const EVP_CIPHER *cipher) + const EVP_CIPHER *cipher, u32 fixed, u32 aad_len) { openssl_per_thread_data_t *ptd = per_thread_data + vm->thread_index; EVP_CIPHER_CTX *ctx; @@ -135,7 +147,7 @@ openssl_ops_enc_cbc (vlib_main_t *vm, vnet_crypto_op_t *ops[], static_always_inline u32 openssl_ops_dec_cbc (vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, u32 n_ops, - const EVP_CIPHER *cipher) + const EVP_CIPHER *cipher, u32 fixed, u32 aad_len) { openssl_per_thread_data_t *ptd = per_thread_data + vm->thread_index; EVP_CIPHER_CTX *ctx; @@ -189,7 +201,8 @@ openssl_ops_dec_cbc (vlib_main_t *vm, vnet_crypto_op_t *ops[], static_always_inline u32 openssl_ops_enc_aead (vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, u32 n_ops, - const EVP_CIPHER *cipher, int is_gcm, int is_gmac) + const EVP_CIPHER *cipher, int is_gcm, int is_gmac, + u32 fixed, u32 aadlen) { openssl_per_thread_data_t *ptd = per_thread_data + vm->thread_index; EVP_CIPHER_CTX *ctx; @@ -199,6 +212,7 @@ openssl_ops_enc_aead (vlib_main_t *vm, vnet_crypto_op_t *ops[], { vnet_crypto_op_t *op = ops[i]; int len = 0; + u32 taglen = 16; if (i + 2 < n_ops) { @@ -213,8 +227,14 @@ openssl_ops_enc_aead (vlib_main_t *vm, vnet_crypto_op_t *ops[], ctx = ptd->evp_cipher_enc_ctx[op->key_index]; EVP_EncryptInit_ex (ctx, 0, 0, NULL, op->iv); - if (op->aad_len) - EVP_EncryptUpdate (ctx, NULL, &len, op->aad, op->aad_len); + if (!fixed) + { + taglen = op->tag_len; + aadlen = op->aad_len; + } + + if (aadlen) + EVP_EncryptUpdate (ctx, NULL, &len, op->aad, aadlen); if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS) { chp = chunks + op->chunk_index; @@ -228,7 +248,7 @@ openssl_ops_enc_aead (vlib_main_t *vm, vnet_crypto_op_t *ops[], else EVP_EncryptUpdate (ctx, is_gmac ? 0 : op->dst, &len, op->src, op->len); EVP_EncryptFinal_ex (ctx, is_gmac ? 0 : op->dst + len, &len); - EVP_CIPHER_CTX_ctrl (ctx, EVP_CTRL_AEAD_GET_TAG, op->tag_len, op->tag); + EVP_CIPHER_CTX_ctrl (ctx, EVP_CTRL_AEAD_GET_TAG, taglen, op->tag); op->status = VNET_CRYPTO_OP_STATUS_COMPLETED; } return n_ops; @@ -237,34 +257,36 @@ openssl_ops_enc_aead (vlib_main_t *vm, vnet_crypto_op_t *ops[], static_always_inline u32 openssl_ops_enc_null_gmac (vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, u32 n_ops, - const EVP_CIPHER *cipher) + const EVP_CIPHER *cipher, u32 fixed, u32 aadlen) { return openssl_ops_enc_aead (vm, ops, chunks, n_ops, cipher, - /* is_gcm */ 1, /* is_gmac */ 1); + /* is_gcm */ 1, /* is_gmac */ 1, fixed, aadlen); } static_always_inline u32 openssl_ops_enc_gcm (vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, u32 n_ops, - const EVP_CIPHER *cipher) + const EVP_CIPHER *cipher, u32 fixed, u32 aadlen) { return openssl_ops_enc_aead (vm, ops, chunks, n_ops, cipher, - /* is_gcm */ 1, /* is_gmac */ 0); + /* is_gcm */ 1, /* is_gmac */ 0, fixed, aadlen); } static_always_inline __clib_unused u32 openssl_ops_enc_chacha20_poly1305 (vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, u32 n_ops, - const EVP_CIPHER *cipher) + const EVP_CIPHER *cipher, u32 fixed, + u32 aadlen) { return openssl_ops_enc_aead (vm, ops, chunks, n_ops, cipher, - /* is_gcm */ 0, /* is_gmac */ 0); + /* is_gcm */ 0, /* is_gmac */ 0, fixed, aadlen); } static_always_inline u32 openssl_ops_dec_aead (vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, u32 n_ops, - const EVP_CIPHER *cipher, int is_gcm, int is_gmac) + const EVP_CIPHER *cipher, int is_gcm, int is_gmac, + u32 fixed, u32 aadlen) { openssl_per_thread_data_t *ptd = per_thread_data + vm->thread_index; EVP_CIPHER_CTX *ctx; @@ -274,11 +296,17 @@ openssl_ops_dec_aead (vlib_main_t *vm, vnet_crypto_op_t *ops[], { vnet_crypto_op_t *op = ops[i]; int len = 0; + u32 taglen = 16; + if (!fixed) + { + taglen = op->tag_len; + aadlen = op->aad_len; + } ctx = ptd->evp_cipher_dec_ctx[op->key_index]; EVP_DecryptInit_ex (ctx, 0, 0, NULL, op->iv); if (op->aad_len) - EVP_DecryptUpdate (ctx, 0, &len, op->aad, op->aad_len); + EVP_DecryptUpdate (ctx, 0, &len, op->aad, aadlen); if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS) { chp = chunks + op->chunk_index; @@ -294,7 +322,7 @@ openssl_ops_dec_aead (vlib_main_t *vm, vnet_crypto_op_t *ops[], EVP_DecryptUpdate (ctx, is_gmac ? 0 : op->dst, &len, op->src, op->len); } - EVP_CIPHER_CTX_ctrl (ctx, EVP_CTRL_AEAD_SET_TAG, op->tag_len, op->tag); + EVP_CIPHER_CTX_ctrl (ctx, EVP_CTRL_AEAD_SET_TAG, taglen, op->tag); if (EVP_DecryptFinal_ex (ctx, is_gmac ? 0 : op->dst + len, &len) > 0) op->status = VNET_CRYPTO_OP_STATUS_COMPLETED; @@ -310,28 +338,32 @@ openssl_ops_dec_aead (vlib_main_t *vm, vnet_crypto_op_t *ops[], static_always_inline u32 openssl_ops_dec_null_gmac (vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, u32 n_ops, - const EVP_CIPHER *cipher) + const EVP_CIPHER *cipher, u32 fixed, u32 aad_len) { return openssl_ops_dec_aead (vm, ops, chunks, n_ops, cipher, - /* is_gcm */ 1, /* is_gmac */ 1); + /* is_gcm */ 1, /* is_gmac */ 1, fixed, + aad_len); } static_always_inline u32 openssl_ops_dec_gcm (vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, u32 n_ops, - const EVP_CIPHER *cipher) + const EVP_CIPHER *cipher, u32 fixed, u32 aad_len) { return openssl_ops_dec_aead (vm, ops, chunks, n_ops, cipher, - /* is_gcm */ 1, /* is_gmac */ 0); + /* is_gcm */ 1, /* is_gmac */ 0, fixed, + aad_len); } static_always_inline __clib_unused u32 openssl_ops_dec_chacha20_poly1305 (vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, u32 n_ops, - const EVP_CIPHER *cipher) + const EVP_CIPHER *cipher, u32 fixed, + u32 aad_len) { return openssl_ops_dec_aead (vm, ops, chunks, n_ops, cipher, - /* is_gcm */ 0, /* is_gmac */ 0); + /* is_gcm */ 0, /* is_gmac */ 0, fixed, + aad_len); } static_always_inline u32 @@ -494,7 +526,7 @@ openssl_ctx_hmac (vnet_crypto_key_t *key, vnet_crypto_key_op_t kop, vec_validate_aligned (ptd->hmac_ctx, idx, CLIB_CACHE_LINE_BYTES); #if OPENSSL_VERSION_NUMBER >= 0x10100000L ctx = HMAC_CTX_new (); - HMAC_Init_ex (ctx, key->data, vec_len (key->data), md, NULL); + HMAC_Init_ex (ctx, key->data, key->length, md, NULL); ptd->hmac_ctx[idx] = ctx; #else HMAC_CTX_init (&(ptd->_hmac_ctx)); @@ -507,7 +539,7 @@ openssl_ctx_hmac (vnet_crypto_key_t *key, vnet_crypto_key_op_t kop, for (ptd = per_thread_data; ptd - per_thread_data < num_threads; ptd++) { ctx = ptd->hmac_ctx[idx]; - HMAC_Init_ex (ctx, key->data, vec_len (key->data), md, NULL); + HMAC_Init_ex (ctx, key->data, key->length, md, NULL); } } else if (VNET_CRYPTO_KEY_OP_DEL == kop) @@ -530,7 +562,7 @@ crypto_openssl_key_handler (vnet_crypto_key_op_t kop, crypto_openssl_main_t *cm = &crypto_openssl_main; /** TODO: add linked alg support **/ - if (key->type == VNET_CRYPTO_KEY_TYPE_LINK) + if (key->is_link) return; if (cm->ctx_fn[key->alg] == 0) @@ -539,31 +571,31 @@ crypto_openssl_key_handler (vnet_crypto_key_op_t kop, cm->ctx_fn[key->alg](key, kop, idx); } -#define _(m, a, b) \ +#define _(m, a, b, f, l) \ static u32 openssl_ops_enc_##a (vlib_main_t *vm, vnet_crypto_op_t *ops[], \ u32 n_ops) \ { \ - return openssl_ops_enc_##m (vm, ops, 0, n_ops, b ()); \ + return openssl_ops_enc_##m (vm, ops, 0, n_ops, b (), f, l); \ } \ \ u32 openssl_ops_dec_##a (vlib_main_t *vm, vnet_crypto_op_t *ops[], \ u32 n_ops) \ { \ - return openssl_ops_dec_##m (vm, ops, 0, n_ops, b ()); \ + return openssl_ops_dec_##m (vm, ops, 0, n_ops, b (), f, l); \ } \ \ static u32 openssl_ops_enc_chained_##a ( \ vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, \ u32 n_ops) \ { \ - return openssl_ops_enc_##m (vm, ops, chunks, n_ops, b ()); \ + return openssl_ops_enc_##m (vm, ops, chunks, n_ops, b (), f, l); \ } \ \ static u32 openssl_ops_dec_chained_##a ( \ vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, \ u32 n_ops) \ { \ - return openssl_ops_dec_##m (vm, ops, chunks, n_ops, b ()); \ + return openssl_ops_dec_##m (vm, ops, chunks, n_ops, b (), f, l); \ } \ static void *openssl_ctx_##a (vnet_crypto_key_t *key, \ vnet_crypto_key_op_t kop, \ @@ -630,7 +662,7 @@ crypto_openssl_init (vnet_crypto_engine_registration_t *r) RAND_seed (seed, sizeof (seed)); -#define _(m, a, b) cm->ctx_fn[VNET_CRYPTO_ALG_##a] = openssl_ctx_##a; +#define _(m, a, b, f, l) cm->ctx_fn[VNET_CRYPTO_ALG_##a] = openssl_ctx_##a; foreach_openssl_evp_op; #undef _ @@ -648,7 +680,7 @@ crypto_openssl_init (vnet_crypto_engine_registration_t *r) } vnet_crypto_engine_op_handlers_t op_handlers[] = { -#define _(m, a, b) \ +#define _(m, a, b, f, l) \ { \ .opt = VNET_CRYPTO_OP_##a##_ENC, \ .fn = openssl_ops_enc_##a, \ diff --git a/src/plugins/af_packet/af_packet.c b/src/plugins/af_packet/af_packet.c index 69245429918..8cb2af27d7f 100644 --- a/src/plugins/af_packet/af_packet.c +++ b/src/plugins/af_packet/af_packet.c @@ -189,7 +189,6 @@ af_packet_fd_read_ready (clib_file_t * uf) static clib_error_t * af_packet_fd_error (clib_file_t *uf) { - af_packet_main_t *apm = &af_packet_main; clib_error_t *err = 0; u64 u64; @@ -198,8 +197,20 @@ af_packet_fd_error (clib_file_t *uf) if (ret < 0) { err = clib_error_return_unix (0, ""); - vlib_log_notice (apm->log_class, "fd %u %U", uf->file_descriptor, - format_clib_error, err); + ELOG_TYPE_DECLARE (e) = { + .format = "af-packet-msg: fd %u reason %s", + .format_args = "i4T4", + }; + struct + { + u32 fd; + u32 reason; + } *ed; + ed = ELOG_DATA (&vlib_global_main.elog_main, e); + ed->fd = uf->file_descriptor; + ed->reason = + elog_string (vlib_get_elog_main (), "%U", format_clib_error, err); + clib_error_free (err); } diff --git a/src/plugins/crypto_sw_scheduler/main.c b/src/plugins/crypto_sw_scheduler/main.c index a594f30f823..dc97ce937d9 100644 --- a/src/plugins/crypto_sw_scheduler/main.c +++ b/src/plugins/crypto_sw_scheduler/main.c @@ -59,7 +59,7 @@ crypto_sw_scheduler_key_handler (vnet_crypto_key_op_t kop, vec_validate (cm->keys, idx); - if (key->type == VNET_CRYPTO_KEY_TYPE_LINK) + if (key->is_link) { if (kop == VNET_CRYPTO_KEY_OP_DEL) { @@ -401,7 +401,7 @@ crypto_sw_scheduler_process_link (vlib_main_t *vm, } static_always_inline int -convert_async_crypto_id (vnet_crypto_async_op_id_t async_op_id, u32 *crypto_op, +convert_async_crypto_id (vnet_crypto_op_id_t async_op_id, u32 *crypto_op, u32 *auth_op_or_aad_len, u16 *digest_len, u8 *is_enc) { switch (async_op_id) diff --git a/src/plugins/dev_octeon/crypto.c b/src/plugins/dev_octeon/crypto.c index 9c710aed7fd..800f24a008a 100644 --- a/src/plugins/dev_octeon/crypto.c +++ b/src/plugins/dev_octeon/crypto.c @@ -63,10 +63,13 @@ oct_crypto_session_create (vlib_main_t *vm, vnet_crypto_key_index_t key_index, oct_crypto_sess_t *session; vnet_crypto_key_t *key; oct_crypto_key_t *ckey; + oct_crypto_dev_t *ocd; + + ocd = ocm->crypto_dev[op_type]; key = vnet_crypto_get_key (key_index); - if (key->type == VNET_CRYPTO_KEY_TYPE_LINK) + if (key->is_link) { /* * Read crypto or integ key session. And map link key index to same. @@ -89,6 +92,7 @@ oct_crypto_session_create (vlib_main_t *vm, vnet_crypto_key_index_t key_index, session = oct_crypto_session_alloc (vm, op_type); if (session == NULL) return -1; + session->crypto_dev = ocd; } oct_map_keyindex_to_session (session, key_index, op_type); @@ -117,6 +121,12 @@ oct_crypto_key_del_handler (vlib_main_t *vm, vnet_crypto_key_index_t key_index) ocm->keys[VNET_CRYPTO_OP_TYPE_ENCRYPT], ckey->sess->key_index); ckey_linked->sess = NULL; } + + /* Trigger CTX flush + invalidate to remove from CTX_CACHE */ + if (oct_hw_ctx_cache_enable ()) + roc_cpt_lf_ctx_flush (&ckey->sess->crypto_dev->lf, + &ckey->sess->cpt_ctx.se_ctx, true); + oct_plt_init_param.oct_plt_free (ckey->sess); ckey->sess = NULL; } @@ -134,6 +144,11 @@ oct_crypto_key_del_handler (vlib_main_t *vm, vnet_crypto_key_index_t key_index) ckey_linked->sess = NULL; } + /* Trigger CTX flush + invalidate to remove from CTX_CACHE */ + if (oct_hw_ctx_cache_enable ()) + roc_cpt_lf_ctx_flush (&ckey->sess->crypto_dev->lf, + &ckey->sess->cpt_ctx.se_ctx, true); + oct_plt_init_param.oct_plt_free (ckey->sess); ckey->sess = NULL; } @@ -1060,12 +1075,11 @@ oct_crypto_cpt_hmac_prep (u32 flags, u64 d_offs, u64 d_lens, } static_always_inline int -oct_crypto_fill_fc_params (oct_crypto_sess_t *sess, struct cpt_inst_s *inst, - const bool is_aead, u8 aad_length, u8 *payload, - vnet_crypto_async_frame_elt_t *elts, void *mdata, - u32 cipher_data_length, u32 cipher_data_offset, - u32 auth_data_length, u32 auth_data_offset, - vlib_buffer_t *b, u16 adj_len) +oct_crypto_scatter_gather_mode ( + oct_crypto_sess_t *sess, struct cpt_inst_s *inst, const bool is_aead, + u8 aad_length, u8 *payload, vnet_crypto_async_frame_elt_t *elts, void *mdata, + u32 cipher_data_length, u32 cipher_data_offset, u32 auth_data_length, + u32 auth_data_offset, vlib_buffer_t *b, u16 adj_len) { struct roc_se_fc_params fc_params = { 0 }; struct roc_se_ctx *ctx = &sess->cpt_ctx; @@ -1162,6 +1176,10 @@ oct_cpt_inst_w7_get (oct_crypto_sess_t *sess, struct roc_cpt *roc_cpt) inst_w7.u64 = 0; inst_w7.s.cptr = (u64) &sess->cpt_ctx.se_ctx.fctx; + + if (oct_hw_ctx_cache_enable ()) + inst_w7.s.ctx_val = 1; + /* Set the engine group */ inst_w7.s.egrp = roc_cpt->eng_grp[CPT_ENG_TYPE_IE]; @@ -1181,7 +1199,7 @@ oct_crypto_link_session_update (vlib_main_t *vm, oct_crypto_sess_t *sess, key = vnet_crypto_get_key (key_index); - switch (key->async_alg) + switch (key->alg) { case VNET_CRYPTO_ALG_AES_128_CBC_SHA1_TAG12: case VNET_CRYPTO_ALG_AES_192_CBC_SHA1_TAG12: @@ -1232,6 +1250,27 @@ oct_crypto_link_session_update (vlib_main_t *vm, oct_crypto_sess_t *sess, auth_type = ROC_SE_SHA1_TYPE; digest_len = 12; break; + case VNET_CRYPTO_ALG_AES_128_CTR_SHA256_TAG16: + case VNET_CRYPTO_ALG_AES_192_CTR_SHA256_TAG16: + case VNET_CRYPTO_ALG_AES_256_CTR_SHA256_TAG16: + enc_type = ROC_SE_AES_CTR; + auth_type = ROC_SE_SHA2_SHA256; + digest_len = 16; + break; + case VNET_CRYPTO_ALG_AES_128_CTR_SHA384_TAG24: + case VNET_CRYPTO_ALG_AES_192_CTR_SHA384_TAG24: + case VNET_CRYPTO_ALG_AES_256_CTR_SHA384_TAG24: + enc_type = ROC_SE_AES_CTR; + auth_type = ROC_SE_SHA2_SHA384; + digest_len = 24; + break; + case VNET_CRYPTO_ALG_AES_128_CTR_SHA512_TAG32: + case VNET_CRYPTO_ALG_AES_192_CTR_SHA512_TAG32: + case VNET_CRYPTO_ALG_AES_256_CTR_SHA512_TAG32: + enc_type = ROC_SE_AES_CTR; + auth_type = ROC_SE_SHA2_SHA512; + digest_len = 32; + break; case VNET_CRYPTO_ALG_3DES_CBC_MD5_TAG12: enc_type = ROC_SE_DES3_CBC; auth_type = ROC_SE_MD5_TYPE; @@ -1264,8 +1303,8 @@ oct_crypto_link_session_update (vlib_main_t *vm, oct_crypto_sess_t *sess, break; default: clib_warning ( - "Cryptodev: Undefined link algo %u specified. Key index %u", - key->async_alg, key_index); + "Cryptodev: Undefined link algo %u specified. Key index %u", key->alg, + key_index); return -1; } @@ -1279,7 +1318,7 @@ oct_crypto_link_session_update (vlib_main_t *vm, oct_crypto_sess_t *sess, crypto_key = vnet_crypto_get_key (key->index_crypto); rv = roc_se_ciph_key_set (&sess->cpt_ctx, enc_type, crypto_key->data, - vec_len (crypto_key->data)); + crypto_key->length); if (rv) { clib_warning ("Cryptodev: Error in setting cipher key for enc type %u", @@ -1290,7 +1329,7 @@ oct_crypto_link_session_update (vlib_main_t *vm, oct_crypto_sess_t *sess, auth_key = vnet_crypto_get_key (key->index_integ); rv = roc_se_auth_key_set (&sess->cpt_ctx, auth_type, auth_key->data, - vec_len (auth_key->data), digest_len); + auth_key->length, digest_len); if (rv) { clib_warning ("Cryptodev: Error in setting auth key for auth type %u", @@ -1298,6 +1337,13 @@ oct_crypto_link_session_update (vlib_main_t *vm, oct_crypto_sess_t *sess, return -1; } + sess->cpt_ctx.template_w4.s.opcode_major = ROC_SE_MAJOR_OP_FC; + + if (sess->cpt_op == VNET_CRYPTO_OP_TYPE_DECRYPT) + sess->cpt_ctx.template_w4.s.opcode_minor |= ROC_SE_FC_MINOR_OP_DECRYPT; + else + sess->cpt_ctx.template_w4.s.opcode_minor |= ROC_SE_FC_MINOR_OP_ENCRYPT; + return 0; } @@ -1311,7 +1357,7 @@ oct_crypto_aead_session_update (vlib_main_t *vm, oct_crypto_sess_t *sess, u32 digest_len = ~0; i32 rv = 0; - switch (key->async_alg) + switch (key->alg) { case VNET_CRYPTO_ALG_AES_128_GCM: case VNET_CRYPTO_ALG_AES_192_GCM: @@ -1331,12 +1377,11 @@ oct_crypto_aead_session_update (vlib_main_t *vm, oct_crypto_sess_t *sess, default: clib_warning ( "Cryptodev: Undefined cipher algo %u specified. Key index %u", - key->async_alg, key_index); + key->alg, key_index); return -1; } - rv = roc_se_ciph_key_set (&sess->cpt_ctx, enc_type, key->data, - vec_len (key->data)); + rv = roc_se_ciph_key_set (&sess->cpt_ctx, enc_type, key->data, key->length); if (rv) { clib_warning ("Cryptodev: Error in setting cipher key for enc type %u", @@ -1352,6 +1397,13 @@ oct_crypto_aead_session_update (vlib_main_t *vm, oct_crypto_sess_t *sess, return -1; } + sess->cpt_ctx.template_w4.s.opcode_major = ROC_SE_MAJOR_OP_FC; + + if (sess->cpt_op == VNET_CRYPTO_OP_TYPE_DECRYPT) + sess->cpt_ctx.template_w4.s.opcode_minor |= ROC_SE_FC_MINOR_OP_DECRYPT; + else + sess->cpt_ctx.template_w4.s.opcode_minor |= ROC_SE_FC_MINOR_OP_ENCRYPT; + if (enc_type == ROC_SE_CHACHA20) sess->cpt_ctx.template_w4.s.opcode_minor |= BIT (5); @@ -1371,7 +1423,7 @@ oct_crypto_session_init (vlib_main_t *vm, oct_crypto_sess_t *session, key = vnet_crypto_get_key (key_index); - if (key->type == VNET_CRYPTO_KEY_TYPE_LINK) + if (key->is_link) rv = oct_crypto_link_session_update (vm, session, key_index, op_type); else rv = oct_crypto_aead_session_update (vm, session, key_index, op_type); @@ -1387,6 +1439,9 @@ oct_crypto_session_init (vlib_main_t *vm, oct_crypto_sess_t *session, session->cpt_inst_w7 = oct_cpt_inst_w7_get (session, session->crypto_dev->roc_cpt); + if (oct_hw_ctx_cache_enable ()) + roc_se_ctx_init (&session->cpt_ctx); + session->initialised = 1; return 0; @@ -1405,6 +1460,138 @@ oct_crypto_update_frame_error_status (vnet_crypto_async_frame_t *f, u32 index, f->state = VNET_CRYPTO_FRAME_STATE_NOT_PROCESSED; } +static_always_inline void +oct_crypto_direct_mode_linked (vlib_buffer_t *buffer, struct cpt_inst_s *inst, + oct_crypto_sess_t *sess, + oct_crypto_inflight_req_t *infl_req, u8 aad_len) +{ + u32 encr_offset, auth_offset, iv_offset; + vnet_crypto_async_frame_elt_t *elts; + union cpt_inst_w4 cpt_inst_w4; + u64 *offset_control_word; + u32 crypto_total_length; + u32 auth_dlen, enc_dlen; + u32 enc_auth_len; + + elts = infl_req->fe; + enc_auth_len = elts->crypto_total_length + elts->integ_length_adj; + crypto_total_length = elts->crypto_total_length; + + if (sess->cpt_op == VNET_CRYPTO_OP_TYPE_DECRYPT) + { + /* + * Position the offset control word so that it does not + * overlap with the IV. + */ + offset_control_word = (void *) (buffer->data) - ROC_SE_OFF_CTRL_LEN - 4; + + iv_offset = + (void *) elts->iv - (void *) offset_control_word - ROC_SE_OFF_CTRL_LEN; + } + else + { + offset_control_word = (void *) (elts->iv) - ROC_SE_OFF_CTRL_LEN; + iv_offset = 0; + } + + encr_offset = (void *) (buffer->data + elts->crypto_start_offset) - + (void *) offset_control_word - ROC_SE_OFF_CTRL_LEN; + auth_offset = (void *) (buffer->data + elts->integ_start_offset) - + (void *) offset_control_word - ROC_SE_OFF_CTRL_LEN; + *offset_control_word = clib_host_to_net_u64 ( + ((u64) encr_offset << 16) | ((u64) iv_offset << 8) | ((u64) auth_offset)); + + cpt_inst_w4.u64 = sess->cpt_ctx.template_w4.u64; + + cpt_inst_w4.s.param1 = crypto_total_length; + cpt_inst_w4.s.param2 = enc_auth_len; + + auth_dlen = auth_offset + enc_auth_len + ROC_SE_OFF_CTRL_LEN; + enc_dlen = encr_offset + crypto_total_length + ROC_SE_OFF_CTRL_LEN; + + if (sess->cpt_op == VNET_CRYPTO_OP_TYPE_DECRYPT) + cpt_inst_w4.s.dlen = auth_dlen + sess->cpt_ctx.mac_len; + else + { + /* + * In the case of ESN, 4 bytes of the seqhi will be stored at the end of + * the cipher. This data must be overwritten by the digest data during + * the dequeue process. + */ + if (auth_dlen > enc_dlen) + infl_req->esn_enabled = true; + + cpt_inst_w4.s.dlen = auth_dlen; + } + + infl_req->mac_len = sess->cpt_ctx.mac_len; + + inst->dptr = (uint64_t) offset_control_word; + inst->rptr = (uint64_t) ((void *) offset_control_word + ROC_SE_OFF_CTRL_LEN); + inst->w4.u64 = cpt_inst_w4.u64; +} + +static_always_inline void +oct_crypto_direct_mode_aead (vlib_buffer_t *buffer, struct cpt_inst_s *inst, + oct_crypto_sess_t *sess, + oct_crypto_inflight_req_t *infl_req, u8 aad_len) +{ + u32 encr_offset, auth_offset, iv_offset; + u32 auth_copy_offset, iv_copy_offset; + vnet_crypto_async_frame_elt_t *elts; + union cpt_inst_w4 cpt_inst_w4; + u64 *offset_control_word; + u32 crypto_total_length; + + elts = infl_req->fe; + crypto_total_length = elts->crypto_total_length; + + ((u32 *) elts->iv)[3] = clib_host_to_net_u32 (0x1); + + offset_control_word = (void *) (elts->aad) - ROC_SE_OFF_CTRL_LEN; + encr_offset = (void *) (buffer->data + elts->crypto_start_offset) - + (void *) offset_control_word - ROC_SE_OFF_CTRL_LEN; + iv_offset = elts->iv - elts->aad; + auth_offset = encr_offset - aad_len; + + *offset_control_word = clib_host_to_net_u64 ( + ((u64) encr_offset << 16) | ((u64) iv_offset << 8) | ((u64) auth_offset)); + + cpt_inst_w4.u64 = sess->cpt_ctx.template_w4.u64; + + cpt_inst_w4.s.param1 = crypto_total_length; + cpt_inst_w4.s.param2 = crypto_total_length + aad_len; + + if (sess->cpt_op == VNET_CRYPTO_OP_TYPE_DECRYPT) + cpt_inst_w4.s.dlen = encr_offset + elts->crypto_total_length + + ROC_SE_OFF_CTRL_LEN + sess->cpt_ctx.mac_len; + else + cpt_inst_w4.s.dlen = + encr_offset + elts->crypto_total_length + ROC_SE_OFF_CTRL_LEN; + + inst->dptr = (uint64_t) offset_control_word; + inst->rptr = (uint64_t) ((void *) offset_control_word + ROC_SE_OFF_CTRL_LEN); + inst->w4.u64 = cpt_inst_w4.u64; + + /* + * CPT hardware requires the AAD to be followed by the cipher packet. + * Therefore, maintain a copy of the AAD and IV in the inflight request, + * and write the AAD in front of the cipher data before submission. + */ + auth_copy_offset = encr_offset - sess->cpt_ctx.mac_len; + iv_copy_offset = encr_offset - 8; + + clib_memcpy_fast (infl_req->aad, + ((void *) inst->dptr) + auth_copy_offset + 8, 8); + clib_memcpy_fast (infl_req->iv, ((void *) inst->dptr) + iv_copy_offset + 8, + 8); + clib_memcpy_fast (((void *) inst->dptr) + encr_offset + ROC_SE_OFF_CTRL_LEN - + aad_len, + elts->aad, aad_len); + + infl_req->aead_algo = true; +} + static_always_inline int oct_crypto_enqueue_enc_dec (vlib_main_t *vm, vnet_crypto_async_frame_t *frame, const u8 is_aead, u8 aad_len, const u8 type) @@ -1422,30 +1609,35 @@ oct_crypto_enqueue_enc_dec (vlib_main_t *vm, vnet_crypto_async_frame_t *frame, u32 crypto_total_length; oct_crypto_key_t *key; vlib_buffer_t *buffer; + void *sg_data; u16 adj_len; - int ret; + int ret = 0; /* GCM packets having 8 bytes of aad and 8 bytes of iv */ u8 aad_iv = 8 + 8; pend_q = &ocm->pend_q[vlib_get_thread_index ()]; - enq_tail = pend_q->enq_tail; - nb_infl_allowed = pend_q->n_desc - pend_q->n_crypto_inflight; - if (PREDICT_FALSE (nb_infl_allowed == 0)) + if (PREDICT_FALSE (nb_infl_allowed < frame->n_elts)) { oct_crypto_update_frame_error_status ( frame, 0, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR); return -1; } - infl_req = &pend_q->req_queue[enq_tail]; - infl_req->frame = frame; + sg_data = pend_q->sg_data; for (i = 0; i < frame->n_elts; i++) { + enq_tail = pend_q->enq_tail; + infl_req = &pend_q->req_queue[enq_tail]; + infl_req->frame = frame; + infl_req->last_elts = false; + infl_req->index = i; + elts = &frame->elts[i]; + infl_req->fe = elts; buffer_index = frame->buffer_indices[i]; key = vec_elt_at_index (ocm->keys[type], elts->key_index); @@ -1459,7 +1651,13 @@ oct_crypto_enqueue_enc_dec (vlib_main_t *vm, vnet_crypto_async_frame_t *frame, sess = key->sess; if (PREDICT_FALSE (!sess->initialised)) - oct_crypto_session_init (vm, sess, elts->key_index, type); + ret = oct_crypto_session_init (vm, sess, elts->key_index, type); + if (ret) + { + oct_crypto_update_frame_error_status ( + frame, i, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR); + return -1; + } crypto_dev = sess->crypto_dev; @@ -1469,66 +1667,86 @@ oct_crypto_enqueue_enc_dec (vlib_main_t *vm, vnet_crypto_async_frame_t *frame, if (is_aead) { - dptr_start_ptr = - (u64) (buffer->data + (elts->crypto_start_offset - aad_iv)); - curr_ptr = (u64) (buffer->data + buffer->current_data); - adj_len = (u16) (dptr_start_ptr - curr_ptr); - - crypto_total_length = elts->crypto_total_length; - crypto_start_offset = aad_iv; - integ_start_offset = 0; - - ret = oct_crypto_fill_fc_params ( - sess, inst + i, is_aead, aad_len, (u8 *) dptr_start_ptr, elts, - (oct_crypto_scatter_gather_t *) (infl_req->sg_data) + i, - crypto_total_length /* cipher_len */, - crypto_start_offset /* cipher_offset */, 0 /* auth_len */, - integ_start_offset /* auth_off */, buffer, adj_len); - if (PREDICT_FALSE (ret < 0)) + if (buffer->flags & VLIB_BUFFER_NEXT_PRESENT) { - oct_crypto_update_frame_error_status ( - frame, i, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR); - return -1; + dptr_start_ptr = + (u64) (buffer->data + (elts->crypto_start_offset - aad_iv)); + curr_ptr = (u64) (buffer->data + buffer->current_data); + adj_len = (u16) (dptr_start_ptr - curr_ptr); + + crypto_total_length = elts->crypto_total_length; + crypto_start_offset = aad_iv; + integ_start_offset = 0; + + ret = oct_crypto_scatter_gather_mode ( + sess, inst + i, is_aead, aad_len, (u8 *) dptr_start_ptr, elts, + ((oct_crypto_scatter_gather_t *) (sg_data)) + enq_tail, + crypto_total_length /* cipher_len */, + crypto_start_offset /* cipher_offset */, 0 /* auth_len */, + integ_start_offset /* auth_off */, buffer, adj_len); + + if (PREDICT_FALSE (ret < 0)) + { + oct_crypto_update_frame_error_status ( + frame, i, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR); + return -1; + } + } + else + { + oct_crypto_direct_mode_aead (buffer, inst + i, sess, infl_req, + aad_len); } } else { - dptr_start_ptr = (u64) (buffer->data + elts->integ_start_offset); - - enc_auth_len = elts->crypto_total_length + elts->integ_length_adj; - - curr_ptr = (u64) (buffer->data + buffer->current_data); - adj_len = (u16) (dptr_start_ptr - curr_ptr); - - crypto_total_length = elts->crypto_total_length; - crypto_start_offset = - elts->crypto_start_offset - elts->integ_start_offset; - integ_start_offset = 0; - - ret = oct_crypto_fill_fc_params ( - sess, inst + i, is_aead, aad_len, (u8 *) dptr_start_ptr, elts, - (oct_crypto_scatter_gather_t *) (infl_req->sg_data) + i, - crypto_total_length /* cipher_len */, - crypto_start_offset /* cipher_offset */, - enc_auth_len /* auth_len */, integ_start_offset /* auth_off */, - buffer, adj_len); - if (PREDICT_FALSE (ret < 0)) + if (buffer->flags & VLIB_BUFFER_NEXT_PRESENT) { - oct_crypto_update_frame_error_status ( - frame, i, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR); - return -1; + dptr_start_ptr = (u64) (buffer->data + elts->integ_start_offset); + + curr_ptr = (u64) (buffer->data + buffer->current_data); + adj_len = (u16) (dptr_start_ptr - curr_ptr); + + crypto_start_offset = + elts->crypto_start_offset - elts->integ_start_offset; + integ_start_offset = 0; + enc_auth_len = + elts->crypto_total_length + elts->integ_length_adj; + crypto_total_length = elts->crypto_total_length; + + ret = oct_crypto_scatter_gather_mode ( + sess, inst + i, is_aead, aad_len, (u8 *) dptr_start_ptr, elts, + ((oct_crypto_scatter_gather_t *) (sg_data)) + enq_tail, + crypto_total_length /* cipher_len */, + crypto_start_offset /* cipher_offset */, + enc_auth_len /* auth_len */, integ_start_offset /* auth_off */, + buffer, adj_len); + + if (PREDICT_FALSE (ret < 0)) + { + oct_crypto_update_frame_error_status ( + frame, i, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR); + return -1; + } + } + else + { + oct_crypto_direct_mode_linked (buffer, inst + i, sess, infl_req, + aad_len); } } inst[i].w7.u64 = sess->cpt_inst_w7; - inst[i].res_addr = (u64) &infl_req->res[i]; + inst[i].res_addr = (u64) &infl_req->res; + OCT_MOD_INC (pend_q->enq_tail, pend_q->n_desc); } oct_crypto_burst_submit (crypto_dev, inst, frame->n_elts); - infl_req->elts = frame->n_elts; - OCT_MOD_INC (pend_q->enq_tail, pend_q->n_desc); - pend_q->n_crypto_inflight++; + infl_req->last_elts = true; + + pend_q->n_crypto_inflight += frame->n_elts; + pend_q->n_crypto_frame++; return 0; } @@ -1618,22 +1836,23 @@ oct_crypto_frame_dequeue (vlib_main_t *vm, u32 *nb_elts_processed, oct_crypto_pending_queue_t *pend_q; vnet_crypto_async_frame_t *frame; volatile union cpt_res_s *res; - int i; + bool last_elts_processed; + vlib_buffer_t *buffer; pend_q = &ocm->pend_q[vlib_get_thread_index ()]; - if (!pend_q->n_crypto_inflight) + if (!pend_q->n_crypto_frame) return NULL; - deq_head = pend_q->deq_head; - infl_req = &pend_q->req_queue[deq_head]; - frame = infl_req->frame; - - fe = frame->elts; + last_elts_processed = false; - for (i = infl_req->deq_elts; i < infl_req->elts; ++i) + for (; last_elts_processed == false;) { - res = &infl_req->res[i]; + deq_head = pend_q->deq_head; + infl_req = &pend_q->req_queue[deq_head]; + fe = infl_req->fe; + + res = &infl_req->res; if (PREDICT_FALSE (res->cn10k.compcode == CPT_COMP_NOT_DONE)) return NULL; @@ -1641,19 +1860,38 @@ oct_crypto_frame_dequeue (vlib_main_t *vm, u32 *nb_elts_processed, if (PREDICT_FALSE (res->cn10k.uc_compcode)) { if (res->cn10k.uc_compcode == ROC_SE_ERR_GC_ICV_MISCOMPARE) - status = fe[i].status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC; + status = fe->status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC; else - status = fe[i].status = VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR; + status = fe->status = VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR; + } + + buffer = + vlib_get_buffer (vm, infl_req->frame->buffer_indices[infl_req->index]); + + /* + * For AEAD, copy the AAD and IV back to their original positions. + * If ESN is enabled (in case of linked algo), overwrite the ESN + * seqhi at the end of the cipher with the digest data. + */ + if (infl_req->aead_algo) + { + clib_memcpy_fast (buffer->data + fe->crypto_start_offset - 8, + infl_req->iv, 8); + clib_memcpy_fast (buffer->data + fe->crypto_start_offset - 16, + infl_req->aad, 8); } + else if (infl_req->esn_enabled) + clib_memcpy_fast (fe->digest, fe->digest + 4, infl_req->mac_len); - infl_req->deq_elts++; + clib_memset ((void *) &infl_req->res, 0, sizeof (union cpt_res_s)); + last_elts_processed = infl_req->last_elts; + OCT_MOD_INC (pend_q->deq_head, pend_q->n_desc); } - clib_memset ((void *) infl_req->res, 0, - sizeof (union cpt_res_s) * VNET_CRYPTO_FRAME_SIZE); + frame = infl_req->frame; - OCT_MOD_INC (pend_q->deq_head, pend_q->n_desc); - pend_q->n_crypto_inflight--; + pend_q->n_crypto_frame--; + pend_q->n_crypto_inflight -= frame->n_elts; frame->state = status == VNET_CRYPTO_OP_STATUS_COMPLETED ? VNET_CRYPTO_FRAME_STATE_SUCCESS : @@ -1662,9 +1900,6 @@ oct_crypto_frame_dequeue (vlib_main_t *vm, u32 *nb_elts_processed, *nb_elts_processed = frame->n_elts; *enqueue_thread_idx = frame->enqueue_thread_index; - infl_req->deq_elts = 0; - infl_req->elts = 0; - return frame; } @@ -1710,9 +1945,8 @@ oct_conf_sw_queue (vlib_main_t *vm, vnet_dev_t *dev) oct_crypto_main_t *ocm = &oct_crypto_main; vlib_thread_main_t *tm = vlib_get_thread_main (); extern oct_plt_init_param_t oct_plt_init_param; - oct_crypto_inflight_req_t *infl_req_queue; u32 n_inflight_req; - int i, j = 0; + int i; ocm->pend_q = oct_plt_init_param.oct_plt_zmalloc ( tm->n_vlib_mains * sizeof (oct_crypto_pending_queue_t), @@ -1727,8 +1961,7 @@ oct_conf_sw_queue (vlib_main_t *vm, vnet_dev_t *dev) * Each pending queue will get number of cpt desc / number of cores. * And that desc count is shared across inflight entries. */ - n_inflight_req = - (OCT_CPT_LF_MAX_NB_DESC / tm->n_vlib_mains) / VNET_CRYPTO_FRAME_SIZE; + n_inflight_req = (OCT_CPT_LF_MAX_NB_DESC / tm->n_vlib_mains); for (i = 0; i < tm->n_vlib_mains; ++i) { @@ -1744,35 +1977,26 @@ oct_conf_sw_queue (vlib_main_t *vm, vnet_dev_t *dev) goto free; } - for (j = 0; j <= ocm->pend_q[i].n_desc; ++j) + ocm->pend_q[i].sg_data = oct_plt_init_param.oct_plt_zmalloc ( + OCT_SCATTER_GATHER_BUFFER_SIZE * ocm->pend_q[i].n_desc, + CLIB_CACHE_LINE_BYTES); + if (ocm->pend_q[i].sg_data == NULL) { - infl_req_queue = &ocm->pend_q[i].req_queue[j]; - - infl_req_queue->sg_data = oct_plt_init_param.oct_plt_zmalloc ( - OCT_SCATTER_GATHER_BUFFER_SIZE * VNET_CRYPTO_FRAME_SIZE, - CLIB_CACHE_LINE_BYTES); - if (infl_req_queue->sg_data == NULL) - { - log_err (dev, "Failed to allocate crypto scatter gather memory"); - goto free; - } + log_err (dev, "Failed to allocate crypto scatter gather memory"); + goto free; } } + return 0; + free: for (; i >= 0; i--) { if (ocm->pend_q[i].req_queue == NULL) continue; - for (; j >= 0; j--) - { - infl_req_queue = &ocm->pend_q[i].req_queue[j]; - if (infl_req_queue->sg_data == NULL) - continue; + oct_plt_init_param.oct_plt_free (ocm->pend_q[i].sg_data); - oct_plt_init_param.oct_plt_free (infl_req_queue->sg_data); - } oct_plt_init_param.oct_plt_free (ocm->pend_q[i].req_queue); } oct_plt_init_param.oct_plt_free (ocm->pend_q); diff --git a/src/plugins/dev_octeon/crypto.h b/src/plugins/dev_octeon/crypto.h index 0a3b796d3dd..5bd26f6b9be 100644 --- a/src/plugins/dev_octeon/crypto.h +++ b/src/plugins/dev_octeon/crypto.h @@ -49,7 +49,16 @@ _ (3DES_CBC, SHA512, 24, 32) \ _ (AES_128_CTR, SHA1, 16, 12) \ _ (AES_192_CTR, SHA1, 24, 12) \ - _ (AES_256_CTR, SHA1, 32, 12) + _ (AES_256_CTR, SHA1, 32, 12) \ + _ (AES_128_CTR, SHA256, 16, 16) \ + _ (AES_192_CTR, SHA256, 24, 16) \ + _ (AES_256_CTR, SHA256, 32, 16) \ + _ (AES_128_CTR, SHA384, 16, 24) \ + _ (AES_192_CTR, SHA384, 24, 24) \ + _ (AES_256_CTR, SHA384, 32, 24) \ + _ (AES_128_CTR, SHA512, 16, 32) \ + _ (AES_192_CTR, SHA512, 24, 32) \ + _ (AES_256_CTR, SHA512, 32, 32) #define OCT_MOD_INC(i, l) ((i) == (l - 1) ? (i) = 0 : (i)++) @@ -114,17 +123,28 @@ typedef struct oct_crypto_scatter_gather typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - /** Result data of all entries in the frame */ - volatile union cpt_res_s res[VNET_CRYPTO_FRAME_SIZE]; - /** Scatter gather data */ - void *sg_data; + /** Result data */ + volatile union cpt_res_s res; /** Frame pointer */ vnet_crypto_async_frame_t *frame; - /** Number of async elements in frame */ - u16 elts; - /** Next read entry in frame, when dequeue */ - u16 deq_elts; -} oct_crypto_inflight_req_t; + /** Async frame element */ + vnet_crypto_async_frame_elt_t *fe; + /** AAD meta data */ + u8 aad[8]; + /** IV meta data */ + u8 iv[16]; + /** Digest len */ + u8 mac_len; + /** aead */ + bool aead_algo; + /** Set when encrypting linked algo with esn. + * To move digest data */ + bool esn_enabled; + /** Set if this is last element in frame */ + bool last_elts; + /** Index of element in frame */ + int index; +} __plt_cache_aligned oct_crypto_inflight_req_t; typedef struct { @@ -132,23 +152,34 @@ typedef struct oct_crypto_inflight_req_t *req_queue; /** Number of inflight operations in queue */ u32 n_crypto_inflight; + /** Number of frames in queue */ + u32 n_crypto_frame; /** Tail of queue to be used for enqueue */ u16 enq_tail; /** Head of queue to be used for dequeue */ u16 deq_head; /** Number of descriptors */ u16 n_desc; + /** Scatter gather data */ + void *sg_data; } oct_crypto_pending_queue_t; typedef struct { oct_crypto_dev_t *crypto_dev[OCT_MAX_N_CPT_DEV]; - oct_crypto_key_t *keys[VNET_CRYPTO_ASYNC_OP_N_TYPES]; + oct_crypto_key_t *keys[VNET_CRYPTO_OP_N_TYPES]; oct_crypto_pending_queue_t *pend_q; int n_cpt; u8 started; } oct_crypto_main_t; +static_always_inline bool +oct_hw_ctx_cache_enable (void) +{ + return roc_errata_cpt_hang_on_mixed_ctx_val () || + roc_model_is_cn10ka_b0 () || roc_model_is_cn10kb_a0 (); +} + extern oct_crypto_main_t oct_crypto_main; void oct_crypto_key_del_handler (vlib_main_t *vm, diff --git a/src/plugins/dev_octeon/init.c b/src/plugins/dev_octeon/init.c index 99cadddfc24..561cbe94fed 100644 --- a/src/plugins/dev_octeon/init.c +++ b/src/plugins/dev_octeon/init.c @@ -211,12 +211,12 @@ oct_conf_cpt (vlib_main_t *vm, vnet_dev_t *dev, oct_crypto_dev_t *ocd, log_err (dev, "Could not add CPT IE engines"); return cnx_return_roc_err (dev, rrv, "roc_cpt_eng_grp_add"); } - if (roc_cpt->eng_grp[CPT_ENG_TYPE_IE] != ROC_CPT_DFLT_ENG_GRP_SE_IE) + if (roc_cpt->eng_grp[CPT_ENG_TYPE_IE] != ROC_LEGACY_CPT_DFLT_ENG_GRP_SE_IE) { log_err (dev, "Invalid CPT IE engine group configuration"); return -1; } - if (roc_cpt->eng_grp[CPT_ENG_TYPE_SE] != ROC_CPT_DFLT_ENG_GRP_SE) + if (roc_cpt->eng_grp[CPT_ENG_TYPE_SE] != ROC_LEGACY_CPT_DFLT_ENG_GRP_SE) { log_err (dev, "Invalid CPT SE engine group configuration"); return -1; @@ -248,7 +248,7 @@ oct_conf_cpt_queue (vlib_main_t *vm, vnet_dev_t *dev, oct_crypto_dev_t *ocd) roc_cpt_iq_enable (cpt_lf); - if ((rrv = roc_cpt_lmtline_init (roc_cpt, cpt_lmtline, 0) < 0)) + if ((rrv = roc_cpt_lmtline_init (roc_cpt, cpt_lmtline, 0, false) < 0)) return cnx_return_roc_err (dev, rrv, "roc_cpt_lmtline_init"); return 0; @@ -344,8 +344,9 @@ oct_init (vlib_main_t *vm, vnet_dev_t *dev) return rv; } + STATIC_ASSERT (sizeof (cd->plt_pci_dev.name) == sizeof (dev->device_id), ""); strncpy ((char *) cd->plt_pci_dev.name, dev->device_id, - sizeof (cd->plt_pci_dev.name) - 1); + sizeof (dev->device_id)); switch (cd->type) { @@ -410,6 +411,15 @@ oct_plugin_init (vlib_main_t *vm) rv = roc_model_init (&oct_model); if (rv) return clib_error_return (0, "roc_model_init failed"); + +#ifdef PLATFORM_OCTEON9 + if (!roc_model_is_cn9k ()) + return clib_error_return (0, "OCTEON model is not OCTEON9"); +#else + if (!roc_model_is_cn10k ()) + return clib_error_return (0, "OCTEON model is not OCTEON10"); +#endif + return 0; } diff --git a/src/plugins/dpdk/cryptodev/cryptodev.c b/src/plugins/dpdk/cryptodev/cryptodev.c index 0250da7cda3..c60f9c886ff 100644 --- a/src/plugins/dpdk/cryptodev/cryptodev.c +++ b/src/plugins/dpdk/cryptodev/cryptodev.c @@ -71,7 +71,7 @@ prepare_aead_xform (struct rte_crypto_sym_xform *xform, aead_xform->iv.offset = CRYPTODEV_IV_OFFSET; aead_xform->iv.length = 12; aead_xform->key.data = key->data; - aead_xform->key.length = vec_len (key->data); + aead_xform->key.length = key->length; return 0; } @@ -111,7 +111,7 @@ prepare_linked_xform (struct rte_crypto_sym_xform *xforms, xform_auth->type = RTE_CRYPTO_SYM_XFORM_AUTH; xforms->next = xforms + 1; - switch (key->async_alg) + switch (key->alg) { #define _(a, b, c, d, e) \ case VNET_CRYPTO_ALG_##a##_##d##_TAG##e: \ @@ -249,9 +249,9 @@ cryptodev_check_supported_vnet_alg (vnet_crypto_key_t *key) { u32 matched = 0; - if (key->type == VNET_CRYPTO_KEY_TYPE_LINK) + if (key->is_link) { - switch (key->async_alg) + switch (key->alg) { #define _(a, b, c, d, e) \ case VNET_CRYPTO_ALG_##a##_##d##_TAG##e: \ @@ -453,7 +453,7 @@ cryptodev_session_create (vlib_main_t *vm, vnet_crypto_key_index_t idx, rte_cryptodev_sym_session_create (sess_pool); #endif - if (key->type == VNET_CRYPTO_KEY_TYPE_LINK) + if (key->is_link) ret = prepare_linked_xform (xforms_enc, CRYPTODEV_OP_TYPE_ENCRYPT, key); else ret = @@ -464,7 +464,7 @@ cryptodev_session_create (vlib_main_t *vm, vnet_crypto_key_index_t idx, goto clear_key; } - if (key->type == VNET_CRYPTO_KEY_TYPE_LINK) + if (key->is_link) prepare_linked_xform (xforms_dec, CRYPTODEV_OP_TYPE_DECRYPT, key); else prepare_aead_xform (xforms_dec, CRYPTODEV_OP_TYPE_DECRYPT, key, aad_len); diff --git a/src/plugins/dpdk/device/common.c b/src/plugins/dpdk/device/common.c index 7a49c5aaef2..d6eed5441b4 100644 --- a/src/plugins/dpdk/device/common.c +++ b/src/plugins/dpdk/device/common.c @@ -80,7 +80,9 @@ dpdk_device_setup (dpdk_device_t * xd) dpdk_device_stop (xd); } - rte_eth_dev_info_get (xd->port_id, &dev_info); + rv = rte_eth_dev_info_get (xd->port_id, &dev_info); + if (rv) + dpdk_device_error (xd, "rte_eth_dev_info_get", rv); dpdk_log_debug ("[%u] configuring device %U", xd->port_id, format_dpdk_rte_device, dev_info.device); @@ -443,6 +445,7 @@ dpdk_port_state_callback_inline (dpdk_portid_t port_id, enum rte_eth_event_type type, void *param) { struct rte_eth_link link; + CLIB_UNUSED (int rv); RTE_SET_USED (param); if (type != RTE_ETH_EVENT_INTR_LSC) @@ -451,7 +454,8 @@ dpdk_port_state_callback_inline (dpdk_portid_t port_id, return -1; } - rte_eth_link_get_nowait (port_id, &link); + rv = rte_eth_link_get_nowait (port_id, &link); + ASSERT (rv == 0); u8 link_up = link.link_status; if (link_up) dpdk_log_info ("Port %d Link Up - speed %u Mbps - %s", port_id, diff --git a/src/plugins/dpdk/device/device.c b/src/plugins/dpdk/device/device.c index 0ba59562838..c5abbd5f727 100644 --- a/src/plugins/dpdk/device/device.c +++ b/src/plugins/dpdk/device/device.c @@ -597,8 +597,10 @@ dpdk_interface_set_rss_queues (struct vnet_main_t *vnm, u16 valid_queue_count = 0; uint32_t i, j; uint32_t ret; + int __clib_unused rv; - rte_eth_dev_info_get (xd->port_id, &dev_info); + rv = rte_eth_dev_info_get (xd->port_id, &dev_info); + ASSERT (rv == 0); /* parameter check */ if (clib_bitmap_count_set_bits (bitmap) == 0) diff --git a/src/plugins/dpdk/device/driver.c b/src/plugins/dpdk/device/driver.c index 9c368dd9038..2fde041684c 100644 --- a/src/plugins/dpdk/device/driver.c +++ b/src/plugins/dpdk/device/driver.c @@ -52,6 +52,7 @@ static dpdk_driver_t dpdk_drivers[] = { .supported_flow_actions = supported_flow_actions_intel, .use_intel_phdr_cksum = 1, .int_unmaskable = 1, + .program_vlans = 1, }, { .drivers = DPDK_DRIVERS ({ "net_e1000_igb_vf", "Intel e1000 VF" }), diff --git a/src/plugins/dpdk/device/flow.c b/src/plugins/dpdk/device/flow.c index 635f6f37ebf..95be9e230be 100644 --- a/src/plugins/dpdk/device/flow.c +++ b/src/plugins/dpdk/device/flow.c @@ -85,6 +85,20 @@ (f->type == VNET_FLOW_TYPE_IP6_IP4_N_TUPLE) || \ (f->type == VNET_FLOW_TYPE_IP6_IP6_N_TUPLE)) +/* get source addr from ipv6 header */ +#if (RTE_VERSION >= RTE_VERSION_NUM(24, 11, 0, 0)) +#define IP6_SRC_ADDR(ip6) ip6.hdr.src_addr.a +#else +#define IP6_SRC_ADDR(ip6) ip6.hdr.src_addr +#endif + +/* get destination addr from ipv6 header */ +#if (RTE_VERSION >= RTE_VERSION_NUM(24, 11, 0, 0)) +#define IP6_DST_ADDR(ip6) ip6.hdr.dst_addr.a +#else +#define IP6_DST_ADDR(ip6) ip6.hdr.dst_addr +#endif + /* constant structs */ static const struct rte_flow_attr ingress = {.ingress = 1 }; @@ -342,13 +356,13 @@ dpdk_flow_add (dpdk_device_t * xd, vnet_flow_t * f, dpdk_flow_entry_t * fe) } else { - clib_memcpy (ip6[0].hdr.src_addr, &ip6_ptr->src_addr.addr, + clib_memcpy (IP6_SRC_ADDR (ip6[0]), &ip6_ptr->src_addr.addr, ARRAY_LEN (ip6_ptr->src_addr.addr.as_u8)); - clib_memcpy (ip6[1].hdr.src_addr, &ip6_ptr->src_addr.mask, + clib_memcpy (IP6_SRC_ADDR (ip6[1]), &ip6_ptr->src_addr.mask, ARRAY_LEN (ip6_ptr->src_addr.mask.as_u8)); - clib_memcpy (ip6[0].hdr.dst_addr, &ip6_ptr->dst_addr.addr, + clib_memcpy (IP6_DST_ADDR (ip6[0]), &ip6_ptr->dst_addr.addr, ARRAY_LEN (ip6_ptr->dst_addr.addr.as_u8)); - clib_memcpy (ip6[1].hdr.dst_addr, &ip6_ptr->dst_addr.mask, + clib_memcpy (IP6_DST_ADDR (ip6[1]), &ip6_ptr->dst_addr.mask, ARRAY_LEN (ip6_ptr->dst_addr.mask.as_u8)); ip6[0].hdr.proto = ip6_ptr->protocol.prot; ip6[1].hdr.proto = ip6_ptr->protocol.mask; @@ -505,13 +519,13 @@ dpdk_flow_add (dpdk_device_t * xd, vnet_flow_t * f, dpdk_flow_entry_t * fe) } \ else \ { \ - clib_memcpy (in_ip6[0].hdr.src_addr, &ptr->in_src_addr.addr, \ + clib_memcpy (IP6_SRC_ADDR (in_ip6[0]), &ptr->in_src_addr.addr, \ ARRAY_LEN (ptr->in_src_addr.addr.as_u8)); \ - clib_memcpy (in_ip6[1].hdr.src_addr, &ptr->in_src_addr.mask, \ + clib_memcpy (IP6_SRC_ADDR (in_ip6[1]), &ptr->in_src_addr.mask, \ ARRAY_LEN (ptr->in_src_addr.mask.as_u8)); \ - clib_memcpy (in_ip6[0].hdr.dst_addr, &ptr->in_dst_addr.addr, \ + clib_memcpy (IP6_DST_ADDR (in_ip6[0]), &ptr->in_dst_addr.addr, \ ARRAY_LEN (ptr->in_dst_addr.addr.as_u8)); \ - clib_memcpy (in_ip6[1].hdr.dst_addr, &ptr->in_dst_addr.mask, \ + clib_memcpy (IP6_DST_ADDR (in_ip6[1]), &ptr->in_dst_addr.mask, \ ARRAY_LEN (ptr->in_dst_addr.mask.as_u8)); \ item->spec = in_ip6; \ item->mask = in_ip6 + 1; \ diff --git a/src/plugins/dpdk/device/format.c b/src/plugins/dpdk/device/format.c index c4170c20329..fd301da8ea5 100644 --- a/src/plugins/dpdk/device/format.c +++ b/src/plugins/dpdk/device/format.c @@ -423,10 +423,12 @@ format_dpdk_device (u8 * s, va_list * args) struct rte_eth_rss_conf rss_conf; int vlan_off; int retval; + int __clib_unused rv; dpdk_update_counters (xd, now); dpdk_update_link_state (xd, now); - rte_eth_dev_info_get (xd->port_id, &di); + rv = rte_eth_dev_info_get (xd->port_id, &di); + ASSERT (rv == 0); s = format (s, "%U\n%Ucarrier %U", format_dpdk_device_type, dev_instance, diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index ec9e6045de7..aaa2c1f4a68 100644 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -1520,10 +1520,12 @@ dpdk_update_link_state (dpdk_device_t * xd, f64 now) struct rte_eth_link prev_link = xd->link; u32 hw_flags = 0; u8 hw_flags_chg = 0; + int __clib_unused rv; xd->time_last_link_update = now ? now : xd->time_last_link_update; clib_memset (&xd->link, 0, sizeof (xd->link)); - rte_eth_link_get_nowait (xd->port_id, &xd->link); + rv = rte_eth_link_get_nowait (xd->port_id, &xd->link); + ASSERT (rv == 0); if (LINK_STATE_ELOGS) { diff --git a/src/plugins/hs_apps/echo_client.c b/src/plugins/hs_apps/echo_client.c index d5edffbd02e..ff5a3bd6b3c 100644 --- a/src/plugins/hs_apps/echo_client.c +++ b/src/plugins/hs_apps/echo_client.c @@ -96,8 +96,7 @@ send_data_chunk (ec_main_t *ecm, ec_session_t *es) svm_fifo_t *f = es->tx_fifo; rv = clib_min (svm_fifo_max_enqueue_prod (f), bytes_this_chunk); svm_fifo_enqueue_nocopy (f, rv); - session_send_io_evt_to_thread_custom ( - &es->vpp_session_index, es->thread_index, SESSION_IO_EVT_TX); + session_program_tx_io_evt (es->tx_fifo->vpp_sh, SESSION_IO_EVT_TX); } else rv = @@ -132,8 +131,7 @@ send_data_chunk (ec_main_t *ecm, ec_session_t *es) hdr.lcl_port = at->lcl_port; svm_fifo_enqueue (f, sizeof (hdr), (u8 *) & hdr); svm_fifo_enqueue_nocopy (f, rv); - session_send_io_evt_to_thread_custom ( - &es->vpp_session_index, es->thread_index, SESSION_IO_EVT_TX); + session_program_tx_io_evt (es->tx_fifo->vpp_sh, SESSION_IO_EVT_TX); } else { @@ -543,7 +541,7 @@ ec_ctrl_send (hs_test_cmd_t cmd) rv = svm_fifo_enqueue (s->tx_fifo, sizeof (ecm->cfg), (u8 *) &ecm->cfg); ASSERT (rv == sizeof (ecm->cfg)); - session_send_io_evt_to_thread (s->tx_fifo, SESSION_IO_EVT_TX); + session_program_tx_io_evt (s->handle, SESSION_IO_EVT_TX); return 0; } diff --git a/src/plugins/hs_apps/echo_server.c b/src/plugins/hs_apps/echo_server.c index b981e775b57..dc303e2f83a 100644 --- a/src/plugins/hs_apps/echo_server.c +++ b/src/plugins/hs_apps/echo_server.c @@ -256,8 +256,7 @@ echo_server_ctrl_reply (session_t *s) rv = svm_fifo_enqueue (s->tx_fifo, sizeof (esm->cfg), (u8 *) &esm->cfg); ASSERT (rv == sizeof (esm->cfg)); - session_send_io_evt_to_thread_custom (&s->session_index, s->thread_index, - SESSION_IO_EVT_TX); + session_program_tx_io_evt (s->handle, SESSION_IO_EVT_TX); } static int @@ -423,8 +422,8 @@ echo_server_rx_callback (session_t * s) { /* TODO should be session_enqueue_notify(s) but quic tests seem * to fail if that's the case */ - if (session_send_io_evt_to_thread (rx_fifo, - SESSION_IO_EVT_BUILTIN_RX)) + if (session_program_transport_io_evt (s->handle, + SESSION_IO_EVT_BUILTIN_RX)) es_err ("failed to enqueue self-tap"); if (es->rx_retries == 500000) diff --git a/src/plugins/hs_apps/http_cli.c b/src/plugins/hs_apps/http_cli.c index 3ca86d24673..531e2750c1e 100644 --- a/src/plugins/hs_apps/http_cli.c +++ b/src/plugins/hs_apps/http_cli.c @@ -52,7 +52,8 @@ typedef struct u32 tx_offset; u32 vpp_session_index; http_header_table_t req_headers; - http_header_t *resp_headers; + http_headers_ctx_t resp_headers; + u8 *resp_headers_buf; } hcs_session_t; typedef struct @@ -92,6 +93,7 @@ hcs_session_alloc (u32 thread_index) memset (hs, 0, sizeof (*hs)); hs->session_index = hs - hcm->sessions[thread_index]; hs->thread_index = thread_index; + vec_validate (hs->resp_headers_buf, 255); return hs; } @@ -172,21 +174,10 @@ start_send_data (hcs_session_t *hs, http_status_code_t status) { http_msg_t msg; session_t *ts; - u8 *headers_buf = 0; int rv; - if (vec_len (hs->resp_headers)) - { - headers_buf = http_serialize_headers (hs->resp_headers); - vec_reset_length (hs->resp_headers); - msg.data.headers_offset = 0; - msg.data.headers_len = vec_len (headers_buf); - } - else - { - msg.data.headers_offset = 0; - msg.data.headers_len = 0; - } + msg.data.headers_offset = 0; + msg.data.headers_len = hs->resp_headers.tail_offset; msg.type = HTTP_MSG_REPLY; msg.code = status; @@ -201,9 +192,9 @@ start_send_data (hcs_session_t *hs, http_status_code_t status) if (msg.data.headers_len) { - rv = svm_fifo_enqueue (ts->tx_fifo, vec_len (headers_buf), headers_buf); + rv = svm_fifo_enqueue (ts->tx_fifo, msg.data.headers_len, + hs->resp_headers.buf); ASSERT (rv == msg.data.headers_len); - vec_free (headers_buf); } if (!msg.data.body_len) @@ -245,8 +236,7 @@ send_data_to_http (void *rpc_args) if (args->plain_text) type = HTTP_CONTENT_TEXT_PLAIN; - http_add_header (&hs->resp_headers, - http_header_name_token (HTTP_HEADER_CONTENT_TYPE), + http_add_header (&hs->resp_headers, HTTP_HEADER_CONTENT_TYPE, http_content_type_token (type)); start_send_data (hs, HTTP_STATUS_OK); @@ -371,7 +361,8 @@ hcs_ts_rx_callback (session_t *ts) hs = hcs_session_get (ts->thread_index, ts->opaque); hs->tx_buf = 0; - vec_reset_length (hs->resp_headers); + http_init_headers_ctx (&hs->resp_headers, hs->resp_headers_buf, + vec_len (hs->resp_headers_buf)); http_reset_header_table (&hs->req_headers); /* Read the http message header */ @@ -380,15 +371,13 @@ hcs_ts_rx_callback (session_t *ts) if (msg.type != HTTP_MSG_REQUEST || msg.method_type != HTTP_REQ_GET) { - http_add_header (&hs->resp_headers, - http_header_name_token (HTTP_HEADER_ALLOW), + http_add_header (&hs->resp_headers, HTTP_HEADER_ALLOW, http_token_lit ("GET")); start_send_data (hs, HTTP_STATUS_METHOD_NOT_ALLOWED); goto done; } - if (msg.data.target_path_len == 0 || - msg.data.target_form != HTTP_TARGET_ORIGIN_FORM) + if (msg.data.target_path_len == 0) { start_send_data (hs, HTTP_STATUS_BAD_REQUEST); goto done; @@ -420,14 +409,14 @@ hcs_ts_rx_callback (session_t *ts) msg.data.headers_len, hs->req_headers.buf); ASSERT (rv == msg.data.headers_len); http_build_header_table (&hs->req_headers, msg); - const http_header_t *accept = http_get_header ( + const http_token_t *accept_value = http_get_header ( &hs->req_headers, http_header_name_token (HTTP_HEADER_ACCEPT)); - if (accept) + if (accept_value) { - HCS_DBG ("client accept: %U", format_http_bytes, accept->value.base, - accept->value.len); + HCS_DBG ("client accept: %U", format_http_bytes, accept_value->base, + accept_value->len); /* just for testing purpose, we don't care about precedence */ - if (http_token_contains (accept->value.base, accept->value.len, + if (http_token_contains (accept_value->base, accept_value->len, http_token_lit ("text/plain"))) args.plain_text = 1; } @@ -541,7 +530,7 @@ hcs_ts_cleanup_callback (session_t *s, session_cleanup_ntf_t ntf) return; vec_free (hs->tx_buf); - vec_free (hs->resp_headers); + vec_free (hs->resp_headers_buf); http_free_header_table (&hs->req_headers); hcs_session_free (hs); } diff --git a/src/plugins/hs_apps/http_client.c b/src/plugins/hs_apps/http_client.c index 35c25d207ec..20271fc4aea 100644 --- a/src/plugins/hs_apps/http_client.c +++ b/src/plugins/hs_apps/http_client.c @@ -6,7 +6,6 @@ #include <vnet/session/application_interface.h> #include <vnet/session/session.h> #include <http/http.h> -#include <http/http_header_names.h> #include <http/http_content_types.h> #include <http/http_status_codes.h> #include <vppinfra/unix.h> @@ -34,7 +33,7 @@ typedef struct u32 thread_index; vlib_main_t *vlib_main; u8 *headers_buf; - http_header_t *req_headers; + http_headers_ctx_t req_headers; http_msg_t msg; } hc_worker_t; @@ -155,9 +154,9 @@ hc_request (session_t *s, session_error_t err) rv = svm_fifo_enqueue (s->tx_fifo, vec_len (hcm->target), hcm->target); ASSERT (rv == vec_len (hcm->target)); - rv = svm_fifo_enqueue (s->tx_fifo, vec_len (wrk->headers_buf), + rv = svm_fifo_enqueue (s->tx_fifo, wrk->req_headers.tail_offset, wrk->headers_buf); - ASSERT (rv == wrk->msg.data.headers_len); + ASSERT (rv == wrk->req_headers.tail_offset); if (hcm->req_method == HTTP_REQ_POST) { @@ -214,22 +213,22 @@ hc_session_connected_callback (u32 app_index, u32 hc_session_index, { if (hcm->is_file) http_add_header ( - &wrk->req_headers, http_header_name_token (HTTP_HEADER_CONTENT_TYPE), + &wrk->req_headers, HTTP_HEADER_CONTENT_TYPE, http_content_type_token (HTTP_CONTENT_APP_OCTET_STREAM)); else http_add_header ( - &wrk->req_headers, http_header_name_token (HTTP_HEADER_CONTENT_TYPE), + &wrk->req_headers, HTTP_HEADER_CONTENT_TYPE, http_content_type_token (HTTP_CONTENT_APP_X_WWW_FORM_URLENCODED)); } + http_add_header (&wrk->req_headers, HTTP_HEADER_ACCEPT, "*", 1); vec_foreach (header, hcm->custom_header) - http_add_header (&wrk->req_headers, (const char *) header->name, - vec_len (header->name), (const char *) header->value, - vec_len (header->value)); - - wrk->headers_buf = http_serialize_headers (wrk->req_headers); - vec_free (wrk->req_headers); + http_add_custom_header ( + &wrk->req_headers, (const char *) header->name, vec_len (header->name), + (const char *) header->value, vec_len (header->value)); + clib_warning ("%U", format_http_bytes, wrk->headers_buf, + wrk->req_headers.tail_offset); wrk->msg.method_type = hcm->req_method; if (hcm->req_method == HTTP_REQ_POST) wrk->msg.data.body_len = vec_len (hcm->data); @@ -238,10 +237,9 @@ hc_session_connected_callback (u32 app_index, u32 hc_session_index, wrk->msg.type = HTTP_MSG_REQUEST; /* request target */ - wrk->msg.data.target_form = HTTP_TARGET_ORIGIN_FORM; wrk->msg.data.target_path_len = vec_len (hcm->target); /* custom headers */ - wrk->msg.data.headers_len = vec_len (wrk->headers_buf); + wrk->msg.data.headers_len = wrk->req_headers.tail_offset; /* total length */ wrk->msg.data.len = wrk->msg.data.target_path_len + wrk->msg.data.headers_len + wrk->msg.data.body_len; @@ -508,6 +506,7 @@ hc_connect_rpc (void *rpc_args) if (rv > 0) clib_warning (0, "connect returned: %U", format_session_error, rv); + session_endpoint_free_ext_cfgs (&a->sep_ext); vec_free (a); return rv; } @@ -520,6 +519,7 @@ hc_connect () hc_worker_t *wrk; hc_session_t *hc_session; transport_endpt_ext_cfg_t *ext_cfg; + transport_endpt_cfg_http_t http_cfg = { (u32) hcm->timeout, 0 }; vec_validate (a, 0); clib_memset (a, 0, sizeof (a[0])); @@ -528,8 +528,8 @@ hc_connect () a->app_index = hcm->app_index; ext_cfg = session_endpoint_add_ext_cfg ( - &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (ext_cfg->opaque)); - ext_cfg->opaque = hcm->timeout; + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (http_cfg)); + clib_memcpy (ext_cfg->data, &http_cfg, sizeof (http_cfg)); /* allocate http session on main thread */ wrk = hc_worker_get (0); @@ -620,7 +620,13 @@ hc_run (vlib_main_t *vm) num_threads = 1 /* main thread */ + vtm->n_threads; vec_validate (hcm->wrk, num_threads - 1); vec_foreach (wrk, hcm->wrk) - wrk->thread_index = wrk - hcm->wrk; + { + wrk->thread_index = wrk - hcm->wrk; + /* 4k for headers should be enough */ + vec_validate (wrk->headers_buf, 4095); + http_init_headers_ctx (&wrk->req_headers, wrk->headers_buf, + vec_len (wrk->headers_buf)); + } if ((err = hc_attach ())) return clib_error_return (0, "http client attach: %U", format_clib_error, diff --git a/src/plugins/hs_apps/http_client_cli.c b/src/plugins/hs_apps/http_client_cli.c index 861af7f03e2..4ee3b49444c 100644 --- a/src/plugins/hs_apps/http_client_cli.c +++ b/src/plugins/hs_apps/http_client_cli.c @@ -16,7 +16,6 @@ #include <vnet/session/application_interface.h> #include <vnet/session/session.h> #include <http/http.h> -#include <http/http_header_names.h> #include <http/http_content_types.h> #include <http/http_status_codes.h> @@ -37,7 +36,6 @@ typedef struct u32 vpp_session_index; u64 to_recv; u8 is_closed; - http_header_t *req_headers; } hcc_session_t; typedef struct @@ -131,9 +129,10 @@ hcc_ts_connected_callback (u32 app_index, u32 hc_index, session_t *as, hcc_session_t *hs, *new_hs; hcc_worker_t *wrk; http_msg_t msg; - u8 *headers_buf; + u8 *headers_buf = 0; u32 new_hs_index; int rv; + http_headers_ctx_t headers; HCC_DBG ("ho hc_index: %d", hc_index); @@ -157,21 +156,19 @@ hcc_ts_connected_callback (u32 app_index, u32 hc_index, session_t *as, HCC_DBG ("new hc_index: %d", new_hs->session_index); as->opaque = new_hs_index; - http_add_header (&new_hs->req_headers, - http_header_name_token (HTTP_HEADER_ACCEPT), + vec_validate (headers_buf, 63); + http_init_headers_ctx (&headers, headers_buf, vec_len (headers_buf)); + http_add_header (&headers, HTTP_HEADER_ACCEPT, http_content_type_token (HTTP_CONTENT_TEXT_HTML)); - headers_buf = http_serialize_headers (new_hs->req_headers); - vec_free (new_hs->req_headers); msg.type = HTTP_MSG_REQUEST; msg.method_type = HTTP_REQ_GET; /* request target */ - msg.data.target_form = HTTP_TARGET_ORIGIN_FORM; msg.data.target_path_offset = 0; msg.data.target_path_len = vec_len (hcm->http_query); /* custom headers */ msg.data.headers_offset = msg.data.target_path_len; - msg.data.headers_len = vec_len (headers_buf); + msg.data.headers_len = headers.tail_offset; /* request body */ msg.data.body_len = 0; /* data type and total length */ @@ -181,7 +178,7 @@ hcc_ts_connected_callback (u32 app_index, u32 hc_index, session_t *as, svm_fifo_seg_t segs[3] = { { (u8 *) &msg, sizeof (msg) }, { hcm->http_query, vec_len (hcm->http_query) }, - { headers_buf, vec_len (headers_buf) } }; + { headers_buf, msg.data.headers_len } }; rv = svm_fifo_enqueue_segments (as->tx_fifo, segs, 3, 0 /* allow partial */); vec_free (headers_buf); @@ -409,9 +406,10 @@ hcc_connect () a->app_index = hcm->app_index; /* set http (response) timeout to 10 seconds */ + transport_endpt_cfg_http_t http_cfg = { 10, 0 }; ext_cfg = session_endpoint_add_ext_cfg ( - &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (ext_cfg->opaque)); - ext_cfg->opaque = 10; + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (http_cfg)); + clib_memcpy (ext_cfg->data, &http_cfg, sizeof (http_cfg)); /* allocate http session on main thread */ wrk = hcc_worker_get (0); diff --git a/src/plugins/hs_apps/http_tps.c b/src/plugins/hs_apps/http_tps.c index a40a31caf63..59a0309e363 100644 --- a/src/plugins/hs_apps/http_tps.c +++ b/src/plugins/hs_apps/http_tps.c @@ -17,7 +17,6 @@ #include <vnet/session/application_interface.h> #include <vnet/session/session.h> #include <http/http.h> -#include <http/http_header_names.h> #include <http/http_content_types.h> #define HTS_RX_BUF_SIZE (64 << 10) @@ -41,7 +40,8 @@ typedef struct }; u8 *uri; u8 *rx_buf; - http_header_t *resp_headers; + http_headers_ctx_t resp_headers; + u8 *resp_headers_buf; } hts_session_t; typedef struct hts_listen_cfg_ @@ -86,6 +86,7 @@ hts_session_alloc (u32 thread_index) pool_get_zero (htm->sessions[thread_index], hs); hs->session_index = hs - htm->sessions[thread_index]; hs->thread_index = thread_index; + vec_validate (hs->resp_headers_buf, 255); return hs; } @@ -111,6 +112,7 @@ hts_session_free (hts_session_t *hs) clib_warning ("Freeing session %u", hs->session_index); vec_free (hs->rx_buf); + vec_free (hs->resp_headers_buf); if (CLIB_DEBUG) clib_memset (hs, 0xfa, sizeof (*hs)); @@ -233,26 +235,20 @@ hts_start_send_data (hts_session_t *hs, http_status_code_t status) { http_msg_t msg; session_t *ts; - u8 *headers_buf = 0; u32 n_segs = 1; svm_fifo_seg_t seg[2]; int rv; - if (vec_len (hs->resp_headers)) + msg.data.headers_offset = 0; + msg.data.headers_len = 0; + + if (hs->resp_headers.tail_offset) { - headers_buf = http_serialize_headers (hs->resp_headers); - vec_free (hs->resp_headers); - msg.data.headers_offset = 0; - msg.data.headers_len = vec_len (headers_buf); - seg[1].data = headers_buf; + msg.data.headers_len = hs->resp_headers.tail_offset; + seg[1].data = hs->resp_headers_buf; seg[1].len = msg.data.headers_len; n_segs = 2; } - else - { - msg.data.headers_offset = 0; - msg.data.headers_len = 0; - } msg.type = HTTP_MSG_REPLY; msg.code = status; @@ -266,7 +262,6 @@ hts_start_send_data (hts_session_t *hs, http_status_code_t status) ts = session_get (hs->vpp_session_index, hs->thread_index); rv = svm_fifo_enqueue_segments (ts->tx_fifo, seg, n_segs, 0 /* allow partial */); - vec_free (headers_buf); ASSERT (rv == (sizeof (msg) + msg.data.headers_len)); if (!msg.data.body_len) @@ -320,8 +315,7 @@ try_test_file (hts_session_t *hs, u8 *target) } } - http_add_header (&hs->resp_headers, - http_header_name_token (HTTP_HEADER_CONTENT_TYPE), + http_add_header (&hs->resp_headers, HTTP_HEADER_CONTENT_TYPE, http_content_type_token (HTTP_CONTENT_APP_OCTET_STREAM)); hts_start_send_data (hs, HTTP_STATUS_OK); @@ -380,9 +374,9 @@ hts_ts_rx_callback (session_t *ts) if (hs->left_recv == 0) { hs->data_len = 0; - hs->resp_headers = 0; hs->rx_buf = 0; - + http_init_headers_ctx (&hs->resp_headers, hs->resp_headers_buf, + vec_len (hs->resp_headers_buf)); /* Read the http message header */ rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg); ASSERT (rv == sizeof (msg)); @@ -394,15 +388,13 @@ hts_ts_rx_callback (session_t *ts) } if (msg.method_type != HTTP_REQ_GET && msg.method_type != HTTP_REQ_POST) { - http_add_header (&hs->resp_headers, - http_header_name_token (HTTP_HEADER_ALLOW), + http_add_header (&hs->resp_headers, HTTP_HEADER_ALLOW, http_token_lit ("GET, POST")); hts_start_send_data (hs, HTTP_STATUS_METHOD_NOT_ALLOWED); goto done; } - if (msg.data.target_path_len == 0 || - msg.data.target_form != HTTP_TARGET_ORIGIN_FORM) + if (msg.data.target_path_len == 0) { hts_start_send_data (hs, HTTP_STATUS_BAD_REQUEST); goto done; diff --git a/src/plugins/hs_apps/proxy.c b/src/plugins/hs_apps/proxy.c index 82b904f9f1b..1bcc1e85a17 100644 --- a/src/plugins/hs_apps/proxy.c +++ b/src/plugins/hs_apps/proxy.c @@ -19,13 +19,16 @@ #include <vnet/session/application_interface.h> #include <hs_apps/proxy.h> #include <vnet/tcp/tcp.h> -#include <http/http.h> #include <http/http_header_names.h> proxy_main_t proxy_main; #define TCP_MSS 1460 +static const char masque_udp_uri_prefix[] = ".well-known/masque/udp/"; +#define MASQUE_UDP_URI_PREFIX_LEN (sizeof (masque_udp_uri_prefix) - 1) +#define MASQUE_UDP_URI_MIN_LEN (MASQUE_UDP_URI_PREFIX_LEN + 10) + #define PROXY_DEBUG 0 #if PROXY_DEBUG @@ -59,37 +62,40 @@ proxy_session_side_ctx_get (proxy_worker_t *wrk, u32 ctx_index) return pool_elt_at_index (wrk->ctx_pool, ctx_index); } -static void +static_always_inline void proxy_send_http_resp (session_t *s, http_status_code_t sc, - http_header_t *resp_headers) + http_headers_ctx_t *headers) { http_msg_t msg; int rv; - u8 *headers_buf = 0; + uword headers_ptr; + svm_fifo_seg_t seg[2]; + u32 n_segs = 1; ASSERT (s->thread_index == vlib_get_thread_index ()); - if (vec_len (resp_headers)) + + msg.data.headers_len = 0; + if (headers) { - headers_buf = http_serialize_headers (resp_headers); - msg.data.len = msg.data.headers_len = vec_len (headers_buf); + msg.data.headers_len = headers->tail_offset; + headers_ptr = pointer_to_uword (headers->buf); + seg[1].data = (u8 *) &headers_ptr; + seg[1].len = sizeof (headers_ptr); + n_segs = 2; } - else - msg.data.len = msg.data.headers_len = 0; - msg.type = HTTP_MSG_REPLY; msg.code = sc; - msg.data.type = HTTP_MSG_DATA_INLINE; + msg.data.type = HTTP_MSG_DATA_PTR; + msg.data.len = msg.data.headers_len; msg.data.headers_offset = 0; msg.data.body_len = 0; msg.data.body_offset = 0; - rv = svm_fifo_enqueue (s->tx_fifo, sizeof (msg), (u8 *) &msg); - ASSERT (rv == sizeof (msg)); - if (msg.data.headers_len) - { - rv = svm_fifo_enqueue (s->tx_fifo, vec_len (headers_buf), headers_buf); - ASSERT (rv == vec_len (headers_buf)); - vec_free (headers_buf); - } + seg[0].data = (u8 *) &msg; + seg[0].len = sizeof (msg); + + rv = + svm_fifo_enqueue_segments (s->tx_fifo, seg, n_segs, 0 /* allow partial */); + ASSERT (rv == (sizeof (msg) + (n_segs == 2 ? sizeof (headers_ptr) : 0))); if (svm_fifo_set_event (s->tx_fifo)) session_program_tx_io_evt (s->handle, SESSION_IO_EVT_TX); @@ -504,6 +510,127 @@ proxy_transport_needs_crypto (transport_proto_t proto) } static void +proxy_http_connect (session_t *s, vnet_connect_args_t *a) +{ + proxy_main_t *pm = &proxy_main; + http_msg_t msg; + http_uri_authority_t target_uri; + session_endpoint_cfg_t target_sep = SESSION_ENDPOINT_CFG_NULL; + int rv; + u8 *rx_buf = pm->rx_buf[s->thread_index]; + http_header_table_t req_headers = pm->req_headers[s->thread_index]; + + rv = svm_fifo_dequeue (s->rx_fifo, sizeof (msg), (u8 *) &msg); + ASSERT (rv == sizeof (msg)); + + ASSERT (msg.type == HTTP_MSG_REQUEST); + + if (PREDICT_FALSE (msg.method_type != HTTP_REQ_CONNECT)) + { + PROXY_DBG ("invalid method"); + goto bad_req; + } + if (msg.data.upgrade_proto == HTTP_UPGRADE_PROTO_NA) + { + /* TCP tunnel (RFC9110 section 9.3.6) */ + PROXY_DBG ("CONNECT"); + /* get tunnel target */ + if (!msg.data.target_authority_len) + { + PROXY_DBG ("CONNECT target missing"); + goto bad_req; + } + ASSERT (msg.data.target_authority_len <= pm->rcv_buffer_size); + rv = svm_fifo_peek (s->rx_fifo, msg.data.target_authority_offset, + msg.data.target_authority_len, rx_buf); + ASSERT (rv == msg.data.target_authority_len); + rv = http_parse_authority (rx_buf, msg.data.target_authority_len, + &target_uri); + if (rv) + { + PROXY_DBG ("authority parsing failed"); + goto bad_req; + } + /* TODO reg-name resolution */ + if (target_uri.host_type == HTTP_URI_HOST_TYPE_REG_NAME) + { + PROXY_DBG ("reg-name resolution not supported"); + goto bad_req; + } + target_sep.transport_proto = TRANSPORT_PROTO_TCP; + } + else if (msg.data.upgrade_proto == HTTP_UPGRADE_PROTO_CONNECT_UDP) + { + /* UDP tunnel (RFC9298) */ + PROXY_DBG ("CONNECT-UDP"); + /* get tunnel target */ + if (msg.data.target_path_len < MASQUE_UDP_URI_MIN_LEN) + { + PROXY_DBG ("invalid target"); + goto bad_req; + } + ASSERT (msg.data.target_path_len <= pm->rcv_buffer_size); + rv = svm_fifo_peek (s->rx_fifo, msg.data.target_path_offset, + msg.data.target_path_len, rx_buf); + ASSERT (rv == msg.data.target_path_len); + if (http_validate_target_syntax (rx_buf, msg.data.target_path_len, 0, 0)) + { + PROXY_DBG ("invalid target"); + goto bad_req; + } + if (memcmp (rx_buf, masque_udp_uri_prefix, MASQUE_UDP_URI_PREFIX_LEN)) + { + PROXY_DBG ("uri prefix not match"); + goto bad_req; + } + rv = http_parse_masque_host_port ( + rx_buf + MASQUE_UDP_URI_PREFIX_LEN, + msg.data.target_path_len - MASQUE_UDP_URI_PREFIX_LEN, &target_uri); + if (rv) + { + PROXY_DBG ("masque host/port parsing failed"); + goto bad_req; + } + + /* Capsule-Protocol header is optional, but need to have true value */ + http_reset_header_table (&req_headers); + http_init_header_table_buf (&req_headers, msg); + rv = svm_fifo_peek (s->rx_fifo, msg.data.headers_offset, + msg.data.headers_len, req_headers.buf); + ASSERT (rv == msg.data.headers_len); + http_build_header_table (&req_headers, msg); + const http_token_t *capsule_protocol = http_get_header ( + &req_headers, http_header_name_token (HTTP_HEADER_CAPSULE_PROTOCOL)); + if (capsule_protocol) + { + PROXY_DBG ("Capsule-Protocol header present"); + if (!http_token_is (capsule_protocol->base, capsule_protocol->len, + http_token_lit (HTTP_BOOLEAN_TRUE))) + { + PROXY_DBG ("Capsule-Protocol invalid value"); + goto bad_req; + } + } + target_sep.transport_proto = TRANSPORT_PROTO_UDP; + } + else + { + bad_req: + proxy_send_http_resp (s, HTTP_STATUS_BAD_REQUEST, 0); + svm_fifo_dequeue_drop_all (s->rx_fifo); + return; + } + PROXY_DBG ("proxy target %U:%u", format_ip46_address, &target_uri.ip, + target_uri.host_type == HTTP_URI_HOST_TYPE_IP4, + clib_net_to_host_u16 (target_uri.port)); + svm_fifo_dequeue_drop (s->rx_fifo, msg.data.len); + target_sep.is_ip4 = target_uri.host_type == HTTP_URI_HOST_TYPE_IP4; + target_sep.ip = target_uri.ip; + target_sep.port = target_uri.port; + clib_memcpy (&a->sep_ext, &target_sep, sizeof (target_sep)); +} + +static void proxy_session_start_connect (proxy_session_side_ctx_t *sc, session_t *s) { int actual_transfer __attribute__ ((unused)); @@ -530,59 +657,7 @@ proxy_session_start_connect (proxy_session_side_ctx_t *sc, session_t *s) clib_spinlock_unlock_if_init (&pm->sessions_lock); if (tp == TRANSPORT_PROTO_HTTP) - { - http_msg_t msg; - u8 *target_buf = 0; - http_uri_t target_uri; - http_header_t *resp_headers = 0; - session_endpoint_cfg_t target_sep = SESSION_ENDPOINT_CFG_NULL; - int rv; - - rv = svm_fifo_dequeue (s->rx_fifo, sizeof (msg), (u8 *) &msg); - ASSERT (rv == sizeof (msg)); - - if (msg.type != HTTP_MSG_REQUEST) - { - proxy_send_http_resp (s, HTTP_STATUS_INTERNAL_ERROR, 0); - return; - } - if (msg.method_type != HTTP_REQ_CONNECT) - { - http_add_header (&resp_headers, - http_header_name_token (HTTP_HEADER_ALLOW), - http_token_lit ("CONNECT")); - proxy_send_http_resp (s, HTTP_STATUS_METHOD_NOT_ALLOWED, - resp_headers); - vec_free (resp_headers); - return; - } - - if (msg.data.target_form != HTTP_TARGET_AUTHORITY_FORM || - msg.data.target_path_len == 0) - { - proxy_send_http_resp (s, HTTP_STATUS_BAD_REQUEST, 0); - return; - } - - /* read target uri */ - target_buf = vec_new (u8, msg.data.target_path_len); - rv = svm_fifo_peek (s->rx_fifo, msg.data.target_path_offset, - msg.data.target_path_len, target_buf); - ASSERT (rv == msg.data.target_path_len); - svm_fifo_dequeue_drop (s->rx_fifo, msg.data.len); - rv = http_parse_authority_form_target (target_buf, &target_uri); - vec_free (target_buf); - if (rv) - { - proxy_send_http_resp (s, HTTP_STATUS_BAD_REQUEST, 0); - return; - } - target_sep.is_ip4 = target_uri.is_ip4; - target_sep.ip = target_uri.ip; - target_sep.port = target_uri.port; - target_sep.transport_proto = TRANSPORT_PROTO_TCP; - clib_memcpy (&a->sep_ext, &target_sep, sizeof (target_sep)); - } + proxy_http_connect (s, a); else { max_dequeue = svm_fifo_max_dequeue_cons (s->rx_fifo); @@ -762,7 +837,7 @@ active_open_alloc_session_fifos (session_t *s) * will receive data, etc. */ txf->shr->master_session_index = s->session_index; - txf->master_thread_index = s->thread_index; + txf->vpp_sh = s->handle; /* * Account for the active-open session's use of the fifos @@ -786,18 +861,35 @@ active_open_send_http_resp_rpc (void *arg) u32 ps_index = pointer_to_uword (arg); proxy_main_t *pm = &proxy_main; proxy_session_t *ps; - http_status_code_t sc; session_t *po_s; + transport_proto_t ao_tp; + int connect_failed; + + PROXY_DBG ("ps[%xlu] going to send connect response", ps_index); clib_spinlock_lock_if_init (&pm->sessions_lock); ps = proxy_session_get (ps_index); po_s = session_get_from_handle (ps->po.session_handle); - sc = ps->ao_disconnected ? HTTP_STATUS_BAD_GATEWAY : HTTP_STATUS_OK; + connect_failed = ps->ao_disconnected; - clib_spinlock_unlock_if_init (&pm->sessions_lock); + if (!connect_failed) + { + ao_tp = session_get_transport_proto ( + session_get_from_handle (ps->ao.session_handle)); + if (ao_tp == TRANSPORT_PROTO_UDP) + proxy_send_http_resp (po_s, HTTP_STATUS_SWITCHING_PROTOCOLS, + &pm->capsule_proto_header); + else + proxy_send_http_resp (po_s, HTTP_STATUS_OK, 0); + } + else + { + proxy_send_http_resp (po_s, HTTP_STATUS_BAD_GATEWAY, 0); + proxy_session_close_po (ps); + } - proxy_send_http_resp (po_s, sc, 0); + clib_spinlock_unlock_if_init (&pm->sessions_lock); } static int @@ -817,6 +909,7 @@ active_open_connected_callback (u32 app_index, u32 opaque, clib_spinlock_lock_if_init (&pm->sessions_lock); ps = proxy_session_get (opaque); + PROXY_DBG ("ps[%lu] connect failed: %d", opaque, err); ps->ao_disconnected = 1; if (ps->po.is_http) { @@ -825,7 +918,8 @@ active_open_connected_callback (u32 app_index, u32 opaque, active_open_send_http_resp_rpc, uword_to_pointer (ps->ps_index, void *)); } - proxy_session_close_po (ps); + else + proxy_session_close_po (ps); clib_spinlock_unlock_if_init (&pm->sessions_lock); @@ -935,6 +1029,7 @@ active_open_migrate_rpc (void *arg) ps = proxy_session_get (ps_index); sc->ps_index = ps->ps_index; + sc->state = PROXY_SC_S_ESTABLISHED; s = session_get_from_handle (ps->ao.session_handle); s->opaque = sc->sc_index; @@ -1011,13 +1106,7 @@ active_open_rx_callback (session_t * s) * Send event for server tx fifo */ if (svm_fifo_set_event (proxy_tx_fifo)) - { - u8 thread_index = proxy_tx_fifo->master_thread_index; - u32 session_index = proxy_tx_fifo->shr->master_session_index; - return session_send_io_evt_to_thread_custom (&session_index, - thread_index, - SESSION_IO_EVT_TX); - } + session_program_tx_io_evt (proxy_tx_fifo->vpp_sh, SESSION_IO_EVT_TX); if (svm_fifo_max_enqueue (proxy_tx_fifo) <= TCP_MSS) svm_fifo_add_want_deq_ntf (proxy_tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); @@ -1048,9 +1137,7 @@ active_open_tx_callback (session_t * ao_s) if (sc->pair.is_http) { /* notify HTTP transport */ - session_t *po = session_get_from_handle (sc->pair.session_handle); - session_send_io_evt_to_thread_custom ( - &po->session_index, po->thread_index, SESSION_IO_EVT_RX); + session_program_rx_io_evt (sc->pair.session_handle); } else { @@ -1188,14 +1275,15 @@ proxy_server_listen () /* set http timeout for connect-proxy */ if (pm->server_sep.transport_proto == TRANSPORT_PROTO_HTTP) { + transport_endpt_cfg_http_t http_cfg = { pm->idle_timeout, + HTTP_UDP_TUNNEL_DGRAM }; transport_endpt_ext_cfg_t *ext_cfg = session_endpoint_add_ext_cfg ( - &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (ext_cfg->opaque)); - ext_cfg->opaque = pm->idle_timeout; + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (http_cfg)); + clib_memcpy (ext_cfg->data, &http_cfg, sizeof (http_cfg)); } rv = vnet_listen (a); - if (need_crypto) - session_endpoint_free_ext_cfgs (&a->sep_ext); + session_endpoint_free_ext_cfgs (&a->sep_ext); return rv; } @@ -1224,12 +1312,14 @@ proxy_server_create (vlib_main_t * vm) proxy_worker_t *wrk; u32 num_threads; int i; + http_header_table_t empty_ht = HTTP_HEADER_TABLE_NULL; if (vlib_num_workers ()) clib_spinlock_init (&pm->sessions_lock); num_threads = 1 /* main thread */ + vtm->n_threads; vec_validate (pm->rx_buf, num_threads - 1); + vec_validate_init_empty (pm->req_headers, num_threads - 1, empty_ht); for (i = 0; i < num_threads; i++) vec_validate (pm->rx_buf[i], pm->rcv_buffer_size); @@ -1393,6 +1483,13 @@ proxy_main_init (vlib_main_t * vm) pm->idle_timeout = 600; /* connect-proxy default idle timeout 10 minutes */ vec_validate (pm->client_sep, TRANSPORT_N_PROTOS - 1); + vec_validate (pm->capsule_proto_header_buf, 10); + http_init_headers_ctx (&pm->capsule_proto_header, + pm->capsule_proto_header_buf, + vec_len (pm->capsule_proto_header_buf)); + http_add_header (&pm->capsule_proto_header, HTTP_HEADER_CAPSULE_PROTOCOL, + http_token_lit (HTTP_BOOLEAN_TRUE)); + return 0; } diff --git a/src/plugins/hs_apps/proxy.h b/src/plugins/hs_apps/proxy.h index 276133ba418..f26f4bf0ea2 100644 --- a/src/plugins/hs_apps/proxy.h +++ b/src/plugins/hs_apps/proxy.h @@ -26,6 +26,8 @@ #include <vnet/session/session.h> #include <vnet/session/application_interface.h> +#include <http/http.h> + #define foreach_proxy_session_side_state \ _ (CREATED, "created") \ _ (CONNECTING, "connecting") \ @@ -80,6 +82,7 @@ typedef struct proxy_session_t *sessions; /**< session pool, shared */ clib_spinlock_t sessions_lock; /**< lock for session pool */ u8 **rx_buf; /**< intermediate rx buffers */ + http_header_table_t *req_headers; /**< HTTP request headers */ u32 server_client_index; /**< server API client handle */ u32 server_app_index; /**< server app index */ @@ -87,6 +90,9 @@ typedef struct u32 active_open_app_index; /**< active open index after attach */ u32 ckpair_index; /**< certkey pair index for tls */ + http_headers_ctx_t capsule_proto_header; + u8 *capsule_proto_header_buf; + /* * Configuration params */ diff --git a/src/plugins/hs_apps/sapi/vpp_echo.c b/src/plugins/hs_apps/sapi/vpp_echo.c index 08fd4e175e9..9ae95e6e8e9 100644 --- a/src/plugins/hs_apps/sapi/vpp_echo.c +++ b/src/plugins/hs_apps/sapi/vpp_echo.c @@ -69,9 +69,9 @@ echo_session_dequeue_notify (echo_session_t * s) int rv; if (!svm_fifo_set_event (s->rx_fifo)) return; - if ((rv = app_send_io_evt_to_vpp (s->vpp_evt_q, - s->rx_fifo->shr->master_session_index, - SESSION_IO_EVT_RX, SVM_Q_WAIT))) + if ((rv = + app_send_io_evt_to_vpp (s->vpp_evt_q, s->rx_fifo->vpp_session_index, + SESSION_IO_EVT_RX, SVM_Q_WAIT))) ECHO_FAIL (ECHO_FAIL_SEND_IO_EVT, "app_send_io_evt_to_vpp errored %d", rv); svm_fifo_clear_deq_ntf (s->rx_fifo); diff --git a/src/plugins/hs_apps/sapi/vpp_echo_bapi.c b/src/plugins/hs_apps/sapi/vpp_echo_bapi.c index 868cc3a0591..12be20503a3 100644 --- a/src/plugins/hs_apps/sapi/vpp_echo_bapi.c +++ b/src/plugins/hs_apps/sapi/vpp_echo_bapi.c @@ -332,6 +332,10 @@ echo_attach_session (uword segment_handle, uword rxf_offset, uword txf_offset, s->tx_fifo = fifo_segment_alloc_fifo_w_offset (fs, txf_offset); s->rx_fifo->segment_index = fs_index; s->tx_fifo->segment_index = fs_index; + s->rx_fifo->vpp_session_index = s->rx_fifo->shr->master_session_index; + s->tx_fifo->vpp_session_index = s->tx_fifo->shr->master_session_index; + s->rx_fifo->app_session_index = s->session_index; + s->tx_fifo->app_session_index = s->session_index; s->rx_fifo->shr->client_session_index = s->session_index; s->tx_fifo->shr->client_session_index = s->session_index; diff --git a/src/plugins/hs_apps/vcl/vcl_test_protos.c b/src/plugins/hs_apps/vcl/vcl_test_protos.c index 9c81c5f17a1..fd17c7b2c54 100644 --- a/src/plugins/hs_apps/vcl/vcl_test_protos.c +++ b/src/plugins/hs_apps/vcl/vcl_test_protos.c @@ -15,7 +15,6 @@ #include <hs_apps/vcl/vcl_test.h> #include <http/http.h> -#include <http/http_header_names.h> #include <http/http_content_types.h> typedef enum vcl_test_http_state_ @@ -1087,13 +1086,6 @@ vt_process_http_server_read_msg (vcl_test_session_t *ts, void *buf, return 0; } - if (msg.data.target_form != HTTP_TARGET_ORIGIN_FORM) - { - vt_http_send_reply_msg (ts, HTTP_STATUS_BAD_REQUEST); - vterr ("error! http target not in origin form", 0); - return 0; - } - /* validate target path syntax */ if (msg.data.target_path_len) { @@ -1169,7 +1161,7 @@ vt_process_http_client_write_msg (vcl_test_session_t *ts, void *buf, uint32_t nbytes) { http_msg_t msg; - http_header_t *req_headers = 0; + http_headers_ctx_t req_headers; u8 *headers_buf = 0; u8 *target; vcl_test_http_ctx_t *vcl_test_http_ctx = (vcl_test_http_ctx_t *) ts->opaque; @@ -1214,24 +1206,23 @@ vt_process_http_client_write_msg (vcl_test_session_t *ts, void *buf, else if (PREDICT_FALSE (vcl_test_http_ctx->test_state == VCL_TEST_HTTP_IDLE)) { + vec_validate (headers_buf, 63); + http_init_headers_ctx (&req_headers, headers_buf, vec_len (headers_buf)); http_add_header ( - &req_headers, http_header_name_token (HTTP_HEADER_CONTENT_TYPE), + &req_headers, HTTP_HEADER_CONTENT_TYPE, http_content_type_token (HTTP_CONTENT_APP_OCTET_STREAM)); - headers_buf = http_serialize_headers (req_headers); - vec_free (req_headers); memset (&msg, 0, sizeof (http_msg_t)); msg.type = HTTP_MSG_REQUEST; msg.method_type = HTTP_REQ_POST; /* target */ - msg.data.target_form = HTTP_TARGET_ORIGIN_FORM; target = (u8 *) "/vcl_test_http\0"; msg.data.target_path_len = strlen ((char *) target); /* headers */ msg.data.headers_offset = msg.data.target_path_len; - msg.data.headers_len = vec_len (headers_buf); + msg.data.headers_len = req_headers.tail_offset; /* body */ msg.data.body_offset = msg.data.headers_offset + msg.data.headers_len; @@ -1244,7 +1235,7 @@ vt_process_http_client_write_msg (vcl_test_session_t *ts, void *buf, vppcom_data_segment_t segs[3] = { { (u8 *) &msg, sizeof (msg) }, { target, strlen ((char *) target) }, { headers_buf, - vec_len (headers_buf) } }; + msg.data.headers_len } }; do { diff --git a/src/plugins/http/http.c b/src/plugins/http/http.c index c5cd894cb59..69b661d0611 100644 --- a/src/plugins/http/http.c +++ b/src/plugins/http/http.c @@ -42,6 +42,13 @@ const char *http_upgrade_proto_str[] = { "", #undef _ }; +#define expect_char(c) \ + if (*p++ != c) \ + { \ + clib_warning ("unexpected character"); \ + return -1; \ + } + static u8 * format_http_req_state (u8 *s, va_list *va) { @@ -461,14 +468,12 @@ static const char *connection_upgrade_template = "Connection: upgrade\r\n" */ static const char *http_get_request_template = "GET %s HTTP/1.1\r\n" "Host: %v\r\n" - "User-Agent: %v\r\n" - "%s"; + "User-Agent: %v\r\n"; static const char *http_post_request_template = "POST %s HTTP/1.1\r\n" "Host: %v\r\n" "User-Agent: %v\r\n" - "Content-Length: %llu\r\n" - "%s"; + "Content-Length: %llu\r\n"; static u32 http_send_data (http_conn_t *hc, u8 *data, u32 length) @@ -619,19 +624,21 @@ http_identify_optional_query (http_req_t *req) } static int -http_get_target_form (http_req_t *req) +http_parse_target (http_req_t *req) { int i; + u8 *p, *end; - /* "*" */ + /* asterisk-form = "*" */ if ((req->rx_buf[req->target_path_offset] == '*') && (req->target_path_len == 1)) { req->target_form = HTTP_TARGET_ASTERISK_FORM; - return 0; + /* we do not support OPTIONS request */ + return -1; } - /* 1*( "/" segment ) [ "?" query ] */ + /* origin-form = 1*( "/" segment ) [ "?" query ] */ if (req->rx_buf[req->target_path_offset] == '/') { /* drop leading slash */ @@ -639,27 +646,66 @@ http_get_target_form (http_req_t *req) req->target_path_offset++; req->target_form = HTTP_TARGET_ORIGIN_FORM; http_identify_optional_query (req); - return 0; + /* can't be CONNECT method */ + return req->method == HTTP_REQ_CONNECT ? -1 : 0; } - /* scheme "://" host [ ":" port ] *( "/" segment ) [ "?" query ] */ - i = v_find_index (req->rx_buf, req->target_path_offset, req->target_path_len, - "://"); - if (i > 0) + /* absolute-form = + * scheme "://" host [ ":" port ] *( "/" segment ) [ "?" query ] */ + if (req->target_path_len > 8 && + !memcmp (req->rx_buf + req->target_path_offset, "http", 4)) { - req->target_form = HTTP_TARGET_ABSOLUTE_FORM; - http_identify_optional_query (req); - return 0; + req->scheme = HTTP_URL_SCHEME_HTTP; + p = req->rx_buf + req->target_path_offset + 4; + if (*p == 's') + { + p++; + req->scheme = HTTP_URL_SCHEME_HTTPS; + } + if (*p++ == ':') + { + expect_char ('/'); + expect_char ('/'); + req->target_form = HTTP_TARGET_ABSOLUTE_FORM; + req->target_authority_offset = p - req->rx_buf; + req->target_authority_len = 0; + end = req->rx_buf + req->target_path_offset + req->target_path_len; + while (p < end) + { + if (*p == '/') + { + p++; /* drop leading slash */ + req->target_path_offset = p - req->rx_buf; + req->target_path_len = end - p; + break; + } + req->target_authority_len++; + p++; + } + if (!req->target_path_len) + { + clib_warning ("zero length host"); + return -1; + } + http_identify_optional_query (req); + /* can't be CONNECT method */ + return req->method == HTTP_REQ_CONNECT ? -1 : 0; + } } - /* host ":" port */ + /* authority-form = host ":" port */ for (i = req->target_path_offset; i < (req->target_path_offset + req->target_path_len); i++) { if ((req->rx_buf[i] == ':') && (isdigit (req->rx_buf[i + 1]))) { + req->target_authority_len = req->target_path_len; + req->target_path_len = 0; + req->target_authority_offset = req->target_path_offset; + req->target_path_offset = 0; req->target_form = HTTP_TARGET_AUTHORITY_FORM; - return 0; + /* "authority-form" is only used for CONNECT requests */ + return req->method == HTTP_REQ_CONNECT ? 0 : -1; } } @@ -776,7 +822,9 @@ http_parse_request_line (http_req_t *req, http_status_code_t *ec) req->target_path_len = target_len; req->target_query_offset = 0; req->target_query_len = 0; - if (http_get_target_form (req)) + req->target_authority_len = 0; + req->target_authority_offset = 0; + if (http_parse_target (req)) { clib_warning ("invalid target"); *ec = HTTP_STATUS_BAD_REQUEST; @@ -793,13 +841,6 @@ http_parse_request_line (http_req_t *req, http_status_code_t *ec) return 0; } -#define expect_char(c) \ - if (*p++ != c) \ - { \ - clib_warning ("unexpected character"); \ - return -1; \ - } - #define parse_int(val, mul) \ do \ { \ @@ -913,6 +954,7 @@ http_identify_headers (http_req_t *req, http_status_code_t *ec) req->content_len_header_index = ~0; req->connection_header_index = ~0; req->upgrade_header_index = ~0; + req->host_header_index = ~0; req->headers_offset = req->rx_buf_offset; /* check if we have any header */ @@ -954,18 +996,26 @@ http_identify_headers (http_req_t *req, http_status_code_t *ec) header_index = field_line - req->headers; /* find headers that will be used later in preprocessing */ + /* names are case-insensitive (RFC9110 section 5.1) */ if (req->content_len_header_index == ~0 && - http_token_is ((const char *) name_start, name_len, - http_header_name_token (HTTP_HEADER_CONTENT_LENGTH))) + http_token_is_case ( + (const char *) name_start, name_len, + http_header_name_token (HTTP_HEADER_CONTENT_LENGTH))) req->content_len_header_index = header_index; else if (req->connection_header_index == ~0 && - http_token_is ((const char *) name_start, name_len, - http_header_name_token (HTTP_HEADER_CONNECTION))) + http_token_is_case ( + (const char *) name_start, name_len, + http_header_name_token (HTTP_HEADER_CONNECTION))) req->connection_header_index = header_index; else if (req->upgrade_header_index == ~0 && - http_token_is ((const char *) name_start, name_len, - http_header_name_token (HTTP_HEADER_UPGRADE))) + http_token_is_case ( + (const char *) name_start, name_len, + http_header_name_token (HTTP_HEADER_UPGRADE))) req->upgrade_header_index = header_index; + else if (req->host_header_index == ~0 && + http_token_is_case ((const char *) name_start, name_len, + http_header_name_token (HTTP_HEADER_HOST))) + req->host_header_index = header_index; /* are we done? */ if (*p == '\r' && *(p + 1) == '\n') @@ -1166,8 +1216,8 @@ http_check_connection_upgrade (http_req_t *req) if (0) ; #define _(sym, str) \ - else if (http_token_is (http_field_line_value_token (upgrade, req), \ - http_token_lit (str))) req->upgrade_proto = \ + else if (http_token_is_case (http_field_line_value_token (upgrade, req), \ + http_token_lit (str))) req->upgrade_proto = \ HTTP_UPGRADE_PROTO_##sym; foreach_http_upgrade_proto #undef _ @@ -1181,6 +1231,30 @@ http_check_connection_upgrade (http_req_t *req) } } +static void +http_target_fixup (http_conn_t *hc) +{ + http_field_line_t *host; + + if (hc->req.target_form == HTTP_TARGET_ABSOLUTE_FORM) + return; + + /* scheme fixup */ + hc->req.scheme = session_get_transport_proto (session_get_from_handle ( + hc->h_tc_session_handle)) == TRANSPORT_PROTO_TLS ? + HTTP_URL_SCHEME_HTTPS : + HTTP_URL_SCHEME_HTTP; + + if (hc->req.target_form == HTTP_TARGET_AUTHORITY_FORM || + hc->req.connection_header_index == ~0) + return; + + /* authority fixup */ + host = vec_elt_at_index (hc->req.headers, hc->req.connection_header_index); + hc->req.target_authority_offset = host->value_offset; + hc->req.target_authority_len = host->value_len; +} + static http_sm_result_t http_req_state_wait_transport_method (http_conn_t *hc, transport_send_params_t *sp) @@ -1215,6 +1289,7 @@ http_req_state_wait_transport_method (http_conn_t *hc, if (rv) goto error; + http_target_fixup (hc); http_check_connection_upgrade (&hc->req); rv = http_identify_message_body (&hc->req, &ec); @@ -1240,7 +1315,9 @@ http_req_state_wait_transport_method (http_conn_t *hc, msg.method_type = hc->req.method; msg.data.type = HTTP_MSG_DATA_INLINE; msg.data.len = len; - msg.data.target_form = hc->req.target_form; + msg.data.scheme = hc->req.scheme; + msg.data.target_authority_offset = hc->req.target_authority_offset; + msg.data.target_authority_len = hc->req.target_authority_len; msg.data.target_path_offset = hc->req.target_path_offset; msg.data.target_path_len = hc->req.target_path_len; msg.data.target_query_offset = hc->req.target_query_offset; @@ -1289,6 +1366,76 @@ error: return HTTP_SM_ERROR; } +static void +http_write_app_headers (http_conn_t *hc, http_msg_t *msg, u8 **tx_buf) +{ + http_main_t *hm = &http_main; + session_t *as; + u8 *app_headers, *p, *end; + u32 *tmp; + int rv; + + as = session_get_from_handle (hc->h_pa_session_handle); + + /* read app header list */ + if (msg->data.type == HTTP_MSG_DATA_PTR) + { + uword app_headers_ptr; + rv = svm_fifo_dequeue (as->tx_fifo, sizeof (app_headers_ptr), + (u8 *) &app_headers_ptr); + ASSERT (rv == sizeof (app_headers_ptr)); + app_headers = uword_to_pointer (app_headers_ptr, u8 *); + } + else + { + app_headers = hm->app_header_lists[hc->c_thread_index]; + rv = svm_fifo_dequeue (as->tx_fifo, msg->data.headers_len, app_headers); + ASSERT (rv == msg->data.headers_len); + } + + /* serialize app headers to tx_buf */ + end = app_headers + msg->data.headers_len; + while (app_headers < end) + { + /* custom header name? */ + tmp = (u32 *) app_headers; + if (PREDICT_FALSE (*tmp & HTTP_CUSTOM_HEADER_NAME_BIT)) + { + http_custom_token_t *name, *value; + name = (http_custom_token_t *) app_headers; + u32 name_len = name->len & ~HTTP_CUSTOM_HEADER_NAME_BIT; + app_headers += sizeof (http_custom_token_t) + name_len; + value = (http_custom_token_t *) app_headers; + app_headers += sizeof (http_custom_token_t) + value->len; + vec_add2 (*tx_buf, p, name_len + value->len + 4); + clib_memcpy (p, name->token, name_len); + p += name_len; + *p++ = ':'; + *p++ = ' '; + clib_memcpy (p, value->token, value->len); + p += value->len; + *p++ = '\r'; + *p++ = '\n'; + } + else + { + http_app_header_t *header; + header = (http_app_header_t *) app_headers; + app_headers += sizeof (http_app_header_t) + header->value.len; + http_token_t name = { http_header_name_token (header->name) }; + vec_add2 (*tx_buf, p, name.len + header->value.len + 4); + clib_memcpy (p, name.base, name.len); + p += name.len; + *p++ = ':'; + *p++ = ' '; + clib_memcpy (p, header->value.token, header->value.len); + p += header->value.len; + *p++ = '\r'; + *p++ = '\n'; + } + } +} + static http_sm_result_t http_req_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp) { @@ -1328,6 +1475,8 @@ http_req_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp) return HTTP_SM_ERROR; } + response = hm->tx_bufs[hc->c_thread_index]; + vec_reset_length (response); /* * Add "protocol layer" headers: * - current time @@ -1335,11 +1484,12 @@ http_req_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp) * - data length */ now = clib_timebase_now (&hm->timebase); - response = format (0, http_response_template, http_status_code_str[msg.code], - /* Date */ - format_clib_timebase_time, now, - /* Server */ - hc->app_name); + response = + format (response, http_response_template, http_status_code_str[msg.code], + /* Date */ + format_clib_timebase_time, now, + /* Server */ + hc->app_name); /* RFC9110 8.6: A server MUST NOT send Content-Length header field in a * 2xx (Successful) response to CONNECT or with a status code of 101 @@ -1350,13 +1500,18 @@ http_req_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp) ASSERT (msg.data.body_len == 0); next_state = HTTP_REQ_STATE_TUNNEL; if (hc->req.upgrade_proto > HTTP_UPGRADE_PROTO_NA) - response = format (response, connection_upgrade_template, - http_upgrade_proto_str[hc->req.upgrade_proto]); + { + response = format (response, connection_upgrade_template, + http_upgrade_proto_str[hc->req.upgrade_proto]); + if (hc->req.upgrade_proto == HTTP_UPGRADE_PROTO_CONNECT_UDP && + hc->udp_tunnel_mode == HTTP_UDP_TUNNEL_DGRAM) + next_state = HTTP_REQ_STATE_UDP_TUNNEL; + } /* cleanup some stuff we don't need anymore in tunnel mode */ - http_conn_timer_stop (hc); vec_free (hc->req.rx_buf); vec_free (hc->req.headers); http_buffer_free (&hc->req.tx_buf); + hc->req.to_skip = 0; } else response = format (response, content_len_template, msg.data.body_len); @@ -1365,28 +1520,10 @@ http_req_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp) if (msg.data.headers_len) { HTTP_DBG (0, "got headers from app, len %d", msg.data.headers_len); - if (msg.data.type == HTTP_MSG_DATA_PTR) - { - uword app_headers_ptr; - rv = svm_fifo_dequeue (as->tx_fifo, sizeof (app_headers_ptr), - (u8 *) &app_headers_ptr); - ASSERT (rv == sizeof (app_headers_ptr)); - vec_append (response, uword_to_pointer (app_headers_ptr, u8 *)); - } - else - { - u32 orig_len = vec_len (response); - vec_resize (response, msg.data.headers_len); - u8 *p = response + orig_len; - rv = svm_fifo_dequeue (as->tx_fifo, msg.data.headers_len, p); - ASSERT (rv == msg.data.headers_len); - } - } - else - { - /* No headers from app */ - response = format (response, "\r\n"); + http_write_app_headers (hc, &msg, &response); } + /* Add empty line after headers */ + response = format (response, "\r\n"); HTTP_DBG (3, "%v", response); sent = http_send_data (hc, response, vec_len (response)); @@ -1394,10 +1531,8 @@ http_req_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp) { clib_warning ("sending status-line and headers failed!"); sc = HTTP_STATUS_INTERNAL_ERROR; - vec_free (response); goto error; } - vec_free (response); if (msg.data.body_len) { @@ -1429,6 +1564,7 @@ error: static http_sm_result_t http_req_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp) { + http_main_t *hm = &http_main; http_msg_t msg; session_t *as; u8 *target_buff = 0, *request = 0, *target; @@ -1472,6 +1608,8 @@ http_req_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp) target = target_buff; } + request = hm->tx_bufs[hc->c_thread_index]; + vec_reset_length (request); /* currently we support only GET and POST method */ if (msg.method_type == HTTP_REQ_GET) { @@ -1485,15 +1623,13 @@ http_req_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp) * - host * - user agent */ - request = format (0, http_get_request_template, + request = format (request, http_get_request_template, /* target */ target, /* Host */ hc->host, /* User-Agent */ - hc->app_name, - /* Any headers from app? */ - msg.data.headers_len ? "" : "\r\n"); + hc->app_name); next_state = HTTP_REQ_STATE_WAIT_TRANSPORT_REPLY; sm_result = HTTP_SM_STOP; @@ -1511,7 +1647,7 @@ http_req_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp) * - user agent * - content length */ - request = format (0, http_post_request_template, + request = format (request, http_post_request_template, /* target */ target, /* Host */ @@ -1519,9 +1655,7 @@ http_req_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp) /* User-Agent */ hc->app_name, /* Content-Length */ - msg.data.body_len, - /* Any headers from app? */ - msg.data.headers_len ? "" : "\r\n"); + msg.data.body_len); http_buffer_init (&hc->req.tx_buf, msg_to_buf_type[msg.data.type], as->tx_fifo, msg.data.body_len); @@ -1539,23 +1673,10 @@ http_req_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp) if (msg.data.headers_len) { HTTP_DBG (0, "got headers from app, len %d", msg.data.headers_len); - if (msg.data.type == HTTP_MSG_DATA_PTR) - { - uword app_headers_ptr; - rv = svm_fifo_dequeue (as->tx_fifo, sizeof (app_headers_ptr), - (u8 *) &app_headers_ptr); - ASSERT (rv == sizeof (app_headers_ptr)); - vec_append (request, uword_to_pointer (app_headers_ptr, u8 *)); - } - else - { - u32 orig_len = vec_len (request); - vec_resize (request, msg.data.headers_len); - u8 *p = request + orig_len; - rv = svm_fifo_dequeue (as->tx_fifo, msg.data.headers_len, p); - ASSERT (rv == msg.data.headers_len); - } + http_write_app_headers (hc, &msg, &request); } + /* Add empty line after headers */ + request = format (request, "\r\n"); HTTP_DBG (3, "%v", request); sent = http_send_data (hc, request, vec_len (request)); @@ -1577,7 +1698,6 @@ error: done: vec_free (target_buff); - vec_free (request); return sm_result; } @@ -1798,6 +1918,196 @@ check_fifo: return HTTP_SM_STOP; } +static http_sm_result_t +http_req_state_udp_tunnel_rx (http_conn_t *hc, transport_send_params_t *sp) +{ + http_main_t *hm = &http_main; + u32 to_deq, capsule_size, dgram_size, n_written = 0; + int rv, n_read; + session_t *as, *ts; + app_worker_t *app_wrk; + u8 payload_offset; + u64 payload_len; + session_dgram_hdr_t hdr; + u8 *buf = 0; + + HTTP_DBG (1, "udp tunnel received data from client"); + + as = session_get_from_handle (hc->h_pa_session_handle); + ts = session_get_from_handle (hc->h_tc_session_handle); + buf = hm->rx_bufs[hc->c_thread_index]; + to_deq = svm_fifo_max_dequeue_cons (ts->rx_fifo); + + while (to_deq > 0) + { + /* some bytes remaining to skip? */ + if (PREDICT_FALSE (hc->req.to_skip)) + { + if (hc->req.to_skip >= to_deq) + { + svm_fifo_dequeue_drop (ts->rx_fifo, to_deq); + hc->req.to_skip -= to_deq; + goto done; + } + else + { + svm_fifo_dequeue_drop (ts->rx_fifo, hc->req.to_skip); + hc->req.to_skip = 0; + } + } + n_read = + svm_fifo_peek (ts->rx_fifo, 0, HTTP_CAPSULE_HEADER_MAX_SIZE, buf); + ASSERT (n_read > 0); + rv = http_decap_udp_payload_datagram (buf, n_read, &payload_offset, + &payload_len); + HTTP_DBG (1, "rv=%d, payload_offset=%u, payload_len=%llu", rv, + payload_offset, payload_len); + if (PREDICT_FALSE (rv != 0)) + { + if (rv < 0) + { + /* capsule datagram is invalid (session need to be aborted) */ + svm_fifo_dequeue_drop_all (ts->rx_fifo); + session_transport_closing_notify (&hc->connection); + session_transport_closed_notify (&hc->connection); + http_disconnect_transport (hc); + return HTTP_SM_STOP; + } + else + { + /* unknown capsule should be skipped */ + if (payload_len <= to_deq) + { + svm_fifo_dequeue_drop (ts->rx_fifo, payload_len); + to_deq -= payload_len; + continue; + } + else + { + svm_fifo_dequeue_drop (ts->rx_fifo, to_deq); + hc->req.to_skip = payload_len - to_deq; + goto done; + } + } + } + capsule_size = payload_offset + payload_len; + /* check if we have the full capsule */ + if (PREDICT_FALSE (to_deq < capsule_size)) + { + HTTP_DBG (1, "capsule not complete"); + goto done; + } + + dgram_size = sizeof (hdr) + payload_len; + if (svm_fifo_max_enqueue_prod (as->rx_fifo) < dgram_size) + { + HTTP_DBG (1, "app's rx fifo full"); + svm_fifo_add_want_deq_ntf (as->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); + goto done; + } + + /* read capsule payload */ + rv = svm_fifo_peek (ts->rx_fifo, payload_offset, payload_len, buf); + ASSERT (rv == payload_len); + svm_fifo_dequeue_drop (ts->rx_fifo, capsule_size); + + hdr.data_length = payload_len; + hdr.data_offset = 0; + + /* send datagram header and payload */ + svm_fifo_seg_t segs[2] = { { (u8 *) &hdr, sizeof (hdr) }, + { buf, payload_len } }; + rv = svm_fifo_enqueue_segments (as->rx_fifo, segs, 2, 0); + ASSERT (rv > 0); + + n_written += dgram_size; + to_deq -= capsule_size; + } + +done: + HTTP_DBG (1, "written %lu bytes", n_written); + + if (n_written) + { + app_wrk = app_worker_get_if_valid (as->app_wrk_index); + if (app_wrk) + app_worker_rx_notify (app_wrk, as); + } + if (svm_fifo_max_dequeue_cons (ts->rx_fifo)) + session_program_rx_io_evt (session_handle (ts)); + + return HTTP_SM_STOP; +} + +static http_sm_result_t +http_req_state_udp_tunnel_tx (http_conn_t *hc, transport_send_params_t *sp) +{ + http_main_t *hm = &http_main; + u32 to_deq, capsule_size, dgram_size, n_written = 0; + session_t *as, *ts; + int rv; + session_dgram_pre_hdr_t hdr; + u8 *buf; + u8 *payload; + + HTTP_DBG (1, "udp tunnel received data from target"); + + as = session_get_from_handle (hc->h_pa_session_handle); + ts = session_get_from_handle (hc->h_tc_session_handle); + buf = hm->tx_bufs[hc->c_thread_index]; + to_deq = svm_fifo_max_dequeue_cons (as->tx_fifo); + + while (to_deq > 0) + { + /* read datagram header */ + rv = svm_fifo_peek (as->tx_fifo, 0, sizeof (hdr), (u8 *) &hdr); + ASSERT (rv == sizeof (hdr) && + hdr.data_length <= HTTP_UDP_PAYLOAD_MAX_LEN); + ASSERT (to_deq >= hdr.data_length + SESSION_CONN_HDR_LEN); + dgram_size = hdr.data_length + SESSION_CONN_HDR_LEN; + + if (svm_fifo_max_enqueue_prod (ts->tx_fifo) < + (hdr.data_length + HTTP_UDP_PROXY_DATAGRAM_CAPSULE_OVERHEAD)) + { + HTTP_DBG (1, "ts tx fifo full"); + goto done; + } + + /* create capsule header */ + payload = http_encap_udp_payload_datagram (buf, hdr.data_length); + capsule_size = (payload - buf) + hdr.data_length; + /* read payload */ + rv = svm_fifo_peek (as->tx_fifo, SESSION_CONN_HDR_LEN, hdr.data_length, + payload); + ASSERT (rv == hdr.data_length); + svm_fifo_dequeue_drop (as->tx_fifo, dgram_size); + /* send capsule */ + rv = svm_fifo_enqueue (ts->tx_fifo, capsule_size, buf); + ASSERT (rv == capsule_size); + + n_written += capsule_size; + to_deq -= dgram_size; + } + +done: + HTTP_DBG (1, "written %lu bytes", n_written); + if (n_written) + { + if (svm_fifo_set_event (ts->tx_fifo)) + session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX); + } + + /* Deschedule and wait for deq notification if ts fifo is almost full */ + if (svm_fifo_max_enqueue (ts->tx_fifo) < HTTP_FIFO_THRESH) + { + svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); + transport_connection_deschedule (&hc->connection); + sp->flags |= TRANSPORT_SND_F_DESCHED; + } + + return HTTP_SM_STOP; +} + typedef http_sm_result_t (*http_sm_handler) (http_conn_t *, transport_send_params_t *sp); @@ -1810,6 +2120,7 @@ static http_sm_handler tx_state_funcs[HTTP_REQ_N_STATES] = { http_req_state_wait_app_reply, http_req_state_app_io_more_data, http_req_state_tunnel_tx, + http_req_state_udp_tunnel_tx, }; static_always_inline int @@ -1827,6 +2138,7 @@ static http_sm_handler rx_state_funcs[HTTP_REQ_N_STATES] = { 0, /* wait app reply */ 0, /* app io more data */ http_req_state_tunnel_rx, + http_req_state_udp_tunnel_rx, }; static_always_inline int @@ -1977,10 +2289,12 @@ static session_cb_vft_t http_app_cb_vft = { static clib_error_t * http_transport_enable (vlib_main_t *vm, u8 is_en) { + vlib_thread_main_t *vtm = vlib_get_thread_main (); vnet_app_detach_args_t _da, *da = &_da; vnet_app_attach_args_t _a, *a = &_a; u64 options[APP_OPTIONS_N_OPTIONS]; http_main_t *hm = &http_main; + u32 num_threads, i; if (!is_en) { @@ -1990,6 +2304,8 @@ http_transport_enable (vlib_main_t *vm, u8 is_en) return 0; } + num_threads = 1 /* main thread */ + vtm->n_threads; + clib_memset (a, 0, sizeof (*a)); clib_memset (options, 0, sizeof (options)); @@ -2014,7 +2330,20 @@ http_transport_enable (vlib_main_t *vm, u8 is_en) if (hm->is_init) return 0; - vec_validate (hm->wrk, vlib_num_workers ()); + vec_validate (hm->wrk, num_threads - 1); + vec_validate (hm->rx_bufs, num_threads - 1); + vec_validate (hm->tx_bufs, num_threads - 1); + vec_validate (hm->app_header_lists, num_threads - 1); + for (i = 0; i < num_threads; i++) + { + vec_validate (hm->rx_bufs[i], + HTTP_UDP_PAYLOAD_MAX_LEN + + HTTP_UDP_PROXY_DATAGRAM_CAPSULE_OVERHEAD); + vec_validate (hm->tx_bufs[i], + HTTP_UDP_PAYLOAD_MAX_LEN + + HTTP_UDP_PROXY_DATAGRAM_CAPSULE_OVERHEAD); + vec_validate (hm->app_header_lists[i], 32 << 10); + } clib_timebase_init (&hm->timebase, 0 /* GMT */, CLIB_TIMEBASE_DAYLIGHT_NONE, &vm->clib_time /* share the system clock */); @@ -2056,8 +2385,10 @@ http_transport_connect (transport_endpoint_cfg_t *tep) ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_HTTP); if (ext_cfg) { - HTTP_DBG (1, "app set timeout %u", ext_cfg->opaque); - hc->timeout = ext_cfg->opaque; + transport_endpt_cfg_http_t *http_cfg = + (transport_endpt_cfg_http_t *) ext_cfg->data; + HTTP_DBG (1, "app set timeout %u", http_cfg->timeout); + hc->timeout = http_cfg->timeout; } hc->is_server = 0; @@ -2132,8 +2463,11 @@ http_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep) ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_HTTP); if (ext_cfg && ext_cfg->opaque) { - HTTP_DBG (1, "app set timeout %u", ext_cfg->opaque); - lhc->timeout = ext_cfg->opaque; + transport_endpt_cfg_http_t *http_cfg = + (transport_endpt_cfg_http_t *) ext_cfg->data; + HTTP_DBG (1, "app set timeout %u", http_cfg->timeout); + lhc->timeout = http_cfg->timeout; + lhc->udp_tunnel_mode = http_cfg->udp_tunnel_mode; } /* Grab transport connection listener and link to http listener */ @@ -2246,13 +2580,24 @@ http_app_tx_callback (void *session, transport_send_params_t *sp) if (!http_req_state_is_tx_valid (hc)) { - clib_warning ("hc [%u]%x invalid tx state: http req state " - "'%U', session state '%U'", - as->thread_index, as->connection_index, - format_http_req_state, hc->req.state, - format_http_conn_state, hc); - svm_fifo_dequeue_drop_all (as->tx_fifo); - return 0; + /* Sometimes the server apps can send the response earlier + * than expected (e.g when rejecting a bad request)*/ + if (hc->req.state == HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA && + hc->is_server) + { + svm_fifo_dequeue_drop_all (as->rx_fifo); + hc->req.state = HTTP_REQ_STATE_WAIT_APP_REPLY; + } + else + { + clib_warning ("hc [%u]%x invalid tx state: http req state " + "'%U', session state '%U'", + as->thread_index, as->connection_index, + format_http_req_state, hc->req.state, + format_http_conn_state, hc); + svm_fifo_dequeue_drop_all (as->tx_fifo); + return 0; + } } HTTP_DBG (1, "run state machine"); diff --git a/src/plugins/http/http.h b/src/plugins/http/http.h index 7405d3d3bf7..d61ac0b08c7 100644 --- a/src/plugins/http/http.h +++ b/src/plugins/http/http.h @@ -37,6 +37,18 @@ #define HTTP_DBG(_lvl, _fmt, _args...) #endif +typedef enum http_udp_tunnel_mode_ +{ + HTTP_UDP_TUNNEL_CAPSULE, /**< app receive raw capsule */ + HTTP_UDP_TUNNEL_DGRAM, /**< convert capsule to datagram (zc proxy) */ +} http_udp_tunnel_mode_t; + +typedef struct transport_endpt_cfg_http +{ + u32 timeout; /**< HTTP session timeout in seconds */ + http_udp_tunnel_mode_t udp_tunnel_mode; /**< connect-udp mode */ +} transport_endpt_cfg_http_t; + typedef struct http_conn_id_ { union @@ -82,7 +94,8 @@ typedef enum http_conn_state_ _ (4, WAIT_TRANSPORT_METHOD, "wait transport method") \ _ (5, WAIT_APP_REPLY, "wait app reply") \ _ (6, APP_IO_MORE_DATA, "app io more data") \ - _ (7, TUNNEL, "tunnel") + _ (7, TUNNEL, "tunnel") \ + _ (8, UDP_TUNNEL, "udp tunnel") typedef enum http_req_state_ { @@ -382,11 +395,19 @@ typedef struct http_field_line_ u32 value_len; } http_field_line_t; +typedef enum http_url_scheme_ +{ + HTTP_URL_SCHEME_HTTP, + HTTP_URL_SCHEME_HTTPS, +} http_url_scheme_t; + typedef struct http_msg_data_ { http_msg_data_type_t type; u64 len; - http_target_form_t target_form; + http_url_scheme_t scheme; + u32 target_authority_offset; + u32 target_authority_len; u32 target_path_offset; u32 target_path_len; u32 target_query_offset; @@ -424,7 +445,11 @@ typedef struct http_req_ u32 rx_buf_offset; /* current offset during parsing */ u32 control_data_len; /* start line + headers + empty line */ - u64 to_recv; /* remaining bytes of message body to receive from transport */ + union + { + u64 to_recv; /* remaining bytes of body to receive from transport */ + u64 to_skip; /* remaining bytes of capsule to skip */ + }; u8 is_tunnel; @@ -438,6 +463,9 @@ typedef struct http_req_ }; http_target_form_t target_form; + http_url_scheme_t scheme; + u32 target_authority_offset; + u32 target_authority_len; u32 target_path_offset; u32 target_path_len; u32 target_query_offset; @@ -453,6 +481,7 @@ typedef struct http_req_ uword content_len_header_index; uword connection_header_index; uword upgrade_header_index; + uword host_header_index; http_upgrade_proto_t upgrade_proto; } http_req_t; @@ -477,6 +506,7 @@ typedef struct http_tc_ u8 *app_name; u8 *host; u8 is_server; + http_udp_tunnel_mode_t udp_tunnel_mode; http_req_t req; } http_conn_t; @@ -493,6 +523,10 @@ typedef struct http_main_ http_conn_t *ho_conn_pool; u32 app_index; + u8 **rx_bufs; + u8 **tx_bufs; + u8 **app_header_lists; + clib_timebase_t timebase; u16 *sc_by_u16; @@ -532,7 +566,8 @@ format_http_bytes (u8 *s, va_list *va) } always_inline int -_validate_target_syntax (u8 *target, u32 len, int is_query, int *is_encoded) +http_validate_target_syntax (u8 *target, u32 len, int is_query, + int *is_encoded) { int encoded = 0; u32 i; @@ -584,13 +619,13 @@ _validate_target_syntax (u8 *target, u32 len, int is_query, int *is_encoded) always_inline int http_validate_abs_path_syntax (u8 *path, int *is_encoded) { - return _validate_target_syntax (path, vec_len (path), 0, is_encoded); + return http_validate_target_syntax (path, vec_len (path), 0, is_encoded); } /** * A "query" rule validation (RFC3986 section 2.1). * - * @param query Vector of target query to validate. + * @param query Target query to validate. * @param is_encoded Return flag that indicates if percent-encoded (optional). * * @return @c 0 on success. @@ -598,7 +633,7 @@ http_validate_abs_path_syntax (u8 *path, int *is_encoded) always_inline int http_validate_query_syntax (u8 *query, int *is_encoded) { - return _validate_target_syntax (query, vec_len (query), 1, is_encoded); + return http_validate_target_syntax (query, vec_len (query), 1, is_encoded); } #define htoi(x) (isdigit (x) ? (x - '0') : (tolower (x) - 'a' + 10)) @@ -827,7 +862,7 @@ typedef struct typedef struct { - http_header_t *headers; + http_token_t *values; uword *value_by_name; u8 *buf; char **concatenated_values; @@ -835,7 +870,7 @@ typedef struct #define HTTP_HEADER_TABLE_NULL \ { \ - .headers = 0, .value_by_name = 0, .buf = 0, .concatenated_values = 0, \ + .values = 0, .value_by_name = 0, .buf = 0, .concatenated_values = 0, \ } always_inline u8 @@ -848,17 +883,52 @@ http_token_is (const char *actual, uword actual_len, const char *expected, return memcmp (actual, expected, expected_len) == 0 ? 1 : 0; } +/* Based on searching for a value in a given range from Hacker's Delight */ +always_inline uword +http_tolower_word (uword x) +{ +#if uword_bits == 64 + uword all_bytes = 0x0101010101010101; +#else + uword all_bytes = 0x01010101; +#endif + uword d, y; + d = (x | (0x80 * all_bytes)) - (0x41 * all_bytes); + d = ~((x | (0x7F * all_bytes)) ^ d); + y = (d & (0x7F * all_bytes)) + (0x66 * all_bytes); + y = y | d; + y = y | (0x7F * all_bytes); + y = ~y; + y = (y >> 2) & (0x20 * all_bytes); + return (x | y); +} + always_inline u8 http_token_is_case (const char *actual, uword actual_len, const char *expected, uword expected_len) { - uword i; + uword i, last_a = 0, last_e = 0; + uword *a, *e; ASSERT (actual != 0); if (actual_len != expected_len) return 0; - for (i = 0; i < expected_len; i++) + + i = expected_len; + a = (uword *) actual; + e = (uword *) expected; + while (i >= sizeof (uword)) + { + if (http_tolower_word (*a) != http_tolower_word (*e)) + return 0; + a++; + e++; + i -= sizeof (uword); + } + if (i > 0) { - if (tolower (actual[i]) != expected[i]) + clib_memcpy_fast (&last_a, a, i); + clib_memcpy_fast (&last_e, e, i); + if (http_tolower_word (last_a) != http_tolower_word (last_e)) return 0; } return 1; @@ -893,7 +963,7 @@ http_reset_header_table (http_header_table_t *ht) for (i = 0; i < vec_len (ht->concatenated_values); i++) vec_free (ht->concatenated_values[i]); vec_reset_length (ht->concatenated_values); - vec_reset_length (ht->headers); + vec_reset_length (ht->values); vec_reset_length (ht->buf); hash_free (ht->value_by_name); } @@ -921,7 +991,7 @@ http_free_header_table (http_header_table_t *ht) for (i = 0; i < vec_len (ht->concatenated_values); i++) vec_free (ht->concatenated_values[i]); vec_free (ht->concatenated_values); - vec_free (ht->headers); + vec_free (ht->values); vec_free (ht->buf); hash_free (ht->value_by_name); } @@ -930,7 +1000,37 @@ static uword _http_ht_hash_key_sum (hash_t *h, uword key) { http_token_t *name = uword_to_pointer (key, http_token_t *); - return hash_memory (name->base, name->len, 0); + uword last[3] = {}; + uwordu *q = (uword *) name->base; + u64 a, b, c, n; + + a = b = (uword_bits == 64) ? 0x9e3779b97f4a7c13LL : 0x9e3779b9; + c = 0; + n = name->len; + + while (n >= 3 * sizeof (uword)) + { + a += http_tolower_word (q[0]); + b += http_tolower_word (q[1]); + c += http_tolower_word (q[2]); + hash_mix (a, b, c); + n -= 3 * sizeof (uword); + q += 3; + } + + c += name->len; + + if (n > 0) + { + clib_memcpy_fast (&last, q, n); + a += http_tolower_word (last[0]); + b += http_tolower_word (last[1]); + c += http_tolower_word (last[2]); + } + + hash_mix (a, b, c); + + return c; } static uword @@ -939,7 +1039,22 @@ _http_ht_hash_key_equal (hash_t *h, uword key1, uword key2) http_token_t *name1 = uword_to_pointer (key1, http_token_t *); http_token_t *name2 = uword_to_pointer (key2, http_token_t *); return name1 && name2 && - http_token_is (name1->base, name1->len, name2->base, name2->len); + http_token_is_case (name1->base, name1->len, name2->base, name2->len); +} + +static u8 * +_http_ht_format_pair (u8 *s, va_list *args) +{ + http_header_table_t *ht = va_arg (*args, http_header_table_t *); + void *CLIB_UNUSED (*v) = va_arg (*args, void *); + hash_pair_t *p = va_arg (*args, hash_pair_t *); + http_token_t *name = uword_to_pointer (p->key, http_token_t *); + http_token_t *value = vec_elt_at_index (ht->values, p->value[0]); + + s = format (s, "%U: %U", format_http_bytes, name->base, name->len, + format_http_bytes, value->base, value->len); + + return s; } /** @@ -954,16 +1069,15 @@ _http_ht_hash_key_equal (hash_t *h, uword key1, uword key2) always_inline void http_build_header_table (http_header_table_t *ht, http_msg_t msg) { - http_token_t name; - http_header_t *header; + http_token_t name, *value; http_field_line_t *field_lines, *field_line; uword *p; ASSERT (ht); field_lines = uword_to_pointer (msg.data.headers_ctx, http_field_line_t *); - ht->value_by_name = - hash_create2 (0, 0, sizeof (uword), _http_ht_hash_key_sum, - _http_ht_hash_key_equal, 0, 0); + ht->value_by_name = hash_create2 ( + 0, sizeof (http_token_t), sizeof (uword), _http_ht_hash_key_sum, + _http_ht_hash_key_equal, _http_ht_format_pair, ht); vec_foreach (field_line, field_lines) { @@ -974,29 +1088,25 @@ http_build_header_table (http_header_table_t *ht, http_msg_t msg) if (p) { char *new_value = 0; - header = vec_elt_at_index (ht->headers, p[0]); - u32 new_len = header->value.len + field_line->value_len + 2; + value = vec_elt_at_index (ht->values, p[0]); + u32 new_len = value->len + field_line->value_len + 2; vec_validate (new_value, new_len - 1); - clib_memcpy (new_value, header->value.base, header->value.len); - new_value[header->value.len] = ','; - new_value[header->value.len + 1] = ' '; - clib_memcpy (new_value + header->value.len + 2, + clib_memcpy (new_value, value->base, value->len); + new_value[value->len] = ','; + new_value[value->len + 1] = ' '; + clib_memcpy (new_value + value->len + 2, ht->buf + field_line->value_offset, field_line->value_len); vec_add1 (ht->concatenated_values, new_value); - header->value.base = new_value; - header->value.len = new_len; + value->base = new_value; + value->len = new_len; continue; } /* or create new record */ - vec_add2 (ht->headers, header, 1); - header->name.base = name.base; - header->name.len = name.len; - header->value.base = (char *) (ht->buf + field_line->value_offset); - header->value.len = field_line->value_len; - HTTP_DBG (1, "value: %U", format_http_bytes, header->value.base, - header->value.len); - hash_set_mem (ht->value_by_name, &header->name, header - ht->headers); + vec_add2 (ht->values, value, 1); + value->base = (char *) (ht->buf + field_line->value_offset); + value->len = field_line->value_len; + hash_set_mem_alloc (&ht->value_by_name, &name, value - ht->values); } } @@ -1006,158 +1116,254 @@ http_build_header_table (http_header_table_t *ht, http_msg_t msg) * @param header_table Header table to search. * @param name Header name to match. * - * @return Header's value in case of success, @c 0 otherwise. + * @return Header value in case of success, @c 0 otherwise. */ -always_inline const http_header_t * +always_inline const http_token_t * http_get_header (http_header_table_t *header_table, const char *name, uword name_len) { uword *p; - http_header_t *header; + http_token_t *value; http_token_t name_token = { (char *) name, name_len }; p = hash_get_mem (header_table->value_by_name, &name_token); if (p) { - header = vec_elt_at_index (header_table->headers, p[0]); - return header; + value = vec_elt_at_index (header_table->values, p[0]); + return value; } return 0; } -/** - * Add header to the list. - * - * @param headers Header list. - * @param name Pointer to header's name buffer. - * @param name_len Length of the name. - * @param value Pointer to header's value buffer. - * @param value_len Length of the value. - * - * @note Headers added at protocol layer: Date, Server, Content-Length - */ +typedef struct +{ + u32 len; /**< length of the header data buffer */ + u32 tail_offset; /**< current tail in header data */ + u8 *buf; /**< start of header data */ +} http_headers_ctx_t; + +typedef struct +{ + u32 len; + u8 token[0]; +} http_custom_token_t; + +typedef struct +{ + u32 name; + http_custom_token_t value; +} http_app_header_t; + +/* Use high bit of header name length as custom header name bit. */ +#define HTTP_CUSTOM_HEADER_NAME_BIT (1 << 31) + always_inline void -http_add_header (http_header_t **headers, const char *name, uword name_len, - const char *value, uword value_len) +http_init_headers_ctx (http_headers_ctx_t *ctx, u8 *buf, u32 len) { - http_header_t *header; - vec_add2 (*headers, header, 1); - header->name.base = (char *) name; - header->name.len = name_len; - header->value.base = (char *) value; - header->value.len = value_len; + ctx->len = len; + ctx->tail_offset = 0; + ctx->buf = buf; } -/** - * Serialize the header list. - * - * @param headers Header list to serialize. - * - * @return New vector with serialized headers. - * - * The caller is always responsible to free the returned vector. - */ -always_inline u8 * -http_serialize_headers (http_header_t *headers) +always_inline void +http_add_header (http_headers_ctx_t *ctx, http_header_name_t name, + const char *value, uword value_len) { - u8 *headers_buf = 0, *dst; - u32 headers_buf_len = 2; - http_header_t *header; + http_app_header_t *header; - vec_foreach (header, headers) - headers_buf_len += header->name.len + header->value.len + 4; + ASSERT ((ctx->tail_offset + sizeof (http_app_header_t) + value_len) < + ctx->len); - vec_validate (headers_buf, headers_buf_len - 1); - dst = headers_buf; + header = (http_app_header_t *) (ctx->buf + ctx->tail_offset); + header->name = (u32) name; + header->value.len = (u32) value_len; + clib_memcpy (header->value.token, (u8 *) value, value_len); + ctx->tail_offset += sizeof (http_app_header_t) + value_len; +} - vec_foreach (header, headers) - { - clib_memcpy (dst, header->name.base, header->name.len); - dst += header->name.len; - *dst++ = ':'; - *dst++ = ' '; - clib_memcpy (dst, header->value.base, header->value.len); - dst += header->value.len; - *dst++ = '\r'; - *dst++ = '\n'; - } - *dst++ = '\r'; - *dst = '\n'; - return headers_buf; +always_inline void +http_add_custom_header (http_headers_ctx_t *ctx, const char *name, + uword name_len, const char *value, uword value_len) +{ + http_custom_token_t *token; + + ASSERT ((ctx->tail_offset + 2 * sizeof (http_custom_token_t) + name_len + + value_len) < ctx->len); + + /* name */ + token = (http_custom_token_t *) (ctx->buf + ctx->tail_offset); + token->len = (u32) name_len; + clib_memcpy (token->token, (u8 *) name, token->len); + token->len |= HTTP_CUSTOM_HEADER_NAME_BIT; + ctx->tail_offset += sizeof (http_custom_token_t) + name_len; + /* value */ + token = (http_custom_token_t *) (ctx->buf + ctx->tail_offset); + token->len = (u32) value_len; + clib_memcpy (token->token, (u8 *) value, token->len); + ctx->tail_offset += sizeof (http_custom_token_t) + value_len; } +typedef enum http_uri_host_type_ +{ + HTTP_URI_HOST_TYPE_IP4, + HTTP_URI_HOST_TYPE_IP6, + HTTP_URI_HOST_TYPE_REG_NAME +} http_uri_host_type_t; + typedef struct { - ip46_address_t ip; + http_uri_host_type_t host_type; + union + { + ip46_address_t ip; + http_token_t reg_name; + }; u16 port; - u8 is_ip4; -} http_uri_t; +} http_uri_authority_t; always_inline int -http_parse_authority_form_target (u8 *target, http_uri_t *authority) +_http_parse_ip4 (u8 **p, u8 *end, ip4_address_t *ip4) { - unformat_input_t input; - u32 port; + u8 n_octets = 0, digit, n_digits = 0; + u16 dec_octet = 0; int rv = 0; - unformat_init_vector (&input, vec_dup (target)); - if (unformat (&input, "[%U]:%d", unformat_ip6_address, &authority->ip.ip6, - &port)) - { - authority->port = clib_host_to_net_u16 (port); - authority->is_ip4 = 0; - } - else if (unformat (&input, "%U:%d", unformat_ip4_address, &authority->ip.ip4, - &port)) - { - authority->port = clib_host_to_net_u16 (port); - authority->is_ip4 = 1; - } - /* TODO reg-name resolution */ - else + while (*p != end) { - clib_warning ("unsupported format '%v'", target); - rv = -1; + if (**p >= '0' && **p <= '9') + { + digit = **p - '0'; + dec_octet = dec_octet * 10 + digit; + n_digits++; + /* must fit in 8 bits */ + if (dec_octet > 255) + return -1; + } + else if (**p == '.' && n_digits) + { + ip4->as_u8[n_octets++] = (u8) dec_octet; + dec_octet = 0; + n_digits = 0; + /* too many octets */ + if (n_octets >= ARRAY_LEN (ip4->as_u8)) + return -1; + } + else + { + /* probably more data (delimiter) after IPv4 address */ + rv = **p; + break; + } + + (*p)++; } - unformat_free (&input); + + /* must end with octet */ + if (!n_digits) + return -1; + + ip4->as_u8[n_octets++] = (u8) dec_octet; + + /* too few octets */ + if (n_octets < ARRAY_LEN (ip4->as_u8)) + return -1; + return rv; } -always_inline u8 * -http_serialize_authority_form_target (http_uri_t *authority) +/* modified unformat_ip6_address */ +always_inline int +_http_parse_ip6 (u8 **p, u8 *end, ip6_address_t *ip6) { - u8 *s; + u8 n_hex_digits = 0, n_colon = 0, n_hex_quads = 0; + u8 double_colon_index = ~0, i; + u16 hex_digit; + u32 hex_quad = 0; + int rv = 0; - if (authority->is_ip4) - s = format (0, "%U:%d", format_ip4_address, &authority->ip.ip4, - clib_net_to_host_u16 (authority->port)); - else - s = format (0, "[%U]:%d", format_ip6_address, &authority->ip.ip6, - clib_net_to_host_u16 (authority->port)); + while (*p != end) + { + hex_digit = 16; + if (**p >= '0' && **p <= '9') + hex_digit = **p - '0'; + else if (**p >= 'a' && **p <= 'f') + hex_digit = **p + 10 - 'a'; + else if (**p >= 'A' && **p <= 'F') + hex_digit = **p + 10 - 'A'; + else if (**p == ':' && n_colon < 2) + n_colon++; + else + { + /* probably more data (delimiter) after IPv6 address */ + rv = **p; + break; + } - return s; -} + /* too many hex quads */ + if (n_hex_quads >= ARRAY_LEN (ip6->as_u16)) + return -1; -typedef enum http_url_scheme_ -{ - HTTP_URL_SCHEME_HTTP, - HTTP_URL_SCHEME_HTTPS, -} http_url_scheme_t; + if (hex_digit < 16) + { + hex_quad = (hex_quad << 4) | hex_digit; -typedef struct -{ - http_url_scheme_t scheme; - u16 port; - u32 host_offset; - u32 host_len; - u32 path_offset; - u32 path_len; - u8 host_is_ip6; -} http_url_t; + /* must fit in 16 bits */ + if (n_hex_digits >= 4) + return -1; + + n_colon = 0; + n_hex_digits++; + } + + /* save position of :: */ + if (n_colon == 2) + { + /* more than one :: ? */ + if (double_colon_index < ARRAY_LEN (ip6->as_u16)) + return -1; + double_colon_index = n_hex_quads; + } + + if (n_colon > 0 && n_hex_digits > 0) + { + ip6->as_u16[n_hex_quads++] = clib_host_to_net_u16 ((u16) hex_quad); + hex_quad = 0; + n_hex_digits = 0; + } + + (*p)++; + } + + if (n_hex_digits > 0) + ip6->as_u16[n_hex_quads++] = clib_host_to_net_u16 ((u16) hex_quad); + + /* expand :: to appropriate number of zero hex quads */ + if (double_colon_index < ARRAY_LEN (ip6->as_u16)) + { + u8 n_zero = ARRAY_LEN (ip6->as_u16) - n_hex_quads; + + for (i = n_hex_quads - 1; i >= double_colon_index; i--) + ip6->as_u16[n_zero + i] = ip6->as_u16[i]; + + for (i = 0; i < n_zero; i++) + { + ASSERT ((double_colon_index + i) < ARRAY_LEN (ip6->as_u16)); + ip6->as_u16[double_colon_index + i] = 0; + } + + n_hex_quads = ARRAY_LEN (ip6->as_u16); + } + + /* too few hex quads */ + if (n_hex_quads < ARRAY_LEN (ip6->as_u16)) + return -1; + + return rv; +} always_inline int -_parse_port (u8 **pos, u8 *end, u16 *port) +_http_parse_port (u8 **pos, u8 *end, u16 *port) { u32 value = 0; u8 *p = *pos; @@ -1182,18 +1388,20 @@ _parse_port (u8 **pos, u8 *end, u16 *port) } /** - * An "absolute-form" URL parsing. + * Parse authority to components. * - * @param url Vector of target URL to validate. - * @param parsed Parsed URL metadata in case of success. + * @param authority Target URL to parse. + * @param authority_len Length of URL. + * @param parsed Parsed authority (port is se to 0 if not present). * * @return @c 0 on success. */ always_inline int -http_parse_absolute_form (u8 *url, http_url_t *parsed) +http_parse_authority (u8 *authority, u32 authority_len, + http_uri_authority_t *parsed) { - u8 *token_start, *token_end, *end; - int is_encoded = 0; + u8 *token_start, *p, *end; + int rv; static uword valid_chars[4] = { /* -.0123456789 */ @@ -1204,111 +1412,102 @@ http_parse_absolute_form (u8 *url, http_url_t *parsed) 0x0000000000000000, }; - if (vec_len (url) < 9) - { - clib_warning ("uri too short"); - return -1; - } - - clib_memset (parsed, 0, sizeof (*parsed)); + /* reg-name max 255 chars + colon + port max 5 chars */ + if (authority_len > 261) + return -1; - end = url + vec_len (url); + end = authority + authority_len; + token_start = authority; + parsed->port = 0; - /* parse scheme */ - if (!memcmp (url, "http:// ", 7)) - { - parsed->scheme = HTTP_URL_SCHEME_HTTP; - parsed->port = clib_host_to_net_u16 (80); - parsed->host_offset = 7; - } - else if (!memcmp (url, "https:// ", 8)) + /* parse host */ + if (*token_start == '[') { - parsed->scheme = HTTP_URL_SCHEME_HTTPS; - parsed->port = clib_host_to_net_u16 (443); - parsed->host_offset = 8; + /* IPv6 address */ + if (authority_len < 4) + return -1; + + p = ++token_start; + rv = _http_parse_ip6 (&p, end, &parsed->ip.ip6); + if (rv != ']') + return -1; + + parsed->host_type = HTTP_URI_HOST_TYPE_IP6; + token_start = ++p; } - else + else if (isdigit (*token_start)) { - clib_warning ("invalid scheme"); - return -1; - } - token_start = url + parsed->host_offset; + /* maybe IPv4 address */ + p = token_start; - /* parse host */ - if (*token_start == '[') - /* IPv6 address */ - { - parsed->host_is_ip6 = 1; - parsed->host_offset++; - token_end = ++token_start; - while (1) + if (authority_len < 7) + goto reg_name; + + rv = _http_parse_ip4 (&p, end, &parsed->ip.ip4); + if (rv == 0 || rv == ':') { - if (token_end == end) - { - clib_warning ("invalid host, IPv6 addr not terminated with ']'"); - return -1; - } - else if (*token_end == ']') - { - parsed->host_len = token_end - token_start; - token_start = token_end + 1; - break; - } - else if (*token_end != ':' && *token_end != '.' && - !isxdigit (*token_end)) - { - clib_warning ("invalid character '%u'", *token_end); - return -1; - } - token_end++; + parsed->host_type = HTTP_URI_HOST_TYPE_IP4; + token_start = p; } + else + goto reg_name; } else { - token_end = token_start; - while (token_end != end && *token_end != ':' && *token_end != '/') + /* registered name */ + p = token_start; + reg_name: + while (p != end && *p != ':') { - if (!clib_bitmap_get_no_check (valid_chars, *token_end)) + if (!clib_bitmap_get_no_check (valid_chars, *p)) { - clib_warning ("invalid character '%u'", *token_end); + clib_warning ("invalid character '%u'", *p); return -1; } - token_end++; + p++; } - parsed->host_len = token_end - token_start; - token_start = token_end; - } - - if (!parsed->host_len) - { - clib_warning ("zero length host"); - return -1; + parsed->reg_name.len = p - token_start; + if (parsed->reg_name.len > 255) + { + clib_warning ("reg-name too long"); + return -1; + } + parsed->host_type = HTTP_URI_HOST_TYPE_REG_NAME; + parsed->reg_name.base = (char *) token_start; + token_start = p; } /* parse port, if any */ - if (token_start != end && *token_start == ':') + if ((end - token_start) > 1 && *token_start == ':') { - token_end = ++token_start; - if (_parse_port (&token_end, end, &parsed->port)) + token_start++; + if (_http_parse_port (&token_start, end, &parsed->port)) { clib_warning ("invalid port"); return -1; } - token_start = token_end; } - if (token_start == end) - return 0; + return token_start == end ? 0 : -1; +} + +always_inline u8 * +http_serialize_authority (http_uri_authority_t *authority) +{ + u8 *s; - token_start++; /* drop leading slash */ - parsed->path_offset = token_start - url; - parsed->path_len = end - token_start; + if (authority->host_type == HTTP_URI_HOST_TYPE_IP4) + s = format (0, "%U", format_ip4_address, &authority->ip.ip4); + else if (authority->host_type == HTTP_URI_HOST_TYPE_IP6) + s = format (0, "[%U]", format_ip6_address, &authority->ip.ip6); + else + s = format (0, "%U", format_http_bytes, authority->reg_name.base, + authority->reg_name.len); - if (parsed->path_len) - return _validate_target_syntax (token_start, parsed->path_len, 0, - &is_encoded); + if (authority->port) + s = format (s, ":%d", clib_net_to_host_u16 (authority->port)); - return 0; + return s; } /** @@ -1323,11 +1522,11 @@ http_parse_absolute_form (u8 *url, http_url_t *parsed) * @note Only IPv4 literals and IPv6 literals supported. */ always_inline int -http_parse_masque_host_port (u8 *path, u32 path_len, http_uri_t *parsed) +http_parse_masque_host_port (u8 *path, u32 path_len, + http_uri_authority_t *parsed) { - u8 *p, *end, *decoded_host; + u8 *p, *end, *decoded_host, *p4, *p6; u32 host_len; - unformat_input_t input; p = path; end = path + path_len; @@ -1340,21 +1539,22 @@ http_parse_masque_host_port (u8 *path, u32 path_len, http_uri_t *parsed) if (!host_len || (host_len == path_len) || (host_len + 1 == path_len)) return -1; decoded_host = http_percent_decode (path, host_len); - unformat_init_vector (&input, decoded_host); - if (unformat (&input, "%U", unformat_ip4_address, &parsed->ip.ip4)) - parsed->is_ip4 = 1; - else if (unformat (&input, "%U", unformat_ip6_address, &parsed->ip.ip6)) - parsed->is_ip4 = 0; + p4 = p6 = decoded_host; + if (0 == _http_parse_ip6 (&p6, p6 + vec_len (decoded_host), &parsed->ip.ip6)) + parsed->host_type = HTTP_URI_HOST_TYPE_IP6; + else if (0 == + _http_parse_ip4 (&p4, p4 + vec_len (decoded_host), &parsed->ip.ip4)) + parsed->host_type = HTTP_URI_HOST_TYPE_IP4; else { - unformat_free (&input); + vec_free (decoded_host); clib_warning ("unsupported target_host format"); return -1; } - unformat_free (&input); + vec_free (decoded_host); p++; - if (_parse_port (&p, end, &parsed->port)) + if (_http_parse_port (&p, end, &parsed->port)) { clib_warning ("invalid port"); return -1; @@ -1367,6 +1567,7 @@ http_parse_masque_host_port (u8 *path, u32 path_len, http_uri_t *parsed) } #define HTTP_INVALID_VARINT ((u64) ~0) +#define HTTP_CAPSULE_HEADER_MAX_SIZE 8 #define HTTP_UDP_PROXY_DATAGRAM_CAPSULE_OVERHEAD 5 #define HTTP_UDP_PAYLOAD_MAX_LEN 65527 diff --git a/src/plugins/http/http_plugin.rst b/src/plugins/http/http_plugin.rst index bf414cf96ef..995e55e6f0f 100644 --- a/src/plugins/http/http_plugin.rst +++ b/src/plugins/http/http_plugin.rst @@ -16,10 +16,10 @@ Usage The plugin exposes following inline functions: ``http_validate_abs_path_syntax``, ``http_validate_query_syntax``, ``http_percent_decode``, ``http_path_remove_dot_segments``, ``http_build_header_table``, ``http_get_header``, -``http_reset_header_table``, ``http_free_header_table``, ``http_add_header``, -``http_serialize_headers``, ``http_parse_authority_form_target``, ``http_serialize_authority_form_target``, -``http_parse_absolute_form``, ``http_parse_masque_host_port``, ``http_decap_udp_payload_datagram``, -``http_encap_udp_payload_datagram``. ``http_token_is``, ``http_token_is_case``, ``http_token_contains`` +``http_reset_header_table``, ``http_free_header_table``, ``http_init_headers_ctx``, ``http_add_header``, +``http_add_custom_header``, ``http_validate_target_syntax``, ``http_parse_authority``, ``http_serialize_authority``, +``http_parse_masque_host_port``, ``http_decap_udp_payload_datagram``, ``http_encap_udp_payload_datagram``, +``http_token_is``, ``http_token_is_case``, ``http_token_contains`` It relies on the hoststack constructs and uses ``http_msg_data_t`` data structure for passing metadata to/from applications. @@ -36,7 +36,8 @@ HTTP plugin sends message header with metadata for parsing, in form of offset an Application will get pre-parsed following items: * HTTP method -* target form +* scheme (HTTP/HTTPS) +* target authority offset and length * target path offset and length * target query offset and length * header section offset and length @@ -65,30 +66,31 @@ Now application can start reading HTTP data. First let's read the target path: .. code-block:: C u8 *target_path; + if (msg.data.target_path_len == 0) + { + /* your error handling */ + } vec_validate (target_path, msg.data.target_path_len - 1); rv = svm_fifo_peek (ts->rx_fifo, msg.data.target_path_offset, msg.data.target_path_len, target_path); ASSERT (rv == msg.data.target_path_len); -Application might also want to know target form which is stored in ``msg.data.target_form``, you can read more about target forms in RFC9112 section 3.2. -In case of origin form HTTP plugin always sets ``target_path_offset`` after leading slash character. +Target path might be in some cases empty (e.g. CONNECT method), you can read more about target forms in RFC9112 section 3.2. +In case of origin and absolute form HTTP plugin always sets ``target_path_offset`` after leading slash character. -Example bellow validates "absolute-path" rule, as described in RFC9110 section 4.1, in case of target in origin form, additionally application can get information if percent encoding is used and decode path: +Example bellow validates "absolute-path" rule, as described in RFC9110 section 4.1, additionally application can get information if percent encoding is used and decode path: .. code-block:: C int is_encoded = 0; - if (msg.data.target_form == HTTP_TARGET_ORIGIN_FORM) + if (http_validate_abs_path_syntax (target_path, &is_encoded)) { - if (http_validate_abs_path_syntax (target_path, &is_encoded)) - { - /* your error handling */ - } - if (is_encoded) - { - u8 *decoded = http_percent_decode (target_path, vec_len (target_path)); - vec_free (target_path); - target_path = decoded; - } + /* your error handling */ + } + if (is_encoded) + { + u8 *decoded = http_percent_decode (target_path, vec_len (target_path)); + vec_free (target_path); + target_path = decoded; } More on topic when to decode in RFC3986 section 2.4. @@ -128,17 +130,25 @@ Following example shows how to parse headers: if (msg.data.headers_len) { http_header_table_t ht = HTTP_HEADER_TABLE_NULL; + /* initialize header table buffer */ http_init_header_table_buf (&ht, msg); + /* read raw headers into buffer */ rv = svm_fifo_peek (ts->rx_fifo, msg.data.headers_offset, msg.data.headers_len, ht.buf); ASSERT (rv == msg.data.headers_len); + /* build header table */ http_build_header_table (&ht, msg); /* get Accept header */ - const http_header_t *accept = http_get_header (&ht, http_header_name_token (HTTP_HEADER_ACCEPT)); + const http_token_t *accept_value = http_get_header (&ht, + http_header_name_token (HTTP_HEADER_ACCEPT)); if (accept_value) { - /* do something interesting */ + if (http_token_contains (accept_value->base, accept_value->len, http_token_lit ("text/plain"))) + { + /* do something interesting */ + } } + /* free header table */ http_free_header_table (&ht); } @@ -166,17 +176,22 @@ Modified example above: .. code-block:: C #include <http/http_header_names.h> + /* reset header table before reuse */ http_reset_header_table (&ctx->ht); /* ... */ if (msg.data.headers_len) { + /* initialize header table buffer */ http_init_header_table_buf (&ctx->ht, msg); + /* read raw headers into buffer */ rv = svm_fifo_peek (ts->rx_fifo, msg.data.headers_offset, msg.data.headers_len, ctx->ht.buf); ASSERT (rv == msg.data.headers_len); + /* build header table */ http_build_header_table (&ctx->ht, msg); /* get Accept header */ - const http_header_t *accept = http_get_header (&ctx->ht, http_header_name_token (HTTP_HEADER_ACCEPT)); + const http_token_t *accept_value = http_get_header (&ctx->ht, + http_header_name_token (HTTP_HEADER_ACCEPT)); if (accept_value) { /* do something interesting */ @@ -235,17 +250,15 @@ When server application sends response back to HTTP layer it starts with message Application should set following items: * Status code -* target form * header section offset and length * body offset and length -Application could pass headers back to HTTP layer. Header list is created dynamically as vector of ``http_header_t``, -where we store only pointers to buffers (zero copy). -Well known header names are predefined. -The list is serialized just before you send buffer to HTTP layer. +Application could pass headers back to HTTP layer. Header list is created dynamically using ``http_headers_ctx_t``, which must be initialized with preallocated buffer. +Well known header names are predefined and are added using ``http_add_header``, for headers with custom names use ``http_add_custom_header``. +Header list buffer is sent buffer to HTTP layer in raw, current length is stored ``tail_offset`` member of ``http_headers_ctx_t``. .. note:: - Following headers are added at protocol layer and **MUST NOT** be set by application: Date, Server, Content-Length + Following headers are added at protocol layer and **MUST NOT** be set by application: Date, Server, Content-Length, Connection, Upgrade Following example shows how to create headers section: @@ -254,18 +267,20 @@ Following example shows how to create headers section: #include <http/http.h> #include <http/http_header_names.h> #include <http/http_content_types.h> - http_header_t *resp_headers = 0; + http_headers_ctx_t resp_headers; u8 *headers_buf = 0; - http_add_header (resp_headers, - http_header_name_token (HTTP_HEADER_CONTENT_TYPE), + /* allocate buffer for response header list */ + vec_validate (headers_buf, 1023); + /* initialize header list context */ + http_init_headers_ctx (&resp_headers, headers_buf, vec_len (headers_buf)); + /* add headers to the list */ + http_add_header (&resp_headers, HTTP_HEADER_CONTENT_TYPE, http_content_type_token (HTTP_CONTENT_TEXT_HTML)); - http_add_header (resp_headers, - http_header_name_token (HTTP_HEADER_CACHE_CONTROL), + http_add_header (&resp_headers, HTTP_HEADER_CACHE_CONTROL, http_token_lit ("max-age=600")); - http_add_header (resp_headers, - http_header_name_token (HTTP_HEADER_LOCATION), - (const char *) redirect, vec_len (redirect)); - headers_buf = http_serialize_headers (resp_headers); + http_add_custom_header (&resp_headers, + http_token_lit ("X-Frame-Options"), + (const char *) x_frame_opt, vec_len (x_frame_opt)); The example below show how to create and send response HTTP message metadata: @@ -275,7 +290,7 @@ The example below show how to create and send response HTTP message metadata: msg.type = HTTP_MSG_REPLY; msg.code = HTTP_STATUS_MOVED msg.data.headers_offset = 0; - msg.data.headers_len = vec_len (headers_buf); + msg.data.headers_len = resp_headers.tail_offset; msg.data.type = HTTP_MSG_DATA_INLINE; msg.data.body_len = vec_len (tx_buf); msg.data.body_offset = msg.data.headers_len; @@ -284,11 +299,11 @@ The example below show how to create and send response HTTP message metadata: rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (msg), (u8 *) &msg); ASSERT (rv == sizeof (msg)); -Next you will send your serialized headers: +Next you will send your headers: .. code-block:: C - rv = svm_fifo_enqueue (ts->tx_fifo, vec_len (headers_buf), headers_buf); + rv = svm_fifo_enqueue (ts->tx_fifo, msg.data.headers_len, headers_buf); ASSERT (rv == msg.data.headers_len); vec_free (headers_buf); @@ -343,7 +358,7 @@ When client application sends message to HTTP layer it starts with message metad Application should set following items: * HTTP method -* target form, offset and length +* target offset and length * header section offset and length * body offset and length @@ -363,13 +378,12 @@ The example below shows how to create headers section: #include <http/http.h> #include <http/http_header_names.h> #include <http/http_content_types.h> - http_header_t *req_headers = 0; + http_headers_ctx_t *req_headers; u8 *headers_buf = 0; - http_add_header (req_headers, - http_header_name_token (HTTP_HEADER_ACCEPT), + vec_validate (headers_buf, 63); + http_init_headers_ctx (&eq_headers, headers_buf, vec_len (headers_buf)); + http_add_header (req_headers, HTTP_HEADER_ACCEPT, http_content_type_token (HTTP_CONTENT_TEXT_HTML)); - headers_buf = http_serialize_headers (req_headers); - vec_free (hs->req_headers); Following example shows how to set message metadata: @@ -380,12 +394,11 @@ Following example shows how to set message metadata: msg.method_type = HTTP_REQ_GET; msg.data.headers_offset = 0; /* request target */ - msg.data.target_form = HTTP_TARGET_ORIGIN_FORM; msg.data.target_path_offset = 0; msg.data.target_path_len = vec_len (target); /* custom headers */ msg.data.headers_offset = msg.data.target_path_len; - msg.data.headers_len = vec_len (headers_buf); + msg.data.headers_len = headers.tail_offset; /* no request body because we are doing GET request */ msg.data.body_len = 0; /* data type and total length */ @@ -398,7 +411,7 @@ Finally application sends everything to HTTP layer: svm_fifo_seg_t segs[3] = { { (u8 *) &msg, sizeof (msg) }, /* message metadata */ { target, vec_len (target) }, /* request target */ - { headers_buf, vec_len (headers_buf) } }; /* serialized headers */ + { headers_buf, msg.data.headers_len } }; /* headers */ rv = svm_fifo_enqueue_segments (as->tx_fifo, segs, 3, 0 /* allow partial */); vec_free (headers_buf); if (rv < 0 || rv != sizeof (msg) + msg.data.len) @@ -476,17 +489,22 @@ Following example shows how to parse headers: if (msg.data.headers_len) { http_header_table_t ht = HTTP_HEADER_TABLE_NULL; + /* initialize header table buffer */ http_init_header_table_buf (&ht, msg); + /* read raw headers into buffer */ rv = svm_fifo_peek (ts->rx_fifo, msg.data.headers_offset, msg.data.headers_len, ht.buf); ASSERT (rv == msg.data.headers_len); + /* build header table */ http_build_header_table (&ht, msg); /* get Content-Type header */ - const http_header_t *content_type = http_get_header (&ht, http_header_name_token (HTTP_HEADER_CONTENT_TYPE)); + const http_token_t *content_type = http_get_header (&ht, + http_header_name_token (HTTP_HEADER_CONTENT_TYPE)); if (content_type) { /* do something interesting */ } + /* free header table */ http_free_header_table (&ht); } @@ -546,23 +564,23 @@ HTTP timeout HTTP plugin sets session inactivity timeout by default to 60 seconds. Client and server applications can pass custom timeout value (in seconds) using extended configuration when doing connect or start listening respectively. You just need to add extended configuration to session endpoint configuration which is part of ``vnet_connect_args_t`` and ``vnet_listen_args_t``. -HTTP plugin use ``opaque`` member of ``transport_endpt_ext_cfg_t``, unsigned 32bit integer seems to be sufficient (allowing the timeout to be set up to 136 years). +HTTP plugin use ``timeout`` member of ``transport_endpt_cfg_http_t``, unsigned 32bit integer seems to be sufficient (allowing the timeout to be set up to 136 years). The example below sets HTTP session timeout to 30 seconds (server application): .. code-block:: C vnet_listen_args_t _a, *a = &_a; - session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL; transport_endpt_ext_cfg_t *ext_cfg; int rv; clib_memset (a, 0, sizeof (*a)); clib_memcpy (&a->sep_ext, &sep, sizeof (sep)); + /* your custom timeout value in seconds, unused parameters are set to zero */ + transport_endpt_cfg_http_t http_cfg = { 30, 0 }; /* add new extended config entry */ ext_cfg = session_endpoint_add_ext_cfg ( - &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (ext_cfg->opaque)); - /* your custom timeout value in seconds */ - ext_cfg->opaque = 30; + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (http_cfg)); + clib_memcpy (ext_cfg->data, &http_cfg, sizeof (http_cfg)); /* rest of the settings omitted for brevity */ rv = vnet_listen (a); /* don't forget to free extended config */ diff --git a/src/plugins/http/test/http_test.c b/src/plugins/http/test/http_test.c index 40fd4463b61..bfaa285eb35 100644 --- a/src/plugins/http/test/http_test.c +++ b/src/plugins/http/test/http_test.c @@ -5,6 +5,7 @@ #include <vnet/plugin/plugin.h> #include <vpp/app/version.h> #include <http/http.h> +#include <http/http_header_names.h> #define HTTP_TEST_I(_cond, _comment, _args...) \ ({ \ @@ -29,220 +30,175 @@ } static int -http_test_authority_form (vlib_main_t *vm) +http_test_parse_authority (vlib_main_t *vm) { - u8 *target = 0, *formated_target = 0; - http_uri_t authority; + u8 *authority = 0, *formated = 0; + http_uri_authority_t parsed; int rv; - target = format (0, "10.10.2.45:20"); - rv = http_parse_authority_form_target (target, &authority); - HTTP_TEST ((rv == 0), "'%v' should be valid", target); - formated_target = http_serialize_authority_form_target (&authority); - rv = vec_cmp (target, formated_target); - HTTP_TEST ((rv == 0), "'%v' should match '%v'", target, formated_target); - vec_free (target); - vec_free (formated_target); - - target = format (0, "[dead:beef::1234]:443"); - rv = http_parse_authority_form_target (target, &authority); - HTTP_TEST ((rv == 0), "'%v' should be valid", target); - formated_target = http_serialize_authority_form_target (&authority); - rv = vec_cmp (target, formated_target); - HTTP_TEST ((rv == 0), "'%v' should match '%v'", target, formated_target); - vec_free (target); - vec_free (formated_target); - - target = format (0, "example.com:80"); - rv = http_parse_authority_form_target (target, &authority); - HTTP_TEST ((rv != 0), "'%v' reg-name not supported", target); - vec_free (target); - - target = format (0, "10.10.2.45"); - rv = http_parse_authority_form_target (target, &authority); - HTTP_TEST ((rv != 0), "'%v' should be invalid", target); - vec_free (target); - - target = format (0, "1000.10.2.45:20"); - rv = http_parse_authority_form_target (target, &authority); - HTTP_TEST ((rv != 0), "'%v' should be invalid", target); - vec_free (target); - - target = format (0, "[xyz0::1234]:443"); - rv = http_parse_authority_form_target (target, &authority); - HTTP_TEST ((rv != 0), "'%v' should be invalid", target); - vec_free (target); - - return 0; -} - -static int -http_test_absolute_form (vlib_main_t *vm) -{ - u8 *url = 0; - http_url_t parsed_url; - int rv; - - url = format (0, "https://example.org/.well-known/masque/udp/1.2.3.4/123/"); - rv = http_parse_absolute_form (url, &parsed_url); - HTTP_TEST ((rv == 0), "'%v' should be valid", url); - HTTP_TEST ((parsed_url.scheme == HTTP_URL_SCHEME_HTTPS), - "scheme should be https"); - HTTP_TEST ((parsed_url.host_is_ip6 == 0), "host_is_ip6=%u should be 0", - parsed_url.host_is_ip6); - HTTP_TEST ((parsed_url.host_offset == strlen ("https://")), - "host_offset=%u should be %u", parsed_url.host_offset, - strlen ("https://")); - HTTP_TEST ((parsed_url.host_len == strlen ("example.org")), - "host_len=%u should be %u", parsed_url.host_len, - strlen ("example.org")); - HTTP_TEST ((clib_net_to_host_u16 (parsed_url.port) == 443), - "port=%u should be 443", clib_net_to_host_u16 (parsed_url.port)); - HTTP_TEST ((parsed_url.path_offset == strlen ("https://example.org/")), - "path_offset=%u should be %u", parsed_url.path_offset, - strlen ("https://example.org/")); - HTTP_TEST ( - (parsed_url.path_len == strlen (".well-known/masque/udp/1.2.3.4/123/")), - "path_len=%u should be %u", parsed_url.path_len, - strlen (".well-known/masque/udp/1.2.3.4/123/")); - vec_free (url); - - url = format (0, "http://vpp-example.org"); - rv = http_parse_absolute_form (url, &parsed_url); - HTTP_TEST ((rv == 0), "'%v' should be valid", url); - HTTP_TEST ((parsed_url.scheme == HTTP_URL_SCHEME_HTTP), - "scheme should be http"); - HTTP_TEST ((parsed_url.host_is_ip6 == 0), "host_is_ip6=%u should be 0", - parsed_url.host_is_ip6); - HTTP_TEST ((parsed_url.host_offset == strlen ("http://")), - "host_offset=%u should be %u", parsed_url.host_offset, - strlen ("http://")); - HTTP_TEST ((parsed_url.host_len == strlen ("vpp-example.org")), - "host_len=%u should be %u", parsed_url.host_len, - strlen ("vpp-example.org")); - HTTP_TEST ((clib_net_to_host_u16 (parsed_url.port) == 80), - "port=%u should be 80", clib_net_to_host_u16 (parsed_url.port)); - HTTP_TEST ((parsed_url.path_len == 0), "path_len=%u should be 0", - parsed_url.path_len); - vec_free (url); - - url = format (0, "http://1.2.3.4:8080/abcd"); - rv = http_parse_absolute_form (url, &parsed_url); - HTTP_TEST ((rv == 0), "'%v' should be valid", url); - HTTP_TEST ((parsed_url.scheme == HTTP_URL_SCHEME_HTTP), - "scheme should be http"); - HTTP_TEST ((parsed_url.host_is_ip6 == 0), "host_is_ip6=%u should be 0", - parsed_url.host_is_ip6); - HTTP_TEST ((parsed_url.host_offset == strlen ("http://")), - "host_offset=%u should be %u", parsed_url.host_offset, - strlen ("http://")); - HTTP_TEST ((parsed_url.host_len == strlen ("1.2.3.4")), - "host_len=%u should be %u", parsed_url.host_len, - strlen ("1.2.3.4")); - HTTP_TEST ((clib_net_to_host_u16 (parsed_url.port) == 8080), - "port=%u should be 8080", clib_net_to_host_u16 (parsed_url.port)); - HTTP_TEST ((parsed_url.path_offset == strlen ("http://1.2.3.4:8080/")), - "path_offset=%u should be %u", parsed_url.path_offset, - strlen ("http://1.2.3.4:8080/")); - HTTP_TEST ((parsed_url.path_len == strlen ("abcd")), - "path_len=%u should be %u", parsed_url.path_len, strlen ("abcd")); - vec_free (url); - - url = format (0, "https://[dead:beef::1234]/abcd"); - rv = http_parse_absolute_form (url, &parsed_url); - HTTP_TEST ((rv == 0), "'%v' should be valid", url); - HTTP_TEST ((parsed_url.scheme == HTTP_URL_SCHEME_HTTPS), - "scheme should be https"); - HTTP_TEST ((parsed_url.host_is_ip6 == 1), "host_is_ip6=%u should be 1", - parsed_url.host_is_ip6); - HTTP_TEST ((parsed_url.host_offset == strlen ("https://[")), - "host_offset=%u should be %u", parsed_url.host_offset, - strlen ("https://[")); - HTTP_TEST ((parsed_url.host_len == strlen ("dead:beef::1234")), - "host_len=%u should be %u", parsed_url.host_len, - strlen ("dead:beef::1234")); - HTTP_TEST ((clib_net_to_host_u16 (parsed_url.port) == 443), - "port=%u should be 443", clib_net_to_host_u16 (parsed_url.port)); - HTTP_TEST ((parsed_url.path_offset == strlen ("https://[dead:beef::1234]/")), - "path_offset=%u should be %u", parsed_url.path_offset, - strlen ("https://[dead:beef::1234]/")); - HTTP_TEST ((parsed_url.path_len == strlen ("abcd")), - "path_len=%u should be %u", parsed_url.path_len, strlen ("abcd")); - vec_free (url); - - url = format (0, "http://[::ffff:192.0.2.128]:8080/"); - rv = http_parse_absolute_form (url, &parsed_url); - HTTP_TEST ((rv == 0), "'%v' should be valid", url); - HTTP_TEST ((parsed_url.scheme == HTTP_URL_SCHEME_HTTP), - "scheme should be http"); - HTTP_TEST ((parsed_url.host_is_ip6 == 1), "host_is_ip6=%u should be 1", - parsed_url.host_is_ip6); - HTTP_TEST ((parsed_url.host_offset == strlen ("http://[")), - "host_offset=%u should be %u", parsed_url.host_offset, - strlen ("http://[")); - HTTP_TEST ((parsed_url.host_len == strlen ("::ffff:192.0.2.128")), - "host_len=%u should be %u", parsed_url.host_len, - strlen ("::ffff:192.0.2.128")); - HTTP_TEST ((clib_net_to_host_u16 (parsed_url.port) == 8080), - "port=%u should be 8080", clib_net_to_host_u16 (parsed_url.port)); - HTTP_TEST ((parsed_url.path_len == 0), "path_len=%u should be 0", - parsed_url.path_len); - vec_free (url); - - url = format (0, "http://[dead:beef::1234/abc"); - rv = http_parse_absolute_form (url, &parsed_url); - HTTP_TEST ((rv != 0), "'%v' should be invalid", url); - vec_free (url); - - url = format (0, "http://[dead|beef::1234]/abc"); - rv = http_parse_absolute_form (url, &parsed_url); - HTTP_TEST ((rv != 0), "'%v' should be invalid", url); - vec_free (url); - - url = format (0, "http:example.org:8080/abcd"); - rv = http_parse_absolute_form (url, &parsed_url); - HTTP_TEST ((rv != 0), "'%v' should be invalid", url); - vec_free (url); - - url = format (0, "htt://example.org:8080/abcd"); - rv = http_parse_absolute_form (url, &parsed_url); - HTTP_TEST ((rv != 0), "'%v' should be invalid", url); - vec_free (url); - - url = format (0, "http://"); - rv = http_parse_absolute_form (url, &parsed_url); - HTTP_TEST ((rv != 0), "'%v' should be invalid", url); - vec_free (url); - - url = format (0, "http:///abcd"); - rv = http_parse_absolute_form (url, &parsed_url); - HTTP_TEST ((rv != 0), "'%v' should be invalid", url); - vec_free (url); - - url = format (0, "http://example.org:808080/abcd"); - rv = http_parse_absolute_form (url, &parsed_url); - HTTP_TEST ((rv != 0), "'%v' should be invalid", url); - vec_free (url); - - url = format (0, "http://example.org/a%%3Xbcd"); - rv = http_parse_absolute_form (url, &parsed_url); - HTTP_TEST ((rv != 0), "'%v' should be invalid", url); - vec_free (url); - - url = format (0, "http://example.org/a%%3"); - rv = http_parse_absolute_form (url, &parsed_url); - HTTP_TEST ((rv != 0), "'%v' should be invalid", url); - vec_free (url); - - url = format (0, "http://example.org/a[b]cd"); - rv = http_parse_absolute_form (url, &parsed_url); - HTTP_TEST ((rv != 0), "'%v' should be invalid", url); - vec_free (url); - - url = format (0, "http://exa[m]ple.org/abcd"); - rv = http_parse_absolute_form (url, &parsed_url); - HTTP_TEST ((rv != 0), "'%v' should be invalid", url); - vec_free (url); + /* IPv4 address */ + authority = format (0, "10.10.2.45:20"); + rv = http_parse_authority (authority, vec_len (authority), &parsed); + HTTP_TEST ((rv == 0), "'%v' should be valid", authority); + HTTP_TEST ((parsed.host_type == HTTP_URI_HOST_TYPE_IP4), + "host_type=%d should be %d", parsed.host_type, + HTTP_URI_HOST_TYPE_IP4); + HTTP_TEST ((clib_net_to_host_u16 (parsed.port) == 20), + "port=%u should be 20", clib_net_to_host_u16 (parsed.port)); + formated = http_serialize_authority (&parsed); + rv = vec_cmp (authority, formated); + HTTP_TEST ((rv == 0), "'%v' should match '%v'", authority, formated); + vec_free (authority); + vec_free (formated); + + authority = format (0, "10.255.2.1"); + rv = http_parse_authority (authority, vec_len (authority), &parsed); + HTTP_TEST ((rv == 0), "'%v' should be valid", authority); + HTTP_TEST ((parsed.host_type == HTTP_URI_HOST_TYPE_IP4), + "host_type=%d should be %d", parsed.host_type, + HTTP_URI_HOST_TYPE_IP4); + HTTP_TEST ((parsed.port == 0), "port=%u should be 0", parsed.port); + formated = http_serialize_authority (&parsed); + rv = vec_cmp (authority, formated); + HTTP_TEST ((rv == 0), "'%v' should match '%v'", authority, formated); + vec_free (authority); + vec_free (formated); + + /* IPv6 address */ + authority = format (0, "[dead:beef::1234]:443"); + rv = http_parse_authority (authority, vec_len (authority), &parsed); + HTTP_TEST ((rv == 0), "'%v' should be valid", authority); + HTTP_TEST ((parsed.host_type == HTTP_URI_HOST_TYPE_IP6), + "host_type=%d should be %d", parsed.host_type, + HTTP_URI_HOST_TYPE_IP6); + HTTP_TEST ((clib_net_to_host_u16 (parsed.port) == 443), + "port=%u should be 443", clib_net_to_host_u16 (parsed.port)); + formated = http_serialize_authority (&parsed); + rv = vec_cmp (authority, formated); + HTTP_TEST ((rv == 0), "'%v' should match '%v'", authority, formated); + vec_free (authority); + vec_free (formated); + + /* registered name */ + authority = format (0, "example.com:80"); + rv = http_parse_authority (authority, vec_len (authority), &parsed); + HTTP_TEST ((rv == 0), "'%v' should be valid", authority); + HTTP_TEST ((parsed.host_type == HTTP_URI_HOST_TYPE_REG_NAME), + "host_type=%d should be %d", parsed.host_type, + HTTP_URI_HOST_TYPE_REG_NAME); + HTTP_TEST ((clib_net_to_host_u16 (parsed.port) == 80), + "port=%u should be 80", clib_net_to_host_u16 (parsed.port)); + formated = http_serialize_authority (&parsed); + rv = vec_cmp (authority, formated); + HTTP_TEST ((rv == 0), "'%v' should match '%v'", authority, formated); + vec_free (authority); + vec_free (formated); + + authority = format (0, "3xample.com:80"); + rv = http_parse_authority (authority, vec_len (authority), &parsed); + HTTP_TEST ((rv == 0), "'%v' should be valid", authority); + HTTP_TEST ((parsed.host_type == HTTP_URI_HOST_TYPE_REG_NAME), + "host_type=%d should be %d", parsed.host_type, + HTTP_URI_HOST_TYPE_REG_NAME); + HTTP_TEST ((clib_net_to_host_u16 (parsed.port) == 80), + "port=%u should be 80", clib_net_to_host_u16 (parsed.port)); + formated = http_serialize_authority (&parsed); + rv = vec_cmp (authority, formated); + HTTP_TEST ((rv == 0), "'%v' should match '%v'", authority, formated); + vec_free (authority); + vec_free (formated); + + /* 'invalid IPv4 address' is recognized as registered name */ + authority = format (0, "1000.10.2.45:80"); + rv = http_parse_authority (authority, vec_len (authority), &parsed); + HTTP_TEST ((rv == 0), "'%v' should be valid", authority); + HTTP_TEST ((parsed.host_type == HTTP_URI_HOST_TYPE_REG_NAME), + "host_type=%d should be %d", parsed.host_type, + HTTP_URI_HOST_TYPE_REG_NAME); + HTTP_TEST ((clib_net_to_host_u16 (parsed.port) == 80), + "port=%u should be 80", clib_net_to_host_u16 (parsed.port)); + formated = http_serialize_authority (&parsed); + rv = vec_cmp (authority, formated); + HTTP_TEST ((rv == 0), "'%v' should match '%v'", authority, formated); + vec_free (authority); + vec_free (formated); + + authority = format (0, "10.10.20:80"); + rv = http_parse_authority (authority, vec_len (authority), &parsed); + HTTP_TEST ((rv == 0), "'%v' should be valid", authority); + HTTP_TEST ((parsed.host_type == HTTP_URI_HOST_TYPE_REG_NAME), + "host_type=%d should be %d", parsed.host_type, + HTTP_URI_HOST_TYPE_REG_NAME); + HTTP_TEST ((clib_net_to_host_u16 (parsed.port) == 80), + "port=%u should be 80", clib_net_to_host_u16 (parsed.port)); + formated = http_serialize_authority (&parsed); + rv = vec_cmp (authority, formated); + HTTP_TEST ((rv == 0), "'%v' should match '%v'", authority, formated); + vec_free (authority); + vec_free (formated); + + authority = format (0, "10.10.10.10.2"); + rv = http_parse_authority (authority, vec_len (authority), &parsed); + HTTP_TEST ((rv == 0), "'%v' should be valid", authority); + HTTP_TEST ((parsed.host_type == HTTP_URI_HOST_TYPE_REG_NAME), + "host_type=%d should be %d", parsed.host_type, + HTTP_URI_HOST_TYPE_REG_NAME); + HTTP_TEST ((parsed.port == 0), "port=%u should be 0", parsed.port); + formated = http_serialize_authority (&parsed); + rv = vec_cmp (authority, formated); + HTTP_TEST ((rv == 0), "'%v' should match '%v'", authority, formated); + vec_free (authority); + vec_free (formated); + + /* invalid port */ + authority = format (0, "example.com:80000000"); + rv = http_parse_authority (authority, vec_len (authority), &parsed); + HTTP_TEST ((rv == -1), "'%v' should be invalid", authority); + + /* no port after colon */ + authority = format (0, "example.com:"); + rv = http_parse_authority (authority, vec_len (authority), &parsed); + HTTP_TEST ((rv == -1), "'%v' should be invalid", authority); + + /* invalid character in registered name */ + authority = format (0, "bad#example.com"); + rv = http_parse_authority (authority, vec_len (authority), &parsed); + HTTP_TEST ((rv == -1), "'%v' should be invalid", authority); + + /* invalid IPv6 address not terminated with ']' */ + authority = format (0, "[dead:beef::1234"); + rv = http_parse_authority (authority, vec_len (authority), &parsed); + HTTP_TEST ((rv == -1), "'%v' should be invalid", authority); + + /* empty IPv6 address */ + authority = format (0, "[]"); + rv = http_parse_authority (authority, vec_len (authority), &parsed); + HTTP_TEST ((rv == -1), "'%v' should be invalid", authority); + + /* invalid IPv6 address too few hex quads */ + authority = format (0, "[dead:beef]:80"); + rv = http_parse_authority (authority, vec_len (authority), &parsed); + HTTP_TEST ((rv == -1), "'%v' should be invalid", authority); + + /* invalid IPv6 address more than one :: */ + authority = format (0, "[dead::beef::1]:80"); + rv = http_parse_authority (authority, vec_len (authority), &parsed); + HTTP_TEST ((rv == -1), "'%v' should be invalid", authority); + + /* invalid IPv6 address too much hex quads */ + authority = format (0, "[d:e:a:d:b:e:e:f:1:2]:80"); + rv = http_parse_authority (authority, vec_len (authority), &parsed); + HTTP_TEST ((rv == -1), "'%v' should be invalid", authority); + + /* invalid character in IPv6 address */ + authority = format (0, "[xyz0::1234]:443"); + rv = http_parse_authority (authority, vec_len (authority), &parsed); + HTTP_TEST ((rv == -1), "'%v' should be invalid", authority); + + /* invalid IPv6 address */ + authority = format (0, "[deadbeef::1234"); + rv = http_parse_authority (authority, vec_len (authority), &parsed); + HTTP_TEST ((rv == -1), "'%v' should be invalid", authority); return 0; } @@ -251,13 +207,15 @@ static int http_test_parse_masque_host_port (vlib_main_t *vm) { u8 *path = 0; - http_uri_t target; + http_uri_authority_t target; int rv; path = format (0, "10.10.2.45/443/"); rv = http_parse_masque_host_port (path, vec_len (path), &target); HTTP_TEST ((rv == 0), "'%v' should be valid", path); - HTTP_TEST ((target.is_ip4 == 1), "is_ip4=%d should be 1", target.is_ip4); + HTTP_TEST ((target.host_type == HTTP_URI_HOST_TYPE_IP4), + "host_type=%d should be %d", target.host_type, + HTTP_URI_HOST_TYPE_IP4); HTTP_TEST ((clib_net_to_host_u16 (target.port) == 443), "port=%u should be 443", clib_net_to_host_u16 (target.port)); HTTP_TEST ((target.ip.ip4.data[0] == 10 && target.ip.ip4.data[1] == 10 && @@ -269,7 +227,9 @@ http_test_parse_masque_host_port (vlib_main_t *vm) path = format (0, "dead%%3Abeef%%3A%%3A1234/80/"); rv = http_parse_masque_host_port (path, vec_len (path), &target); HTTP_TEST ((rv == 0), "'%v' should be valid", path); - HTTP_TEST ((target.is_ip4 == 0), "is_ip4=%d should be 0", target.is_ip4); + HTTP_TEST ((target.host_type == HTTP_URI_HOST_TYPE_IP6), + "host_type=%d should be %d", target.host_type, + HTTP_URI_HOST_TYPE_IP6); HTTP_TEST ((clib_net_to_host_u16 (target.port) == 80), "port=%u should be 80", clib_net_to_host_u16 (target.port)); HTTP_TEST ((clib_net_to_host_u16 (target.ip.ip6.as_u16[0]) == 0xdead && @@ -391,6 +351,188 @@ http_test_udp_payload_datagram (vlib_main_t *vm) return 0; } +static int +http_test_http_token_is_case (vlib_main_t *vm) +{ + static const char eq_1[] = "content-length"; + static const char eq_2[] = "CONtENT-lenGth"; + static const char eq_3[] = "caPsulE-ProtOcol"; + static const char eq_4[] = "ACCESS-CONTROL-REQUEST-METHOD"; + static const char ne_1[] = "content_length"; + static const char ne_2[] = "content-lengXh"; + static const char ne_3[] = "coNtent-lengXh"; + static const char ne_4[] = "content-len"; + static const char ne_5[] = "comtent-length"; + static const char ne_6[] = "content-lengtR"; + u8 rv; + + rv = http_token_is_case ( + eq_1, strlen (eq_1), http_header_name_token (HTTP_HEADER_CONTENT_LENGTH)); + HTTP_TEST ((rv == 1), "'%s' and '%s' are equal", eq_1, + http_header_name_str (HTTP_HEADER_CONTENT_LENGTH)) + + rv = http_token_is_case ( + eq_2, strlen (eq_2), http_header_name_token (HTTP_HEADER_CONTENT_LENGTH)); + HTTP_TEST ((rv == 1), "'%s' and '%s' are equal", eq_2, + http_header_name_str (HTTP_HEADER_CONTENT_LENGTH)) + + rv = + http_token_is_case (eq_3, strlen (eq_3), + http_header_name_token (HTTP_HEADER_CAPSULE_PROTOCOL)); + HTTP_TEST ((rv == 1), "'%s' and '%s' are equal", eq_3, + http_header_name_str (HTTP_HEADER_CAPSULE_PROTOCOL)) + + rv = http_token_is_case ( + eq_4, strlen (eq_4), + http_header_name_token (HTTP_HEADER_ACCESS_CONTROL_REQUEST_METHOD)); + HTTP_TEST ((rv == 1), "'%s' and '%s' are equal", eq_4, + http_header_name_str (HTTP_HEADER_ACCESS_CONTROL_REQUEST_METHOD)) + + rv = http_token_is_case ( + ne_1, strlen (ne_1), http_header_name_token (HTTP_HEADER_CONTENT_LENGTH)); + HTTP_TEST ((rv == 0), "'%s' and '%s' are not equal", ne_1, + http_header_name_str (HTTP_HEADER_CONTENT_LENGTH)) + + rv = http_token_is_case ( + ne_2, strlen (ne_2), http_header_name_token (HTTP_HEADER_CONTENT_LENGTH)); + HTTP_TEST ((rv == 0), "'%s' and '%s' are not equal", ne_2, + http_header_name_str (HTTP_HEADER_CONTENT_LENGTH)) + + rv = http_token_is_case ( + ne_3, strlen (ne_3), http_header_name_token (HTTP_HEADER_CONTENT_LENGTH)); + HTTP_TEST ((rv == 0), "'%s' and '%s' are not equal", ne_3, + http_header_name_str (HTTP_HEADER_CONTENT_LENGTH)) + + rv = http_token_is_case ( + ne_4, strlen (ne_4), http_header_name_token (HTTP_HEADER_CONTENT_LENGTH)); + HTTP_TEST ((rv == 0), "'%s' and '%s' are not equal", ne_4, + http_header_name_str (HTTP_HEADER_CONTENT_LENGTH)) + + rv = http_token_is_case ( + ne_5, strlen (ne_5), http_header_name_token (HTTP_HEADER_CONTENT_LENGTH)); + HTTP_TEST ((rv == 0), "'%s' and '%s' are not equal", ne_5, + http_header_name_str (HTTP_HEADER_CONTENT_LENGTH)) + + rv = http_token_is_case ( + ne_6, strlen (ne_6), http_header_name_token (HTTP_HEADER_CONTENT_LENGTH)); + HTTP_TEST ((rv == 0), "'%s' and '%s' are not equal", ne_6, + http_header_name_str (HTTP_HEADER_CONTENT_LENGTH)) + + return 0; +} + +static int +http_test_http_header_table (vlib_main_t *vm) +{ + http_header_table_t ht = HTTP_HEADER_TABLE_NULL; + const char buf[] = "daTe: Wed, 15 Jan 2025 16:17:33 GMT" + "conTent-tYpE: text/html; charset=utf-8" + "STRICT-transport-security: max-age=31536000" + "sAnDwich: Eggs" + "CONTENT-ENCODING: GZIP" + "sandwich: Spam"; + http_msg_t msg = {}; + http_field_line_t *headers = 0, *field_line; + const http_token_t *value; + u8 rv; + + /* daTe */ + vec_add2 (headers, field_line, 1); + field_line->name_offset = 0; + field_line->name_len = 4; + field_line->value_offset = 6; + field_line->value_len = 29; + /* conTent-tYpE */ + vec_add2 (headers, field_line, 1); + field_line->name_offset = 35; + field_line->name_len = 12; + field_line->value_offset = 49; + field_line->value_len = 24; + /* STRICT-transport-security */ + vec_add2 (headers, field_line, 1); + field_line->name_offset = 73; + field_line->name_len = 25; + field_line->value_offset = 100; + field_line->value_len = 16; + /* sAnDwich */ + vec_add2 (headers, field_line, 1); + field_line->name_offset = 116; + field_line->name_len = 8; + field_line->value_offset = 126; + field_line->value_len = 4; + /* CONTENT-ENCODING */ + vec_add2 (headers, field_line, 1); + field_line->name_offset = 130; + field_line->name_len = 16; + field_line->value_offset = 148; + field_line->value_len = 4; + /* sandwich */ + vec_add2 (headers, field_line, 1); + field_line->name_offset = 152; + field_line->name_len = 8; + field_line->value_offset = 162; + field_line->value_len = 4; + + msg.data.headers_ctx = pointer_to_uword (headers); + msg.data.headers_len = strlen (buf); + + http_init_header_table_buf (&ht, msg); + memcpy (ht.buf, buf, strlen (buf)); + http_build_header_table (&ht, msg); + + vlib_cli_output (vm, "%U", format_hash, ht.value_by_name, 1); + + value = http_get_header ( + &ht, http_header_name_token (HTTP_HEADER_CONTENT_ENCODING)); + HTTP_TEST ((value != 0), "'%s' is in headers", + http_header_name_str (HTTP_HEADER_CONTENT_ENCODING)); + rv = http_token_is (value->base, value->len, http_token_lit ("GZIP")); + HTTP_TEST ((rv = 1), "header value '%U' should be 'GZIP'", format_http_bytes, + value->base, value->len); + + value = + http_get_header (&ht, http_header_name_token (HTTP_HEADER_CONTENT_TYPE)); + HTTP_TEST ((value != 0), "'%s' is in headers", + http_header_name_str (HTTP_HEADER_CONTENT_TYPE)); + + value = http_get_header (&ht, http_header_name_token (HTTP_HEADER_DATE)); + HTTP_TEST ((value != 0), "'%s' is in headers", + http_header_name_str (HTTP_HEADER_DATE)); + + value = http_get_header ( + &ht, http_header_name_token (HTTP_HEADER_STRICT_TRANSPORT_SECURITY)); + HTTP_TEST ((value != 0), "'%s' is in headers", + http_header_name_str (HTTP_HEADER_STRICT_TRANSPORT_SECURITY)); + + value = http_get_header (&ht, http_token_lit ("DATE")); + HTTP_TEST ((value != 0), "'DATE' is in headers"); + + value = http_get_header (&ht, http_token_lit ("date")); + HTTP_TEST ((value != 0), "'date' is in headers"); + + /* repeated header */ + value = http_get_header (&ht, http_token_lit ("sandwich")); + HTTP_TEST ((value != 0), "'sandwich' is in headers"); + rv = http_token_is (value->base, value->len, http_token_lit ("Eggs, Spam")); + HTTP_TEST ((rv = 1), "header value '%U' should be 'Eggs, Spam'", + format_http_bytes, value->base, value->len); + + value = http_get_header (&ht, http_token_lit ("Jade")); + HTTP_TEST ((value == 0), "'Jade' is not in headers"); + + value = http_get_header (&ht, http_token_lit ("CONTENT")); + HTTP_TEST ((value == 0), "'CONTENT' is not in headers"); + + value = + http_get_header (&ht, http_header_name_token (HTTP_HEADER_ACCEPT_CHARSET)); + HTTP_TEST ((value == 0), "'%s' is not in headers", + http_header_name_str (HTTP_HEADER_ACCEPT_CHARSET)); + + http_free_header_table (&ht); + vec_free (headers); + return 0; +} + static clib_error_t * test_http_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) @@ -398,24 +540,28 @@ test_http_command_fn (vlib_main_t *vm, unformat_input_t *input, int res = 0; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - if (unformat (input, "authority-form")) - res = http_test_authority_form (vm); - else if (unformat (input, "absolute-form")) - res = http_test_absolute_form (vm); + if (unformat (input, "parse-authority")) + res = http_test_parse_authority (vm); else if (unformat (input, "parse-masque-host-port")) res = http_test_parse_masque_host_port (vm); else if (unformat (input, "udp-payload-datagram")) res = http_test_udp_payload_datagram (vm); + else if (unformat (input, "token-is-case")) + res = http_test_http_token_is_case (vm); + else if (unformat (input, "header-table")) + res = http_test_http_header_table (vm); else if (unformat (input, "all")) { - if ((res = http_test_authority_form (vm))) - goto done; - if ((res = http_test_absolute_form (vm))) + if ((res = http_test_parse_authority (vm))) goto done; if ((res = http_test_parse_masque_host_port (vm))) goto done; if ((res = http_test_udp_payload_datagram (vm))) goto done; + if ((res = http_test_http_token_is_case (vm))) + goto done; + if ((res = http_test_http_header_table (vm))) + goto done; } else break; diff --git a/src/plugins/http_static/http_static.api b/src/plugins/http_static/http_static.api index 60c0369848d..bd0cebc45d2 100644 --- a/src/plugins/http_static/http_static.api +++ b/src/plugins/http_static/http_static.api @@ -3,7 +3,7 @@ This file defines static http server control-plane API messages */ -option version = "2.3.0"; +option version = "2.4.0"; /** \brief Configure and enable the static http server @param client_index - opaque cookie to identify the sender @@ -74,3 +74,39 @@ autoreply define http_static_enable_v3 { /* The bind URI */ string uri[256]; }; + +/** \brief Configure and enable the static http server + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param fifo_size - size (in bytes) of the session FIFOs + @param cache_size_limit - size (in bytes) of the in-memory file data cache + @param max_age - how long a response is considered fresh (in seconds) + @param max_body_size - maximum size of a request body (in bytes) + @param keepalive_timeout - timeout during which client connection will stay open (in seconds) + @param prealloc_fifos - number of preallocated fifos (usually 0) + @param private_segment_size - fifo segment size (usually 0) + @param www_root - html root path + @param uri - bind URI, defaults to "tcp://0.0.0.0/80" +*/ + +autoreply define http_static_enable_v4 { + /* Client identifier, set from api_main.my_client_index */ + u32 client_index; + + /* Arbitrary context, so client can match reply to request */ + u32 context; + /* Typical options */ + u32 fifo_size; + u32 cache_size_limit; + u32 max_age [default=600]; + u32 keepalive_timeout [default=60]; + u64 max_body_size [default=8000]; + /* Unusual options */ + u32 prealloc_fifos; + u32 private_segment_size; + + /* Root of the html path */ + string www_root[256]; + /* The bind URI */ + string uri[256]; +}; diff --git a/src/plugins/http_static/http_static.c b/src/plugins/http_static/http_static.c index 464fd27e90d..7a12f37b8d3 100644 --- a/src/plugins/http_static/http_static.c +++ b/src/plugins/http_static/http_static.c @@ -67,7 +67,7 @@ hss_register_url_handler (hss_url_handler_fn fp, const char *url, static int hss_enable_api (u32 fifo_size, u32 cache_limit, u32 prealloc_fifos, u32 private_segment_size, u8 *www_root, u8 *uri, u32 max_age, - u32 keepalive_timeout) + u32 keepalive_timeout, u64 max_body_size) { hss_main_t *hsm = &hss_main; int rv; @@ -79,6 +79,7 @@ hss_enable_api (u32 fifo_size, u32 cache_limit, u32 prealloc_fifos, hsm->www_root = format (0, "%s%c", www_root, 0); hsm->uri = format (0, "%s%c", uri, 0); hsm->max_age = max_age; + hsm->max_body_size = max_body_size; hsm->keepalive_timeout = keepalive_timeout; if (vec_len (hsm->www_root) < 2) @@ -119,7 +120,8 @@ vl_api_http_static_enable_v2_t_handler (vl_api_http_static_enable_v2_t *mp) rv = hss_enable_api (ntohl (mp->fifo_size), ntohl (mp->cache_size_limit), ntohl (mp->prealloc_fifos), ntohl (mp->private_segment_size), mp->www_root, mp->uri, - ntohl (mp->max_age), HSS_DEFAULT_KEEPALIVE_TIMEOUT); + ntohl (mp->max_age), HSS_DEFAULT_KEEPALIVE_TIMEOUT, + HSS_DEFAULT_MAX_BODY_SIZE); REPLY_MACRO (VL_API_HTTP_STATIC_ENABLE_V2_REPLY); } @@ -138,11 +140,32 @@ vl_api_http_static_enable_v3_t_handler (vl_api_http_static_enable_v3_t *mp) rv = hss_enable_api (ntohl (mp->fifo_size), ntohl (mp->cache_size_limit), ntohl (mp->prealloc_fifos), ntohl (mp->private_segment_size), mp->www_root, mp->uri, - ntohl (mp->max_age), ntohl (mp->keepalive_timeout)); + ntohl (mp->max_age), ntohl (mp->keepalive_timeout), + HSS_DEFAULT_MAX_BODY_SIZE); REPLY_MACRO (VL_API_HTTP_STATIC_ENABLE_V3_REPLY); } +/* API message handler */ +static void +vl_api_http_static_enable_v4_t_handler (vl_api_http_static_enable_v4_t *mp) +{ + vl_api_http_static_enable_v4_reply_t *rmp; + hss_main_t *hsm = &hss_main; + int rv; + + mp->uri[ARRAY_LEN (mp->uri) - 1] = 0; + mp->www_root[ARRAY_LEN (mp->www_root) - 1] = 0; + + rv = hss_enable_api (ntohl (mp->fifo_size), ntohl (mp->cache_size_limit), + ntohl (mp->prealloc_fifos), + ntohl (mp->private_segment_size), mp->www_root, mp->uri, + ntohl (mp->max_age), ntohl (mp->keepalive_timeout), + ntohl (mp->max_body_size)); + + REPLY_MACRO (VL_API_HTTP_STATIC_ENABLE_V4_REPLY); +} + #include <http_static/http_static.api.c> static clib_error_t * hss_api_init (vlib_main_t *vm) diff --git a/src/plugins/http_static/http_static.h b/src/plugins/http_static/http_static.h index 5e0654fae24..e158a32dbc9 100644 --- a/src/plugins/http_static/http_static.h +++ b/src/plugins/http_static/http_static.h @@ -24,6 +24,7 @@ #include <http_static/http_cache.h> #define HSS_DEFAULT_MAX_AGE 600 +#define HSS_DEFAULT_MAX_BODY_SIZE 8192 #define HSS_DEFAULT_KEEPALIVE_TIMEOUT 60 /** @file http_static.h @@ -53,9 +54,9 @@ typedef struct int free_data; /** File cache pool index */ u32 cache_pool_index; - /** Response header list */ - http_header_t *resp_headers; - /** Serialized headers to send */ + /** Response header ctx */ + http_headers_ctx_t resp_headers; + /** Response header buffer */ u8 *headers_buf; } hss_session_t; @@ -161,6 +162,8 @@ typedef struct u64 cache_size; /** How long a response is considered fresh (in seconds) */ u32 max_age; + /** Maximum size of a request body (in bytes) **/ + u64 max_body_size; /** Formatted max_age: "max-age=xyz" */ u8 *max_age_formatted; /** Timeout during which client connection will stay open */ diff --git a/src/plugins/http_static/http_static_test.c b/src/plugins/http_static/http_static_test.c index edb016f9e05..56487893220 100644 --- a/src/plugins/http_static/http_static_test.c +++ b/src/plugins/http_static/http_static_test.c @@ -214,6 +214,104 @@ api_http_static_enable_v3 (vat_main_t *vam) mp->private_segment_size = ntohl (private_segment_size); mp->max_age = ntohl (max_age); mp->keepalive_timeout = ntohl (keepalive_timeout); + /* send it... */ + S (mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static int +api_http_static_enable_v4 (vat_main_t *vam) +{ + unformat_input_t *line_input = vam->input; + vl_api_http_static_enable_v4_t *mp; + u64 tmp; + u8 *www_root = 0; + u8 *uri = 0; + u32 prealloc_fifos = 0; + u32 private_segment_size = 0; + u32 fifo_size = 8 << 10; + u32 cache_size_limit = 1 << 20; + u32 max_age = HSS_DEFAULT_MAX_AGE; + u32 keepalive_timeout = HSS_DEFAULT_KEEPALIVE_TIMEOUT; + u64 max_body_size = HSS_DEFAULT_MAX_BODY_SIZE; + int ret; + + /* Parse args required to build the message */ + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "www-root %s", &www_root)) + ; + else if (unformat (line_input, "prealloc-fifos %d", &prealloc_fifos)) + ; + else if (unformat (line_input, "private-segment-size %U", + unformat_memory_size, &tmp)) + { + if (tmp >= 0x100000000ULL) + { + errmsg ("private segment size %llu, too large", tmp); + return -99; + } + private_segment_size = (u32) tmp; + } + else if (unformat (line_input, "fifo-size %U", unformat_memory_size, + &tmp)) + { + if (tmp >= 0x100000000ULL) + { + errmsg ("fifo-size %llu, too large", tmp); + return -99; + } + fifo_size = (u32) tmp; + } + else if (unformat (line_input, "cache-size %U", unformat_memory_size, + &tmp)) + { + if (tmp < (128ULL << 10)) + { + errmsg ("cache-size must be at least 128kb"); + return -99; + } + cache_size_limit = (u32) tmp; + } + else if (unformat (line_input, "max-age %d", &max_age)) + ; + else if (unformat (line_input, "keepalive-timeout %d", + &keepalive_timeout)) + ; + else if (unformat (line_input, "uri %s", &uri)) + ; + else if (unformat (line_input, "max-body-size %llu", &max_body_size)) + ; + else + { + errmsg ("unknown input `%U'", format_unformat_error, line_input); + return -99; + } + } + + if (www_root == 0) + { + errmsg ("Must specify www-root"); + return -99; + } + + if (uri == 0) + uri = format (0, "tcp://0.0.0.0/80%c", 0); + + /* Construct the API message */ + M (HTTP_STATIC_ENABLE_V4, mp); + strncpy_s ((char *) mp->www_root, 256, (const char *) www_root, 256); + strncpy_s ((char *) mp->uri, 256, (const char *) uri, 256); + mp->fifo_size = ntohl (fifo_size); + mp->cache_size_limit = ntohl (cache_size_limit); + mp->prealloc_fifos = ntohl (prealloc_fifos); + mp->private_segment_size = ntohl (private_segment_size); + mp->max_age = ntohl (max_age); + mp->keepalive_timeout = ntohl (keepalive_timeout); + mp->max_body_size = ntohl (max_body_size); /* send it... */ S (mp); diff --git a/src/plugins/http_static/static_server.c b/src/plugins/http_static/static_server.c index 9cc3f5dd658..074416873e3 100644 --- a/src/plugins/http_static/static_server.c +++ b/src/plugins/http_static/static_server.c @@ -19,7 +19,6 @@ #include <sys/stat.h> #include <unistd.h> -#include <http/http_header_names.h> #include <http/http_content_types.h> /** @file static_server.c @@ -41,6 +40,8 @@ hss_session_alloc (u32 thread_index) hs->session_index = hs - hsm->sessions[thread_index]; hs->thread_index = thread_index; hs->cache_pool_index = ~0; + /* 1kB for headers should be enough for now */ + vec_validate (hs->headers_buf, 1023); return hs; } @@ -86,29 +87,17 @@ start_send_data (hss_session_t *hs, http_status_code_t status) { http_msg_t msg; session_t *ts; - u8 *headers_buf = 0; u32 n_enq; u64 to_send; int rv; ts = session_get (hs->vpp_session_index, hs->thread_index); - if (vec_len (hs->resp_headers)) - { - headers_buf = http_serialize_headers (hs->resp_headers); - vec_free (hs->resp_headers); - msg.data.headers_offset = 0; - msg.data.headers_len = vec_len (headers_buf); - } - else - { - msg.data.headers_offset = 0; - msg.data.headers_len = 0; - } - msg.type = HTTP_MSG_REPLY; msg.code = status; msg.data.body_len = hs->data_len; + msg.data.headers_offset = 0; + msg.data.headers_len = hs->resp_headers.tail_offset; msg.data.len = msg.data.body_len + msg.data.headers_len; if (msg.data.len > hss_main.use_ptr_thresh) @@ -119,7 +108,6 @@ start_send_data (hss_session_t *hs, http_status_code_t status) if (msg.data.headers_len) { - hs->headers_buf = headers_buf; uword headers = pointer_to_uword (hs->headers_buf); rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (headers), (u8 *) &headers); @@ -144,9 +132,9 @@ start_send_data (hss_session_t *hs, http_status_code_t status) if (msg.data.headers_len) { - rv = svm_fifo_enqueue (ts->tx_fifo, vec_len (headers_buf), headers_buf); + rv = + svm_fifo_enqueue (ts->tx_fifo, msg.data.headers_len, hs->headers_buf); ASSERT (rv == msg.data.headers_len); - vec_free (headers_buf); } if (!msg.data.body_len) @@ -187,11 +175,8 @@ hss_session_send_data (hss_url_handler_args_t *args) /* Set content type only if we have some response data */ if (hs->data_len) - { - http_add_header (&hs->resp_headers, - http_header_name_token (HTTP_HEADER_CONTENT_TYPE), - http_content_type_token (args->ct)); - } + http_add_header (&hs->resp_headers, HTTP_HEADER_CONTENT_TYPE, + http_content_type_token (args->ct)); start_send_data (hs, args->sc); } @@ -320,11 +305,8 @@ try_url_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt, /* Set content type only if we have some response data */ if (hs->data_len) - { - http_add_header (&hs->resp_headers, - http_header_name_token (HTTP_HEADER_CONTENT_TYPE), - http_content_type_token (args.ct)); - } + http_add_header (&hs->resp_headers, HTTP_HEADER_CONTENT_TYPE, + http_content_type_token (args.ct)); start_send_data (hs, sc); @@ -401,10 +383,9 @@ try_index_file (hss_main_t *hsm, hss_session_t *hs, u8 *path) vec_free (port_str); - http_add_header (&hs->resp_headers, - http_header_name_token (HTTP_HEADER_LOCATION), + http_add_header (&hs->resp_headers, HTTP_HEADER_LOCATION, (const char *) redirect, vec_len (redirect)); - hs->data = redirect; /* TODO: find better way */ + vec_free (redirect); hs->data_len = 0; hs->free_data = 1; @@ -479,16 +460,15 @@ try_file_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt, /* Set following headers only for happy path: * Content-Type * Cache-Control max-age + * Last-Modified */ type = content_type_from_request (target); - http_add_header (&hs->resp_headers, - http_header_name_token (HTTP_HEADER_CONTENT_TYPE), + http_add_header (&hs->resp_headers, HTTP_HEADER_CONTENT_TYPE, http_content_type_token (type)); - http_add_header ( - &hs->resp_headers, http_header_name_token (HTTP_HEADER_CACHE_CONTROL), - (const char *) hsm->max_age_formatted, vec_len (hsm->max_age_formatted)); - http_add_header (&hs->resp_headers, - http_header_name_token (HTTP_HEADER_LAST_MODIFIED), + http_add_header (&hs->resp_headers, HTTP_HEADER_CACHE_CONTROL, + (const char *) hsm->max_age_formatted, + vec_len (hsm->max_age_formatted)); + http_add_header (&hs->resp_headers, HTTP_HEADER_LAST_MODIFIED, (const char *) last_modified, vec_len (last_modified)); done: @@ -520,6 +500,7 @@ handle_request (hss_session_t *hs, http_req_method_t rt, u8 *target_path, static int hss_ts_rx_callback (session_t *ts) { + hss_main_t *hsm = &hss_main; hss_session_t *hs; u8 *target_path = 0, *target_query = 0, *data = 0; http_msg_t msg; @@ -529,8 +510,8 @@ hss_ts_rx_callback (session_t *ts) if (hs->free_data) vec_free (hs->data); hs->data = 0; - hs->resp_headers = 0; - vec_free (hs->headers_buf); + http_init_headers_ctx (&hs->resp_headers, hs->headers_buf, + vec_len (hs->headers_buf)); /* Read the http message header */ rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg); @@ -539,17 +520,10 @@ hss_ts_rx_callback (session_t *ts) if (msg.type != HTTP_MSG_REQUEST || (msg.method_type != HTTP_REQ_GET && msg.method_type != HTTP_REQ_POST)) { - http_add_header (&hs->resp_headers, - http_header_name_token (HTTP_HEADER_ALLOW), + http_add_header (&hs->resp_headers, HTTP_HEADER_ALLOW, http_token_lit ("GET, POST")); start_send_data (hs, HTTP_STATUS_METHOD_NOT_ALLOWED); - goto done; - } - - if (msg.data.target_form != HTTP_TARGET_ORIGIN_FORM) - { - start_send_data (hs, HTTP_STATUS_BAD_REQUEST); - goto done; + goto err_done; } /* Read target path */ @@ -562,7 +536,7 @@ hss_ts_rx_callback (session_t *ts) if (http_validate_abs_path_syntax (target_path, 0)) { start_send_data (hs, HTTP_STATUS_BAD_REQUEST); - goto done; + goto err_done; } /* Target path must be a proper C-string in addition to a vector */ vec_add1 (target_path, 0); @@ -578,13 +552,24 @@ hss_ts_rx_callback (session_t *ts) if (http_validate_query_syntax (target_query, 0)) { start_send_data (hs, HTTP_STATUS_BAD_REQUEST); - goto done; + goto err_done; } } - /* Read body */ - if (msg.data.body_len) + /* Read request body for POST requests */ + if (msg.data.body_len && msg.method_type == HTTP_REQ_POST) { + if (msg.data.body_len > hsm->max_body_size) + { + start_send_data (hs, HTTP_STATUS_CONTENT_TOO_LARGE); + goto err_done; + } + if (svm_fifo_max_dequeue (ts->rx_fifo) - msg.data.body_offset < + msg.data.body_len) + { + start_send_data (hs, HTTP_STATUS_INTERNAL_ERROR); + goto err_done; + } vec_validate (data, msg.data.body_len - 1); rv = svm_fifo_peek (ts->rx_fifo, msg.data.body_offset, msg.data.body_len, data); @@ -593,7 +578,10 @@ hss_ts_rx_callback (session_t *ts) /* Find and send data */ handle_request (hs, msg.method_type, target_path, target_query, data); + goto done; +err_done: + hss_session_disconnect_transport (hs); done: vec_free (target_path); vec_free (target_query); @@ -809,6 +797,7 @@ hss_listen (void) u8 need_crypto; transport_endpt_ext_cfg_t *ext_cfg; int rv; + transport_endpt_cfg_http_t http_cfg = { hsm->keepalive_timeout, 0 }; clib_memset (a, 0, sizeof (*a)); a->app_index = hsm->app_index; @@ -825,8 +814,8 @@ hss_listen (void) clib_memcpy (&a->sep_ext, &sep, sizeof (sep)); ext_cfg = session_endpoint_add_ext_cfg ( - &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (ext_cfg->opaque)); - ext_cfg->opaque = hsm->keepalive_timeout; + &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (http_cfg)); + clib_memcpy (ext_cfg->data, &http_cfg, sizeof (http_cfg)); if (need_crypto) { @@ -905,6 +894,7 @@ hss_create_command_fn (vlib_main_t *vm, unformat_input_t *input, hsm->fifo_size = 0; hsm->cache_size = 10 << 20; hsm->max_age = HSS_DEFAULT_MAX_AGE; + hsm->max_body_size = HSS_DEFAULT_MAX_BODY_SIZE; hsm->keepalive_timeout = HSS_DEFAULT_KEEPALIVE_TIMEOUT; /* Get a line of input. */ @@ -942,6 +932,9 @@ hss_create_command_fn (vlib_main_t *vm, unformat_input_t *input, hsm->enable_url_handlers = 1; else if (unformat (line_input, "max-age %d", &hsm->max_age)) ; + else if (unformat (line_input, "max-body-size %U", unformat_memory_size, + &hsm->max_body_size)) + ; else { error = clib_error_return (0, "unknown input `%U'", @@ -1005,7 +998,7 @@ VLIB_CLI_COMMAND (hss_create_command, static) = { "http static server www-root <path> [prealloc-fifos <nn>]\n" "[private-segment-size <nnMG>] [fifo-size <nbytes>] [max-age <nseconds>]\n" "[uri <uri>] [ptr-thresh <nn>] [url-handlers] [debug [nn]]\n" - "[keepalive-timeout <nn>]\n", + "[keepalive-timeout <nn>] [max-body-size <nn>]\n", .function = hss_create_command_fn, }; diff --git a/src/plugins/linux-cp/lcp_nl.c b/src/plugins/linux-cp/lcp_nl.c index 85b6447007a..916877939f0 100644 --- a/src/plugins/linux-cp/lcp_nl.c +++ b/src/plugins/linux-cp/lcp_nl.c @@ -732,7 +732,8 @@ lcp_nl_drain_messages (void) nl_main_t *nm = &nl_main; /* Read until there's an error */ - while ((err = nl_recvmsgs_default (nm->sk_route)) > -1) + while ((err = nm->sk_route ? nl_recvmsgs_default (nm->sk_route) : + -NLE_BAD_SOCK) > -1) ; /* If there was an error other then EAGAIN, signal process node */ diff --git a/src/plugins/quic/quic.c b/src/plugins/quic/quic.c index 15a5263284a..10651f10e7e 100644 --- a/src/plugins/quic/quic.c +++ b/src/plugins/quic/quic.c @@ -507,8 +507,7 @@ quic_set_udp_tx_evt (session_t * udp_session) { int rv = 0; if (svm_fifo_set_event (udp_session->tx_fifo)) - rv = session_send_io_evt_to_thread (udp_session->tx_fifo, - SESSION_IO_EVT_TX); + rv = session_program_tx_io_evt (udp_session->handle, SESSION_IO_EVT_TX); if (PREDICT_FALSE (rv)) clib_warning ("Event enqueue errored %d", rv); } @@ -1154,10 +1153,10 @@ quic_update_timer (quic_ctx_t * ctx) quic_session = session_get (ctx->c_s_index, ctx->c_thread_index); if (svm_fifo_set_event (quic_session->tx_fifo)) { - rv = session_send_io_evt_to_thread_custom ( - quic_session, quic_session->thread_index, SESSION_IO_EVT_TX); - if (PREDICT_FALSE (rv)) - QUIC_ERR ("Failed to enqueue builtin_tx %d", rv); + rv = session_program_tx_io_evt (quic_session->handle, + SESSION_IO_EVT_TX); + if (PREDICT_FALSE (rv)) + QUIC_ERR ("Failed to enqueue builtin_tx %d", rv); } return; } diff --git a/src/plugins/sflow/CMakeLists.txt b/src/plugins/sflow/CMakeLists.txt new file mode 100644 index 00000000000..35433bd24df --- /dev/null +++ b/src/plugins/sflow/CMakeLists.txt @@ -0,0 +1,61 @@ + +# Copyright (c) 2024 InMon Corp. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +vpp_find_path(NETLINK_INCLUDE_DIR NAMES linux/netlink.h) +if (NOT NETLINK_INCLUDE_DIR) + message(WARNING "netlink headers not found - sflow plugin disabled") + return() +endif() + +if ("${CMAKE_SYSTEM_NAME}" STREQUAL "FreeBSD") + message(WARNING "sflow is not supported on FreeBSD - sflow plugin disabled") + return() +endif() + +LIST(FIND excluded_plugins linux-cp exc_index) +if(${exc_index} EQUAL "-1") + message(WARNING "sflow plugin - linux-cp plugin included: compiling VAPI calls") + add_compile_definitions(SFLOW_USE_VAPI) +else() + message(WARNING "sflow plugin - linux-cp plugin excluded: not compiling VAPI calls") +endif() + +include_directories(${CMAKE_SOURCE_DIR}/vpp-api ${CMAKE_CURRENT_BINARY_DIR}/../../vpp-api) +add_vpp_plugin(sflow + SOURCES + sflow.c + node.c + sflow_common.h + sflow.h + sflow_psample.c + sflow_psample.h + sflow_psample_fields.h + sflow_usersock.c + sflow_usersock.h + sflow_vapi.c + sflow_vapi.h + + MULTIARCH_SOURCES + node.c + + API_FILES + sflow.api + + API_TEST_SOURCES + sflow_test.c + + LINK_LIBRARIES + vppapiclient + vapiclient +) diff --git a/src/plugins/sflow/FEATURE.yaml b/src/plugins/sflow/FEATURE.yaml new file mode 100644 index 00000000000..612db61005c --- /dev/null +++ b/src/plugins/sflow/FEATURE.yaml @@ -0,0 +1,16 @@ +--- +name: sFlow +maintainer: Neil McKee <neil.mckee@inmon.com> + +description: |- + This plugin implements the random packet-sampling and interface + telemetry streaming required to support standard sFlow export + on Linux platforms. The overhead incurred by this monitoring is + minimal, so that detailed, real-time traffic analysis can be + achieved even under high load conditions, with visibility into + any fields that appear in the packet headers. If the linux-cp + plugin is running then interfaces will be mapped to their + equivalent Linux tap ports. + +state: experimental +properties: [CLI, MULTITHREAD] diff --git a/src/plugins/sflow/node.c b/src/plugins/sflow/node.c new file mode 100644 index 00000000000..51826438834 --- /dev/null +++ b/src/plugins/sflow/node.c @@ -0,0 +1,356 @@ +/* + * Copyright (c) 2024 InMon Corp. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vlibmemory/api.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <sflow/sflow.h> + +typedef struct +{ + u32 next_index; + u32 sw_if_index; + u8 new_src_mac[6]; + u8 new_dst_mac[6]; +} sflow_trace_t; + +#ifndef CLIB_MARCH_VARIANT +static u8 * +my_format_mac_address (u8 *s, va_list *args) +{ + u8 *a = va_arg (*args, u8 *); + return format (s, "%02x:%02x:%02x:%02x:%02x:%02x", a[0], a[1], a[2], a[3], + a[4], a[5]); +} + +/* packet trace format function */ +static u8 * +format_sflow_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + sflow_trace_t *t = va_arg (*args, sflow_trace_t *); + + s = format (s, "SFLOW: sw_if_index %d, next index %d\n", t->sw_if_index, + t->next_index); + s = format (s, " src %U -> dst %U", my_format_mac_address, t->new_src_mac, + my_format_mac_address, t->new_dst_mac); + return s; +} + +vlib_node_registration_t sflow_node; + +#endif /* CLIB_MARCH_VARIANT */ + +#ifndef CLIB_MARCH_VARIANT +static char *sflow_error_strings[] = { +#define _(sym, string) string, + foreach_sflow_error +#undef _ +}; +#endif /* CLIB_MARCH_VARIANT */ + +typedef enum +{ + SFLOW_NEXT_ETHERNET_INPUT, + SFLOW_N_NEXT, +} sflow_next_t; + +VLIB_NODE_FN (sflow_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + u32 n_left_from, *from, *to_next; + sflow_next_t next_index; + + sflow_main_t *smp = &sflow_main; + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + uword thread_index = os_get_thread_index (); + sflow_per_thread_data_t *sfwk = + vec_elt_at_index (smp->per_thread_data, thread_index); + + /* note that sfwk->skip==1 means "take the next packet", + so we never see sfwk->skip==0. */ + + u32 pkts = n_left_from; + if (PREDICT_TRUE (sfwk->skip > pkts)) + { + /* skip the whole frame-vector */ + sfwk->skip -= pkts; + sfwk->pool += pkts; + } + else + { + while (pkts >= sfwk->skip) + { + /* reach in to get the one we want. */ + vlib_buffer_t *bN = vlib_get_buffer (vm, from[sfwk->skip - 1]); + + /* Sample this packet header. */ + u32 hdr = bN->current_length; + if (hdr > smp->headerB) + hdr = smp->headerB; + + ethernet_header_t *en = vlib_buffer_get_current (bN); + u32 if_index = vnet_buffer (bN)->sw_if_index[VLIB_RX]; + vnet_hw_interface_t *hw = + vnet_get_sup_hw_interface (smp->vnet_main, if_index); + if (hw) + if_index = hw->hw_if_index; + else + { + // TODO: can we get interfaces that have no hw interface? + // If so, should we ignore the sample? + } + + sflow_sample_t sample = { + .samplingN = sfwk->smpN, + .input_if_index = if_index, + .sampled_packet_size = + bN->current_length + bN->total_length_not_including_first_buffer, + .header_bytes = hdr + }; + + // TODO: what bit in the buffer can we set right here to indicate + // that this packet was sampled (and perhaps another bit to say if it + // was dropped or sucessfully enqueued)? That way we can check it + // below if the packet is traced, and indicate that in the trace + // output. + + // TODO: we end up copying the header twice here. Consider allowing + // the enqueue to be just a little more complex. Like this: + // if(!sflow_fifo_enqueue(&sfwk->fifo, &sample, en, hdr). + // With headerB==128 that would be memcpy(,,24) plus memcpy(,,128) + // instead of the memcpy(,,128) plus memcpy(,,24+256) that we do + // here. (We also know that it could be done as a multiple of 8 + // (aligned) bytes because the sflow_sample_t fields are (6xu32) and + // the headerB setting is quantized to the nearest 32 bytes, so there + // may be ways to make it even easier for the compiler.) + sfwk->smpl++; + memcpy (sample.header, en, hdr); + if (PREDICT_FALSE (!sflow_fifo_enqueue (&sfwk->fifo, &sample))) + sfwk->drop++; + + pkts -= sfwk->skip; + sfwk->pool += sfwk->skip; + sfwk->skip = sflow_next_random_skip (sfwk); + } + /* We took a sample (or several) from this frame-vector, but now we are + skipping the rest. */ + sfwk->skip -= pkts; + sfwk->pool += pkts; + } + + /* the rest of this is boilerplate code just to make sure + * that packets are passed on the same way as they would + * have been if this node were not enabled. + * TODO: If there is ever a way to do this in one step + * (i.e. pass on the whole frame-vector unchanged) then it + * might help performance. + */ + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 next0 = SFLOW_NEXT_ETHERNET_INPUT; + u32 next1 = SFLOW_NEXT_ETHERNET_INPUT; + u32 next2 = SFLOW_NEXT_ETHERNET_INPUT; + u32 next3 = SFLOW_NEXT_ETHERNET_INPUT; + ethernet_header_t *en0, *en1, *en2, *en3; + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0-b3 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + /* do this to always pass on to the next node on feature arc */ + vnet_feature_next (&next0, b0); + vnet_feature_next (&next1, b1); + vnet_feature_next (&next2, b2); + vnet_feature_next (&next3, b3); + + ASSERT (b0->current_data == 0); + ASSERT (b1->current_data == 0); + ASSERT (b2->current_data == 0); + ASSERT (b3->current_data == 0); + + en0 = vlib_buffer_get_current (b0); + en1 = vlib_buffer_get_current (b1); + en2 = vlib_buffer_get_current (b2); + en3 = vlib_buffer_get_current (b3); + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sflow_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + t->next_index = next0; + clib_memcpy (t->new_src_mac, en0->src_address, + sizeof (t->new_src_mac)); + clib_memcpy (t->new_dst_mac, en0->dst_address, + sizeof (t->new_dst_mac)); + } + + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + sflow_trace_t *t = vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + t->next_index = next1; + clib_memcpy (t->new_src_mac, en1->src_address, + sizeof (t->new_src_mac)); + clib_memcpy (t->new_dst_mac, en1->dst_address, + sizeof (t->new_dst_mac)); + } + + if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) + { + sflow_trace_t *t = vlib_add_trace (vm, node, b2, sizeof (*t)); + t->sw_if_index = vnet_buffer (b2)->sw_if_index[VLIB_RX]; + t->next_index = next2; + clib_memcpy (t->new_src_mac, en2->src_address, + sizeof (t->new_src_mac)); + clib_memcpy (t->new_dst_mac, en2->dst_address, + sizeof (t->new_dst_mac)); + } + + if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) + { + sflow_trace_t *t = vlib_add_trace (vm, node, b3, sizeof (*t)); + t->sw_if_index = vnet_buffer (b3)->sw_if_index[VLIB_RX]; + t->next_index = next3; + clib_memcpy (t->new_src_mac, en3->src_address, + sizeof (t->new_src_mac)); + clib_memcpy (t->new_dst_mac, en3->dst_address, + sizeof (t->new_dst_mac)); + } + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = SFLOW_NEXT_ETHERNET_INPUT; + ethernet_header_t *en0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* do this to always pass on to the next node on feature arc */ + vnet_feature_next (&next0, b0); + + /* + * Direct from the driver, we should be at offset 0 + * aka at &b0->data[0] + */ + ASSERT (b0->current_data == 0); + + en0 = vlib_buffer_get_current (b0); + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sflow_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + t->next_index = next0; + clib_memcpy (t->new_src_mac, en0->src_address, + sizeof (t->new_src_mac)); + clib_memcpy (t->new_dst_mac, en0->dst_address, + sizeof (t->new_dst_mac)); + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +#ifndef CLIB_MARCH_VARIANT +VLIB_REGISTER_NODE (sflow_node) = +{ + .name = "sflow", + .vector_size = sizeof (u32), + .format_trace = format_sflow_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN(sflow_error_strings), + .error_strings = sflow_error_strings, + .n_next_nodes = SFLOW_N_NEXT, + /* edit / add dispositions here */ + .next_nodes = { + [SFLOW_NEXT_ETHERNET_INPUT] = "ethernet-input", + }, +}; +#endif /* CLIB_MARCH_VARIANT */ +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/sflow/sflow.api b/src/plugins/sflow/sflow.api new file mode 100644 index 00000000000..e5f33001e6e --- /dev/null +++ b/src/plugins/sflow/sflow.api @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2024 InMon Corp. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file sflow.api + * @brief VPP control-plane API messages. + * + * This file defines VPP control-plane binary API messages which are generally + * called through a shared memory interface. + */ + +/* Version and type recitations */ + +option version = "0.1.0"; +import "vnet/interface_types.api"; + + +/** @brief API to enable / disable sflow + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param enable_disable - 1 to enable, 0 to disable the feature + @param hw_if_index - hardware interface handle +*/ + +autoreply define sflow_enable_disable { + /* Client identifier, set from api_main.my_client_index */ + u32 client_index; + + /* Arbitrary context, so client can match reply to request */ + u32 context; + + /* Enable / disable the feature */ + bool enable_disable; + + /* Interface handle */ + vl_api_interface_index_t hw_if_index; +}; + +/** @brief API to get sflow sampling-rate + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ + +define sflow_sampling_rate_get { + /* Client identifier, set from api_main.my_client_index */ + u32 client_index; + + /* Arbitrary context, so client can match reply to request */ + u32 context; +}; + +/** \brief API go the sflow sampling-rate + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sampling_N - the current 1-in-N sampling rate +*/ + +define sflow_sampling_rate_get_reply +{ + u32 context; + u32 sampling_N; + option in_progress; +}; + +/** @brief API to set sflow sampling-rate + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sampling_N - 1-in-N random sampling rate +*/ + +autoreply define sflow_sampling_rate_set { + /* Client identifier, set from api_main.my_client_index */ + u32 client_index; + + /* Arbitrary context, so client can match reply to request */ + u32 context; + + /* Sampling_N */ + u32 sampling_N [default=10000]; +}; + +/** @brief API to set sflow polling-interval + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param polling_S - polling interval in seconds +*/ + +autoreply define sflow_polling_interval_set { + /* Client identifier, set from api_main.my_client_index */ + u32 client_index; + + /* Arbitrary context, so client can match reply to request */ + u32 context; + + /* Polling_S */ + u32 polling_S [default=20]; +}; + +/** @brief API to get sflow polling-interval + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ + +define sflow_polling_interval_get { + /* Client identifier, set from api_main.my_client_index */ + u32 client_index; + + /* Arbitrary context, so client can match reply to request */ + u32 context; +}; + +/** \brief API go the sflow polling-interval + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param polling_S - current polling interval in seconds +*/ + +define sflow_polling_interval_get_reply +{ + u32 context; + u32 polling_S; + option in_progress; +}; + +/** @brief API to set sflow header-bytes + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param header_B - max header length in bytes +*/ + +autoreply define sflow_header_bytes_set { + /* Client identifier, set from api_main.my_client_index */ + u32 client_index; + + /* Arbitrary context, so client can match reply to request */ + u32 context; + + /* header_B */ + u32 header_B [default=128]; +}; + +/** @brief API to get sflow header-bytes + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ + +define sflow_header_bytes_get { + /* Client identifier, set from api_main.my_client_index */ + u32 client_index; + + /* Arbitrary context, so client can match reply to request */ + u32 context; +}; + +/** \brief API go the sflow header-bytes + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param header_B - current maximum header length in bytes +*/ + +define sflow_header_bytes_get_reply +{ + u32 context; + u32 header_B; + option in_progress; +}; + +/** \brief Dump sflow enabled interface(s) + @param client_index - opaque cookie to identify the sender + @param hw_if_index - hw_if_index of a specific interface, or -1 (default) + to return all sflow enabled interfaces +*/ +define sflow_interface_dump +{ + u32 client_index; + u32 context; + vl_api_interface_index_t hw_if_index [default=0xffffffff]; +}; + +/** \brief sflow enabled interface details +*/ +define sflow_interface_details +{ + u32 context; + vl_api_interface_index_t hw_if_index; +}; diff --git a/src/plugins/sflow/sflow.c b/src/plugins/sflow/sflow.c new file mode 100644 index 00000000000..5aa65062330 --- /dev/null +++ b/src/plugins/sflow/sflow.c @@ -0,0 +1,1052 @@ +/* + * Copyright (c) 2024 InMon Corp. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/plugin/plugin.h> +#include <sflow/sflow.h> + +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vpp/app/version.h> +#include <stdbool.h> + +#include <sflow/sflow.api_enum.h> +#include <sflow/sflow.api_types.h> +#include <sflow/sflow_psample.h> + +#include <vpp-api/client/stat_client.h> +#include <vlib/stats/stats.h> + +#define REPLY_MSG_ID_BASE smp->msg_id_base +#include <vlibapi/api_helper_macros.h> + +sflow_main_t sflow_main; +vlib_log_class_t sflow_logger; + +static void +sflow_stat_segment_client_init (void) +{ + stat_client_main_t *scm = &stat_client_main; + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + uword size; + + size = sm->memory_size ? sm->memory_size : STAT_SEGMENT_DEFAULT_SIZE; + scm->memory_size = size; + scm->shared_header = sm->shared_header; + scm->directory_vector = + stat_segment_adjust (scm, (void *) scm->shared_header->directory_vector); +} + +static void +update_counter_vector_simple (stat_segment_data_t *res, + sflow_counters_t *ifCtrs, u32 hw_if_index) +{ + for (int th = 0; th < vec_len (res->simple_counter_vec); th++) + { + for (int intf = 0; intf < vec_len (res->simple_counter_vec[th]); intf++) + { + if (intf == hw_if_index) + { + u64 count = res->simple_counter_vec[th][intf]; + if (count) + { + if (strcmp (res->name, "/if/rx-error") == 0) + ifCtrs->rx.errs += count; + else if (strcmp (res->name, "/if/tx-error") == 0) + ifCtrs->tx.errs += count; + else if (strcmp (res->name, "/if/drops") == 0) + ifCtrs->tx.drps += count; + else if (strcmp (res->name, "/if/rx-miss") == 0 || + strcmp (res->name, "/if/rx-no-buf") == 0) + ifCtrs->rx.drps += count; + } + } + } + } +} + +static void +update_counter_vector_combined (stat_segment_data_t *res, + sflow_counters_t *ifCtrs, u32 hw_if_index) +{ + for (int th = 0; th < vec_len (res->simple_counter_vec); th++) + { + for (int intf = 0; intf < vec_len (res->combined_counter_vec[th]); + intf++) + { + if (intf == hw_if_index) + { + u64 pkts = res->combined_counter_vec[th][intf].packets; + u64 byts = res->combined_counter_vec[th][intf].bytes; + if (pkts || byts) + { + if (strcmp (res->name, "/if/rx") == 0) + { + ifCtrs->rx.pkts += pkts; + ifCtrs->rx.byts += byts; + } + else if (strcmp (res->name, "/if/tx") == 0) + { + ifCtrs->tx.byts += byts; + ifCtrs->tx.pkts += pkts; + } + // TODO: do multicasts include broadcasts, or are they + // counted separately? (test with traffic) + else if (strcmp (res->name, "/if/rx-multicast") == 0) + ifCtrs->rx.m_pkts += pkts; + else if (strcmp (res->name, "/if/tx-multicast") == 0) + ifCtrs->tx.m_pkts += pkts; + else if (strcmp (res->name, "/if/rx-broadcast") == 0) + ifCtrs->rx.b_pkts += pkts; + else if (strcmp (res->name, "/if/tx-broadcast") == 0) + ifCtrs->tx.b_pkts += pkts; + } + } + } + } +} + +static int +startsWith (u8 *str, char *prefix) +{ + if (str && prefix) + { + int len1 = vec_len (str); + int len2 = strlen (prefix); + if (len1 >= len2) + { + return (memcmp (str, prefix, len2) == 0); + } + } + return false; +} + +static void +update_counters (sflow_main_t *smp, sflow_per_interface_data_t *sfif) +{ + vnet_sw_interface_t *sw = + vnet_get_sw_interface (smp->vnet_main, sfif->sw_if_index); + vnet_hw_interface_t *hw = + vnet_get_hw_interface (smp->vnet_main, sfif->hw_if_index); + // This gives us a list of stat integers + u32 *stats = stat_segment_ls (NULL); + stat_segment_data_t *res = NULL; + // read vector of stat_segment_data_t objects +retry: + res = stat_segment_dump (stats); + if (res == NULL) + { + /* Memory layout has changed */ + if (stats) + vec_free (stats); + stats = stat_segment_ls (NULL); + goto retry; + } + sflow_counters_t ifCtrs = {}; + // and accumulate the (per-thread) entries for this interface + for (int ii = 0; ii < vec_len (res); ii++) + { + switch (res[ii].type) + { + case STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE: + update_counter_vector_simple (&res[ii], &ifCtrs, sfif->hw_if_index); + break; + case STAT_DIR_TYPE_COUNTER_VECTOR_COMBINED: + update_counter_vector_combined (&res[ii], &ifCtrs, + sfif->hw_if_index); + break; + case STAT_DIR_TYPE_SCALAR_INDEX: + case STAT_DIR_TYPE_NAME_VECTOR: + case STAT_DIR_TYPE_EMPTY: + default: + break; + } + } + stat_segment_data_free (res); + vec_free (stats); + // send the structure via netlink + SFLOWUSSpec spec = {}; + SFLOWUSSpec_setMsgType (&spec, SFLOW_VPP_MSG_IF_COUNTERS); + SFLOWUSSpec_setAttr (&spec, SFLOW_VPP_ATTR_PORTNAME, hw->name, + vec_len (hw->name)); + SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_IFINDEX, sfif->hw_if_index); + if (sfif->linux_if_index) + { + // We know the corresponding Linux ifIndex for this interface, so include + // that here. + SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_OSINDEX, + sfif->linux_if_index); + } + + // Report consistent with vpp-snmp-agent + u64 ifSpeed = (hw->link_speed == ~0) ? 0 : (hw->link_speed * 1000); + if (startsWith (hw->name, "loop") || startsWith (hw->name, "tap")) + ifSpeed = 1e9; + + u32 ifType = startsWith (hw->name, "loop") ? 24 // softwareLoopback + : + 6; // ethernetCsmacd + + u32 ifDirection = (hw->flags & VNET_HW_INTERFACE_FLAG_HALF_DUPLEX) ? + 2 // half-duplex + : + 1; // full-duplex + + u32 operUp = (hw->flags & VNET_HW_INTERFACE_FLAG_LINK_UP) ? 1 : 0; + u32 adminUp = (sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? 1 : 0; + + SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_IFSPEED, ifSpeed); + SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_IFTYPE, ifType); + SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_IFDIRECTION, ifDirection); + SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_OPER_UP, operUp); + SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_ADMIN_UP, adminUp); + SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_RX_OCTETS, ifCtrs.rx.byts); + SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_TX_OCTETS, ifCtrs.tx.byts); + SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_RX_PKTS, ifCtrs.rx.pkts); + SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_TX_PKTS, ifCtrs.tx.pkts); + SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_RX_MCASTS, ifCtrs.rx.m_pkts); + SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_TX_MCASTS, ifCtrs.tx.m_pkts); + SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_RX_BCASTS, ifCtrs.rx.b_pkts); + SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_TX_BCASTS, ifCtrs.tx.b_pkts); + SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_RX_ERRORS, ifCtrs.rx.errs); + SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_TX_ERRORS, ifCtrs.tx.errs); + SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_RX_DISCARDS, ifCtrs.rx.drps); + SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_TX_DISCARDS, ifCtrs.tx.drps); + SFLOWUSSpec_setAttr (&spec, SFLOW_VPP_ATTR_HW_ADDRESS, hw->hw_address, + vec_len (hw->hw_address)); + smp->unixsock_seq++; + SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_SEQ, smp->unixsock_seq); + if (SFLOWUSSpec_send (&smp->sflow_usersock, &spec) < 0) + smp->csample_send_drops++; + smp->csample_send++; +} + +static u32 +total_drops (sflow_main_t *smp) +{ + // sum sendmsg and worker-fifo drops + u32 all_drops = smp->psample_send_drops; + for (u32 thread_index = 0; thread_index < smp->total_threads; thread_index++) + { + sflow_per_thread_data_t *sfwk = + vec_elt_at_index (smp->per_thread_data, thread_index); + all_drops += sfwk->drop; + } + return all_drops; +} + +static void +send_sampling_status_info (sflow_main_t *smp) +{ + SFLOWUSSpec spec = {}; + u32 all_pipeline_drops = total_drops (smp); + SFLOWUSSpec_setMsgType (&spec, SFLOW_VPP_MSG_STATUS); + SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_UPTIME_S, smp->now_mono_S); + SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_DROPS, all_pipeline_drops); + ++smp->unixsock_seq; + SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_SEQ, smp->unixsock_seq); + SFLOWUSSpec_send (&smp->sflow_usersock, &spec); +} + +static int +counter_polling_check (sflow_main_t *smp) +{ + // see if we should poll one or more interfaces + int polled = 0; + for (int ii = 0; ii < vec_len (smp->per_interface_data); ii++) + { + sflow_per_interface_data_t *sfif = + vec_elt_at_index (smp->per_interface_data, ii); + if (sfif && sfif->sflow_enabled && + (sfif->polled == 0 // always send the first time + || (smp->now_mono_S % smp->pollingS) == + (sfif->hw_if_index % smp->pollingS))) + { + update_counters (smp, sfif); + sfif->polled++; + polled++; + } + } + return polled; +} + +static u32 +read_worker_fifos (sflow_main_t *smp) +{ + // Our maximum samples/sec is approximately: + // (SFLOW_READ_BATCH * smp->total_threads) / SFLOW_POLL_WAIT_S + // but it may also be affected by SFLOW_FIFO_DEPTH + // and whether vlib_process_wait_for_event_or_clock() really waits for + // SFLOW_POLL_WAIT_S every time. + // If there are too many samples then dropping them as early as possible + // (and as randomly as possible) is preferred, so SFLOW_FIFO_DEPTH should not + // be any bigger than it strictly needs to be. If there is a system + // bottleneck it could be in the PSAMPLE netlink channel, the hsflowd + // encoder, the UDP stack, the network path, the collector, or a faraway + // application. Any kind of "clipping" will result in systematic bias so we + // try to make this fair even when it's running hot. For example, we'll + // round-robin the thread FIFO dequeues here to make sure we give them equal + // access to the PSAMPLE channel. Another factor in sizing SFLOW_FIFO_DEPTH + // is to ensure that we can absorb a short-term line-rate burst without + // dropping samples. This implies a deeper FIFO. In fact it looks like this + // requirement ends up being the dominant one. A value of SFLOW_FIFO_DEPTH + // that will absorb an n-second line-rate burst may well result in the max + // sustainable samples/sec being higher than we really need. But it's not a + // serious problem because the samples are packed into UDP datagrams and the + // network or collector can drop those anywhere they need to. The protocol is + // designed to be tolerant to random packet-loss in transit. For example, 1% + // loss should just make it look like the sampling-rate setting was 1:10100 + // instead of 1:10000. + u32 batch = 0; + for (; batch < SFLOW_READ_BATCH; batch++) + { + u32 psample_send = 0, psample_send_fail = 0; + for (u32 thread_index = 0; thread_index < smp->total_threads; + thread_index++) + { + sflow_per_thread_data_t *sfwk = + vec_elt_at_index (smp->per_thread_data, thread_index); + sflow_sample_t sample; + if (sflow_fifo_dequeue (&sfwk->fifo, &sample)) + { + if (sample.header_bytes > smp->headerB) + { + // We get here if header-bytes setting is reduced dynamically + // and a sample that was in the FIFO appears with a larger + // header. + continue; + } + SFLOWPSSpec spec = {}; + u32 ps_group = SFLOW_VPP_PSAMPLE_GROUP_INGRESS; + u32 seqNo = ++smp->psample_seq_ingress; + // TODO: is it always ethernet? (affects ifType counter as well) + u16 header_protocol = 1; /* ethernet */ + SFLOWPSSpec_setAttrInt (&spec, SFLOWPS_PSAMPLE_ATTR_SAMPLE_GROUP, + ps_group); + SFLOWPSSpec_setAttrInt (&spec, SFLOWPS_PSAMPLE_ATTR_IIFINDEX, + sample.input_if_index); + SFLOWPSSpec_setAttrInt (&spec, SFLOWPS_PSAMPLE_ATTR_OIFINDEX, + sample.output_if_index); + SFLOWPSSpec_setAttrInt (&spec, SFLOWPS_PSAMPLE_ATTR_ORIGSIZE, + sample.sampled_packet_size); + SFLOWPSSpec_setAttrInt (&spec, SFLOWPS_PSAMPLE_ATTR_GROUP_SEQ, + seqNo); + SFLOWPSSpec_setAttrInt (&spec, SFLOWPS_PSAMPLE_ATTR_SAMPLE_RATE, + sample.samplingN); + SFLOWPSSpec_setAttr (&spec, SFLOWPS_PSAMPLE_ATTR_DATA, + sample.header, sample.header_bytes); + SFLOWPSSpec_setAttrInt (&spec, SFLOWPS_PSAMPLE_ATTR_PROTO, + header_protocol); + psample_send++; + if (SFLOWPSSpec_send (&smp->sflow_psample, &spec) < 0) + psample_send_fail++; + } + } + if (psample_send == 0) + { + // nothing found on FIFOs this time through, so terminate batch early + break; + } + else + { + vlib_node_increment_counter (smp->vlib_main, sflow_node.index, + SFLOW_ERROR_PSAMPLE_SEND, psample_send); + if (psample_send_fail > 0) + { + vlib_node_increment_counter (smp->vlib_main, sflow_node.index, + SFLOW_ERROR_PSAMPLE_SEND_FAIL, + psample_send_fail); + smp->psample_send_drops += psample_send_fail; + } + } + } + return batch; +} + +static void +read_node_counters (sflow_main_t *smp, sflow_err_ctrs_t *ctrs) +{ + for (u32 ec = 0; ec < SFLOW_N_ERROR; ec++) + ctrs->counters[ec] = 0; + for (u32 thread_index = 0; thread_index < smp->total_threads; thread_index++) + { + sflow_per_thread_data_t *sfwk = + vec_elt_at_index (smp->per_thread_data, thread_index); + ctrs->counters[SFLOW_ERROR_PROCESSED] += sfwk->pool; + ctrs->counters[SFLOW_ERROR_SAMPLED] += sfwk->smpl; + ctrs->counters[SFLOW_ERROR_DROPPED] += sfwk->drop; + } +} + +static void +update_node_cntr (sflow_main_t *smp, sflow_err_ctrs_t *prev, + sflow_err_ctrs_t *latest, sflow_error_t ee) +{ + u32 delta = latest->counters[ee] - prev->counters[ee]; + vlib_node_increment_counter (smp->vlib_main, sflow_node.index, ee, delta); +} + +static void +update_node_counters (sflow_main_t *smp, sflow_err_ctrs_t *prev, + sflow_err_ctrs_t *latest) +{ + update_node_cntr (smp, prev, latest, SFLOW_ERROR_PROCESSED); + update_node_cntr (smp, prev, latest, SFLOW_ERROR_SAMPLED); + update_node_cntr (smp, prev, latest, SFLOW_ERROR_DROPPED); + *prev = *latest; // latch for next time +} + +static uword +sflow_process_samples (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + sflow_main_t *smp = &sflow_main; + clib_time_t ctm; + clib_time_init (&ctm); + + sflow_err_ctrs_t prev = {}; + read_node_counters (smp, &prev); + + while (1) + { + + // We don't have anything for the main loop to edge-trigger on, so + // we are just asking to be called back regularly. More regularly + // if sFlow is actually enabled... + f64 poll_wait_S = smp->running ? SFLOW_POLL_WAIT_S : 1.0; + vlib_process_wait_for_event_or_clock (vm, poll_wait_S); + if (!smp->running) + { + // Nothing to do. Just yield again. + continue; + } + +#ifdef SFLOW_USE_VAPI +#ifdef SFLOW_TEST_HAMMER_VAPI + sflow_vapi_check_for_linux_if_index_results (&smp->vac, + smp->per_interface_data); + sflow_vapi_read_linux_if_index_numbers (&smp->vac, + smp->per_interface_data); +#endif +#endif + + // PSAMPLE channel may need extra step (e.g. to learn family_id) + // before it is ready to send + EnumSFLOWPSState psState = SFLOWPS_state (&smp->sflow_psample); + if (psState != SFLOWPS_STATE_READY) + { + SFLOWPS_open_step (&smp->sflow_psample); + } + + // What we want is a monotonic, per-second clock. This seems to do it + // because it is based on the CPU clock. + f64 tnow = clib_time_now (&ctm); + u32 tnow_S = (u32) tnow; + if (tnow_S != smp->now_mono_S) + { + // second rollover + smp->now_mono_S = tnow_S; +#ifdef SFLOW_USE_VAPI + if (!smp->vac.vapi_unavailable) + { + // look up linux if_index numbers + sflow_vapi_check_for_linux_if_index_results ( + &smp->vac, smp->per_interface_data); + if (smp->vapi_requests == 0 || + (tnow_S % SFLOW_VAPI_POLL_INTERVAL) == 0) + { + if (sflow_vapi_read_linux_if_index_numbers ( + &smp->vac, smp->per_interface_data)) + { + smp->vapi_requests++; + } + } + } +#endif + // send status info + send_sampling_status_info (smp); + // poll counters for interfaces that are due + counter_polling_check (smp); + } + // process samples from workers + read_worker_fifos (smp); + + // and sync the global counters + sflow_err_ctrs_t latest = {}; + read_node_counters (smp, &latest); + update_node_counters (smp, &prev, &latest); + } + return 0; +} + +VLIB_REGISTER_NODE (sflow_process_samples_node, static) = { + .function = sflow_process_samples, + .name = "sflow-process-samples", + .type = VLIB_NODE_TYPE_PROCESS, + .process_log2_n_stack_bytes = 17, +}; + +static void +sflow_set_worker_sampling_state (sflow_main_t *smp) +{ + /* set up (or reset) sampling context for each thread */ + vlib_thread_main_t *tm = &vlib_thread_main; + smp->total_threads = 1 + tm->n_threads; + vec_validate (smp->per_thread_data, smp->total_threads); + for (u32 thread_index = 0; thread_index < smp->total_threads; thread_index++) + { + sflow_per_thread_data_t *sfwk = + vec_elt_at_index (smp->per_thread_data, thread_index); + if (sfwk->smpN != smp->samplingN) + { + sfwk->smpN = smp->samplingN; + sfwk->seed = thread_index; + sfwk->skip = sflow_next_random_skip (sfwk); + SFLOW_DBG ( + "sflowset_worker_sampling_state: samplingN=%u thread=%u skip=%u", + smp->samplingN, thread_index, sfwk->skip); + } + } +} + +static void +sflow_sampling_start (sflow_main_t *smp) +{ + SFLOW_INFO ("sflow_sampling_start"); + + smp->running = 1; + // Reset this clock so that the per-second netlink status updates + // will communicate a restart to hsflowd. This helps to distinguish: + // (1) vpp restarted with sFlow off => no status updates (went quiet) + // (2) vpp restarted with default sFlow => status updates (starting again + // from 0) + smp->now_mono_S = 0; + + // reset sequence numbers to indicated discontinuity + smp->psample_seq_ingress = 0; + smp->psample_seq_egress = 0; + smp->psample_send_drops = 0; + +#ifdef SFLOW_USE_VAPI + // reset vapi request count so that we make a request the first time + smp->vapi_requests = 0; +#endif + + /* open PSAMPLE netlink channel for writing packet samples */ + SFLOWPS_open (&smp->sflow_psample); + /* open USERSOCK netlink channel for writing counters */ + SFLOWUS_open (&smp->sflow_usersock); + smp->sflow_usersock.group_id = SFLOW_NETLINK_USERSOCK_MULTICAST; + /* set up (or reset) sampling context for each thread */ + sflow_set_worker_sampling_state (smp); +} + +static void +sflow_sampling_stop (sflow_main_t *smp) +{ + SFLOW_INFO ("sflow_sampling_stop"); + smp->running = 0; + SFLOWPS_close (&smp->sflow_psample); + SFLOWUS_close (&smp->sflow_usersock); +} + +static void +sflow_sampling_start_stop (sflow_main_t *smp) +{ + int run = (smp->samplingN != 0 && smp->interfacesEnabled != 0); + if (run != smp->running) + { + if (run) + sflow_sampling_start (smp); + else + sflow_sampling_stop (smp); + } +} + +int +sflow_sampling_rate (sflow_main_t *smp, u32 samplingN) +{ + // TODO: this might be the right place to enforce the + // "2 significant" figures constraint so that per-interface + // sampling-rate settings can use HCF+sub-sampling efficiently. + + if (smp->running && smp->samplingN && samplingN) + { + // dynamic change of sampling rate + smp->samplingN = samplingN; + sflow_set_worker_sampling_state (smp); + } + else + { + // potential on/off change + smp->samplingN = samplingN; + sflow_sampling_start_stop (smp); + } + return 0; +} + +int +sflow_polling_interval (sflow_main_t *smp, u32 pollingS) +{ + smp->pollingS = pollingS; + return 0; +} + +int +sflow_header_bytes (sflow_main_t *smp, u32 headerB) +{ + u32 hdrB = headerB; + // first round up to nearest multiple of SFLOW_HEADER_BYTES_STEP + // (which helps to make worker thread memcpy faster) + hdrB = ((hdrB + SFLOW_HEADER_BYTES_STEP - 1) / SFLOW_HEADER_BYTES_STEP) * + SFLOW_HEADER_BYTES_STEP; + // then check max/min + if (hdrB < SFLOW_MIN_HEADER_BYTES) + hdrB = SFLOW_MIN_HEADER_BYTES; + if (hdrB > SFLOW_MAX_HEADER_BYTES) + hdrB = SFLOW_MAX_HEADER_BYTES; + if (hdrB != headerB) + SFLOW_WARN ("header_bytes rounded from %u to %u\n", headerB, hdrB); + smp->headerB = hdrB; + return 0; +} + +int +sflow_enable_disable (sflow_main_t *smp, u32 sw_if_index, int enable_disable) +{ + vnet_sw_interface_t *sw; + + /* Utterly wrong? */ + if (pool_is_free_index (smp->vnet_main->interface_main.sw_interfaces, + sw_if_index)) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + /* Not a physical port? */ + sw = vnet_get_sw_interface (smp->vnet_main, sw_if_index); + if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + // note: vnet_interface_main_t has "fast lookup table" called + // he_if_index_by_sw_if_index. + SFLOW_DBG ("sw_if_index=%u, sup_sw_if_index=%u, hw_if_index=%u\n", + sw->sw_if_index, sw->sup_sw_if_index, sw->hw_if_index); + + // note: vnet_hw_interface_t has uword *bond_info + // (where 0=>none, ~0 => slave, other=>ptr to bitmap of slaves) + + vec_validate (smp->per_interface_data, sw->hw_if_index); + sflow_per_interface_data_t *sfif = + vec_elt_at_index (smp->per_interface_data, sw->hw_if_index); + if (enable_disable == sfif->sflow_enabled) + { + // redundant enable or disable + return VNET_API_ERROR_VALUE_EXIST; + } + else + { + // OK, turn it on/off + sfif->sw_if_index = sw_if_index; + sfif->hw_if_index = sw->hw_if_index; + sfif->polled = 0; + sfif->sflow_enabled = enable_disable; + vnet_feature_enable_disable ("device-input", "sflow", sw_if_index, + enable_disable, 0, 0); + smp->interfacesEnabled += (enable_disable) ? 1 : -1; + } + + sflow_sampling_start_stop (smp); + return 0; +} + +static clib_error_t * +sflow_sampling_rate_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + sflow_main_t *smp = &sflow_main; + u32 sampling_N = ~0; + + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%u", &sampling_N)) + ; + else + break; + } + + if (sampling_N == ~0) + return clib_error_return (0, "Please specify a sampling rate..."); + + rv = sflow_sampling_rate (smp, sampling_N); + + switch (rv) + { + case 0: + break; + default: + return clib_error_return (0, "sflow_enable_disable returned %d", rv); + } + return 0; +} + +static clib_error_t * +sflow_polling_interval_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + sflow_main_t *smp = &sflow_main; + u32 polling_S = ~0; + + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%u", &polling_S)) + ; + else + break; + } + + if (polling_S == ~0) + return clib_error_return (0, "Please specify a polling interval..."); + + rv = sflow_polling_interval (smp, polling_S); + + switch (rv) + { + case 0: + break; + default: + return clib_error_return (0, "sflow_polling_interval returned %d", rv); + } + return 0; +} + +static clib_error_t * +sflow_header_bytes_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + sflow_main_t *smp = &sflow_main; + u32 header_B = ~0; + + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%u", &header_B)) + ; + else + break; + } + + if (header_B == ~0) + return clib_error_return (0, "Please specify a header bytes limit..."); + + rv = sflow_header_bytes (smp, header_B); + + switch (rv) + { + case 0: + break; + default: + return clib_error_return (0, "sflow_header_bytes returned %d", rv); + } + return 0; +} + +static clib_error_t * +sflow_enable_disable_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + sflow_main_t *smp = &sflow_main; + u32 sw_if_index = ~0; + int enable_disable = 1; + + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "disable")) + enable_disable = 0; + else if (unformat (input, "%U", unformat_vnet_sw_interface, + smp->vnet_main, &sw_if_index)) + ; + else + break; + } + + if (sw_if_index == ~0) + return clib_error_return (0, "Please specify an interface..."); + + rv = sflow_enable_disable (smp, sw_if_index, enable_disable); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_INVALID_SW_IF_INDEX: + return clib_error_return ( + 0, "Invalid interface, only works on physical ports"); + break; + + case VNET_API_ERROR_UNIMPLEMENTED: + return clib_error_return (0, + "Device driver doesn't support redirection"); + break; + + default: + return clib_error_return (0, "sflow_enable_disable returned %d", rv); + } + return 0; +} + +static clib_error_t * +show_sflow_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + sflow_main_t *smp = &sflow_main; + clib_error_t *error = NULL; + vlib_cli_output (vm, "sflow sampling-rate %u\n", smp->samplingN); + vlib_cli_output (vm, "sflow sampling-direction ingress\n"); + vlib_cli_output (vm, "sflow polling-interval %u\n", smp->pollingS); + vlib_cli_output (vm, "sflow header-bytes %u\n", smp->headerB); + u32 itfs_enabled = 0; + for (int ii = 0; ii < vec_len (smp->per_interface_data); ii++) + { + sflow_per_interface_data_t *sfif = + vec_elt_at_index (smp->per_interface_data, ii); + if (sfif && sfif->sflow_enabled) + { + itfs_enabled++; + vnet_hw_interface_t *hw = + vnet_get_hw_interface (smp->vnet_main, sfif->hw_if_index); + vlib_cli_output (vm, "sflow enable %s\n", (char *) hw->name); + } + } + vlib_cli_output (vm, "Status\n"); + vlib_cli_output (vm, " interfaces enabled: %u\n", itfs_enabled); + vlib_cli_output (vm, " packet samples sent: %u\n", + smp->psample_seq_ingress + smp->psample_seq_egress); + vlib_cli_output (vm, " packet samples dropped: %u\n", total_drops (smp)); + vlib_cli_output (vm, " counter samples sent: %u\n", smp->csample_send); + vlib_cli_output (vm, " counter samples dropped: %u\n", + smp->csample_send_drops); + return error; +} + +VLIB_CLI_COMMAND (sflow_enable_disable_command, static) = { + .path = "sflow enable-disable", + .short_help = "sflow enable-disable <interface-name> [disable]", + .function = sflow_enable_disable_command_fn, +}; + +VLIB_CLI_COMMAND (sflow_sampling_rate_command, static) = { + .path = "sflow sampling-rate", + .short_help = "sflow sampling-rate <N>", + .function = sflow_sampling_rate_command_fn, +}; + +VLIB_CLI_COMMAND (sflow_polling_interval_command, static) = { + .path = "sflow polling-interval", + .short_help = "sflow polling-interval <S>", + .function = sflow_polling_interval_command_fn, +}; + +VLIB_CLI_COMMAND (sflow_header_bytes_command, static) = { + .path = "sflow header-bytes", + .short_help = "sflow header-bytes <B>", + .function = sflow_header_bytes_command_fn, +}; + +VLIB_CLI_COMMAND (show_sflow_command, static) = { + .path = "show sflow", + .short_help = "show sflow", + .function = show_sflow_command_fn, +}; + +/* API message handler */ +static void +vl_api_sflow_enable_disable_t_handler (vl_api_sflow_enable_disable_t *mp) +{ + vl_api_sflow_enable_disable_reply_t *rmp; + sflow_main_t *smp = &sflow_main; + int rv; + + rv = sflow_enable_disable (smp, ntohl (mp->hw_if_index), + (int) (mp->enable_disable)); + + REPLY_MACRO (VL_API_SFLOW_ENABLE_DISABLE_REPLY); +} + +static void +vl_api_sflow_sampling_rate_set_t_handler (vl_api_sflow_sampling_rate_set_t *mp) +{ + vl_api_sflow_sampling_rate_set_reply_t *rmp; + sflow_main_t *smp = &sflow_main; + int rv; + + rv = sflow_sampling_rate (smp, ntohl (mp->sampling_N)); + + REPLY_MACRO (VL_API_SFLOW_SAMPLING_RATE_SET_REPLY); +} + +static void +vl_api_sflow_sampling_rate_get_t_handler (vl_api_sflow_sampling_rate_get_t *mp) +{ + vl_api_sflow_sampling_rate_get_reply_t *rmp; + sflow_main_t *smp = &sflow_main; + + REPLY_MACRO_DETAILS2 (VL_API_SFLOW_SAMPLING_RATE_GET_REPLY, + ({ rmp->sampling_N = ntohl (smp->samplingN); })); +} + +static void +vl_api_sflow_polling_interval_set_t_handler ( + vl_api_sflow_polling_interval_set_t *mp) +{ + vl_api_sflow_polling_interval_set_reply_t *rmp; + sflow_main_t *smp = &sflow_main; + int rv; + + rv = sflow_polling_interval (smp, ntohl (mp->polling_S)); + + REPLY_MACRO (VL_API_SFLOW_POLLING_INTERVAL_SET_REPLY); +} + +static void +vl_api_sflow_polling_interval_get_t_handler ( + vl_api_sflow_polling_interval_get_t *mp) +{ + vl_api_sflow_polling_interval_get_reply_t *rmp; + sflow_main_t *smp = &sflow_main; + + REPLY_MACRO_DETAILS2 (VL_API_SFLOW_POLLING_INTERVAL_GET_REPLY, + ({ rmp->polling_S = ntohl (smp->pollingS); })); +} + +static void +vl_api_sflow_header_bytes_set_t_handler (vl_api_sflow_header_bytes_set_t *mp) +{ + vl_api_sflow_header_bytes_set_reply_t *rmp; + sflow_main_t *smp = &sflow_main; + int rv; + + rv = sflow_header_bytes (smp, ntohl (mp->header_B)); + + REPLY_MACRO (VL_API_SFLOW_HEADER_BYTES_SET_REPLY); +} + +static void +vl_api_sflow_header_bytes_get_t_handler (vl_api_sflow_header_bytes_get_t *mp) +{ + vl_api_sflow_header_bytes_get_reply_t *rmp; + sflow_main_t *smp = &sflow_main; + + REPLY_MACRO_DETAILS2 (VL_API_SFLOW_HEADER_BYTES_GET_REPLY, + ({ rmp->header_B = ntohl (smp->headerB); })); +} + +static void +send_sflow_interface_details (vpe_api_main_t *am, vl_api_registration_t *reg, + u32 context, const u32 hw_if_index) +{ + vl_api_sflow_interface_details_t *mp; + sflow_main_t *smp = &sflow_main; + + mp = vl_msg_api_alloc_zero (sizeof (*mp)); + mp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_SFLOW_INTERFACE_DETAILS); + mp->context = context; + + mp->hw_if_index = htonl (hw_if_index); + vl_api_send_msg (reg, (u8 *) mp); +} + +static void +vl_api_sflow_interface_dump_t_handler (vl_api_sflow_interface_dump_t *mp) +{ + vpe_api_main_t *am = &vpe_api_main; + sflow_main_t *smp = &sflow_main; + vl_api_registration_t *reg; + u32 hw_if_index = ~0; + + reg = vl_api_client_index_to_registration (mp->client_index); + if (!reg) + return; + hw_if_index = ntohl (mp->hw_if_index); + + for (int ii = 0; ii < vec_len (smp->per_interface_data); ii++) + { + sflow_per_interface_data_t *sfif = + vec_elt_at_index (smp->per_interface_data, ii); + if (sfif && sfif->sflow_enabled) + { + if (hw_if_index == ~0 || hw_if_index == sfif->hw_if_index) + { + send_sflow_interface_details (am, reg, mp->context, + sfif->hw_if_index); + } + } + } +} + +/* API definitions */ +#include <sflow/sflow.api.c> + +static clib_error_t * +sflow_init (vlib_main_t *vm) +{ + sflow_logger = vlib_log_register_class ("sflow", "all"); + + sflow_main_t *smp = &sflow_main; + clib_error_t *error = 0; + + smp->vlib_main = vm; + smp->vnet_main = vnet_get_main (); + + /* set default sampling-rate and polling-interval so that "enable" is all + * that is necessary */ + smp->samplingN = SFLOW_DEFAULT_SAMPLING_N; + smp->pollingS = SFLOW_DEFAULT_POLLING_S; + smp->headerB = SFLOW_DEFAULT_HEADER_BYTES; + + /* Add our API messages to the global name_crc hash table */ + smp->msg_id_base = setup_message_id_table (); + + /* access to counters - TODO: should this only happen on sflow enable? */ + sflow_stat_segment_client_init (); + return error; +} + +VLIB_INIT_FUNCTION (sflow_init); + +VNET_FEATURE_INIT (sflow, static) = { + .arc_name = "device-input", + .node_name = "sflow", + .runs_before = VNET_FEATURES ("ethernet-input"), +}; + +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "sFlow random packet sampling", +}; + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/sflow/sflow.h b/src/plugins/sflow/sflow.h new file mode 100644 index 00000000000..609ff723816 --- /dev/null +++ b/src/plugins/sflow/sflow.h @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2024 InMon Corp. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_sflow_h__ +#define __included_sflow_h__ + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> + +#include <vppinfra/hash.h> +#include <vppinfra/error.h> +#include <sflow/sflow_common.h> +#include <sflow/sflow_vapi.h> +#include <sflow/sflow_psample.h> +#include <sflow/sflow_usersock.h> + +#define SFLOW_DEFAULT_SAMPLING_N 10000 +#define SFLOW_DEFAULT_POLLING_S 20 +#define SFLOW_DEFAULT_HEADER_BYTES 128 +#define SFLOW_MAX_HEADER_BYTES 256 +#define SFLOW_MIN_HEADER_BYTES 64 +#define SFLOW_HEADER_BYTES_STEP 32 + +#define SFLOW_FIFO_DEPTH 2048 // must be power of 2 +#define SFLOW_POLL_WAIT_S 0.001 +#define SFLOW_READ_BATCH 100 + +// use PSAMPLE group number to distinguish VPP samples from others +// (so that hsflowd will know to remap the ifIndex numbers if necessary) +#define SFLOW_VPP_PSAMPLE_GROUP_INGRESS 3 +#define SFLOW_VPP_PSAMPLE_GROUP_EGRESS 4 + +#define foreach_sflow_error \ + _ (PROCESSED, "sflow packets processed") \ + _ (SAMPLED, "sflow packets sampled") \ + _ (DROPPED, "sflow packets dropped") \ + _ (PSAMPLE_SEND, "sflow PSAMPLE sent") \ + _ (PSAMPLE_SEND_FAIL, "sflow PSAMPLE send failed") + +typedef enum +{ +#define _(sym, str) SFLOW_ERROR_##sym, + foreach_sflow_error +#undef _ + SFLOW_N_ERROR, +} sflow_error_t; + +typedef struct +{ + u32 counters[SFLOW_N_ERROR]; +} sflow_err_ctrs_t; + +/* packet sample */ +typedef struct +{ + u32 samplingN; + u32 input_if_index; + u32 output_if_index; + u32 header_protocol; + u32 sampled_packet_size; + u32 header_bytes; + u8 header[SFLOW_MAX_HEADER_BYTES]; +} sflow_sample_t; + +// Define SPSC FIFO for sending samples worker-to-main. +// (I did try to use VPP svm FIFO, but couldn't +// understand why it was sometimes going wrong). +typedef struct +{ + volatile u32 tx; // can change under consumer's feet + volatile u32 rx; // can change under producer's feet + sflow_sample_t samples[SFLOW_FIFO_DEPTH]; +} sflow_fifo_t; + +#define SFLOW_FIFO_NEXT(slot) ((slot + 1) & (SFLOW_FIFO_DEPTH - 1)) +static inline int +sflow_fifo_enqueue (sflow_fifo_t *fifo, sflow_sample_t *sample) +{ + u32 curr_rx = clib_atomic_load_acq_n (&fifo->rx); + u32 curr_tx = fifo->tx; // clib_atomic_load_acq_n(&fifo->tx); + u32 next_tx = SFLOW_FIFO_NEXT (curr_tx); + if (next_tx == curr_rx) + return false; // full + memcpy (&fifo->samples[next_tx], sample, sizeof (*sample)); + clib_atomic_store_rel_n (&fifo->tx, next_tx); + return true; +} + +static inline int +sflow_fifo_dequeue (sflow_fifo_t *fifo, sflow_sample_t *sample) +{ + u32 curr_rx = fifo->rx; // clib_atomic_load_acq_n(&fifo->rx); + u32 curr_tx = clib_atomic_load_acq_n (&fifo->tx); + if (curr_rx == curr_tx) + return false; // empty + memcpy (sample, &fifo->samples[curr_rx], sizeof (*sample)); + u32 next_rx = SFLOW_FIFO_NEXT (curr_rx); + clib_atomic_store_rel_n (&fifo->rx, next_rx); + return true; +} + +/* private to worker */ +typedef struct +{ + u32 smpN; + u32 skip; + u32 pool; + u32 seed; + u32 smpl; + u32 drop; + CLIB_CACHE_LINE_ALIGN_MARK (_fifo); + sflow_fifo_t fifo; +} sflow_per_thread_data_t; + +typedef struct +{ + /* API message ID base */ + u16 msg_id_base; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + ethernet_main_t *ethernet_main; + + /* sampling state */ + u32 samplingN; + u32 pollingS; + u32 headerB; + u32 total_threads; + sflow_per_interface_data_t *per_interface_data; + sflow_per_thread_data_t *per_thread_data; + + /* psample channel (packet samples) */ + SFLOWPS sflow_psample; + /* usersock channel (periodic counters) */ + SFLOWUS sflow_usersock; +#define SFLOW_NETLINK_USERSOCK_MULTICAST 29 + /* dropmon channel (packet drops) */ + // SFLOWDM sflow_dropmon; + + /* sample-processing */ + u32 now_mono_S; + + /* running control */ + int running; + u32 interfacesEnabled; + + /* main-thread counters */ + u32 psample_seq_ingress; + u32 psample_seq_egress; + u32 psample_send_drops; + u32 csample_send; + u32 csample_send_drops; + u32 unixsock_seq; +#ifdef SFLOW_USE_VAPI + /* vapi query helper thread (transient) */ + CLIB_CACHE_LINE_ALIGN_MARK (_vapi); + sflow_vapi_client_t vac; + int vapi_requests; +#endif +} sflow_main_t; + +extern sflow_main_t sflow_main; + +extern vlib_node_registration_t sflow_node; + +static inline u32 +sflow_next_random_skip (sflow_per_thread_data_t *sfwk) +{ + /* skip==1 means "take the next packet" so this + fn must never return 0 */ + if (sfwk->smpN <= 1) + return 1; + u32 lim = (2 * sfwk->smpN) - 1; + return (random_u32 (&sfwk->seed) % lim) + 1; +} + +#endif /* __included_sflow_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/sflow/sflow.rst b/src/plugins/sflow/sflow.rst new file mode 100644 index 00000000000..f9c18488363 --- /dev/null +++ b/src/plugins/sflow/sflow.rst @@ -0,0 +1,61 @@ +.. _Sflow_agent: + +.. toctree:: + +SFlow Monitoring Agent +====================== + +Overview +________ + +This plugin implements the random packet-sampling and interface +telemetry streaming required to support standard sFlow export +on Linux platforms. The overhead incurred by this monitoring is +minimal, so that detailed, real-time traffic analysis can be +achieved even under high load conditions, with visibility into +any fields that appear in the packet headers. If the VPP linux-cp +plugin is running then interfaces will be mapped to their +equivalent Linux tap ports. + +Example Configuration +_____________________ + +:: + sflow sampling-rate 10000 + sflow polling-interval 20 + sflow header-bytes 128 + sflow enable GigabitEthernet0/8/0 + sflow enable GigabitEthernet0/9/0 + sflow enable GigabitEthernet0/a/0 + ... + sflow enable GigabitEthernet0/a/0 disable + +Detailed notes +______________ + +Each VPP worker that has at least one interface, will create a FIFO +and enqueues samples to it from the interfaces it is servicing that +are enabled. There is a process running in the main thread that will +dequeue the FIFOs periodically. If the FIFO is full, the worker will +drop samples, which helps ensure that (a) the main thread is not +overloaded with samples and (b) that individual workers and interfaces, +even when under high load, can't crowd out other interfaces and workers. + +You can change the sampling-rate at runtime, but keep in mind that +it is a global variable that applies to workers, not interfaces. +This means that (1) all workers will sample at the same rate, and (2) +if there are multiple interfaces assigned to a worker, they'll share +the sampling rate which will undershoot, and similarly (3) if there +are multiple RX queues assigned to more than one worker, the effective +sampling rate will overshoot. + +External Dependencies +_____________________ + +This plugin writes packet samples to the standard Linux netlink PSAMPLE +channel, so the kernel psample module must be loaded with modprobe or +insmod. As such, this plugin only works for Linux environments. + +It also shares periodic interface counter samples vi netlink USERSOCK. +The host-sflow daemon, hsflowd, at https://sflow.net is one example of +a tool that will consume this feed and emit standard sFlow v5. diff --git a/src/plugins/sflow/sflow_common.h b/src/plugins/sflow/sflow_common.h new file mode 100644 index 00000000000..29784638bb9 --- /dev/null +++ b/src/plugins/sflow/sflow_common.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2024 InMon Corp. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_sflow_common_h__ +#define __included_sflow_common_h__ + +// #define SFLOW_USE_VAPI (set by CMakeLists.txt) + +extern vlib_log_class_t sflow_logger; +#define SFLOW_DBG(...) vlib_log_debug (sflow_logger, __VA_ARGS__); +#define SFLOW_INFO(...) vlib_log_info (sflow_logger, __VA_ARGS__); +#define SFLOW_NOTICE(...) vlib_log_notice (sflow_logger, __VA_ARGS__); +#define SFLOW_WARN(...) vlib_log_warn (sflow_logger, __VA_ARGS__); +#define SFLOW_ERR(...) vlib_log_err (sflow_logger, __VA_ARGS__); + +typedef struct +{ + u32 sw_if_index; + u32 hw_if_index; + u32 linux_if_index; + u32 polled; + int sflow_enabled; +} sflow_per_interface_data_t; + +#endif /* __included_sflow_common_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/sflow/sflow_psample.c b/src/plugins/sflow/sflow_psample.c new file mode 100644 index 00000000000..0e4fcfbe790 --- /dev/null +++ b/src/plugins/sflow/sflow_psample.c @@ -0,0 +1,523 @@ +/* + * Copyright (c) 2024 InMon Corp. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <sflow/sflow.h> + +#include <fcntl.h> +#include <asm/types.h> +#include <sys/socket.h> +#include <linux/types.h> +#include <linux/netlink.h> +#include <linux/genetlink.h> +#include <linux/psample.h> +#include <signal.h> +#include <ctype.h> + +#include <sflow/sflow_psample.h> + + /*_________________---------------------------__________________ + _________________ fcntl utils __________________ + -----------------___________________________------------------ + */ + + static void + setNonBlocking (int fd) + { + // set the socket to non-blocking + int fdFlags = fcntl (fd, F_GETFL); + fdFlags |= O_NONBLOCK; + if (fcntl (fd, F_SETFL, fdFlags) < 0) + { + SFLOW_ERR ("fcntl(O_NONBLOCK) failed: %s\n", strerror (errno)); + } + } + + static void + setCloseOnExec (int fd) + { + // make sure it doesn't get inherited, e.g. when we fork a script + int fdFlags = fcntl (fd, F_GETFD); + fdFlags |= FD_CLOEXEC; + if (fcntl (fd, F_SETFD, fdFlags) < 0) + { + SFLOW_ERR ("fcntl(F_SETFD=FD_CLOEXEC) failed: %s\n", strerror (errno)); + } + } + + static int + setSendBuffer (int fd, int requested) + { + int txbuf = 0; + socklen_t txbufsiz = sizeof (txbuf); + if (getsockopt (fd, SOL_SOCKET, SO_SNDBUF, &txbuf, &txbufsiz) < 0) + { + SFLOW_ERR ("getsockopt(SO_SNDBUF) failed: %s", strerror (errno)); + } + if (txbuf < requested) + { + txbuf = requested; + if (setsockopt (fd, SOL_SOCKET, SO_SNDBUF, &txbuf, sizeof (txbuf)) < 0) + { + SFLOW_WARN ("setsockopt(SO_TXBUF=%d) failed: %s", requested, + strerror (errno)); + } + // see what we actually got + txbufsiz = sizeof (txbuf); + if (getsockopt (fd, SOL_SOCKET, SO_SNDBUF, &txbuf, &txbufsiz) < 0) + { + SFLOW_ERR ("getsockopt(SO_SNDBUF) failed: %s", strerror (errno)); + } + } + return txbuf; + } + + /*_________________---------------------------__________________ + _________________ generic_pid __________________ + -----------------___________________________------------------ + choose a 32-bit id that is likely to be unique even if more + than one module in this process wants to bind a netlink socket + */ + + static u32 + generic_pid (u32 mod_id) + { + return (mod_id << 16) | getpid (); + } + + /*_________________---------------------------__________________ + _________________ generic_open __________________ + -----------------___________________________------------------ + */ + + static int + generic_open (u32 mod_id) + { + int nl_sock = socket (AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + if (nl_sock < 0) + { + SFLOW_ERR ("nl_sock open failed: %s\n", strerror (errno)); + return -1; + } + // bind to a suitable id + struct sockaddr_nl sa = { .nl_family = AF_NETLINK, + .nl_pid = generic_pid (mod_id) }; + if (bind (nl_sock, (struct sockaddr *) &sa, sizeof (sa)) < 0) + SFLOW_ERR ("generic_open: bind failed: %s\n", strerror (errno)); + setNonBlocking (nl_sock); + setCloseOnExec (nl_sock); + return nl_sock; + } + + /*_________________---------------------------__________________ + _________________ generic_send __________________ + -----------------___________________________------------------ + */ + + static int + generic_send (int sockfd, u32 mod_id, int type, int cmd, int req_type, + void *req, int req_len, int req_footprint, u32 seqNo) + { + struct nlmsghdr nlh = {}; + struct genlmsghdr ge = {}; + struct nlattr attr = {}; + + attr.nla_len = sizeof (attr) + req_len; + attr.nla_type = req_type; + + ge.cmd = cmd; + ge.version = 1; + + nlh.nlmsg_len = NLMSG_LENGTH (req_footprint + sizeof (attr) + sizeof (ge)); + nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + nlh.nlmsg_type = type; + nlh.nlmsg_seq = seqNo; + nlh.nlmsg_pid = generic_pid (mod_id); + + struct iovec iov[4] = { { .iov_base = &nlh, .iov_len = sizeof (nlh) }, + { .iov_base = &ge, .iov_len = sizeof (ge) }, + { .iov_base = &attr, .iov_len = sizeof (attr) }, + { .iov_base = req, .iov_len = req_footprint } }; + + struct sockaddr_nl sa = { .nl_family = AF_NETLINK }; + struct msghdr msg = { .msg_name = &sa, + .msg_namelen = sizeof (sa), + .msg_iov = iov, + .msg_iovlen = 4 }; + return sendmsg (sockfd, &msg, 0); + } + + /*_________________---------------------------__________________ + _________________ getFamily_PSAMPLE __________________ + -----------------___________________________------------------ + */ + + static void + getFamily_PSAMPLE (SFLOWPS *pst) + { +#define SFLOWPS_FAM_LEN sizeof (PSAMPLE_GENL_NAME) +#define SFLOWPS_FAM_FOOTPRINT NLMSG_ALIGN (SFLOWPS_FAM_LEN) + char fam_name[SFLOWPS_FAM_FOOTPRINT] = {}; + memcpy (fam_name, PSAMPLE_GENL_NAME, SFLOWPS_FAM_LEN); + generic_send (pst->nl_sock, pst->id, GENL_ID_CTRL, CTRL_CMD_GETFAMILY, + CTRL_ATTR_FAMILY_NAME, fam_name, SFLOWPS_FAM_LEN, + SFLOWPS_FAM_FOOTPRINT, ++pst->nl_seq); + pst->state = SFLOWPS_STATE_WAIT_FAMILY; + } + + /*_________________---------------------------__________________ + _________________ processNetlink_GENERIC __________________ + -----------------___________________________------------------ + */ + + static void + processNetlink_GENERIC (SFLOWPS *pst, struct nlmsghdr *nlh) + { + char *msg = (char *) NLMSG_DATA (nlh); + int msglen = nlh->nlmsg_len - NLMSG_HDRLEN; + struct genlmsghdr *genl = (struct genlmsghdr *) msg; + SFLOW_DBG ("generic netlink CMD = %u\n", genl->cmd); + + for (int offset = GENL_HDRLEN; offset < msglen;) + { + struct nlattr *attr = (struct nlattr *) (msg + offset); + if (attr->nla_len == 0 || (attr->nla_len + offset) > msglen) + { + SFLOW_ERR ("processNetlink_GENERIC attr parse error\n"); + break; // attr parse error + } + char *attr_datap = (char *) attr + NLA_HDRLEN; + switch (attr->nla_type) + { + case CTRL_ATTR_VERSION: + pst->genetlink_version = *(u32 *) attr_datap; + break; + case CTRL_ATTR_FAMILY_ID: + pst->family_id = *(u16 *) attr_datap; + SFLOW_DBG ("generic family id: %u\n", pst->family_id); + break; + case CTRL_ATTR_FAMILY_NAME: + SFLOW_DBG ("generic family name: %s\n", attr_datap); + break; + case CTRL_ATTR_MCAST_GROUPS: + for (int grp_offset = NLA_HDRLEN; grp_offset < attr->nla_len;) + { + struct nlattr *grp_attr = + (struct nlattr *) (msg + offset + grp_offset); + if (grp_attr->nla_len == 0 || + (grp_attr->nla_len + grp_offset) > attr->nla_len) + { + SFLOW_ERR ( + "processNetlink_GENERIC grp_attr parse error\n"); + break; + } + char *grp_name = NULL; + u32 grp_id = 0; + for (int gf_offset = NLA_HDRLEN; + gf_offset < grp_attr->nla_len;) + { + struct nlattr *gf_attr = + (struct nlattr *) (msg + offset + grp_offset + + gf_offset); + if (gf_attr->nla_len == 0 || + (gf_attr->nla_len + gf_offset) > grp_attr->nla_len) + { + SFLOW_ERR ( + "processNetlink_GENERIC gf_attr parse error\n"); + break; + } + char *grp_attr_datap = (char *) gf_attr + NLA_HDRLEN; + switch (gf_attr->nla_type) + { + case CTRL_ATTR_MCAST_GRP_NAME: + grp_name = grp_attr_datap; + SFLOW_DBG ("psample multicast group: %s\n", grp_name); + break; + case CTRL_ATTR_MCAST_GRP_ID: + grp_id = *(u32 *) grp_attr_datap; + SFLOW_DBG ("psample multicast group id: %u\n", grp_id); + break; + } + gf_offset += NLMSG_ALIGN (gf_attr->nla_len); + } + if (pst->group_id == 0 && grp_name && grp_id && + !strcmp (grp_name, PSAMPLE_NL_MCGRP_SAMPLE_NAME)) + { + SFLOW_DBG ("psample found group %s=%u\n", grp_name, + grp_id); + pst->group_id = grp_id; + // We don't need to join the group if we are only sending + // to it. + } + + grp_offset += NLMSG_ALIGN (grp_attr->nla_len); + } + break; + default: + SFLOW_DBG ("psample attr type: %u (nested=%u) len: %u\n", + attr->nla_type, attr->nla_type & NLA_F_NESTED, + attr->nla_len); + break; + } + offset += NLMSG_ALIGN (attr->nla_len); + } + if (pst->family_id && pst->group_id) + { + SFLOW_DBG ("psample state->READY\n"); + pst->state = SFLOWPS_STATE_READY; + } + } + + // TODO: we can take out the fns for reading PSAMPLE here + + /*_________________---------------------------__________________ + _________________ processNetlink __________________ + -----------------___________________________------------------ + */ + + static void + processNetlink (SFLOWPS *pst, struct nlmsghdr *nlh) + { + if (nlh->nlmsg_type == NETLINK_GENERIC) + { + processNetlink_GENERIC (pst, nlh); + } + else if (nlh->nlmsg_type == pst->family_id) + { + // We are write-only, don't need to read these. + } + } + + /*_________________---------------------------__________________ + _________________ readNetlink_PSAMPLE __________________ + -----------------___________________________------------------ + */ + + static void + readNetlink_PSAMPLE (SFLOWPS *pst, int fd) + { + uint8_t recv_buf[SFLOWPS_PSAMPLE_READNL_RCV_BUF]; + int numbytes = recv (fd, recv_buf, sizeof (recv_buf), 0); + if (numbytes <= 0) + { + SFLOW_ERR ("readNetlink_PSAMPLE returned %d : %s\n", numbytes, + strerror (errno)); + return; + } + struct nlmsghdr *nlh = (struct nlmsghdr *) recv_buf; + while (NLMSG_OK (nlh, numbytes)) + { + if (nlh->nlmsg_type == NLMSG_DONE) + break; + if (nlh->nlmsg_type == NLMSG_ERROR) + { + struct nlmsgerr *err_msg = (struct nlmsgerr *) NLMSG_DATA (nlh); + if (err_msg->error == 0) + { + SFLOW_DBG ("received Netlink ACK\n"); + } + else + { + SFLOW_ERR ("error in netlink message: %d : %s\n", + err_msg->error, strerror (-err_msg->error)); + } + return; + } + processNetlink (pst, nlh); + nlh = NLMSG_NEXT (nlh, numbytes); + } + } + + /*_________________---------------------------__________________ + _________________ SFLOWPS_open __________________ + -----------------___________________________------------------ + */ + + bool + SFLOWPS_open (SFLOWPS *pst) + { + if (pst->nl_sock == 0) + { + pst->nl_sock = generic_open (pst->id); + if (pst->nl_sock > 0) + { + pst->state = SFLOWPS_STATE_OPEN; + setSendBuffer (pst->nl_sock, SFLOWPS_PSAMPLE_READNL_SND_BUF); + getFamily_PSAMPLE (pst); + } + } + return (pst->nl_sock > 0); + } + + /*_________________---------------------------__________________ + _________________ SFLOWPS_close __________________ + -----------------___________________________------------------ + */ + + bool + SFLOWPS_close (SFLOWPS *pst) + { + if (pst->nl_sock > 0) + { + int err = close (pst->nl_sock); + if (err == 0) + { + pst->nl_sock = 0; + return true; + } + else + { + SFLOW_ERR ("SFLOWPS_close: returned %d : %s\n", err, + strerror (errno)); + } + } + return false; + } + + /*_________________---------------------------__________________ + _________________ SFLOWPS_state __________________ + -----------------___________________________------------------ + */ + + EnumSFLOWPSState + SFLOWPS_state (SFLOWPS *pst) + { + return pst->state; + } + + /*_________________---------------------------__________________ + _________________ SFLOWPS_open_step __________________ + -----------------___________________________------------------ + */ + + EnumSFLOWPSState + SFLOWPS_open_step (SFLOWPS *pst) + { + switch (pst->state) + { + case SFLOWPS_STATE_INIT: + SFLOWPS_open (pst); + break; + case SFLOWPS_STATE_OPEN: + getFamily_PSAMPLE (pst); + break; + case SFLOWPS_STATE_WAIT_FAMILY: + readNetlink_PSAMPLE (pst, pst->nl_sock); + break; + case SFLOWPS_STATE_READY: + break; + } + return pst->state; + } + + /*_________________---------------------------__________________ + _________________ SFLOWPSSpec_setAttr __________________ + -----------------___________________________------------------ + */ + + bool + SFLOWPSSpec_setAttr (SFLOWPSSpec *spec, EnumSFLOWPSAttributes field, + void *val, int len) + { + SFLOWPSAttr *psa = &spec->attr[field]; + if (psa->included) + return false; + psa->included = true; + int expected_len = SFLOWPS_Fields[field].len; + if (expected_len && expected_len != len) + { + SFLOW_ERR ("SFLOWPSSpec_setAttr(%s) length=%u != expected: %u\n", + SFLOWPS_Fields[field].descr, len, expected_len); + return false; + } + psa->attr.nla_type = field; + psa->attr.nla_len = sizeof (psa->attr) + len; + int len_w_pad = NLMSG_ALIGN (len); + psa->val.iov_len = len_w_pad; + psa->val.iov_base = val; + spec->n_attrs++; + spec->attrs_len += sizeof (psa->attr); + spec->attrs_len += len_w_pad; + return true; + } + + /*_________________---------------------------__________________ + _________________ SFLOWPSSpec_send __________________ + -----------------___________________________------------------ + */ + + int + SFLOWPSSpec_send (SFLOWPS *pst, SFLOWPSSpec *spec) + { + spec->nlh.nlmsg_len = NLMSG_LENGTH (sizeof (spec->ge) + spec->attrs_len); + spec->nlh.nlmsg_flags = 0; + spec->nlh.nlmsg_type = pst->family_id; + spec->nlh.nlmsg_seq = ++pst->nl_seq; + spec->nlh.nlmsg_pid = generic_pid (pst->id); + + spec->ge.cmd = PSAMPLE_CMD_SAMPLE; + spec->ge.version = PSAMPLE_GENL_VERSION; + +#define MAX_IOV_FRAGMENTS (2 * __SFLOWPS_PSAMPLE_ATTR_MAX) + 2 + + struct iovec iov[MAX_IOV_FRAGMENTS]; + u32 frag = 0; + iov[frag].iov_base = &spec->nlh; + iov[frag].iov_len = sizeof (spec->nlh); + frag++; + iov[frag].iov_base = &spec->ge; + iov[frag].iov_len = sizeof (spec->ge); + frag++; + int nn = 0; + for (u32 ii = 0; ii < __SFLOWPS_PSAMPLE_ATTR_MAX; ii++) + { + SFLOWPSAttr *psa = &spec->attr[ii]; + if (psa->included) + { + nn++; + iov[frag].iov_base = &psa->attr; + iov[frag].iov_len = sizeof (psa->attr); + frag++; + iov[frag] = psa->val; // struct copy + frag++; + } + } + ASSERT (nn == spec->n_attrs); + + struct sockaddr_nl da = { .nl_family = AF_NETLINK, + .nl_groups = (1 << (pst->group_id - 1)) }; + + struct msghdr msg = { .msg_name = &da, + .msg_namelen = sizeof (da), + .msg_iov = iov, + .msg_iovlen = frag }; + + int status = sendmsg (pst->nl_sock, &msg, 0); + if (status <= 0) + { + SFLOW_ERR ("strerror(errno) = %s; errno = %d\n", strerror (errno), + errno); + return -1; + } + return 0; + } diff --git a/src/plugins/sflow/sflow_psample.h b/src/plugins/sflow/sflow_psample.h new file mode 100644 index 00000000000..5d4944231fd --- /dev/null +++ b/src/plugins/sflow/sflow_psample.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2024 InMon Corp. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_sflow_psample_h__ +#define __included_sflow_psample_h__ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <sflow/sflow.h> + +#include <asm/types.h> +#include <sys/socket.h> +#include <linux/types.h> +#include <linux/netlink.h> +#include <linux/genetlink.h> +#include <linux/psample.h> +#include <signal.h> +#include <ctype.h> + +// #define SFLOWPS_DEBUG + +#define SFLOWPS_PSAMPLE_READNL_RCV_BUF 8192 +#define SFLOWPS_PSAMPLE_READNL_SND_BUF 1000000 + +/* Shadow the attributes in linux/psample.h so + * we can easily compile/test fields that are not + * defined on the kernel we are compiling on. + */ +typedef enum +{ +#define SFLOWPS_FIELDDATA(field, len, descr) field, +#include "sflow/sflow_psample_fields.h" +#undef SFLOWPS_FIELDDATA + __SFLOWPS_PSAMPLE_ATTR_MAX +} EnumSFLOWPSAttributes; + +typedef struct _SFLOWPS_field_t +{ + EnumSFLOWPSAttributes field; + int len; + char *descr; +} SFLOWPS_field_t; + +static const SFLOWPS_field_t SFLOWPS_Fields[] = { +#define SFLOWPS_FIELDDATA(field, len, descr) { field, len, descr }, +#include "sflow/sflow_psample_fields.h" +#undef SFLOWPS_FIELDDATA +}; + +typedef enum +{ + SFLOWPS_STATE_INIT, + SFLOWPS_STATE_OPEN, + SFLOWPS_STATE_WAIT_FAMILY, + SFLOWPS_STATE_READY +} EnumSFLOWPSState; + +typedef struct _SFLOWPS +{ + EnumSFLOWPSState state; + u32 id; + int nl_sock; + u32 nl_seq; + u32 genetlink_version; + u16 family_id; + u32 group_id; +} SFLOWPS; + +typedef struct _SFLOWPSAttr +{ + bool included : 1; + struct nlattr attr; + struct iovec val; +} SFLOWPSAttr; + +typedef struct _SFLOWPSSpec +{ + struct nlmsghdr nlh; + struct genlmsghdr ge; + SFLOWPSAttr attr[__SFLOWPS_PSAMPLE_ATTR_MAX]; + int n_attrs; + int attrs_len; +} SFLOWPSSpec; + +bool SFLOWPS_open (SFLOWPS *pst); +bool SFLOWPS_close (SFLOWPS *pst); +EnumSFLOWPSState SFLOWPS_state (SFLOWPS *pst); +EnumSFLOWPSState SFLOWPS_open_step (SFLOWPS *pst); + +bool SFLOWPSSpec_setAttr (SFLOWPSSpec *spec, EnumSFLOWPSAttributes field, + void *buf, int len); +#define SFLOWPSSpec_setAttrInt(spec, field, val) \ + SFLOWPSSpec_setAttr ((spec), (field), &(val), sizeof (val)) + +int SFLOWPSSpec_send (SFLOWPS *pst, SFLOWPSSpec *spec); + +#endif /* __included_sflow_psample_h__ */ diff --git a/src/plugins/sflow/sflow_psample_fields.h b/src/plugins/sflow/sflow_psample_fields.h new file mode 100644 index 00000000000..72d484c4850 --- /dev/null +++ b/src/plugins/sflow/sflow_psample_fields.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2024 InMon Corp. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +SFLOWPS_FIELDDATA (SFLOWPS_PSAMPLE_ATTR_IIFINDEX, 4, "input if_index") +SFLOWPS_FIELDDATA (SFLOWPS_PSAMPLE_ATTR_OIFINDEX, 4, "output if_index") +SFLOWPS_FIELDDATA (SFLOWPS_PSAMPLE_ATTR_ORIGSIZE, 4, "original packet size") +SFLOWPS_FIELDDATA (SFLOWPS_PSAMPLE_ATTR_SAMPLE_GROUP, 4, "group number") +SFLOWPS_FIELDDATA (SFLOWPS_PSAMPLE_ATTR_GROUP_SEQ, 4, "group sequence number") +SFLOWPS_FIELDDATA (SFLOWPS_PSAMPLE_ATTR_SAMPLE_RATE, 4, "sampling N") +SFLOWPS_FIELDDATA (SFLOWPS_PSAMPLE_ATTR_DATA, 0, "sampled header") +SFLOWPS_FIELDDATA (SFLOWPS_PSAMPLE_ATTR_TUNNEL, 0, "tunnel header") + +/* commands attributes */ +SFLOWPS_FIELDDATA (SFLOWPS_PSAMPLE_ATTR_GROUP_REFCOUNT, 0, + "group reference count") + +SFLOWPS_FIELDDATA (SFLOWPS_PSAMPLE_ATTR_PAD, 0, "pad bytes") +SFLOWPS_FIELDDATA (SFLOWPS_PSAMPLE_ATTR_OUT_TC, 2, "egress queue number") +SFLOWPS_FIELDDATA (SFLOWPS_PSAMPLE_ATTR_OUT_TC_OCC, 8, + "egress queue depth in bytes") +SFLOWPS_FIELDDATA (SFLOWPS_PSAMPLE_ATTR_LATENCY, 8, + "transit latency in nanoseconds") +SFLOWPS_FIELDDATA (SFLOWPS_PSAMPLE_ATTR_TIMESTAMP, 8, "timestamp") +SFLOWPS_FIELDDATA (SFLOWPS_PSAMPLE_ATTR_PROTO, 2, "header protocol") diff --git a/src/plugins/sflow/sflow_test.c b/src/plugins/sflow/sflow_test.c new file mode 100644 index 00000000000..554806640e3 --- /dev/null +++ b/src/plugins/sflow/sflow_test.c @@ -0,0 +1,298 @@ +/* + * Copyright (c) 2024 InMon Corp. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vat/vat.h> +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vppinfra/error.h> +#include <stdbool.h> + +#define __plugin_msg_base sflow_test_main.msg_id_base +#include <vlibapi/vat_helper_macros.h> + +uword unformat_sw_if_index (unformat_input_t *input, va_list *args); + +/* Declare message IDs */ +#include <sflow/sflow.api_enum.h> +#include <sflow/sflow.api_types.h> + +typedef struct +{ + /* API message ID base */ + u16 msg_id_base; + vat_main_t *vat_main; +} sflow_test_main_t; + +sflow_test_main_t sflow_test_main; + +static int +api_sflow_enable_disable (vat_main_t *vam) +{ + unformat_input_t *i = vam->input; + int enable_disable = 1; + u32 hw_if_index = ~0; + vl_api_sflow_enable_disable_t *mp; + int ret; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%U", unformat_sw_if_index, vam, &hw_if_index)) + ; + else if (unformat (i, "disable")) + enable_disable = 0; + else + break; + } + + if (hw_if_index == ~0) + { + errmsg ("missing interface name / explicit hw_if_index number \n"); + return -99; + } + + /* Construct the API message */ + M (SFLOW_ENABLE_DISABLE, mp); + mp->hw_if_index = ntohl (hw_if_index); + mp->enable_disable = enable_disable; + + /* send it... */ + S (mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static void +vl_api_sflow_sampling_rate_get_reply_t_handler ( + vl_api_sflow_sampling_rate_get_reply_t *mp) +{ + vat_main_t *vam = sflow_test_main.vat_main; + clib_warning ("sflow sampling_N: %d", ntohl (mp->sampling_N)); + vam->result_ready = 1; +} + +static int +api_sflow_sampling_rate_get (vat_main_t *vam) +{ + vl_api_sflow_sampling_rate_get_t *mp; + int ret; + + /* Construct the API message */ + M (SFLOW_SAMPLING_RATE_GET, mp); + + /* send it... */ + S (mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static int +api_sflow_sampling_rate_set (vat_main_t *vam) +{ + unformat_input_t *i = vam->input; + u32 sampling_N = ~0; + vl_api_sflow_sampling_rate_set_t *mp; + int ret; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "sampling_N %d", &sampling_N)) + ; + else + break; + } + + if (sampling_N == ~0) + { + errmsg ("missing sampling_N number \n"); + return -99; + } + + /* Construct the API message */ + M (SFLOW_SAMPLING_RATE_SET, mp); + mp->sampling_N = ntohl (sampling_N); + + /* send it... */ + S (mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static void +vl_api_sflow_polling_interval_get_reply_t_handler ( + vl_api_sflow_polling_interval_get_reply_t *mp) +{ + vat_main_t *vam = sflow_test_main.vat_main; + clib_warning ("sflow polling-interval: %d", ntohl (mp->polling_S)); + vam->result_ready = 1; +} + +static int +api_sflow_polling_interval_get (vat_main_t *vam) +{ + vl_api_sflow_polling_interval_get_t *mp; + int ret; + + /* Construct the API message */ + M (SFLOW_POLLING_INTERVAL_GET, mp); + + /* send it... */ + S (mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static int +api_sflow_polling_interval_set (vat_main_t *vam) +{ + unformat_input_t *i = vam->input; + u32 polling_S = ~0; + vl_api_sflow_polling_interval_set_t *mp; + int ret; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "polling_S %d", &polling_S)) + ; + else + break; + } + + if (polling_S == ~0) + { + errmsg ("missing polling_S number \n"); + return -99; + } + + /* Construct the API message */ + M (SFLOW_POLLING_INTERVAL_SET, mp); + mp->polling_S = ntohl (polling_S); + + /* send it... */ + S (mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static void +vl_api_sflow_header_bytes_get_reply_t_handler ( + vl_api_sflow_header_bytes_get_reply_t *mp) +{ + vat_main_t *vam = sflow_test_main.vat_main; + clib_warning ("sflow header-bytes: %d", ntohl (mp->header_B)); + vam->result_ready = 1; +} + +static int +api_sflow_header_bytes_get (vat_main_t *vam) +{ + vl_api_sflow_header_bytes_get_t *mp; + int ret; + + /* Construct the API message */ + M (SFLOW_HEADER_BYTES_GET, mp); + + /* send it... */ + S (mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static int +api_sflow_header_bytes_set (vat_main_t *vam) +{ + unformat_input_t *i = vam->input; + u32 header_B = ~0; + vl_api_sflow_header_bytes_set_t *mp; + int ret; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "header_B %d", &header_B)) + ; + else + break; + } + + if (header_B == ~0) + { + errmsg ("missing header_B number \n"); + return -99; + } + + /* Construct the API message */ + M (SFLOW_HEADER_BYTES_SET, mp); + mp->header_B = ntohl (header_B); + + /* send it... */ + S (mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static void +vl_api_sflow_interface_details_t_handler (vl_api_sflow_interface_details_t *mp) +{ + vat_main_t *vam = sflow_test_main.vat_main; + clib_warning ("sflow enable: %d", ntohl (mp->hw_if_index)); + vam->result_ready = 1; +} + +static int +api_sflow_interface_dump (vat_main_t *vam) +{ + vl_api_sflow_interface_dump_t *mp; + int ret; + + /* Construct the API message */ + M (SFLOW_INTERFACE_DUMP, mp); + + /* send it... */ + S (mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +/* + * List of messages that the sflow test plugin sends, + * and that the data plane plugin processes + */ +#include <sflow/sflow.api_test.c> + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/sflow/sflow_usersock.c b/src/plugins/sflow/sflow_usersock.c new file mode 100644 index 00000000000..0ccb947709a --- /dev/null +++ b/src/plugins/sflow/sflow_usersock.c @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2024 InMon Corp. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <sflow/sflow.h> + +#include <fcntl.h> +#include <asm/types.h> +#include <sys/socket.h> +#include <linux/types.h> +#include <linux/netlink.h> +#include <signal.h> +#include <ctype.h> + +#include <sflow/sflow_usersock.h> + + /*_________________---------------------------__________________ + _________________ fcntl utils __________________ + -----------------___________________________------------------ + */ + + static void + setNonBlocking (int fd) + { + // set the socket to non-blocking + int fdFlags = fcntl (fd, F_GETFL); + fdFlags |= O_NONBLOCK; + if (fcntl (fd, F_SETFL, fdFlags) < 0) + { + SFLOW_ERR ("fcntl(O_NONBLOCK) failed: %s\n", strerror (errno)); + } + } + + static void + setCloseOnExec (int fd) + { + // make sure it doesn't get inherited, e.g. when we fork a script + int fdFlags = fcntl (fd, F_GETFD); + fdFlags |= FD_CLOEXEC; + if (fcntl (fd, F_SETFD, fdFlags) < 0) + { + SFLOW_ERR ("fcntl(F_SETFD=FD_CLOEXEC) failed: %s\n", strerror (errno)); + } + } + + /*_________________---------------------------__________________ + _________________ usersock_open __________________ + -----------------___________________________------------------ + */ + + static int + usersock_open (void) + { + int nl_sock = socket (AF_NETLINK, SOCK_RAW, NETLINK_USERSOCK); + if (nl_sock < 0) + { + SFLOW_ERR ("nl_sock open failed: %s\n", strerror (errno)); + return -1; + } + setNonBlocking (nl_sock); + setCloseOnExec (nl_sock); + return nl_sock; + } + + /*_________________---------------------------__________________ + _________________ SFLOWUS_open __________________ + -----------------___________________________------------------ + */ + + bool + SFLOWUS_open (SFLOWUS *ust) + { + if (ust->nl_sock == 0) + { + ust->nl_sock = usersock_open (); + } + return true; + } + + /*_________________---------------------------__________________ + _________________ SFLOWUS_close __________________ + -----------------___________________________------------------ + */ + + bool + SFLOWUS_close (SFLOWUS *ust) + { + if (ust->nl_sock != 0) + { + int err = close (ust->nl_sock); + if (err == 0) + { + ust->nl_sock = 0; + return true; + } + else + { + SFLOW_WARN ("SFLOWUS_close: returned %d : %s\n", err, + strerror (errno)); + } + } + return false; + } + + /*_________________---------------------------__________________ + _________________ SFLOWUSSpec_setMsgType __________________ + -----------------___________________________------------------ + */ + + bool + SFLOWUSSpec_setMsgType (SFLOWUSSpec *spec, EnumSFlowVppMsgType msgType) + { + spec->nlh.nlmsg_type = msgType; + return true; + } + + /*_________________---------------------------__________________ + _________________ SFLOWUSSpec_setAttr __________________ + -----------------___________________________------------------ + */ + + bool + SFLOWUSSpec_setAttr (SFLOWUSSpec *spec, EnumSFlowVppAttributes field, + void *val, int len) + { + SFLOWUSAttr *usa = &spec->attr[field]; + if (usa->included) + return false; + usa->included = true; + usa->attr.nla_type = field; + usa->attr.nla_len = sizeof (usa->attr) + len; + int len_w_pad = NLMSG_ALIGN (len); + usa->val.iov_len = len_w_pad; + usa->val.iov_base = val; + spec->n_attrs++; + spec->attrs_len += sizeof (usa->attr); + spec->attrs_len += len_w_pad; + return true; + } + + /*_________________---------------------------__________________ + _________________ SFLOWUSSpec_send __________________ + -----------------___________________________------------------ + */ + + int + SFLOWUSSpec_send (SFLOWUS *ust, SFLOWUSSpec *spec) + { + spec->nlh.nlmsg_len = NLMSG_LENGTH (spec->attrs_len); + spec->nlh.nlmsg_flags = 0; + spec->nlh.nlmsg_seq = ++ust->nl_seq; + spec->nlh.nlmsg_pid = getpid (); + +#define MAX_IOV_FRAGMENTS (2 * __SFLOW_VPP_ATTR_MAX) + 2 + + struct iovec iov[MAX_IOV_FRAGMENTS]; + u32 frag = 0; + iov[frag].iov_base = &spec->nlh; + iov[frag].iov_len = sizeof (spec->nlh); + frag++; + int nn = 0; + for (u32 ii = 0; ii < __SFLOW_VPP_ATTR_MAX; ii++) + { + SFLOWUSAttr *usa = &spec->attr[ii]; + if (usa->included) + { + nn++; + iov[frag].iov_base = &usa->attr; + iov[frag].iov_len = sizeof (usa->attr); + frag++; + iov[frag] = usa->val; // struct copy + frag++; + } + } + ASSERT (nn == spec->n_attrs); + + struct sockaddr_nl da = { + .nl_family = AF_NETLINK, + .nl_groups = (1 << (ust->group_id - 1)) // for multicast to the group + // .nl_pid = 1e9+6343 // for unicast to receiver bound to netlink socket + // with that "pid" + }; + + struct msghdr msg = { .msg_name = &da, + .msg_namelen = sizeof (da), + .msg_iov = iov, + .msg_iovlen = frag }; + + int status = sendmsg (ust->nl_sock, &msg, 0); + if (status <= 0) + { + // Linux replies with ECONNREFUSED when + // a multicast is sent via NETLINK_USERSOCK, but + // it's not an error so we can just ignore it here. + if (errno != ECONNREFUSED) + { + SFLOW_DBG ("USERSOCK strerror(errno) = %s\n", strerror (errno)); + return -1; + } + } + return 0; + } diff --git a/src/plugins/sflow/sflow_usersock.h b/src/plugins/sflow/sflow_usersock.h new file mode 100644 index 00000000000..d66389941a6 --- /dev/null +++ b/src/plugins/sflow/sflow_usersock.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2024 InMon Corp. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_sflow_usersock_h__ +#define __included_sflow_usersock_h__ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <sflow/sflow.h> + +#include <asm/types.h> +#include <sys/socket.h> +#include <linux/types.h> +#include <linux/netlink.h> +#include <signal.h> +#include <ctype.h> + +// ==================== shared with hsflowd mod_vpp ========================= +// See https://github.com/sflow/host-sflow + +#define SFLOW_VPP_NETLINK_USERSOCK_MULTICAST 29 + +typedef enum +{ + SFLOW_VPP_MSG_STATUS = 1, + SFLOW_VPP_MSG_IF_COUNTERS +} EnumSFlowVppMsgType; + +typedef enum +{ + SFLOW_VPP_ATTR_PORTNAME, /* string */ + SFLOW_VPP_ATTR_IFINDEX, /* u32 */ + SFLOW_VPP_ATTR_IFTYPE, /* u32 */ + SFLOW_VPP_ATTR_IFSPEED, /* u64 */ + SFLOW_VPP_ATTR_IFDIRECTION, /* u32 */ + SFLOW_VPP_ATTR_OPER_UP, /* u32 */ + SFLOW_VPP_ATTR_ADMIN_UP, /* u32 */ + SFLOW_VPP_ATTR_RX_OCTETS, /* u64 */ + SFLOW_VPP_ATTR_TX_OCTETS, /* u64 */ + SFLOW_VPP_ATTR_RX_PKTS, /* u64 */ + SFLOW_VPP_ATTR_TX_PKTS, /* u64 */ + SFLOW_VPP_ATTR_RX_BCASTS, /* u64 */ + SFLOW_VPP_ATTR_TX_BCASTS, /* u64 */ + SFLOW_VPP_ATTR_RX_MCASTS, /* u64 */ + SFLOW_VPP_ATTR_TX_MCASTS, /* u64 */ + SFLOW_VPP_ATTR_RX_DISCARDS, /* u64 */ + SFLOW_VPP_ATTR_TX_DISCARDS, /* u64 */ + SFLOW_VPP_ATTR_RX_ERRORS, /* u64 */ + SFLOW_VPP_ATTR_TX_ERRORS, /* u64 */ + SFLOW_VPP_ATTR_HW_ADDRESS, /* binary */ + SFLOW_VPP_ATTR_UPTIME_S, /* u32 */ + SFLOW_VPP_ATTR_OSINDEX, /* u32 Linux ifIndex number, where applicable */ + SFLOW_VPP_ATTR_DROPS, /* u32 all FIFO and netlink sendmsg drops */ + SFLOW_VPP_ATTR_SEQ, /* u32 send seq no */ + /* enum shared with hsflowd, so only add here */ + __SFLOW_VPP_ATTR_MAX +} EnumSFlowVppAttributes; + +#define SFLOW_VPP_PSAMPLE_GROUP_INGRESS 3 +#define SFLOW_VPP_PSAMPLE_GROUP_EGRESS 4 + +// ========================================================================= +typedef struct +{ + u64 byts; + u64 pkts; + u64 m_pkts; + u64 b_pkts; + u64 errs; + u64 drps; +} sflow_ctrs_t; + +typedef struct +{ + sflow_ctrs_t tx; + sflow_ctrs_t rx; +} sflow_counters_t; + +typedef struct _SFLOWUS_field_t +{ + EnumSFlowVppAttributes field; + int len; +} SFLOWUS_field_t; + +typedef struct _SFLOWUS +{ + u32 id; + int nl_sock; + u32 nl_seq; + u32 group_id; +} SFLOWUS; + +typedef struct _SFLOWUSAttr +{ + bool included : 1; + struct nlattr attr; + struct iovec val; +} SFLOWUSAttr; + +typedef struct _SFLOWUSSpec +{ + struct nlmsghdr nlh; + SFLOWUSAttr attr[__SFLOW_VPP_ATTR_MAX]; + int n_attrs; + int attrs_len; +} SFLOWUSSpec; + +bool SFLOWUS_open (SFLOWUS *ust); +bool SFLOWUS_close (SFLOWUS *ust); + +bool SFLOWUSSpec_setMsgType (SFLOWUSSpec *spec, EnumSFlowVppMsgType type); +bool SFLOWUSSpec_setAttr (SFLOWUSSpec *spec, EnumSFlowVppAttributes field, + void *buf, int len); +#define SFLOWUSSpec_setAttrInt(spec, field, val) \ + SFLOWUSSpec_setAttr ((spec), (field), &(val), sizeof (val)) + +int SFLOWUSSpec_send (SFLOWUS *ust, SFLOWUSSpec *spec); + +#endif /* __included_sflow_usersock_h__ */ diff --git a/src/plugins/sflow/sflow_vapi.c b/src/plugins/sflow/sflow_vapi.c new file mode 100644 index 00000000000..cdc89a54c80 --- /dev/null +++ b/src/plugins/sflow/sflow_vapi.c @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2024 InMon Corp. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <sflow/sflow_vapi.h> + +#ifdef SFLOW_USE_VAPI + +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vpp/app/version.h> +#include <stdbool.h> + +#include <vapi/vapi.h> +#include <vapi/memclnt.api.vapi.h> +#include <vapi/vlib.api.vapi.h> + +#ifdef included_interface_types_api_types_h +#define defined_vapi_enum_if_status_flags +#define defined_vapi_enum_mtu_proto +#define defined_vapi_enum_link_duplex +#define defined_vapi_enum_sub_if_flags +#define defined_vapi_enum_rx_mode +#define defined_vapi_enum_if_type +#define defined_vapi_enum_direction +#endif +#include <vapi/lcp.api.vapi.h> + +DEFINE_VAPI_MSG_IDS_LCP_API_JSON; + +static vapi_error_e +my_pair_get_cb (struct vapi_ctx_s *ctx, void *callback_ctx, vapi_error_e rv, + bool is_last, vapi_payload_lcp_itf_pair_get_v2_reply *reply) +{ + // this is a no-op, but it seems like it's presence is still required. For + // example, it is called if the pair lookup does not find anything. + return VAPI_OK; +} + +static vapi_error_e +my_pair_details_cb (struct vapi_ctx_s *ctx, void *callback_ctx, + vapi_error_e rv, bool is_last, + vapi_payload_lcp_itf_pair_details *details) +{ + sflow_per_interface_data_t *sfif = + (sflow_per_interface_data_t *) callback_ctx; + // Setting this here will mean it is sent to hsflowd with the interface + // counters. + sfif->linux_if_index = details->vif_index; + return VAPI_OK; +} + +static vapi_error_e +sflow_vapi_connect (sflow_vapi_client_t *vac) +{ + vapi_error_e rv = VAPI_OK; + vapi_ctx_t ctx = vac->vapi_ctx; + if (ctx == NULL) + { + // first time - open and connect. + if ((rv = vapi_ctx_alloc (&ctx)) != VAPI_OK) + { + SFLOW_ERR ("vap_ctx_alloc() returned %d", rv); + } + else + { + vac->vapi_ctx = ctx; + if ((rv = vapi_connect_from_vpp ( + ctx, "api_from_sflow_plugin", SFLOW_VAPI_MAX_REQUEST_Q, + SFLOW_VAPI_MAX_RESPONSE_Q, VAPI_MODE_BLOCKING, true)) != + VAPI_OK) + { + SFLOW_ERR ("vapi_connect_from_vpp() returned %d", rv); + } + else + { + // Connected - but is there a handler for the request we want to + // send? + if (!vapi_is_msg_available (ctx, + vapi_msg_id_lcp_itf_pair_add_del_v2)) + { + SFLOW_WARN ("vapi_is_msg_available() returned false => " + "linux-cp plugin not loaded"); + rv = VAPI_EUSER; + } + } + } + } + return rv; +} + +// in forked thread +static void * +get_lcp_itf_pairs (void *magic) +{ + sflow_vapi_client_t *vac = magic; + vapi_error_e rv = VAPI_OK; + + sflow_per_interface_data_t *intfs = vac->vapi_itfs; + vlib_set_thread_name (SFLOW_VAPI_THREAD_NAME); + if ((rv = sflow_vapi_connect (vac)) != VAPI_OK) + { + vac->vapi_unavailable = true; + } + else + { + vapi_ctx_t ctx = vac->vapi_ctx; + + for (int ii = 1; ii < vec_len (intfs); ii++) + { + sflow_per_interface_data_t *sfif = vec_elt_at_index (intfs, ii); + if (sfif && sfif->sflow_enabled) + { + // TODO: if we try non-blocking we might not be able to just pour + // all the requests in here. Might be better to do them one at a + // time - e.g. when we poll for counters. + vapi_msg_lcp_itf_pair_get_v2 *msg = + vapi_alloc_lcp_itf_pair_get_v2 (ctx); + if (msg) + { + msg->payload.sw_if_index = sfif->sw_if_index; + if ((rv = vapi_lcp_itf_pair_get_v2 (ctx, msg, my_pair_get_cb, + sfif, my_pair_details_cb, + sfif)) != VAPI_OK) + { + SFLOW_ERR ("vapi_lcp_itf_pair_get_v2 returned %d", rv); + // vapi.h: "message must be freed by vapi_msg_free if not + // consumed by vapi_send" + vapi_msg_free (ctx, msg); + } + } + } + } + // We no longer disconnect or free the client structures + // vapi_disconnect_from_vpp (ctx); + // vapi_ctx_free (ctx); + } + // indicate that we are done - more portable that using pthread_tryjoin_np() + vac->vapi_request_status = (int) rv; + clib_atomic_store_rel_n (&vac->vapi_request_active, false); + // TODO: how to tell if heap-allocated data is stored separately per thread? + // And if so, how to tell the allocator to GC all data for the thread when it + // exits? + return (void *) rv; +} + +int +sflow_vapi_read_linux_if_index_numbers (sflow_vapi_client_t *vac, + sflow_per_interface_data_t *itfs) +{ + +#ifdef SFLOW_VAPI_TEST_PLUGIN_SYMBOL + // don't even fork the query thread if the symbol is not there + if (!vlib_get_plugin_symbol ("linux_cp_plugin.so", "lcp_itf_pair_get")) + { + return false; + } +#endif + // previous query is done and results extracted? + int req_active = clib_atomic_load_acq_n (&vac->vapi_request_active); + if (req_active == false && vac->vapi_itfs == NULL) + { + // make a copy of the current interfaces vector for the lookup thread to + // write into + vac->vapi_itfs = vec_dup (itfs); + pthread_attr_t attr; + pthread_attr_init (&attr); + pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED); + pthread_attr_setstacksize (&attr, VLIB_THREAD_STACK_SIZE); + vac->vapi_request_active = true; + pthread_create (&vac->vapi_thread, &attr, get_lcp_itf_pairs, vac); + pthread_attr_destroy (&attr); + return true; + } + return false; +} + +int +sflow_vapi_check_for_linux_if_index_results (sflow_vapi_client_t *vac, + sflow_per_interface_data_t *itfs) +{ + // request completed? + // TODO: if we use non-blocking mode do we have to call something here to + // receive results? + int req_active = clib_atomic_load_acq_n (&vac->vapi_request_active); + if (req_active == false && vac->vapi_itfs != NULL) + { + // yes, extract what we learned + // TODO: would not have to do this if vector were array of pointers + // to sflow_per_interface_data_t rather than an actual array, but + // it does mean we have very clear separation between the threads. + for (int ii = 1; ii < vec_len (vac->vapi_itfs); ii++) + { + sflow_per_interface_data_t *sfif1 = + vec_elt_at_index (vac->vapi_itfs, ii); + sflow_per_interface_data_t *sfif2 = vec_elt_at_index (itfs, ii); + if (sfif1 && sfif2 && sfif1->sflow_enabled && sfif2->sflow_enabled) + sfif2->linux_if_index = sfif1->linux_if_index; + } + vec_free (vac->vapi_itfs); + vac->vapi_itfs = NULL; + return true; + } + return false; +} + +#endif /* SFLOW_USE_VAPI */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/sflow/sflow_vapi.h b/src/plugins/sflow/sflow_vapi.h new file mode 100644 index 00000000000..640fe997684 --- /dev/null +++ b/src/plugins/sflow/sflow_vapi.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2024 InMon Corp. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_sflow_vapi_h__ +#define __included_sflow_vapi_h__ + +#include <vnet/vnet.h> +#include <sflow/sflow_common.h> + +#ifdef SFLOW_USE_VAPI + +#define SFLOW_VAPI_POLL_INTERVAL 5 +#define SFLOW_VAPI_MAX_REQUEST_Q 8 +#define SFLOW_VAPI_MAX_RESPONSE_Q 16 +#define SFLOW_VAPI_THREAD_NAME "sflow_vapi" // must be <= 15 characters + +// #define SFLOW_VAPI_TEST_PLUGIN_SYMBOL + +typedef struct +{ + volatile int vapi_request_active; // to sync main <-> vapi_thread + pthread_t vapi_thread; + sflow_per_interface_data_t *vapi_itfs; + int vapi_unavailable; + int vapi_request_status; // written by vapi_thread + void *vapi_ctx; +} sflow_vapi_client_t; + +int sflow_vapi_read_linux_if_index_numbers (sflow_vapi_client_t *vac, + sflow_per_interface_data_t *itfs); +int +sflow_vapi_check_for_linux_if_index_results (sflow_vapi_client_t *vac, + sflow_per_interface_data_t *itfs); + +#endif /* SFLOW_USE_VAPI */ +#endif /* __included_sflow_vapi_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/snort/cli.c b/src/plugins/snort/cli.c index 4b6dbc742a7..d4b69adae7d 100644 --- a/src/plugins/snort/cli.c +++ b/src/plugins/snort/cli.c @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: Apache-2.0 * Copyright(c) 2021 Cisco Systems, Inc. + * Copyright(c) 2024 Arm Limited */ #include <vlib/vlib.h> @@ -17,6 +18,70 @@ format_snort_instance (u8 *s, va_list *args) } static clib_error_t * +snort_attach_detach_instance (vlib_main_t *vm, vnet_main_t *vnm, + char *instance_name, u32 sw_if_index, + int is_enable, snort_attach_dir_t dir) +{ + clib_error_t *err = NULL; + int rv = snort_interface_enable_disable (vm, instance_name, sw_if_index, + is_enable, dir); + switch (rv) + { + case 0: + break; + case VNET_API_ERROR_FEATURE_ALREADY_ENABLED: + /* already attached to same instance */ + break; + case VNET_API_ERROR_INVALID_INTERFACE: + err = clib_error_return (0, + "interface %U is not assigned to snort " + "instance %s!", + format_vnet_sw_if_index_name, vnm, sw_if_index, + instance_name); + break; + case VNET_API_ERROR_NO_SUCH_ENTRY: + err = clib_error_return (0, "unknown instance '%s'", instance_name); + break; + case VNET_API_ERROR_INSTANCE_IN_USE: + err = clib_error_return ( + 0, "interface %U is currently up, set state down first", + format_vnet_sw_if_index_name, vnm, sw_if_index); + break; + default: + err = clib_error_return (0, "snort_interface_enable_disable returned %d", + rv); + break; + } + return err; +} + +static clib_error_t * +snort_detach_all_instance (vlib_main_t *vm, vnet_main_t *vnm, u32 sw_if_index) +{ + clib_error_t *err = NULL; + int rv = snort_interface_disable_all (vm, sw_if_index); + switch (rv) + { + case 0: + break; + case VNET_API_ERROR_INSTANCE_IN_USE: + err = clib_error_return ( + 0, "interface %U is currently up, set state down first", + format_vnet_sw_if_index_name, vnm, sw_if_index); + break; + case VNET_API_ERROR_INVALID_INTERFACE: + err = clib_error_return (0, "interface %U has no attached instances", + format_vnet_sw_if_index_name, vnm, sw_if_index); + break; + default: + err = + clib_error_return (0, "snort_interface_disable_all returned %d", rv); + break; + } + return err; +} + +static clib_error_t * snort_create_instance_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) { @@ -94,7 +159,7 @@ done: VLIB_CLI_COMMAND (snort_create_instance_command, static) = { .path = "snort create-instance", - .short_help = "snort create-instaince name <name> [queue-size <size>] " + .short_help = "snort create-instance name <name> [queue-size <size>] " "[on-disconnect drop|pass]", .function = snort_create_instance_command_fn, }; @@ -217,11 +282,15 @@ snort_attach_command_fn (vlib_main_t *vm, unformat_input_t *input, { unformat_input_t _line_input, *line_input = &_line_input; vnet_main_t *vnm = vnet_get_main (); - clib_error_t *err = 0; - u8 *name = 0; + snort_main_t *sm = &snort_main; + snort_instance_t *si; + clib_error_t *err = NULL; + u8 *name = NULL; + u8 **names = NULL; u32 sw_if_index = ~0; - snort_attach_dir_t dir = SNORT_INOUT; - int rv = 0; + snort_attach_dir_t direction = SNORT_INOUT; + u8 is_all_instances = 0; + int i; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -233,13 +302,15 @@ snort_attach_command_fn (vlib_main_t *vm, unformat_input_t *input, vnm, &sw_if_index)) ; else if (unformat (line_input, "instance %s", &name)) - ; + vec_add1 (names, name); + else if (unformat (line_input, "all-instances")) + is_all_instances = 1; else if (unformat (line_input, "input")) - dir = SNORT_INPUT; + direction = SNORT_INPUT; else if (unformat (line_input, "output")) - dir = SNORT_OUTPUT; + direction = SNORT_OUTPUT; else if (unformat (line_input, "inout")) - dir = SNORT_INOUT; + direction = SNORT_INOUT; else { err = clib_error_return (0, "unknown input `%U'", @@ -254,46 +325,53 @@ snort_attach_command_fn (vlib_main_t *vm, unformat_input_t *input, goto done; } - if (!name) + if (vec_len (names) == 0 && is_all_instances == 0) { - err = clib_error_return (0, "please specify instance name"); + err = clib_error_return (0, "please specify instances"); goto done; } - rv = snort_interface_enable_disable (vm, (char *) name, sw_if_index, 1, dir); + if (is_all_instances) + { + if (vec_len (sm->instances) == 0) + { + err = clib_error_return (0, "no snort instances have been created"); + goto done; + } - switch (rv) + pool_foreach (si, sm->instances) + { + snort_attach_detach_instance (vm, vnm, (char *) si->name, + sw_if_index, 1 /* is_enable */, + direction); + } + } + else { - case 0: - break; - case VNET_API_ERROR_FEATURE_ALREADY_ENABLED: - /* already attached to same instance */ - break; - case VNET_API_ERROR_INSTANCE_IN_USE: - err = clib_error_return (0, - "interface %U already assigned to " - "an instance", - format_vnet_sw_if_index_name, vnm, sw_if_index); - break; - case VNET_API_ERROR_NO_SUCH_ENTRY: - err = clib_error_return (0, "unknown instance '%s'", name); - break; - default: - err = clib_error_return (0, "snort_interface_enable_disable returned %d", - rv); - break; + vec_foreach_index (i, names) + { + snort_attach_detach_instance (vm, vnm, (char *) names[i], + sw_if_index, 1 /* is_enable */, + direction); + } } done: - vec_free (name); + vec_foreach_index (i, names) + { + vec_free (names[i]); + } + vec_free (names); unformat_free (line_input); return err; } VLIB_CLI_COMMAND (snort_attach_command, static) = { .path = "snort attach", - .short_help = "snort attach instance <name> interface <if-name> " - "[input|ouput|inout]", + .short_help = + "snort attach all-instances|(instance <name> [instance <name> [...]]) " + "interface <if-name> " + "[input|output|inout]", .function = snort_attach_command_fn, }; @@ -303,9 +381,12 @@ snort_detach_command_fn (vlib_main_t *vm, unformat_input_t *input, { unformat_input_t _line_input, *line_input = &_line_input; vnet_main_t *vnm = vnet_get_main (); - clib_error_t *err = 0; + clib_error_t *err = NULL; + u8 *name = NULL; + u8 **names = NULL; u32 sw_if_index = ~0; - int rv = 0; + u8 is_all_instances = 0; + int i = 0; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -313,8 +394,12 @@ snort_detach_command_fn (vlib_main_t *vm, unformat_input_t *input, while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { - if (unformat (line_input, "interface %U", unformat_vnet_sw_interface, - vnm, &sw_if_index)) + if (unformat (line_input, "instance %s", &name)) + vec_add1 (names, name); + else if (unformat (line_input, "all-instances")) + is_all_instances = 1; + else if (unformat (line_input, "interface %U", + unformat_vnet_sw_interface, vnm, &sw_if_index)) ; else { @@ -330,32 +415,41 @@ snort_detach_command_fn (vlib_main_t *vm, unformat_input_t *input, goto done; } - rv = snort_interface_enable_disable (vm, 0, sw_if_index, 0, SNORT_INOUT); + if (vec_len (names) == 0) + { + /* To maintain backwards compatibility */ + is_all_instances = 1; + } - switch (rv) + if (is_all_instances) { - case 0: - break; - case VNET_API_ERROR_INVALID_INTERFACE: - err = clib_error_return (0, - "interface %U is not assigned to snort " - "instance!", - format_vnet_sw_if_index_name, vnm, sw_if_index); - break; - default: - err = clib_error_return (0, "snort_interface_enable_disable returned %d", - rv); - break; + err = snort_detach_all_instance (vm, vnm, sw_if_index); + } + else + { + vec_foreach_index (i, names) + { + snort_attach_detach_instance (vm, vnm, (char *) names[i], + sw_if_index, 0 /* is_enable */, + SNORT_INOUT); + } } done: + vec_foreach_index (i, names) + { + vec_free (names[i]); + } + vec_free (names); unformat_free (line_input); return err; } VLIB_CLI_COMMAND (snort_detach_command, static) = { .path = "snort detach", - .short_help = "snort detach interface <if-name>", + .short_help = + "snort detach all-instances|(instance <name> [instance <name> [...]]) " + "interface <if-name> ", .function = snort_detach_command_fn, }; @@ -384,17 +478,57 @@ snort_show_interfaces_command_fn (vlib_main_t *vm, unformat_input_t *input, { snort_main_t *sm = &snort_main; vnet_main_t *vnm = vnet_get_main (); - snort_instance_t *si; - u32 *index; - - vlib_cli_output (vm, "interface\t\tsnort instance"); - vec_foreach (index, sm->instance_by_sw_if_index) + snort_interface_data_t *interface; + snort_instance_t *instance; + snort_attach_dir_t direction; + u32 instance_index; + u32 sw_if_index; + u8 is_input; + int i, j; + + vlib_cli_output (vm, "interface\tinstances\tdirection"); + vec_foreach_index (sw_if_index, sm->interfaces) { - if (index[0] != ~0) + interface = vec_elt_at_index (sm->interfaces, sw_if_index); + + /* Loop over input instances and prints all of them (with direction + * indicated), then continues over output instances while ignoring + * previously printed input instances */ + for (i = 0; i < vec_len (interface->input_instance_indices) + + vec_len (interface->output_instance_indices); + i++) { - si = vec_elt_at_index (sm->instances, index[0]); - vlib_cli_output (vm, "%U:\t%s", format_vnet_sw_if_index_name, vnm, - index - sm->instance_by_sw_if_index, si->name); + is_input = i < vec_len (interface->input_instance_indices); + + instance_index = + is_input ? interface->input_instance_indices[i] : + interface->output_instance_indices + [i - vec_len (interface->input_instance_indices)]; + + /* When printing the output instances ignore the ones present in + * input instances as we have already printed them */ + if (!is_input) + { + j = + vec_search (interface->input_instance_indices, instance_index); + if (j != ~0) + continue; + } + + instance = snort_get_instance_by_index (instance_index); + direction = snort_get_instance_direction (instance_index, interface); + if (i == 0) + { + vlib_cli_output (vm, "%U:\t%s\t\t%s", + format_vnet_sw_if_index_name, vnm, sw_if_index, + instance->name, + snort_get_direction_name_by_enum (direction)); + } + else + { + vlib_cli_output (vm, "\t\t%s\t\t%s", instance->name, + snort_get_direction_name_by_enum (direction)); + } } } return 0; diff --git a/src/plugins/snort/enqueue.c b/src/plugins/snort/enqueue.c index ce4f34491ec..84efb4d432f 100644 --- a/src/plugins/snort/enqueue.c +++ b/src/plugins/snort/enqueue.c @@ -1,7 +1,10 @@ /* SPDX-License-Identifier: Apache-2.0 * Copyright(c) 2021 Cisco Systems, Inc. + * Copyright(c) 2024 Arm Limited */ +#include <vnet/ip/ip4_inlines.h> +#include <vnet/ip/ip4_packet.h> #include <vlib/vlib.h> #include <vnet/feature/feature.h> #include <snort/snort.h> @@ -56,6 +59,33 @@ static char *snort_enq_error_strings[] = { #undef _ }; +static_always_inline u32 +get_snort_instance_index_ip4 (snort_main_t *sm, vlib_buffer_t *b, u32 fa_data) +{ + u32 hash; + u32 sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; + ip4_header_t *ip = NULL; + u32 *instances = (fa_data == SNORT_INPUT) ? + sm->interfaces[sw_if_index].input_instance_indices : + sm->interfaces[sw_if_index].output_instance_indices; + int n_instances = vec_len (instances); + + if (n_instances == 1) + { + return instances[0]; + } + ip = vlib_buffer_get_current (b); + hash = ip4_compute_flow_hash (ip, IP_FLOW_HASH_DEFAULT); + return instances[hash % n_instances]; +} + +static_always_inline snort_instance_t * +get_snort_instance (snort_main_t *sm, vlib_buffer_t *b, u32 fa_data) +{ + u32 instance_index = get_snort_instance_index_ip4 (sm, b, fa_data); + return snort_get_instance_by_index (instance_index); +} + static_always_inline uword snort_enq_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, int with_trace) @@ -66,26 +96,24 @@ snort_enq_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, u32 thread_index = vm->thread_index; u32 n_left = frame->n_vectors; u32 n_trace = 0; - u32 total_enq = 0, n_processed = 0; + u32 total_enq = 0, n_unprocessed = 0; u32 *from = vlib_frame_vector_args (frame); vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; u16 nexts[VLIB_FRAME_SIZE], *next = nexts; + u32 unprocessed_bufs[VLIB_FRAME_SIZE]; vlib_get_buffers (vm, from, bufs, n_left); while (n_left) { - u64 fa_data; - u32 instance_index, next_index, n; - u32 l3_offset; - - fa_data = - *(u64 *) vnet_feature_next_with_data (&next_index, b[0], sizeof (u64)); - - instance_index = (u32) (fa_data & 0xffffffff); - l3_offset = - (fa_data >> 32) ? vnet_buffer (b[0])->ip.save_rewrite_length : 0; - si = vec_elt_at_index (sm->instances, instance_index); + u32 next_index, n; + /* fa_data is either SNORT_INPUT or SNORT_OUTPUT */ + u32 fa_data = + *(u32 *) vnet_feature_next_with_data (&next_index, b[0], sizeof (u32)); + u32 l3_offset = (fa_data == SNORT_INPUT) ? + 0 : + vnet_buffer (b[0])->ip.save_rewrite_length; + si = get_snort_instance (sm, b[0], fa_data); /* if client isn't connected skip enqueue and take default action */ if (PREDICT_FALSE (si->client_index == ~0)) @@ -95,7 +123,8 @@ snort_enq_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, else next[0] = next_index; next++; - n_processed++; + unprocessed_bufs[n_unprocessed] = from[0]; + n_unprocessed++; } else { @@ -108,7 +137,7 @@ snort_enq_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_chain_linearize (vm, b[0]); - /* If this pkt is traced, snapshoot the data */ + /* If this pkt is traced, snapshot the data */ if (with_trace && b[0]->flags & VLIB_BUFFER_IS_TRACED) n_trace++; @@ -125,12 +154,12 @@ snort_enq_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node, b++; } - if (n_processed) + if (n_unprocessed) { vlib_node_increment_counter (vm, snort_enq_node.index, - SNORT_ENQ_ERROR_NO_INSTANCE, n_processed); - vlib_buffer_enqueue_to_next (vm, node, vlib_frame_vector_args (frame), - nexts, n_processed); + SNORT_ENQ_ERROR_NO_INSTANCE, n_unprocessed); + vlib_buffer_enqueue_to_next (vm, node, unprocessed_bufs, nexts, + n_unprocessed); } pool_foreach (si, sm->instances) diff --git a/src/plugins/snort/main.c b/src/plugins/snort/main.c index 50bff027a13..9bab1185b60 100644 --- a/src/plugins/snort/main.c +++ b/src/plugins/snort/main.c @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: Apache-2.0 * Copyright(c) 2021 Cisco Systems, Inc. + * Copyright(c) 2024 Arm Limited */ #include <vlib/vlib.h> @@ -96,6 +97,38 @@ snort_instance_disconnect (vlib_main_t *vm, u32 instance_index) return rv; } +const char * +snort_get_direction_name_by_enum (snort_attach_dir_t dir) +{ + switch (dir) + { + case SNORT_INPUT: + return "input"; + case SNORT_OUTPUT: + return "output"; + case SNORT_INOUT: + return "inout"; + default: + return "none"; + } +} + +/* Returns SNORT_INVALID if the instance is not attached */ +snort_attach_dir_t +snort_get_instance_direction (u32 instance_index, + snort_interface_data_t *interface) +{ + snort_attach_dir_t direction = SNORT_INVALID; + int i; + i = vec_search (interface->input_instance_indices, instance_index); + if (i != ~0) + direction = direction | SNORT_INPUT; + i = vec_search (interface->output_instance_indices, instance_index); + if (i != ~0) + direction = direction | SNORT_OUTPUT; + return direction; +} + snort_instance_t * snort_get_instance_by_name (char *name) { @@ -470,6 +503,30 @@ done: return rv; } +static void +snort_vnet_feature_enable_disable (snort_attach_dir_t snort_dir, + u32 sw_if_index, int is_enable) +{ + u32 fa_data; + switch (snort_dir) + { + case SNORT_INPUT: + fa_data = SNORT_INPUT; + vnet_feature_enable_disable ("ip4-unicast", "snort-enq", sw_if_index, + is_enable, &fa_data, sizeof (fa_data)); + break; + case SNORT_OUTPUT: + fa_data = SNORT_OUTPUT; + vnet_feature_enable_disable ("ip4-output", "snort-enq", sw_if_index, + is_enable, &fa_data, sizeof (fa_data)); + break; + default: + vlib_log_err (snort_log.class, + "Invalid direction given to enable/disable snort"); + break; + } +} + int snort_interface_enable_disable (vlib_main_t *vm, char *instance_name, u32 sw_if_index, int is_enable, @@ -477,92 +534,216 @@ snort_interface_enable_disable (vlib_main_t *vm, char *instance_name, { snort_main_t *sm = &snort_main; vnet_main_t *vnm = vnet_get_main (); - snort_instance_t *si; - u64 fa_data; - u32 index; + vnet_sw_interface_t *software_interface = + vnet_get_sw_interface (vnm, sw_if_index); + snort_interface_data_t *interface_data; + snort_instance_t *instance; + u32 **instance_indices; + u32 instance_index; + const snort_attach_dir_t dirs[2] = { SNORT_INPUT, SNORT_OUTPUT }; int rv = 0; + int index, i; - if (is_enable) + /* If interface is up, do not allow modifying attached instances */ + if (software_interface->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) { - if ((si = snort_get_instance_by_name (instance_name)) == 0) - { - log_err ("unknown instance '%s'", instance_name); - return VNET_API_ERROR_NO_SUCH_ENTRY; - } + rv = VNET_API_ERROR_INSTANCE_IN_USE; + log_err ("interface '%U' is currently up", format_vnet_sw_if_index_name, + vnm, sw_if_index); + goto done; + } + + /* Check if provided instance name exists */ + instance = snort_get_instance_by_name (instance_name); + if (instance == NULL) + { + rv = VNET_API_ERROR_NO_SUCH_ENTRY; + log_err ("unknown instance '%s'", instance_name); + goto done; + } + + /* Check if interface is attached before unnecessarily increasing size of + * vector */ + if (!is_enable && vec_len (sm->interfaces) <= sw_if_index) + { + rv = VNET_API_ERROR_INVALID_INTERFACE; + log_err ("interface %U is not assigned to snort instance %s!", + format_vnet_sw_if_index_name, vnm, sw_if_index, instance->name); + goto done; + } - vec_validate_init_empty (sm->instance_by_sw_if_index, sw_if_index, ~0); + /* vec_validate initialises empty space to 0s, which corresponds to null + * pointers (i.e. empty vectors) in the snort_interface_data_t structs which + * is precisely what we need */ + vec_validate (sm->interfaces, sw_if_index); - index = sm->instance_by_sw_if_index[sw_if_index]; - if (index != ~0) + interface_data = vec_elt_at_index (sm->interfaces, sw_if_index); + instance_index = instance->index; + + /* When detaching with direction SNORT_INOUT choose currently attached + * directions */ + if (!is_enable) + { + snort_dir = + snort_get_instance_direction (instance_index, interface_data); + /* If snort_dir is SNORT_INVALID then the instance is not attached */ + if (snort_dir == SNORT_INVALID) { - if (index == si->index) - rv = VNET_API_ERROR_FEATURE_ALREADY_ENABLED; - else - rv = VNET_API_ERROR_INSTANCE_IN_USE; - si = vec_elt_at_index (sm->instances, index); - log_err ("interface %U already assgined to instance '%s'", - format_vnet_sw_if_index_name, vnm, sw_if_index, si->name); + rv = VNET_API_ERROR_INVALID_INTERFACE; + log_err ("interface %U is not assigned to snort instance %s!", + format_vnet_sw_if_index_name, vnm, sw_if_index, + instance->name); goto done; } + } - index = sm->instance_by_sw_if_index[sw_if_index] = si->index; - if (snort_dir & SNORT_INPUT) - { - fa_data = (u64) index; - vnet_feature_enable_disable ("ip4-unicast", "snort-enq", sw_if_index, - 1, &fa_data, sizeof (fa_data)); - } - if (snort_dir & SNORT_OUTPUT) - { - fa_data = (1LL << 32 | index); - vnet_feature_enable_disable ("ip4-output", "snort-enq", sw_if_index, - 1, &fa_data, sizeof (fa_data)); - } + /* Error if direction is invalid */ + if (snort_dir == SNORT_INVALID) + { + rv = VNET_API_ERROR_INVALID_ARGUMENT; + vlib_log_err (snort_log.class, + "cannot attach/detach with invalid direction "); + goto done; } - else + + /* Loop evaluates input instances and then output instances */ + for (i = 0; i < 2; i++) { - if (sw_if_index >= vec_len (sm->instance_by_sw_if_index) || - sm->instance_by_sw_if_index[sw_if_index] == ~0) + if (!(snort_dir & dirs[i])) + continue; + + instance_indices = (dirs[i] == SNORT_INPUT) ? + &(interface_data->input_instance_indices) : + &(interface_data->output_instance_indices); + index = vec_search (*instance_indices, instance_index); + + if (is_enable) { - rv = VNET_API_ERROR_INVALID_INTERFACE; - log_err ("interface %U is not assigned to snort instance!", - format_vnet_sw_if_index_name, vnm, sw_if_index); - goto done; + /* Error if instance is already attached when trying to attach */ + if (index != ~0) + { + rv = VNET_API_ERROR_FEATURE_ALREADY_ENABLED; + log_err ("interface %U already assgined to instance '%s' on " + "direction '%s'", + format_vnet_sw_if_index_name, vnm, sw_if_index, + instance->name, + snort_get_direction_name_by_enum (dirs[i])); + goto done; + } + } + else + { + /* Error if instance is not attached when trying to detach */ + if (index == ~0) + { + rv = VNET_API_ERROR_INVALID_INTERFACE; + log_err ("interface %U is not assigned to snort instance %s on " + "direction '%s'!", + format_vnet_sw_if_index_name, vnm, sw_if_index, + instance->name, + snort_get_direction_name_by_enum (dirs[i])); + goto done; + } } - index = sm->instance_by_sw_if_index[sw_if_index]; - si = vec_elt_at_index (sm->instances, index); - sm->instance_by_sw_if_index[sw_if_index] = ~0; - if (snort_dir & SNORT_INPUT) + if (is_enable) { - fa_data = (u64) index; - vnet_feature_enable_disable ("ip4-unicast", "snort-enq", sw_if_index, - 0, &fa_data, sizeof (fa_data)); + /* Enable feature if not previously enabled */ + if (vec_len (*instance_indices) == 0) + { + snort_vnet_feature_enable_disable (dirs[i], sw_if_index, + 1 /* is_enable */); + } + vec_add1 (*instance_indices, instance_index); } - if (snort_dir & SNORT_OUTPUT) + else { - fa_data = (1LL << 32 | index); - vnet_feature_enable_disable ("ip4-output", "snort-enq", sw_if_index, - 0, &fa_data, sizeof (fa_data)); + /* Disable feature when removing last instance */ + if (vec_len (*instance_indices) == 1) + { + snort_vnet_feature_enable_disable (dirs[i], sw_if_index, + 0 /* is_enable */); + } + vec_del1 (*instance_indices, index); } } +done: + return rv; +} + +int +snort_interface_disable_all (vlib_main_t *vm, u32 sw_if_index) +{ + snort_main_t *sm = &snort_main; + vnet_main_t *vnm = vnet_get_main (); + vnet_sw_interface_t *software_interface = + vnet_get_sw_interface (vnm, sw_if_index); + snort_interface_data_t *interface_data; + int rv = 0; + + if (software_interface->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) + { + rv = VNET_API_ERROR_INSTANCE_IN_USE; + log_err ("interface '%U' is currently up", format_vnet_sw_if_index_name, + vnm, sw_if_index); + goto done; + } + + if (vec_len (sm->interfaces) <= sw_if_index) + { + rv = VNET_API_ERROR_INVALID_INTERFACE; + log_err ("no instances attached to interface %U", + format_vnet_sw_if_index_name, vnm, sw_if_index); + goto done; + } + + interface_data = vec_elt_at_index (sm->interfaces, sw_if_index); + + if (vec_len (interface_data->input_instance_indices) == 0 && + vec_len (interface_data->output_instance_indices) == 0) + { + rv = VNET_API_ERROR_INVALID_INTERFACE; + log_err ("no instances attached to interface %U", + format_vnet_sw_if_index_name, vnm, sw_if_index); + goto done; + } + + if (vec_len (interface_data->input_instance_indices) > 0) + { + snort_vnet_feature_enable_disable (SNORT_INPUT, sw_if_index, + 0 /* is_enable */); + vec_free (interface_data->input_instance_indices); + } + if (vec_len (interface_data->output_instance_indices) > 0) + { + snort_vnet_feature_enable_disable (SNORT_OUTPUT, sw_if_index, + 0 /* is_enable */); + vec_free (interface_data->output_instance_indices); + } done: return rv; } static int -snort_strip_instance_interfaces (vlib_main_t *vm, u32 instance_index) +snort_strip_instance_interfaces (vlib_main_t *vm, snort_instance_t *instance) { snort_main_t *sm = &snort_main; - u32 *index; + snort_interface_data_t *interface; + snort_attach_dir_t direction; + int i; int rv = 0; - vec_foreach (index, sm->instance_by_sw_if_index) + /* Find all interfaces containing the given snort instance to disable */ + vec_foreach_index (i, sm->interfaces) { - if (*index == instance_index) - rv = snort_interface_enable_disable ( - vm, NULL, index - sm->instance_by_sw_if_index, 0, 0); + /* Check if the snort_instance is attached by checking if the direction + * is SNORT_INVALID */ + interface = vec_elt_at_index (sm->interfaces, i); + direction = snort_get_instance_direction (instance->index, interface); + if (direction != SNORT_INVALID) + rv = snort_interface_enable_disable (vm, (char *) instance->name, i, + 0 /* is_enable */, direction); if (rv) break; } @@ -585,7 +766,7 @@ snort_instance_delete (vlib_main_t *vm, u32 instance_index) if (si->client_index != ~0) return VNET_API_ERROR_INSTANCE_IN_USE; - if ((rv = snort_strip_instance_interfaces (vm, si->index))) + if ((rv = snort_strip_instance_interfaces (vm, si))) return rv; hash_unset_mem (sm->instance_by_name, si->name); diff --git a/src/plugins/snort/snort.h b/src/plugins/snort/snort.h index c7e856c0127..76f0652df10 100644 --- a/src/plugins/snort/snort.h +++ b/src/plugins/snort/snort.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: Apache-2.0 * Copyright(c) 2021 Cisco Systems, Inc. + * Copyright(c) 2024 Arm Limited */ #ifndef __snort_snort_h__ @@ -68,13 +69,20 @@ typedef struct void *interrupts; } snort_per_thread_data_t; +/* Holds snort plugin related information for an interface */ +typedef struct +{ + u32 *input_instance_indices; + u32 *output_instance_indices; +} snort_interface_data_t; + typedef struct { clib_socket_t *listener; snort_client_t *clients; snort_instance_t *instances; uword *instance_by_name; - u32 *instance_by_sw_if_index; + snort_interface_data_t *interfaces; u8 **buffer_pool_base_addrs; snort_per_thread_data_t *per_thread_data; u32 input_mode; @@ -96,9 +104,11 @@ typedef enum typedef enum { - SNORT_INPUT = 1, - SNORT_OUTPUT = 2, - SNORT_INOUT = 3 + SNORT_INVALID = 0x00, + SNORT_INPUT = 0x01, + SNORT_OUTPUT = 0x02, + /* SNORT_INOUT === SNORT_INPUT | SNORT_OUTPUT */ + SNORT_INOUT = 0x03 } snort_attach_dir_t; #define SNORT_ENQ_NEXT_NODES \ @@ -108,6 +118,10 @@ typedef enum /* functions */ snort_main_t *snort_get_main (); +const char *snort_get_direction_name_by_enum (snort_attach_dir_t dir); +snort_attach_dir_t +snort_get_instance_direction (u32 instance_index, + snort_interface_data_t *interface); snort_instance_t *snort_get_instance_by_index (u32 instance_index); snort_instance_t *snort_get_instance_by_name (char *name); int snort_instance_create (vlib_main_t *vm, char *name, u8 log2_queue_sz, @@ -115,6 +129,7 @@ int snort_instance_create (vlib_main_t *vm, char *name, u8 log2_queue_sz, int snort_interface_enable_disable (vlib_main_t *vm, char *instance_name, u32 sw_if_index, int is_enable, snort_attach_dir_t dir); +int snort_interface_disable_all (vlib_main_t *vm, u32 sw_if_index); int snort_set_node_mode (vlib_main_t *vm, u32 mode); int snort_instance_delete (vlib_main_t *vm, u32 instance_index); int snort_instance_disconnect (vlib_main_t *vm, u32 instance_index); diff --git a/src/plugins/snort/snort_api.c b/src/plugins/snort/snort_api.c index adad0d8763f..00b3c3ace67 100644 --- a/src/plugins/snort/snort_api.c +++ b/src/plugins/snort/snort_api.c @@ -1,3 +1,6 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. + */ #include <vlib/vlib.h> #include <vnet/plugin/plugin.h> #include <snort/snort.h> @@ -80,17 +83,25 @@ vl_api_snort_interface_attach_t_handler (vl_api_snort_interface_attach_t *mp) u8 snort_dir = mp->snort_dir; int rv = VNET_API_ERROR_NO_SUCH_ENTRY; - if (sw_if_index == INDEX_INVALID) - rv = VNET_API_ERROR_NO_MATCHING_INTERFACE; - else + VALIDATE_SW_IF_INDEX (mp); + switch (snort_dir) { - instance = snort_get_instance_by_index (instance_index); - if (instance) - rv = snort_interface_enable_disable (vm, (char *) instance->name, - sw_if_index, 1 /* is_enable */, - snort_dir); + case SNORT_INPUT: + case SNORT_OUTPUT: + case SNORT_INOUT: + break; + default: + rv = VNET_API_ERROR_INVALID_ARGUMENT; + goto bad_sw_if_index; } - + instance = snort_get_instance_by_index (instance_index); + if (instance) + { + rv = snort_interface_enable_disable (vm, (char *) instance->name, + sw_if_index, 1 /* is_enable */, + snort_dir); + } + BAD_SW_IF_INDEX_LABEL; REPLY_MACRO (VL_API_SNORT_INTERFACE_ATTACH_REPLY); } @@ -185,7 +196,8 @@ vl_api_snort_interface_get_t_handler (vl_api_snort_interface_get_t *mp) snort_main_t *sm = snort_get_main (); vl_api_snort_interface_get_reply_t *rmp; u32 sw_if_index; - u32 *index; + u32 *instances; + u32 index; int rv = 0; sw_if_index = clib_net_to_host_u32 (mp->sw_if_index); @@ -193,7 +205,7 @@ vl_api_snort_interface_get_t_handler (vl_api_snort_interface_get_t *mp) if (sw_if_index == INDEX_INVALID) { /* clang-format off */ - if (vec_len (sm->instance_by_sw_if_index) == 0) + if (vec_len (sm->interfaces) == 0) { REPLY_MACRO2 (VL_API_SNORT_INTERFACE_GET_REPLY, ({ rmp->cursor = ~0; })); return; @@ -201,17 +213,36 @@ vl_api_snort_interface_get_t_handler (vl_api_snort_interface_get_t *mp) REPLY_AND_DETAILS_VEC_MACRO( VL_API_SNORT_INTERFACE_GET_REPLY, - sm->instance_by_sw_if_index, + sm->interfaces, mp, rmp, rv, ({ - index = vec_elt_at_index (sm->instance_by_sw_if_index, cursor); - send_snort_interface_details (cursor, *index, rp, mp->context); + instances = vec_len(sm->interfaces[cursor].input_instance_indices) ? + sm->interfaces[cursor].input_instance_indices : sm->interfaces[cursor].output_instance_indices; + if (vec_len(instances) == 0) + { + index = ~0; + } + else { + index = instances[0]; + } + send_snort_interface_details (cursor, index, rp, mp->context); })) /* clang-format on */ } else { - index = vec_elt_at_index (sm->instance_by_sw_if_index, sw_if_index); - if (snort_get_instance_by_index (index[0])) + instances = + vec_len (sm->interfaces[sw_if_index].input_instance_indices) ? + sm->interfaces[sw_if_index].input_instance_indices : + sm->interfaces[sw_if_index].output_instance_indices; + if (vec_len (instances) == 0) + { + index = ~0; + } + else + { + index = instances[0]; + } + if (snort_get_instance_by_index (index)) { vl_api_registration_t *rp = vl_api_client_index_to_registration (mp->client_index); @@ -221,7 +252,8 @@ vl_api_snort_interface_get_t_handler (vl_api_snort_interface_get_t *mp) return; } - send_snort_interface_details (sw_if_index, *index, rp, mp->context); + send_snort_interface_details (sw_if_index, *instances, rp, + mp->context); } else { @@ -352,12 +384,12 @@ vl_api_snort_interface_detach_t_handler (vl_api_snort_interface_detach_t *mp) vlib_main_t *vm = vlib_get_main (); vl_api_snort_interface_detach_reply_t *rmp; u32 sw_if_index = clib_net_to_host_u32 (mp->sw_if_index); - int rv = VNET_API_ERROR_NO_MATCHING_INTERFACE; + int rv; - if (sw_if_index != INDEX_INVALID) - rv = snort_interface_enable_disable (vm, NULL, sw_if_index, - 0 /* is_enable */, SNORT_INOUT); + VALIDATE_SW_IF_INDEX (mp); + rv = snort_interface_disable_all (vm, sw_if_index); + BAD_SW_IF_INDEX_LABEL; REPLY_MACRO (VL_API_SNORT_INTERFACE_DETACH_REPLY); } diff --git a/src/plugins/srtp/srtp.c b/src/plugins/srtp/srtp.c index 6862301d2d2..5426b7aa03f 100644 --- a/src/plugins/srtp/srtp.c +++ b/src/plugins/srtp/srtp.c @@ -291,7 +291,7 @@ done: if (n_wrote) { if (svm_fifo_set_event (us->tx_fifo)) - session_send_io_evt_to_thread (us->tx_fifo, SESSION_IO_EVT_TX); + session_program_tx_io_evt (us->handle, SESSION_IO_EVT_TX); } if (PREDICT_FALSE (ctx->app_closed && @@ -538,7 +538,7 @@ srtp_migrate_ctx (void *arg) us->opaque = ctx_handle; us->flags &= ~SESSION_F_IS_MIGRATING; if (svm_fifo_max_dequeue (us->tx_fifo)) - session_send_io_evt_to_thread (us->tx_fifo, SESSION_IO_EVT_TX); + session_program_tx_io_evt (us->handle, SESSION_IO_EVT_TX); /* Migrate app session as well */ session_dgram_connect_notify (&ctx->connection, old_thread_index, diff --git a/src/plugins/tlsopenssl/tls_async.c b/src/plugins/tlsopenssl/tls_async.c index c6d2b2fe9e1..cd08da5d9ea 100644 --- a/src/plugins/tlsopenssl/tls_async.c +++ b/src/plugins/tlsopenssl/tls_async.c @@ -31,13 +31,15 @@ [SSL_CLIENT_HELLO_CB] = "SSL_CLIENT_HELLO_CB", \ } -static const char *ssl_want[] = SSL_WANT_NAMES; +const char *ssl_want[] = SSL_WANT_NAMES; #define foreach_ssl_evt_status_type_ \ _ (INVALID_STATUS, "Async event invalid status") \ _ (INFLIGHT, "Async event inflight") \ _ (READY, "Async event ready") \ _ (REENTER, "Async event reenter") \ + _ (DEQ_DONE, "Async event dequeued") \ + _ (CB_EXECUTED, "Async callback executed") \ _ (MAX_STATUS, "Async event max status") typedef enum ssl_evt_status_type_ @@ -51,6 +53,8 @@ typedef struct openssl_tls_callback_arg_ { int thread_index; int event_index; + ssl_async_evt_type_t async_evt_type; + openssl_resume_handler *evt_handler; } openssl_tls_callback_arg_t; typedef struct openssl_event_ @@ -58,12 +62,13 @@ typedef struct openssl_event_ u32 ctx_index; int session_index; ssl_evt_status_type_t status; - ssl_async_evt_type_t type; - - openssl_resume_handler *handler; + transport_send_params_t *tran_sp; openssl_tls_callback_arg_t cb_args; + #define thread_idx cb_args.thread_index #define event_idx cb_args.event_index +#define async_event_type cb_args.async_evt_type +#define async_evt_handler cb_args.evt_handler int next; } openssl_evt_t; @@ -72,7 +77,6 @@ typedef struct openssl_async_queue_ int evt_run_head; int evt_run_tail; int depth; - int max_depth; } openssl_async_queue_t; typedef struct openssl_async_ @@ -136,12 +140,10 @@ evt_pool_init (vlib_main_t * vm) om->queue[i].evt_run_head = -1; om->queue[i].evt_run_tail = -1; om->queue[i].depth = 0; - om->queue[i].max_depth = 0; om->queue_in_init[i].evt_run_head = -1; om->queue_in_init[i].evt_run_tail = -1; om->queue_in_init[i].depth = 0; - om->queue_in_init[i].max_depth = 0; } om->polling = NULL; @@ -284,17 +286,19 @@ tls_async_openssl_callback (SSL * s, void *cb_arg) openssl_tls_callback_arg_t *args = (openssl_tls_callback_arg_t *) cb_arg; int thread_index = args->thread_index; int event_index = args->event_index; + ssl_async_evt_type_t evt_type = args->async_evt_type; + int *evt_run_tail, *evt_run_head; TLS_DBG (2, "Set event %d to run\n", event_index); event = openssl_evt_get_w_thread (event_index, thread_index); - if (event->type == SSL_ASYNC_EVT_INIT) + if (evt_type == SSL_ASYNC_EVT_INIT) queue = om->queue_in_init; else queue = om->queue; - int *evt_run_tail = &queue[thread_index].evt_run_tail; - int *evt_run_head = &queue[thread_index].evt_run_head; + evt_run_tail = &queue[thread_index].evt_run_tail; + evt_run_head = &queue[thread_index].evt_run_head; /* Happend when a recursive case, especially in SW simulation */ if (PREDICT_FALSE (event->status == SSL_ASYNC_READY)) @@ -305,16 +309,17 @@ tls_async_openssl_callback (SSL * s, void *cb_arg) event->status = SSL_ASYNC_READY; event->next = -1; - if (*evt_run_tail >= 0) + if (*evt_run_head < 0) + *evt_run_head = event_index; + else if (*evt_run_tail >= 0) { event_tail = openssl_evt_get_w_thread (*evt_run_tail, thread_index); event_tail->next = event_index; } + + queue[thread_index].depth++; + *evt_run_tail = event_index; - if (*evt_run_head < 0) - { - *evt_run_head = event_index; - } return 1; } @@ -344,42 +349,6 @@ openssl_async_write_from_fifo_into_ssl (svm_fifo_t *f, SSL *ssl, return wrote; } -/* - * Perform SSL_write from TX FIFO head. - * On successful write, TLS context total_async_write bytes are updated. - */ -static_always_inline int -openssl_write_from_fifo_head_into_ssl (svm_fifo_t *f, SSL *ssl, - openssl_ctx_t *oc, u32 max_len) -{ - int wrote = 0, rv, i = 0, len; - u32 n_segs = 2; - svm_fifo_seg_t fs[n_segs]; - - max_len = clib_min (oc->total_async_write, max_len); - - len = svm_fifo_segments (f, 0, fs, &n_segs, max_len); - if (len <= 0) - return 0; - - while (wrote < len && i < n_segs) - { - rv = SSL_write (ssl, fs[i].data, fs[i].len); - wrote += (rv > 0) ? rv : 0; - if (rv < (int) fs[i].len) - break; - i++; - } - - if (wrote) - { - oc->total_async_write -= wrote; - svm_fifo_dequeue_drop (f, wrote); - } - - return wrote; -} - static int openssl_async_read_from_ssl_into_fifo (svm_fifo_t *f, SSL *ssl) { @@ -394,455 +363,53 @@ openssl_async_read_from_ssl_into_fifo (svm_fifo_t *f, SSL *ssl) return read; } -/* - * Pop the current event from queue and update tail if needed - */ -static void -tls_async_dequeue_update (openssl_evt_t *event, int *evt_run_head, - int *evt_run_tail, int *queue_depth) -{ - /* remove the event from queue head */ - *evt_run_head = event->next; - event->status = SSL_ASYNC_INVALID_STATUS; - event->next = -1; - - (*queue_depth)--; - - if (*evt_run_head < 0) - { - *evt_run_tail = -1; - if (*queue_depth) - clib_warning ("queue empty but depth:%d\n", *queue_depth); - } -} - -static int -tls_async_dequeue_event (int thread_index) -{ - openssl_evt_t *event; - openssl_async_t *om = &openssl_async_main; - openssl_async_queue_t *queue = om->queue; - int *evt_run_tail = &queue[thread_index].evt_run_tail; - int *evt_run_head = &queue[thread_index].evt_run_head; - int dequeue_cnt = clib_min (queue[thread_index].depth, MAX_VECTOR_ASYNC); - const u32 max_len = 128 << 10; - - /* dequeue all pending events, events enqueued during this routine call, - * will be handled next time tls_async_dequeue_event is invoked */ - while (*evt_run_head >= 0 && dequeue_cnt--) - { - session_t *app_session, *tls_session; - openssl_ctx_t *oc; - tls_ctx_t *ctx; - SSL *ssl; - - event = openssl_evt_get_w_thread (*evt_run_head, thread_index); - ctx = openssl_ctx_get_w_thread (event->ctx_index, thread_index); - oc = (openssl_ctx_t *) ctx; - ssl = oc->ssl; - - if (event->type == SSL_ASYNC_EVT_RD) - { - /* read event */ - svm_fifo_t *app_rx_fifo, *tls_rx_fifo; - int read; - - app_session = session_get_from_handle (ctx->app_session_handle); - app_rx_fifo = app_session->rx_fifo; - - tls_session = session_get_from_handle (ctx->tls_session_handle); - tls_rx_fifo = tls_session->rx_fifo; - - /* continue the paused job */ - read = openssl_async_read_from_ssl_into_fifo (app_rx_fifo, ssl); - if (read < 0) - { - if (SSL_want_async (ssl)) - goto handle_later; - - tls_async_dequeue_update (event, evt_run_head, evt_run_tail, - &queue[thread_index].depth); - goto ev_rd_done; - } - - /* read finished or in error, remove the event from queue */ - tls_async_dequeue_update (event, evt_run_head, evt_run_tail, - &queue[thread_index].depth); - - /* Unrecoverable protocol error. Reset connection */ - if (PREDICT_FALSE ((read < 0) && - (SSL_get_error (ssl, read) == SSL_ERROR_SSL))) - { - tls_notify_app_io_error (ctx); - goto ev_rd_done; - } - - /* - * Managed to read some data. If handshake just completed, session - * may still be in accepting state. - */ - if (app_session->session_state >= SESSION_STATE_READY) - tls_notify_app_enqueue (ctx, app_session); - - /* managed to read, try to read more */ - while (read > 0) - { - read = - openssl_read_from_ssl_into_fifo (app_rx_fifo, ctx, max_len); - if (read < 0) - { - if (SSL_want_async (ssl)) - { - vpp_tls_async_enqueue_event (oc, SSL_ASYNC_EVT_RD, NULL, - 0); - goto ev_rd_queued; - } - } - - /* Unrecoverable protocol error. Reset connection */ - if (PREDICT_FALSE ((read < 0) && - (SSL_get_error (ssl, read) == SSL_ERROR_SSL))) - { - tls_notify_app_io_error (ctx); - goto ev_rd_done; - } - - /* If handshake just completed, session may still be in accepting - * state */ - if (read >= 0 && - app_session->session_state >= SESSION_STATE_READY) - tls_notify_app_enqueue (ctx, app_session); - } - - ev_rd_done: - /* read done */ - ctx->flags &= ~TLS_CONN_F_ASYNC_RD; - - if ((SSL_pending (ssl) > 0) || - svm_fifo_max_dequeue_cons (tls_rx_fifo)) - { - tls_add_vpp_q_builtin_rx_evt (tls_session); - } - - ev_rd_queued: - continue; - } - else if (event->type == SSL_ASYNC_EVT_WR) - { - /* write event */ - int wrote, wrote_sum = 0; - u32 space, enq_buf; - svm_fifo_t *app_tx_fifo, *tls_tx_fifo; - transport_send_params_t *sp = - (transport_send_params_t *) event->handler; - - app_session = session_get_from_handle (ctx->app_session_handle); - app_tx_fifo = app_session->tx_fifo; - - /* continue the paused job */ - wrote = - openssl_async_write_from_fifo_into_ssl (app_tx_fifo, ssl, oc); - if (wrote < 0) - { - if (SSL_want_async (ssl)) - /* paused job not ready, wait */ - goto handle_later; - clib_warning ("[wrote:%d want:%s ctx:%d]\n", wrote, - ssl_want[SSL_want (ssl)], oc->openssl_ctx_index); - } - wrote_sum += wrote; - - /* paused job done, remove event, update queue */ - tls_async_dequeue_update (event, evt_run_head, evt_run_tail, - &queue[thread_index].depth); - - /* Unrecoverable protocol error. Reset connection */ - if (PREDICT_FALSE (wrote < 0)) - { - tls_notify_app_io_error (ctx); - clib_warning ( - "Unrecoverable protocol error. Reset connection\n"); - goto ev_in_queue; - } - - tls_session = session_get_from_handle (ctx->tls_session_handle); - tls_tx_fifo = tls_session->tx_fifo; - - /* prepare for remaining write(s) */ - space = svm_fifo_max_enqueue_prod (tls_tx_fifo); - /* Leave a bit of extra space for tls ctrl data, if any needed */ - space = clib_max ((int) space - TLSO_CTRL_BYTES, 0); - - /* continue remaining openssl_ctx_write request */ - while (oc->total_async_write) - { - int rv; - u32 deq_max = svm_fifo_max_dequeue_cons (app_tx_fifo); - - deq_max = clib_min (deq_max, space); - deq_max = clib_min (deq_max, sp->max_burst_size); - if (!deq_max) - goto check_tls_fifo; - - /* Make sure tcp's tx fifo can actually buffer all bytes to - * be dequeued. If under memory pressure, tls's fifo segment - * might not be able to allocate the chunks needed. This also - * avoids errors from the underlying custom bio to the ssl - * infra which at times can get stuck. */ - if (svm_fifo_provision_chunks (tls_tx_fifo, 0, 0, - deq_max + TLSO_CTRL_BYTES)) - goto check_tls_fifo; - - rv = openssl_write_from_fifo_head_into_ssl (app_tx_fifo, ssl, oc, - deq_max); - - /* Unrecoverable protocol error. Reset connection */ - if (PREDICT_FALSE (rv < 0)) - { - tls_notify_app_io_error (ctx); - clib_warning ( - "Unrecoverable protocol error. Reset connection\n"); - goto ev_in_queue; - } - - if (!rv) - { - if (SSL_want_async (ssl)) - { - /* new paused job, add queue event and wait */ - vpp_tls_async_enqueue_event (oc, SSL_ASYNC_EVT_WR, sp, - 0); - goto ev_in_queue; - } - clib_warning ("[rv:%d want:%s ctx:%d]\n", rv, - ssl_want[SSL_want (ssl)], - oc->openssl_ctx_index); - break; - } - wrote_sum += rv; - } - - if (svm_fifo_needs_deq_ntf (app_tx_fifo, wrote_sum)) - session_dequeue_notify (app_session); - - check_tls_fifo: - /* we got here, async write is done or not possible */ - oc->total_async_write = 0; - - if (PREDICT_FALSE (BIO_ctrl_pending (oc->rbio) <= 0)) - tls_notify_app_io_error (ctx); - - /* Deschedule and wait for deq notification if fifo is almost full */ - enq_buf = - clib_min (svm_fifo_size (tls_tx_fifo) / 2, TLSO_MIN_ENQ_SPACE); - if (space < wrote_sum + enq_buf) - { - svm_fifo_add_want_deq_ntf (tls_tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); - transport_connection_deschedule (&ctx->connection); - sp->flags |= TRANSPORT_SND_F_DESCHED; - } - else - { - /* Request tx reschedule of the app session */ - app_session->flags |= SESSION_F_CUSTOM_TX; - transport_connection_reschedule (&ctx->connection); - } - - ev_in_queue: - /* job removed, openssl_ctx_write will resume */ - continue; - } - else - { - /* wrong event type */ - clib_warning ("goto remove_event [event->type:%d]\n", event->type); - tls_async_dequeue_update (event, evt_run_head, evt_run_tail, - &queue[thread_index].depth); - } - } - -handle_later: - return 1; -} - -static int -tls_async_dequeue_event_in_init (int thread_index) +int +vpp_tls_async_init_event (tls_ctx_t *ctx, openssl_resume_handler *handler, + session_t *session, ssl_async_evt_type_t evt_type, + transport_send_params_t *sp, int wr_size) { - openssl_evt_t *event; - openssl_async_t *om = &openssl_async_main; - openssl_async_queue_t *queue = om->queue_in_init; - int *evt_run_tail = &queue[thread_index].evt_run_tail; - int *evt_run_head = &queue[thread_index].evt_run_head; + u32 eidx; + openssl_evt_t *event = NULL; + openssl_ctx_t *oc = (openssl_ctx_t *) ctx; + u32 thread_id = ctx->c_thread_index; - /* dequeue events if exists */ - while (*evt_run_head >= 0) + if (oc->evt_alloc_flag[evt_type]) { - openssl_ctx_t *oc; - tls_ctx_t *ctx; - int rv, err; - - event = openssl_evt_get_w_thread (*evt_run_head, thread_index); - ctx = openssl_ctx_get_w_thread (event->ctx_index, thread_index); - oc = (openssl_ctx_t *) ctx; - - if (event->type != SSL_ASYNC_EVT_INIT) - { - /* wrong event type */ - clib_warning ("goto remove_event [event->type:%d]\n", event->type); - goto remove_event; - } - - if (!SSL_in_init (oc->ssl)) - { - clib_warning ("[!SSL_in_init() != ev->type:%d] th:%d ev:%d\n", - event->type, event->cb_args.thread_index, - event->cb_args.event_index); - goto remove_event; - } - - rv = SSL_do_handshake (oc->ssl); - err = SSL_get_error (oc->ssl, rv); - - /* Do not remove session from tail */ - if (err == SSL_ERROR_WANT_ASYNC) - goto handle_later; - - if (err == SSL_ERROR_SSL) + eidx = oc->evt_index[evt_type]; + if (evt_type == SSL_ASYNC_EVT_WR) { - char buf[512]; - ERR_error_string (ERR_get_error (), buf); - clib_warning ("Err: %s\n", buf); - openssl_handle_handshake_failure (ctx); - goto remove_event; - } - - if (err == SSL_ERROR_WANT_WRITE || err == SSL_ERROR_WANT_READ) - goto handle_later; - - /* client not supported */ - if (!SSL_is_server (oc->ssl)) - { - clib_warning ("goto remove_event [!SSL_is_server]\n"); - goto remove_event; - } - - if (tls_notify_app_accept (ctx)) - { - ctx->c_s_index = SESSION_INVALID_INDEX; - tls_disconnect_transport (ctx); - } - - TLS_DBG (1, "Handshake for %u complete. TLS cipher is %s", - oc->openssl_ctx_index, SSL_get_cipher (oc->ssl)); - - remove_event: - *evt_run_head = event->next; - queue[thread_index].depth--; - - if (*evt_run_head < 0) - { - /* queue empty, bail out */ - *evt_run_tail = -1; - if (queue[thread_index].depth) - clib_warning ("queue empty but depth:%d\n", - queue[thread_index].depth); - break; + event = openssl_evt_get (eidx); + goto update_wr_evnt; } + return 1; } - -handle_later: - return 1; -} - -int -vpp_tls_async_enqueue_event (openssl_ctx_t *ctx, int evt_type, - transport_send_params_t *sp, int size) -{ - openssl_evt_t *event; - openssl_async_t *om = &openssl_async_main; - openssl_async_queue_t *queue; - int thread_index; - int event_index; - int *evt_run_tail; - int *evt_run_head; - - event = openssl_evt_get (ctx->evt_index[evt_type]); - - thread_index = event->thread_idx; - event_index = event->event_idx; - - /* set queue to be used */ - if (SSL_in_init (ctx->ssl)) - queue = om->queue_in_init; else - queue = om->queue; - - evt_run_tail = &queue[thread_index].evt_run_tail; - evt_run_head = &queue[thread_index].evt_run_head; - - event->type = evt_type; - event->handler = (openssl_resume_handler *) sp; - event->next = -1; - - /* first we enqueue the request */ - if (*evt_run_tail >= 0) { - openssl_evt_t *event_tail; - - /* queue not empty, append to tail event */ - event_tail = openssl_evt_get_w_thread (*evt_run_tail, thread_index); - event_tail->next = event_index; + eidx = openssl_evt_alloc (); + oc->evt_alloc_flag[evt_type] = true; } - /* set tail to use new event index */ - *evt_run_tail = event_index; - - if (*evt_run_head < 0) - /* queue is empty, update head */ - *evt_run_head = event_index; - - queue[thread_index].depth++; - if (queue[thread_index].depth > queue[thread_index].max_depth) - queue[thread_index].max_depth = queue[thread_index].depth; - - return 1; -} - -static int -vpp_tls_async_init_event (tls_ctx_t *ctx, openssl_resume_handler *handler, - session_t *session, ssl_async_evt_type_t evt_type) -{ - u32 eidx; - openssl_evt_t *event; - openssl_ctx_t *oc = (openssl_ctx_t *) ctx; - u32 thread_id = ctx->c_thread_index; - - eidx = openssl_evt_alloc (); event = openssl_evt_get (eidx); event->ctx_index = oc->openssl_ctx_index; + /* async call back args */ event->event_idx = eidx; event->thread_idx = thread_id; - event->handler = handler; + event->async_event_type = evt_type; + event->async_evt_handler = handler; event->session_index = session->session_index; - event->type = evt_type; event->status = SSL_ASYNC_INVALID_STATUS; oc->evt_index[evt_type] = eidx; - event->next = -1; #ifdef HAVE_OPENSSL_ASYNC SSL_set_async_callback_arg (oc->ssl, &event->cb_args); #endif - - return 1; -} - -int -vpp_tls_async_init_events (tls_ctx_t *ctx, openssl_resume_handler *handler, - session_t *session) -{ - vpp_tls_async_init_event (ctx, handler, session, SSL_ASYNC_EVT_INIT); - vpp_tls_async_init_event (ctx, handler, session, SSL_ASYNC_EVT_RD); - vpp_tls_async_init_event (ctx, handler, session, SSL_ASYNC_EVT_WR); - +update_wr_evnt: + if (evt_type == SSL_ASYNC_EVT_WR) + { + transport_connection_deschedule (&ctx->connection); + sp->flags |= TRANSPORT_SND_F_DESCHED; + oc->total_async_write = wr_size; + } + event->tran_sp = sp; return 1; } @@ -866,23 +433,6 @@ vpp_openssl_is_inflight (tls_ctx_t *ctx) return 0; } -int -vpp_tls_async_update_event (tls_ctx_t *ctx, int eagain, - ssl_async_evt_type_t type) -{ - u32 eidx; - openssl_ctx_t *oc = (openssl_ctx_t *) ctx; - openssl_evt_t *event; - - eidx = oc->evt_index[type]; - event = openssl_evt_get (eidx); - event->status = SSL_ASYNC_INFLIGHT; - if (eagain) - return tls_async_openssl_callback (0, &event->cb_args); - - return 1; -} - void event_handler (void *tls_async) { @@ -890,17 +440,16 @@ event_handler (void *tls_async) openssl_evt_t *event; session_t *session; int thread_index; - tls_ctx_t *ctx; event = (openssl_evt_t *) tls_async; thread_index = event->thread_idx; - ctx = openssl_ctx_get_w_thread (event->ctx_index, thread_index); - handler = event->handler; + handler = event->async_evt_handler; session = session_get (event->session_index, thread_index); if (handler) { - (*handler) (ctx, session); + (*handler) (event, session); + event->status = SSL_ASYNC_CB_EXECUTED; } return; @@ -997,35 +546,33 @@ tls_async_do_job (int eidx, u32 thread_index) } int -tls_resume_from_crypto (int thread_index) +handle_async_cb_events (openssl_async_queue_t *queue, int thread_index) { int i; - - openssl_async_t *om = &openssl_async_main; openssl_evt_t *event; - int *evt_run_head = &om->queue[thread_index].evt_run_head; - int *evt_run_tail = &om->queue[thread_index].evt_run_tail; + + int *evt_run_head = &queue[thread_index].evt_run_head; + int *evt_run_tail = &queue[thread_index].evt_run_tail; if (*evt_run_head < 0) return 0; for (i = 0; i < MAX_VECTOR_ASYNC; i++) { - if (*evt_run_head >= 0) + if (*evt_run_head >= 0 && queue[thread_index].depth) { event = openssl_evt_get_w_thread (*evt_run_head, thread_index); - tls_async_do_job (*evt_run_head, thread_index); if (PREDICT_FALSE (event->status == SSL_ASYNC_REENTER)) - { - /* recusive event triggered */ - event->status = SSL_ASYNC_READY; - continue; - } + /* recusive event triggered */ + goto deq_event; + tls_async_do_job (*evt_run_head, thread_index); - event->status = SSL_ASYNC_INVALID_STATUS; + deq_event: *evt_run_head = event->next; + event->status = SSL_ASYNC_DEQ_DONE; + queue[thread_index].depth--; - if (event->next < 0) + if (*evt_run_head < 0) { *evt_run_tail = -1; break; @@ -1034,7 +581,32 @@ tls_resume_from_crypto (int thread_index) } return 0; +} +void +resume_handshake_events (int thread_index) +{ + openssl_async_t *om = &openssl_async_main; + + openssl_async_queue_t *queue = om->queue_in_init; + handle_async_cb_events (queue, thread_index); +} + +void +resume_read_write_events (int thread_index) +{ + openssl_async_t *om = &openssl_async_main; + + openssl_async_queue_t *queue = om->queue; + handle_async_cb_events (queue, thread_index); +} + +int +tls_resume_from_crypto (int thread_index) +{ + resume_read_write_events (thread_index); + resume_handshake_events (thread_index); + return 0; } static clib_error_t * @@ -1044,6 +616,221 @@ tls_async_init (vlib_main_t * vm) return 0; } +int +tls_async_handshake_event_handler (void *async_evt, void *unused) +{ + openssl_evt_t *event = (openssl_evt_t *) async_evt; + int thread_index = event->thread_idx; + openssl_ctx_t *oc; + tls_ctx_t *ctx; + int rv, err; + + ASSERT (thread_index == vlib_get_thread_index ()); + ctx = openssl_ctx_get_w_thread (event->ctx_index, thread_index); + oc = (openssl_ctx_t *) ctx; + session_t *tls_session = session_get_from_handle (ctx->tls_session_handle); + + if (!SSL_in_init (oc->ssl)) + { + TLS_DBG (2, "[!SSL_in_init]==>CTX: %p EVT: %p EIDX: %d", ctx, event, + event->event_idx); + return 0; + } + + if (ctx->flags & TLS_CONN_F_RESUME) + { + ctx->flags &= ~TLS_CONN_F_RESUME; + } + else if (!svm_fifo_max_dequeue_cons (tls_session->rx_fifo)) + return 0; + + rv = SSL_do_handshake (oc->ssl); + err = SSL_get_error (oc->ssl, rv); + + if (err == SSL_ERROR_WANT_ASYNC) + return 0; + + if (err == SSL_ERROR_SSL) + { + char buf[512]; + ERR_error_string (ERR_get_error (), buf); + TLS_DBG (2, "[SSL_ERROR_SSL]==>CTX: %p EVT: %p EIDX: %d Buf: %s", ctx, + event, event->event_idx, buf); + openssl_handle_handshake_failure (ctx); + return 0; + } + + if (err == SSL_ERROR_WANT_WRITE || err == SSL_ERROR_WANT_READ) + return 0; + + /* client not supported */ + if (!SSL_is_server (oc->ssl)) + return 0; + + /* Need to check transport status */ + if (ctx->flags & TLS_CONN_F_PASSIVE_CLOSE) + { + openssl_handle_handshake_failure (ctx); + return 0; + } + + if (tls_notify_app_accept (ctx)) + { + ctx->c_s_index = SESSION_INVALID_INDEX; + tls_disconnect_transport (ctx); + } + + TLS_DBG (1, + "<=====Handshake for %u complete. TLS cipher is %s EVT: %p =====>", + oc->openssl_ctx_index, SSL_get_cipher (oc->ssl), event); + + ctx->flags |= TLS_CONN_F_HS_DONE; + + return 1; +} + +int +tls_async_read_event_handler (void *async_evt, void *unused) +{ + openssl_evt_t *event = (openssl_evt_t *) async_evt; + int thread_index = event->thread_idx; + session_t *app_session, *tls_session; + openssl_ctx_t *oc; + tls_ctx_t *ctx; + SSL *ssl; + + ASSERT (thread_index == vlib_get_thread_index ()); + ctx = openssl_ctx_get_w_thread (event->ctx_index, thread_index); + oc = (openssl_ctx_t *) ctx; + ssl = oc->ssl; + + ctx->flags |= TLS_CONN_F_ASYNC_RD; + /* read event */ + svm_fifo_t *app_rx_fifo, *tls_rx_fifo; + int read, err; + + app_session = session_get_from_handle (ctx->app_session_handle); + app_rx_fifo = app_session->rx_fifo; + + tls_session = session_get_from_handle (ctx->tls_session_handle); + tls_rx_fifo = tls_session->rx_fifo; + + /* continue the paused job */ + read = openssl_async_read_from_ssl_into_fifo (app_rx_fifo, ssl); + err = SSL_get_error (oc->ssl, read); + + if (err == SSL_ERROR_WANT_ASYNC) + return 0; + + if (read <= 0) + { + if (SSL_want_async (ssl)) + return 0; + goto ev_rd_done; + } + + /* Unrecoverable protocol error. Reset connection */ + if (PREDICT_FALSE ((read <= 0) && (err == SSL_ERROR_SSL))) + { + tls_notify_app_io_error (ctx); + goto ev_rd_done; + } + + /* + * Managed to read some data. If handshake just completed, session + * may still be in accepting state. + */ + if (app_session->session_state >= SESSION_STATE_READY) + tls_notify_app_enqueue (ctx, app_session); + +ev_rd_done: + /* read done */ + ctx->flags &= ~TLS_CONN_F_ASYNC_RD; + + if ((SSL_pending (ssl) > 0) || svm_fifo_max_dequeue_cons (tls_rx_fifo)) + tls_add_vpp_q_builtin_rx_evt (tls_session); + + return 1; +} + +int +tls_async_write_event_handler (void *async_evt, void *unused) +{ + openssl_evt_t *event = (openssl_evt_t *) async_evt; + int thread_index = event->thread_idx; + session_t *app_session, *tls_session; + openssl_ctx_t *oc; + tls_ctx_t *ctx; + SSL *ssl; + + ASSERT (thread_index == vlib_get_thread_index ()); + ctx = openssl_ctx_get_w_thread (event->ctx_index, thread_index); + oc = (openssl_ctx_t *) ctx; + ssl = oc->ssl; + + /* write event */ + int wrote = 0; + u32 space, enq_buf; + svm_fifo_t *app_tx_fifo, *tls_tx_fifo; + transport_send_params_t *sp = event->tran_sp; + + app_session = session_get_from_handle (ctx->app_session_handle); + app_tx_fifo = app_session->tx_fifo; + + /* Check if already data write is completed or not */ + if (oc->total_async_write == 0) + return 0; + + wrote = openssl_async_write_from_fifo_into_ssl (app_tx_fifo, ssl, oc); + if (PREDICT_FALSE (!wrote)) + { + if (SSL_want_async (ssl)) + return 0; + } + + /* Unrecoverable protocol error. Reset connection */ + if (PREDICT_FALSE (wrote < 0)) + { + tls_notify_app_io_error (ctx); + return 0; + } + + tls_session = session_get_from_handle (ctx->tls_session_handle); + tls_tx_fifo = tls_session->tx_fifo; + + /* prepare for remaining write(s) */ + space = svm_fifo_max_enqueue_prod (tls_tx_fifo); + /* Leave a bit of extra space for tls ctrl data, if any needed */ + space = clib_max ((int) space - TLSO_CTRL_BYTES, 0); + + if (svm_fifo_needs_deq_ntf (app_tx_fifo, wrote)) + session_dequeue_notify (app_session); + + /* we got here, async write is done */ + oc->total_async_write = 0; + + if (PREDICT_FALSE (ctx->flags & TLS_CONN_F_APP_CLOSED && + BIO_ctrl_pending (oc->rbio) <= 0)) + openssl_confirm_app_close (ctx); + + /* Deschedule and wait for deq notification if fifo is almost full */ + enq_buf = clib_min (svm_fifo_size (tls_tx_fifo) / 2, TLSO_MIN_ENQ_SPACE); + if (space < wrote + enq_buf) + { + svm_fifo_add_want_deq_ntf (tls_tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); + transport_connection_deschedule (&ctx->connection); + sp->flags |= TRANSPORT_SND_F_DESCHED; + } + else + { + /* Request tx reschedule of the app session */ + app_session->flags |= SESSION_F_CUSTOM_TX; + transport_connection_reschedule (&ctx->connection); + } + + return 1; +} + static uword tls_async_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) @@ -1055,8 +842,7 @@ tls_async_process (vlib_main_t * vm, vlib_node_runtime_t * rt, if (pool_elts (om->evt_pool[thread_index]) > 0) { openssl_async_polling (); - tls_async_dequeue_event_in_init (thread_index); - tls_async_dequeue_event (thread_index); + tls_resume_from_crypto (thread_index); } return 0; diff --git a/src/plugins/tlsopenssl/tls_bio.c b/src/plugins/tlsopenssl/tls_bio.c index eead09a9635..422cee399f6 100644 --- a/src/plugins/tlsopenssl/tls_bio.c +++ b/src/plugins/tlsopenssl/tls_bio.c @@ -80,7 +80,7 @@ bio_tls_read (BIO * b, char *out, int outl) if (svm_fifo_needs_deq_ntf (s->rx_fifo, rv)) { svm_fifo_clear_deq_ntf (s->rx_fifo); - session_send_io_evt_to_thread (s->rx_fifo, SESSION_IO_EVT_RX); + session_program_transport_io_evt (s->handle, SESSION_IO_EVT_RX); } if (svm_fifo_is_empty_cons (s->rx_fifo)) diff --git a/src/plugins/tlsopenssl/tls_openssl.c b/src/plugins/tlsopenssl/tls_openssl.c index d7adbed3269..f0be025a207 100644 --- a/src/plugins/tlsopenssl/tls_openssl.c +++ b/src/plugins/tlsopenssl/tls_openssl.c @@ -166,31 +166,6 @@ openssl_lctx_get (u32 lctx_index) return pool_elt_at_index (openssl_main.lctx_pool, lctx_index); } -static int -openssl_handle_want_async (tls_ctx_t *ctx, int evt_type, - transport_send_params_t *sp, int size) -{ - int ret; - openssl_ctx_t *oc = (openssl_ctx_t *) ctx; - - if (evt_type >= SSL_ASYNC_EVT_MAX || evt_type == 0) - { - clib_warning ("return 0 [illegal evt_type value:%d]\n", evt_type); - return 0; - } - - if (evt_type == SSL_ASYNC_EVT_WR) - { - /* de-schedule transport connection */ - transport_connection_deschedule (&ctx->connection); - sp->flags |= TRANSPORT_SND_F_DESCHED; - oc->total_async_write = size; - } - ret = vpp_tls_async_enqueue_event (oc, evt_type, sp, size); - - return ret; -} - int openssl_read_from_ssl_into_fifo (svm_fifo_t *f, tls_ctx_t *ctx, u32 max_len) { @@ -219,8 +194,10 @@ openssl_read_from_ssl_into_fifo (svm_fifo_t *f, tls_ctx_t *ctx, u32 max_len) { if (openssl_main.async && SSL_want_async (oc->ssl)) { - ctx->flags |= TLS_CONN_F_ASYNC_RD; - openssl_handle_want_async (ctx, SSL_ASYNC_EVT_RD, NULL, 0); + session_t *tls_session = + session_get_from_handle (ctx->tls_session_handle); + vpp_tls_async_init_event (ctx, tls_async_read_event_handler, + tls_session, SSL_ASYNC_EVT_RD, NULL, 0); return 0; } ossl_check_err_is_fatal (ssl, read); @@ -264,53 +241,39 @@ openssl_write_from_fifo_into_ssl (svm_fifo_t *f, tls_ctx_t *ctx, { rv = SSL_write (ssl, fs[i].data, fs[i].len); wrote += (rv > 0) ? rv : 0; + if (rv < (int) fs[i].len) { - ossl_check_err_is_fatal (ssl, rv); + if (rv < 0) + { + int err = SSL_get_error (ssl, rv); + if (err == SSL_ERROR_SSL) + return -1; + + if (err == SSL_ERROR_WANT_WRITE) + break; + + if (openssl_main.async && SSL_want_async (ssl)) + { + session_t *ts = + session_get_from_handle (ctx->tls_session_handle); + vpp_tls_async_init_event (ctx, tls_async_write_event_handler, + ts, SSL_ASYNC_EVT_WR, sp, + sp->max_burst_size); + return 0; + } + } break; } i++; } - if (openssl_main.async && SSL_want_async (ssl)) - { - openssl_handle_want_async (ctx, SSL_ASYNC_EVT_WR, sp, max_len); - return 0; - } if (wrote) svm_fifo_dequeue_drop (f, wrote); return wrote; } -#ifdef HAVE_OPENSSL_ASYNC -static int -openssl_check_async_status (tls_ctx_t * ctx, openssl_resume_handler * handler, - session_t * session) -{ - openssl_ctx_t *oc = (openssl_ctx_t *) ctx; - int estatus; - - SSL_get_async_status (oc->ssl, &estatus); - if (estatus == ASYNC_STATUS_EAGAIN) - { - vpp_tls_async_update_event (ctx, 1, SSL_ASYNC_EVT_INIT); - vpp_tls_async_update_event (ctx, 1, SSL_ASYNC_EVT_RD); - vpp_tls_async_update_event (ctx, 1, SSL_ASYNC_EVT_WR); - } - else - { - vpp_tls_async_update_event (ctx, 0, SSL_ASYNC_EVT_INIT); - vpp_tls_async_update_event (ctx, 0, SSL_ASYNC_EVT_RD); - vpp_tls_async_update_event (ctx, 0, SSL_ASYNC_EVT_WR); - } - - return 1; - -} - -#endif - void openssl_handle_handshake_failure (tls_ctx_t *ctx) { @@ -338,7 +301,7 @@ openssl_handle_handshake_failure (tls_ctx_t *ctx) } int -openssl_ctx_handshake_rx (tls_ctx_t * ctx, session_t * tls_session) +openssl_ctx_handshake_rx (tls_ctx_t *ctx, session_t *tls_session) { openssl_ctx_t *oc = (openssl_ctx_t *) ctx; int rv = 0, err; @@ -357,7 +320,8 @@ openssl_ctx_handshake_rx (tls_ctx_t * ctx, session_t * tls_session) if (openssl_main.async && err == SSL_ERROR_WANT_ASYNC) { - openssl_handle_want_async (ctx, SSL_ASYNC_EVT_INIT, NULL, 0); + vpp_tls_async_init_event (ctx, tls_async_handshake_event_handler, + tls_session, SSL_ASYNC_EVT_INIT, NULL, 0); return -1; } @@ -868,7 +832,7 @@ openssl_ctx_init_client (tls_ctx_t * ctx) #ifdef HAVE_OPENSSL_ASYNC session_t *tls_session = session_get_from_handle (ctx->tls_session_handle); - vpp_tls_async_init_events (ctx, openssl_ctx_handshake_rx, tls_session); + openssl_ctx_handshake_rx (ctx, tls_session); #endif while (1) { @@ -876,11 +840,7 @@ openssl_ctx_init_client (tls_ctx_t * ctx) err = SSL_get_error (oc->ssl, rv); #ifdef HAVE_OPENSSL_ASYNC if (err == SSL_ERROR_WANT_ASYNC) - { - openssl_check_async_status (ctx, openssl_ctx_handshake_rx, - tls_session); - break; - } + break; #endif if (err != SSL_ERROR_WANT_WRITE) break; @@ -1107,7 +1067,7 @@ openssl_ctx_init_server (tls_ctx_t * ctx) { session_t *tls_session = session_get_from_handle (ctx->tls_session_handle); - vpp_tls_async_init_events (ctx, openssl_ctx_handshake_rx, tls_session); + openssl_ctx_handshake_rx (ctx, tls_session); } while (1) @@ -1115,16 +1075,13 @@ openssl_ctx_init_server (tls_ctx_t * ctx) rv = SSL_do_handshake (oc->ssl); err = SSL_get_error (oc->ssl, rv); if (openssl_main.async && err == SSL_ERROR_WANT_ASYNC) - { - openssl_handle_want_async (ctx, SSL_ASYNC_EVT_INIT, NULL, 0); + break; - break; - } if (err != SSL_ERROR_WANT_WRITE) break; } - TLS_DBG (2, "tls state for [%u]%u is su", ctx->c_thread_index, + TLS_DBG (2, "tls state for [%u]%u is %s", ctx->c_thread_index, oc->openssl_ctx_index, SSL_state_string_long (oc->ssl)); return 0; } diff --git a/src/plugins/tlsopenssl/tls_openssl.h b/src/plugins/tlsopenssl/tls_openssl.h index 8f6c6652a52..1a566f35fa6 100644 --- a/src/plugins/tlsopenssl/tls_openssl.h +++ b/src/plugins/tlsopenssl/tls_openssl.h @@ -40,6 +40,7 @@ typedef struct tls_ctx_openssl_ SSL_CTX *client_ssl_ctx; SSL *ssl; u32 evt_index[SSL_ASYNC_EVT_MAX]; + bool evt_alloc_flag[SSL_ASYNC_EVT_MAX]; u32 total_async_write; BIO *rbio; BIO *wbio; @@ -74,15 +75,14 @@ typedef struct openssl_main_ u32 max_pipelines; } openssl_main_t; -typedef int openssl_resume_handler (tls_ctx_t * ctx, session_t * tls_session); +typedef int openssl_resume_handler (void *event, void *session); +typedef int (*async_handlers) (void *event, void *session); tls_ctx_t *openssl_ctx_get_w_thread (u32 ctx_index, u8 thread_index); -int vpp_tls_async_init_events (tls_ctx_t *ctx, openssl_resume_handler *handler, - session_t *session); -int vpp_tls_async_update_event (tls_ctx_t *ctx, int eagain, - ssl_async_evt_type_t type); -int vpp_tls_async_enqueue_event (openssl_ctx_t *ctx, int evt_type, - transport_send_params_t *sp, int size); +int vpp_tls_async_init_event (tls_ctx_t *ctx, openssl_resume_handler *handler, + session_t *session, + ssl_async_evt_type_t evt_type, + transport_send_params_t *sp, int wr_size); int tls_async_openssl_callback (SSL * s, void *evt); int openssl_evt_free (int event_idx, u8 thread_index); void openssl_polling_start (ENGINE * engine); @@ -96,6 +96,9 @@ int openssl_read_from_ssl_into_fifo (svm_fifo_t *f, tls_ctx_t *ctx, void openssl_handle_handshake_failure (tls_ctx_t *ctx); void openssl_confirm_app_close (tls_ctx_t *ctx); +int tls_async_write_event_handler (void *event, void *session); +int tls_async_read_event_handler (void *event, void *session); +int tls_async_handshake_event_handler (void *event, void *session); #endif /* SRC_PLUGINS_TLSOPENSSL_TLS_OPENSSL_H_ */ /* diff --git a/src/plugins/tlspicotls/tls_picotls.c b/src/plugins/tlspicotls/tls_picotls.c index 9459cb776b5..1153d39b6fe 100644 --- a/src/plugins/tlspicotls/tls_picotls.c +++ b/src/plugins/tlspicotls/tls_picotls.c @@ -406,7 +406,8 @@ do_checks: if (svm_fifo_needs_deq_ntf (tcp_rx_fifo, read)) { svm_fifo_clear_deq_ntf (tcp_rx_fifo); - session_send_io_evt_to_thread (tcp_rx_fifo, SESSION_IO_EVT_RX); + session_program_transport_io_evt (tcp_rx_fifo->vpp_sh, + SESSION_IO_EVT_RX); } } @@ -601,7 +602,7 @@ ptls_app_to_tcp_write (picotls_ctx_t *ptls_ctx, session_t *app_session, { svm_fifo_enqueue_nocopy (tcp_tx_fifo, wrote); if (svm_fifo_set_event (tcp_tx_fifo)) - session_send_io_evt_to_thread (tcp_tx_fifo, SESSION_IO_EVT_TX); + session_program_tx_io_evt (tcp_tx_fifo->vpp_sh, SESSION_IO_EVT_TX); } return wrote; diff --git a/src/plugins/unittest/crypto_test.c b/src/plugins/unittest/crypto_test.c index 4bc06f71c79..0254e1a29b8 100644 --- a/src/plugins/unittest/crypto_test.c +++ b/src/plugins/unittest/crypto_test.c @@ -35,6 +35,7 @@ print_results (vlib_main_t * vm, unittest_crypto_test_registration_t ** rv, vnet_crypto_op_t * ops, vnet_crypto_op_chunk_t * chunks, u32 n_ops, crypto_test_main_t * tm) { + vnet_crypto_main_t *cm = &crypto_main; int i; unittest_crypto_test_registration_t *r; vnet_crypto_op_chunk_t *chp; @@ -48,17 +49,17 @@ print_results (vlib_main_t * vm, unittest_crypto_test_registration_t ** rv, unittest_crypto_test_data_t *exp_pt = 0, *exp_ct = 0, exp_pt_data; unittest_crypto_test_data_t *exp_digest = 0, *exp_tag = 0; unittest_crypto_test_data_t *exp_pt_chunks = 0, *exp_ct_chunks = 0; + vnet_crypto_op_data_t *od = cm->opt_data + op->op; + vnet_crypto_alg_data_t *ad = cm->algs + od->alg; switch (vnet_crypto_get_op_type (op->op)) { - case VNET_CRYPTO_OP_TYPE_AEAD_ENCRYPT: - exp_tag = &r->tag; - /* fall through */ case VNET_CRYPTO_OP_TYPE_ENCRYPT: exp_ct = &r->ciphertext; exp_ct_chunks = r->ct_chunks; + if (ad->is_aead) + exp_tag = &r->tag; break; - case VNET_CRYPTO_OP_TYPE_AEAD_DECRYPT: case VNET_CRYPTO_OP_TYPE_DECRYPT: if (r->plaintext_incremental) { @@ -219,9 +220,10 @@ restore_engines (u32 * engs) if (engs[i] != ~0) { - ce = vec_elt_at_index (cm->engines, engs[i]); - od->active_engine_index_simple = engs[i]; - cm->ops_handlers[i] = ce->ops_handlers[i]; + vnet_crypto_handler_type_t t = VNET_CRYPTO_HANDLER_TYPE_SIMPLE; + ce = vec_elt_at_index (cm->engines, engs[i]); + od->active_engine_index[t] = engs[i]; + cm->opt_data[i].handlers[t] = ce->ops[i].handlers[t]; } } @@ -246,12 +248,13 @@ save_current_engines (u32 * engs) for (i = 1; i < VNET_CRYPTO_N_OP_IDS; i++) { vnet_crypto_op_data_t *od = &cm->opt_data[i]; - if (od->active_engine_index_simple != ~0) + if (od->active_engine_index[VNET_CRYPTO_HANDLER_TYPE_SIMPLE]) { /* save engine index */ - engs[i] = od->active_engine_index_simple; - od->active_engine_index_simple = ce - cm->engines; - cm->ops_handlers[i] = ce->ops_handlers[i]; + vnet_crypto_handler_type_t t = VNET_CRYPTO_HANDLER_TYPE_SIMPLE; + engs[i] = od->active_engine_index[t]; + od->active_engine_index[t] = ce - cm->engines; + cm->opt_data[i].handlers[t] = ce->ops[i].handlers[t]; } } @@ -288,7 +291,7 @@ test_crypto_incremental (vlib_main_t * vm, crypto_test_main_t * tm, { r = rv[i]; int t; - ad = vec_elt_at_index (cm->algs, r->alg); + ad = cm->algs + r->alg; for (t = 0; t < VNET_CRYPTO_OP_N_TYPES; t++) { vnet_crypto_op_id_t id = ad->op_by_type[t]; @@ -308,24 +311,16 @@ test_crypto_incremental (vlib_main_t * vm, crypto_test_main_t * tm, op->src = tm->inc_data; op->dst = encrypted_data + computed_data_total_len; computed_data_total_len += r->plaintext_incremental; - op->user_data = i; - op++; - break; - case VNET_CRYPTO_OP_TYPE_AEAD_ENCRYPT: - vnet_crypto_op_init (op, id); - op->iv = tm->inc_data; - op->key_index = vnet_crypto_key_add (vm, r->alg, - tm->inc_data, r->key.length); - vec_add1 (key_indices, op->key_index); - op->aad = tm->inc_data; - op->aad_len = r->aad.length; - op->len = r->plaintext_incremental; - op->dst = encrypted_data + computed_data_total_len; - computed_data_total_len += r->plaintext_incremental; - op->src = tm->inc_data; - op->tag = encrypted_data + computed_data_total_len; - computed_data_total_len += r->tag.length; - op->tag_len = r->tag.length; + + if (ad->is_aead) + { + op->aad = tm->inc_data; + op->aad_len = r->aad.length; + op->tag = encrypted_data + computed_data_total_len; + computed_data_total_len += r->tag.length; + op->tag_len = r->tag.length; + } + op->user_data = i; op++; break; @@ -352,7 +347,7 @@ test_crypto_incremental (vlib_main_t * vm, crypto_test_main_t * tm, { r = rv[i]; int t; - ad = vec_elt_at_index (cm->algs, r->alg); + ad = cm->algs + r->alg; for (t = 0; t < VNET_CRYPTO_OP_N_TYPES; t++) { vnet_crypto_op_id_t id = ad->op_by_type[t]; @@ -372,25 +367,16 @@ test_crypto_incremental (vlib_main_t * vm, crypto_test_main_t * tm, op->src = encrypted_data + computed_data_total_len; op->dst = decrypted_data + computed_data_total_len; computed_data_total_len += r->plaintext_incremental; - op->user_data = i; - op++; - break; - case VNET_CRYPTO_OP_TYPE_AEAD_DECRYPT: - vnet_crypto_op_init (op, id); - op->iv = tm->inc_data; - op->key_index = vnet_crypto_key_add (vm, r->alg, - tm->inc_data, r->key.length); - vec_add1 (key_indices, op->key_index); - op->aad = tm->inc_data; - op->aad_len = r->aad.length; - op->len = r->plaintext_incremental; - op->dst = decrypted_data + computed_data_total_len; - op->src = encrypted_data + computed_data_total_len; - computed_data_total_len += r->plaintext_incremental; - op->tag = encrypted_data + computed_data_total_len; - computed_data_total_len += r->tag.length; - op->tag_len = r->tag.length; + if (ad->is_aead) + { + op->aad = tm->inc_data; + op->aad_len = r->aad.length; + op->tag = encrypted_data + computed_data_total_len; + computed_data_total_len += r->tag.length; + op->tag_len = r->tag.length; + } + op->user_data = i; op++; break; @@ -458,7 +444,7 @@ test_crypto_static (vlib_main_t * vm, crypto_test_main_t * tm, { r = rv[i]; int t; - ad = vec_elt_at_index (cm->algs, r->alg); + ad = cm->algs + r->alg; for (t = 0; t < VNET_CRYPTO_OP_N_TYPES; t++) { vnet_crypto_op_id_t id = ad->op_by_type[t]; @@ -483,116 +469,117 @@ test_crypto_static (vlib_main_t * vm, crypto_test_main_t * tm, { case VNET_CRYPTO_OP_TYPE_ENCRYPT: case VNET_CRYPTO_OP_TYPE_DECRYPT: - op->iv = r->iv.data; - op->key_index = vnet_crypto_key_add (vm, r->alg, - r->key.data, - r->key.length); - vec_add1 (key_indices, op->key_index); - - if (r->is_chained) - { - pt = r->pt_chunks; - ct = r->ct_chunks; - op->flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS; - op->chunk_index = vec_len (chunks); - while (pt->data) - { - ch.src = t == VNET_CRYPTO_OP_TYPE_ENCRYPT ? - pt->data : ct->data; - ch.len = pt->length; - ch.dst = computed_data + computed_data_total_len; - computed_data_total_len += pt->length; - vec_add1 (chunks, ch); - op->n_chunks++; - pt++; - ct++; - } - } - else - { - op->len = r->plaintext.length; - op->src = t == VNET_CRYPTO_OP_TYPE_ENCRYPT ? - r->plaintext.data : r->ciphertext.data; - op->dst = computed_data + computed_data_total_len; - computed_data_total_len += r->ciphertext.length; - } - break; - case VNET_CRYPTO_OP_TYPE_AEAD_ENCRYPT: - case VNET_CRYPTO_OP_TYPE_AEAD_DECRYPT: - if (r->is_chained) - { - op->iv = r->iv.data; - op->key_index = vnet_crypto_key_add (vm, r->alg, - r->key.data, - r->key.length); - vec_add1 (key_indices, op->key_index); - op->aad = r->aad.data; - op->aad_len = r->aad.length; - if (t == VNET_CRYPTO_OP_TYPE_AEAD_ENCRYPT) - { - pt = r->pt_chunks; - op->flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS; - op->chunk_index = vec_len (chunks); - while (pt->data) - { - clib_memset (&ch, 0, sizeof (ch)); - ch.src = pt->data; - ch.len = pt->length; - ch.dst = computed_data + computed_data_total_len; - computed_data_total_len += pt->length; - vec_add1 (chunks, ch); - op->n_chunks++; - pt++; - } - op->tag = computed_data + computed_data_total_len; - computed_data_total_len += r->tag.length; - } - else - { - ct = r->ct_chunks; - op->flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS; - op->chunk_index = vec_len (chunks); - while (ct->data) - { - clib_memset (&ch, 0, sizeof (ch)); - ch.src = ct->data; - ch.len = ct->length; - ch.dst = computed_data + computed_data_total_len; - computed_data_total_len += ct->length; - vec_add1 (chunks, ch); - op->n_chunks++; - ct++; - } - op->tag = r->tag.data; - } - op->tag_len = r->tag.length; - } - else - { - op->iv = r->iv.data; - op->key_index = vnet_crypto_key_add (vm, r->alg, - r->key.data, - r->key.length); - vec_add1 (key_indices, op->key_index); - op->aad = r->aad.data; - op->aad_len = r->aad.length; - op->len = r->plaintext.length; - op->dst = computed_data + computed_data_total_len; - computed_data_total_len += r->ciphertext.length; - - if (t == VNET_CRYPTO_OP_TYPE_AEAD_ENCRYPT) - { - op->src = r->plaintext.data; - op->tag = computed_data + computed_data_total_len; - computed_data_total_len += r->tag.length; - } - else - { - op->tag = r->tag.data; - op->src = r->ciphertext.data; - } - op->tag_len = r->tag.length; - } + if (!ad->is_aead) + { + op->iv = r->iv.data; + op->key_index = + vnet_crypto_key_add (vm, r->alg, r->key.data, r->key.length); + vec_add1 (key_indices, op->key_index); + + if (r->is_chained) + { + pt = r->pt_chunks; + ct = r->ct_chunks; + op->flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS; + op->chunk_index = vec_len (chunks); + while (pt->data) + { + ch.src = t == VNET_CRYPTO_OP_TYPE_ENCRYPT ? pt->data : + ct->data; + ch.len = pt->length; + ch.dst = computed_data + computed_data_total_len; + computed_data_total_len += pt->length; + vec_add1 (chunks, ch); + op->n_chunks++; + pt++; + ct++; + } + } + else + { + op->len = r->plaintext.length; + op->src = t == VNET_CRYPTO_OP_TYPE_ENCRYPT ? + r->plaintext.data : + r->ciphertext.data; + op->dst = computed_data + computed_data_total_len; + computed_data_total_len += r->ciphertext.length; + } + } + else + { + if (r->is_chained) + { + op->iv = r->iv.data; + op->key_index = vnet_crypto_key_add ( + vm, r->alg, r->key.data, r->key.length); + vec_add1 (key_indices, op->key_index); + op->aad = r->aad.data; + op->aad_len = r->aad.length; + if (t == VNET_CRYPTO_OP_TYPE_ENCRYPT) + { + pt = r->pt_chunks; + op->flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS; + op->chunk_index = vec_len (chunks); + while (pt->data) + { + clib_memset (&ch, 0, sizeof (ch)); + ch.src = pt->data; + ch.len = pt->length; + ch.dst = computed_data + computed_data_total_len; + computed_data_total_len += pt->length; + vec_add1 (chunks, ch); + op->n_chunks++; + pt++; + } + op->tag = computed_data + computed_data_total_len; + computed_data_total_len += r->tag.length; + } + else + { + ct = r->ct_chunks; + op->flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS; + op->chunk_index = vec_len (chunks); + while (ct->data) + { + clib_memset (&ch, 0, sizeof (ch)); + ch.src = ct->data; + ch.len = ct->length; + ch.dst = computed_data + computed_data_total_len; + computed_data_total_len += ct->length; + vec_add1 (chunks, ch); + op->n_chunks++; + ct++; + } + op->tag = r->tag.data; + } + op->tag_len = r->tag.length; + } + else + { + op->iv = r->iv.data; + op->key_index = vnet_crypto_key_add ( + vm, r->alg, r->key.data, r->key.length); + vec_add1 (key_indices, op->key_index); + op->aad = r->aad.data; + op->aad_len = r->aad.length; + op->len = r->plaintext.length; + op->dst = computed_data + computed_data_total_len; + computed_data_total_len += r->ciphertext.length; + + if (t == VNET_CRYPTO_OP_TYPE_ENCRYPT) + { + op->src = r->plaintext.data; + op->tag = computed_data + computed_data_total_len; + computed_data_total_len += r->tag.length; + } + else + { + op->tag = r->tag.data; + op->src = r->ciphertext.data; + } + op->tag_len = r->tag.length; + } + } break; case VNET_CRYPTO_OP_TYPE_HMAC: if (r->is_chained) @@ -663,29 +650,8 @@ test_crypto_static (vlib_main_t * vm, crypto_test_main_t * tm, static u32 test_crypto_get_key_sz (vnet_crypto_alg_t alg) { - switch (alg) - { -#define _(n, s, l) \ - case VNET_CRYPTO_ALG_##n: \ - return l; - foreach_crypto_cipher_alg - foreach_crypto_aead_alg -#undef _ - case VNET_CRYPTO_ALG_HMAC_MD5: - case VNET_CRYPTO_ALG_HMAC_SHA1: - return 20; - case VNET_CRYPTO_ALG_HMAC_SHA224: - return 28; - case VNET_CRYPTO_ALG_HMAC_SHA256: - return 32; - case VNET_CRYPTO_ALG_HMAC_SHA384: - return 48; - case VNET_CRYPTO_ALG_HMAC_SHA512: - return 64; - default: - return 0; - } - return 0; + vnet_crypto_main_t *cm = &crypto_main; + return cm->algs[alg].key_length; } static clib_error_t * @@ -717,7 +683,7 @@ test_crypto (vlib_main_t * vm, crypto_test_main_t * tm) else vec_add1 (static_tests, r); - ad = vec_elt_at_index (cm->algs, r->alg); + ad = cm->algs + r->alg; for (i = 0; i < VNET_CRYPTO_OP_N_TYPES; i++) { @@ -729,37 +695,8 @@ test_crypto (vlib_main_t * vm, crypto_test_main_t * tm) switch (i) { case VNET_CRYPTO_OP_TYPE_ENCRYPT: - if (r->plaintext_incremental) - { - computed_data_total_incr_len += r->plaintext_incremental; - n_ops_incr += 1; - } - /* fall though */ - case VNET_CRYPTO_OP_TYPE_DECRYPT: - case VNET_CRYPTO_OP_TYPE_AEAD_DECRYPT: - if (r->is_chained) - { - ct = r->ct_chunks; - j = 0; - while (ct->data) - { - if (j > CRYPTO_TEST_MAX_OP_CHUNKS) - return clib_error_return (0, - "test case '%s' exceeds extra data!", - r->name); - computed_data_total_len += ct->length; - ct++; - j++; - } - n_chained_ops += 1; - } - else if (!r->plaintext_incremental) - { - computed_data_total_len += r->ciphertext.length; - n_ops_static += 1; - } - break; - case VNET_CRYPTO_OP_TYPE_AEAD_ENCRYPT: + if (ad->is_aead) + { if (r->plaintext_incremental) { computed_data_total_incr_len += r->plaintext_incremental; @@ -777,9 +714,9 @@ test_crypto (vlib_main_t * vm, crypto_test_main_t * tm) while (ct->data) { if (j > CRYPTO_TEST_MAX_OP_CHUNKS) - return clib_error_return (0, - "test case '%s' exceeds extra data!", - r->name); + return clib_error_return ( + 0, "test case '%s' exceeds extra data!", + r->name); computed_data_total_len += ct->length; ct++; j++; @@ -790,6 +727,36 @@ test_crypto (vlib_main_t * vm, crypto_test_main_t * tm) n_ops_static += 1; } break; + } + + if (r->plaintext_incremental) + { + computed_data_total_incr_len += r->plaintext_incremental; + n_ops_incr += 1; + } + /* fall though */ + case VNET_CRYPTO_OP_TYPE_DECRYPT: + if (r->is_chained) + { + ct = r->ct_chunks; + j = 0; + while (ct->data) + { + if (j > CRYPTO_TEST_MAX_OP_CHUNKS) + return clib_error_return ( + 0, "test case '%s' exceeds extra data!", r->name); + computed_data_total_len += ct->length; + ct++; + j++; + } + n_chained_ops += 1; + } + else if (!r->plaintext_incremental) + { + computed_data_total_len += r->ciphertext.length; + n_ops_static += 1; + } + break; case VNET_CRYPTO_OP_TYPE_HMAC: if (r->plaintext_incremental) { @@ -850,7 +817,7 @@ test_crypto_perf (vlib_main_t * vm, crypto_test_main_t * tm) u32 n_buffers, n_alloc = 0, warmup_rounds, rounds; u32 *buffer_indices = 0; vnet_crypto_op_t *ops1 = 0, *ops2 = 0, *op1, *op2; - vnet_crypto_alg_data_t *ad = vec_elt_at_index (cm->algs, tm->alg); + vnet_crypto_alg_data_t *ad = cm->algs + tm->alg; vnet_crypto_key_index_t key_index = ~0; u8 key[64]; int buffer_size = vlib_buffer_get_default_data_size (vm); @@ -923,23 +890,15 @@ test_crypto_perf (vlib_main_t * vm, crypto_test_main_t * tm) op1->src = op2->src = op1->dst = op2->dst = b->data; op1->key_index = op2->key_index = key_index; op1->iv = op2->iv = b->data - 64; - n_bytes += op1->len = op2->len = buffer_size; - break; - case VNET_CRYPTO_OP_TYPE_AEAD_ENCRYPT: - case VNET_CRYPTO_OP_TYPE_AEAD_DECRYPT: - vnet_crypto_op_init (op1, - ad->op_by_type - [VNET_CRYPTO_OP_TYPE_AEAD_ENCRYPT]); - vnet_crypto_op_init (op2, - ad->op_by_type - [VNET_CRYPTO_OP_TYPE_AEAD_DECRYPT]); - op1->src = op2->src = op1->dst = op2->dst = b->data; - op1->key_index = op2->key_index = key_index; - op1->tag = op2->tag = b->data - 32; - op1->iv = op2->iv = b->data - 64; - op1->aad = op2->aad = b->data - VLIB_BUFFER_PRE_DATA_SIZE; - op1->aad_len = op2->aad_len = 64; - op1->tag_len = op2->tag_len = 16; + + if (ad->is_aead) + { + op1->tag = op2->tag = b->data - 32; + op1->aad = op2->aad = b->data - VLIB_BUFFER_PRE_DATA_SIZE; + op1->aad_len = op2->aad_len = 64; + op1->tag_len = op2->tag_len = 16; + } + n_bytes += op1->len = op2->len = buffer_size; break; case VNET_CRYPTO_OP_TYPE_HMAC: diff --git a/src/plugins/unittest/session_test.c b/src/plugins/unittest/session_test.c index fe4664bda4a..993f1be41a9 100644 --- a/src/plugins/unittest/session_test.c +++ b/src/plugins/unittest/session_test.c @@ -1864,7 +1864,7 @@ test_app_send_io_evt_rx (app_worker_t *app_wrk, session_t *s) if (svm_fifo_has_event (s->rx_fifo)) return 0; - app_session = s->rx_fifo->shr->client_session_index; + app_session = s->rx_fifo->app_session_index; mq = app_wrk->event_queue; rv = test_mq_try_lock_and_alloc_msg (mq, SESSION_MQ_IO_EVT_RING, mq_msg); diff --git a/src/scripts/vnet/arp6 b/src/scripts/vnet/arp6 index 2ade2962db8..940b8c3c210 100644 --- a/src/scripts/vnet/arp6 +++ b/src/scripts/vnet/arp6 @@ -10,7 +10,7 @@ packet-generator new { \ } \ } -tr add pg-input 100 +trace add pg-input 100 loop create loop create set int state loop0 up diff --git a/src/scripts/vnet/icmp b/src/scripts/vnet/icmp index 603d9d77700..71ab0106b6c 100644 --- a/src/scripts/vnet/icmp +++ b/src/scripts/vnet/icmp @@ -10,6 +10,6 @@ packet-generator new { \ } \ } -tr add pg-input 100 +trace add pg-input 100 ip route 5.6.7.8/32 via local ip route 1.2.3.4/32 via local diff --git a/src/scripts/vnet/icmp6 b/src/scripts/vnet/icmp6 index eb41470f448..b3bc1c5182a 100644 --- a/src/scripts/vnet/icmp6 +++ b/src/scripts/vnet/icmp6 @@ -10,6 +10,6 @@ packet-generator new { \ } \ } -tr add pg-input 100 +trace add pg-input 100 ip route ::1/128 via local ip route ::2/128 via local diff --git a/src/scripts/vnet/ige b/src/scripts/vnet/ige index f721e597678..bd073b2369f 100644 --- a/src/scripts/vnet/ige +++ b/src/scripts/vnet/ige @@ -16,4 +16,4 @@ set int state GigabitEthernet4/0/0 up set int state GigabitEthernet4/0/1 up ip route add 1.0.0.3/32 via GigabitEthernet4/0/1 IP4: 00:15:17:61:73:47 -> 00:15:17:61:73:46 -tr add ige-input 10 +trace add ige-input 10 diff --git a/src/scripts/vnet/ip6-hbh b/src/scripts/vnet/ip6-hbh index 7bb0be1a513..6498237a384 100644 --- a/src/scripts/vnet/ip6-hbh +++ b/src/scripts/vnet/ip6-hbh @@ -75,4 +75,4 @@ packet-generator new { \ } \ } -tr add pg-input 100 +trace add pg-input 100 diff --git a/src/scripts/vnet/pcap b/src/scripts/vnet/pcap index c52ddb74587..f1266ba45ea 100644 --- a/src/scripts/vnet/pcap +++ b/src/scripts/vnet/pcap @@ -12,5 +12,5 @@ packet-generator new { \ pcap /home/eliot/pcap-data/ISIS_p2p_adjacency.cap \ } -tr add pg-input 10 +trace add pg-input 10 diff --git a/src/scripts/vnet/rpf b/src/scripts/vnet/rpf index 80aa9bc3b67..236343ed967 100644 --- a/src/scripts/vnet/rpf +++ b/src/scripts/vnet/rpf @@ -10,7 +10,7 @@ packet-generator new { \ } \ } -tr add pg-input 100 +trace add pg-input 100 set interface ip source-check pg/stream-0 ip route 1.2.3.4/32 via pg/stream-0 0x01020304 ip route 5.6.7.8/32 via drop diff --git a/src/scripts/vnet/source_and_port_range_check b/src/scripts/vnet/source_and_port_range_check index abe7034c576..20f1b0dbc34 100644 --- a/src/scripts/vnet/source_and_port_range_check +++ b/src/scripts/vnet/source_and_port_range_check @@ -44,7 +44,7 @@ set ip source-and-port-range-check 1.1.1.0/24 range 2000 - 3000 vrf 99 set interface ip source-and-port-range-check pg0 udp-out-vrf 99 - show ip source-and-port-range-check vrf 99 1.1.1.1 +show ip source-and-port-range-check vrf 99 1.1.1.1 set ip source-and-port-range-check 1.1.1.0/24 range 4000 - 5000 vrf 99 @@ -57,4 +57,4 @@ set ip source-and-port-range-check 1.1.2.0/24 range 4000 - 5000 vrf 99 del show ip source-and-port-range-check vrf 99 1.1.2.1 -tr add pg-input 100 +trace add pg-input 100 diff --git a/src/scripts/vnet/speed b/src/scripts/vnet/speed index eecc1d3d1e9..2c7691f1548 100644 --- a/src/scripts/vnet/speed +++ b/src/scripts/vnet/speed @@ -10,5 +10,5 @@ packet-generator new { \ } \ } -tr add pg-input 100 +trace add pg-input 100 ip route 5.6.7.8/32 via pg/stream-0 000102030405060708090a0b0c0d diff --git a/src/scripts/vnet/srp b/src/scripts/vnet/srp index d77b0bd27a8..e32af9f56d4 100644 --- a/src/scripts/vnet/srp +++ b/src/scripts/vnet/srp @@ -15,7 +15,7 @@ packet-generator new { \ } \ } -tr add pg-input 100 +trace add pg-input 100 set int state fake-srp0 up set int state fake-srp1 up diff --git a/src/scripts/vnet/urpf b/src/scripts/vnet/urpf index 48855e4b165..c2fecf7c44f 100644 --- a/src/scripts/vnet/urpf +++ b/src/scripts/vnet/urpf @@ -64,7 +64,7 @@ packet-generator new { \ } \ } -tr add pg-input 100 +trace add pg-input 100 set int ip addr pg0 10.10.11.10/24 diff --git a/src/scripts/vnet/vlan b/src/scripts/vnet/vlan index 130027b4b32..a8b98f7035a 100644 --- a/src/scripts/vnet/vlan +++ b/src/scripts/vnet/vlan @@ -17,6 +17,6 @@ packet-generator new { \ } \ } -tr add pg-input 100 +trace add pg-input 100 ip route 5.6.7.8/32 via local ip route 1.2.3.4/32 via local diff --git a/src/svm/fifo_segment.c b/src/svm/fifo_segment.c index d5f62913082..86f4ff2d0d2 100644 --- a/src/svm/fifo_segment.c +++ b/src/svm/fifo_segment.c @@ -969,6 +969,7 @@ fifo_segment_free_fifo (fifo_segment_t * fs, svm_fifo_t * f) if (CLIB_DEBUG) { sf->master_session_index = ~0; + f->vpp_session_index = ~0; f->master_thread_index = ~0; } @@ -1002,12 +1003,12 @@ fifo_segment_detach_fifo (fifo_segment_t *fs, svm_fifo_t **f) pfss_fifo_del_active_list (pfss, of); /* Collect chunks that were provided in return for those detached */ - fsh_slice_collect_chunks (fs->h, fss, of->chunks_at_attach); - of->chunks_at_attach = 0; + fsh_slice_collect_chunks (fs->h, fss, svm_fifo_chunks_at_attach (of)); + svm_fifo_chunks_at_attach (of) = 0; /* Collect hdr that was provided in return for the detached */ - fss_fifo_free_list_push (fs->h, fss, of->hdr_at_attach); - of->hdr_at_attach = 0; + fss_fifo_free_list_push (fs->h, fss, svm_fifo_hdr_at_attach (of)); + svm_fifo_hdr_at_attach (of) = 0; clib_mem_bulk_free (pfss->fifos, *f); *f = 0; @@ -1034,10 +1035,11 @@ fifo_segment_attach_fifo (fifo_segment_t *fs, svm_fifo_t **f, u32 slice_index) /* Allocate shared hdr and chunks to be collected at detach in return * for those that are being attached now */ of = *f; - of->hdr_at_attach = fsh_try_alloc_fifo_hdr (fs->h, fss); + svm_fifo_hdr_at_attach (of) = fsh_try_alloc_fifo_hdr (fs->h, fss); c = fs_chunk_ptr (fs->h, nf->shr->start_chunk); - of->chunks_at_attach = pc = fsh_try_alloc_chunk (fs->h, fss, c->length); + svm_fifo_chunks_at_attach (of) = pc = + fsh_try_alloc_chunk (fs->h, fss, c->length); while ((c = fs_chunk_ptr (fs->h, c->next))) { diff --git a/src/svm/fifo_types.h b/src/svm/fifo_types.h index 742351b1764..e4b72eaf1ab 100644 --- a/src/svm/fifo_types.h +++ b/src/svm/fifo_types.h @@ -101,23 +101,39 @@ typedef struct _svm_fifo u32 ooos_newest; /**< Last segment to have been updated */ u8 flags; /**< fifo flags */ - u8 master_thread_index; /**< session layer thread index */ - u8 client_thread_index; /**< app worker index */ i8 refcnt; /**< reference count */ - u32 segment_manager; /**< session layer segment manager index */ - u32 segment_index; /**< segment index in segment manager */ + u8 client_thread_index; /**< app worker index */ + u32 app_session_index; /**< app session index */ + union + { + struct + { + u32 vpp_session_index; /**< session layer session index */ + u32 master_thread_index; /**< session layer thread index */ + }; + u64 vpp_sh; + }; + u32 segment_manager; /**< session layer segment manager index */ + u32 segment_index; /**< segment index in segment manager */ struct _svm_fifo *next; /**< prev in active chain */ struct _svm_fifo *prev; /**< prev in active chain */ - svm_fifo_chunk_t *chunks_at_attach; /**< chunks to be accounted at detach */ - svm_fifo_shared_t *hdr_at_attach; /**< hdr to be freed at detach */ - #if SVM_FIFO_TRACE svm_fifo_trace_elem_t *trace; #endif } svm_fifo_t; +/* To minimize size of svm_fifo_t reuse ooo pointers for tracking chunks and + * hdr at attach/detach. Fifo being migrated should not receive new data */ +#define svm_fifo_chunks_at_attach(f) f->ooo_deq +#define svm_fifo_hdr_at_attach(f) \ + ((union { \ + svm_fifo_shared_t *hdr; \ + svm_fifo_chunk_t *ooo_enq; \ + } *) &f->ooo_enq) \ + ->hdr + typedef struct fifo_segment_slice_ { CLIB_CACHE_LINE_ALIGN_MARK (cacheline); diff --git a/src/svm/svm_fifo.c b/src/svm/svm_fifo.c index 49b3d1728f3..d5c1ae657d9 100644 --- a/src/svm/svm_fifo.c +++ b/src/svm/svm_fifo.c @@ -309,27 +309,21 @@ check_tail: static int ooo_segment_try_collect (svm_fifo_t * f, u32 n_bytes_enqueued, u32 * tail) { - u32 s_index, bytes = 0; + u32 s_index, s_end, bytes = 0; ooo_segment_t *s; - i32 diff; s = pool_elt_at_index (f->ooo_segments, f->ooos_list_head); - diff = *tail - s->start; - - ASSERT (diff != n_bytes_enqueued); - - if (diff > n_bytes_enqueued) - return 0; /* If last tail update overlaps one/multiple ooo segments, remove them */ - while (0 <= diff && diff < n_bytes_enqueued) + while (f_pos_leq (s->start, *tail)) { s_index = s - f->ooo_segments; + s_end = ooo_segment_end_pos (s); /* Segment end is beyond the tail. Advance tail and remove segment */ - if (s->length > diff) + if (f_pos_leq (*tail, s_end)) { - bytes = s->length - diff; + bytes = s_end - *tail; *tail = *tail + bytes; ooo_segment_free (f, s_index); break; @@ -339,7 +333,6 @@ ooo_segment_try_collect (svm_fifo_t * f, u32 n_bytes_enqueued, u32 * tail) if (s->next != OOO_SEGMENT_INVALID_INDEX) { s = pool_elt_at_index (f->ooo_segments, s->next); - diff = *tail - s->start; ooo_segment_free (f, s_index); } /* End of search */ @@ -1650,8 +1643,8 @@ format_svm_fifo (u8 * s, va_list * args) if (verbose > 1) s = format (s, "%Uvpp session %d thread %d app session %d thread %d\n", - format_white_space, indent, f->shr->master_session_index, - f->master_thread_index, f->shr->client_session_index, + format_white_space, indent, f->vpp_session_index, + f->master_thread_index, f->app_session_index, f->client_thread_index); if (verbose) diff --git a/src/vcl/vcl_locked.c b/src/vcl/vcl_locked.c index e32e96ceaa5..f38df8fbf47 100644 --- a/src/vcl/vcl_locked.c +++ b/src/vcl/vcl_locked.c @@ -140,6 +140,7 @@ typedef struct vls_local_ pthread_mutex_t vls_mt_mq_mlock; /**< vcl mq lock */ pthread_mutex_t vls_mt_spool_mlock; /**< vcl select or pool lock */ volatile u8 select_mp_check; /**< flag set if select checks done */ + struct sigaction old_sa; /**< old sigaction to restore */ } vls_process_local_t; static vls_process_local_t vls_local; @@ -1798,8 +1799,6 @@ vls_handle_pending_wrk_cleanup (void) vec_reset_length (vls_wrk->pending_vcl_wrk_cleanup); } -static struct sigaction old_sa; - static void vls_intercept_sigchld_handler (int signum, siginfo_t * si, void *uc) { @@ -1809,7 +1808,7 @@ vls_intercept_sigchld_handler (int signum, siginfo_t * si, void *uc) if (vcl_get_worker_index () == ~0) return; - if (sigaction (SIGCHLD, &old_sa, 0)) + if (sigaction (SIGCHLD, &vlsl->old_sa, 0)) { VERR ("couldn't restore sigchld"); exit (-1); @@ -1823,6 +1822,9 @@ vls_intercept_sigchld_handler (int signum, siginfo_t * si, void *uc) if (!child_wrk) goto done; + /* TODO we need to support multiple children */ + wrk->forked_child = ~0; + if (si && si->si_pid != child_wrk->current_pid) { VDBG (0, "unexpected child pid %u", si->si_pid); @@ -1838,14 +1840,15 @@ vls_intercept_sigchld_handler (int signum, siginfo_t * si, void *uc) vec_add1 (vls_wrk->pending_vcl_wrk_cleanup, child_wrk->wrk_index); done: - if (old_sa.sa_flags & SA_SIGINFO) + if (vlsl->old_sa.sa_flags & SA_SIGINFO) { - void (*fn) (int, siginfo_t *, void *) = old_sa.sa_sigaction; - fn (signum, si, uc); + void (*fn) (int, siginfo_t *, void *) = vlsl->old_sa.sa_sigaction; + if (fn) + fn (signum, si, uc); } else { - void (*fn) (int) = old_sa.sa_handler; + void (*fn) (int) = vlsl->old_sa.sa_handler; if (fn) fn (signum); } @@ -1855,7 +1858,7 @@ static void vls_incercept_sigchld () { struct sigaction sa; - if (old_sa.sa_sigaction) + if (vlsl->old_sa.sa_sigaction) { VDBG (0, "have intercepted sigchld"); return; @@ -1863,11 +1866,17 @@ vls_incercept_sigchld () clib_memset (&sa, 0, sizeof (sa)); sa.sa_sigaction = vls_intercept_sigchld_handler; sa.sa_flags = SA_SIGINFO; - if (sigaction (SIGCHLD, &sa, &old_sa)) + if (sigaction (SIGCHLD, &sa, &vlsl->old_sa)) { VERR ("couldn't intercept sigchld"); exit (-1); } + + /* Not entirely clear how, but some processes can clear old_sa after fork + * and subsequently fork and register vls_intercept_sigchld_handler as + * old_sa handler leading to recursion */ + if (vlsl->old_sa.sa_sigaction == vls_intercept_sigchld_handler) + vlsl->old_sa.sa_sigaction = 0; } static void diff --git a/src/vcl/vcl_private.c b/src/vcl/vcl_private.c index d3ad2331827..d9814394f0d 100644 --- a/src/vcl/vcl_private.c +++ b/src/vcl/vcl_private.c @@ -600,8 +600,12 @@ vcl_segment_attach_session (uword segment_handle, uword rxf_offset, if (!is_ct) { + rxf->vpp_session_index = rxf->shr->master_session_index; + txf->vpp_session_index = txf->shr->master_session_index; rxf->shr->client_session_index = s->session_index; txf->shr->client_session_index = s->session_index; + rxf->app_session_index = s->session_index; + txf->app_session_index = s->session_index; rxf->client_thread_index = vcl_get_worker_index (); txf->client_thread_index = vcl_get_worker_index (); s->rx_fifo = rxf; diff --git a/src/vcl/vppcom.c b/src/vcl/vppcom.c index 0b2141614ee..19d58c349b7 100644 --- a/src/vcl/vppcom.c +++ b/src/vcl/vppcom.c @@ -413,7 +413,7 @@ vcl_session_connected_handler (vcl_worker_t * wrk, }; vcl_segment_attach_session ( mp->segment_handle, mp->server_rx_fifo, mp->server_tx_fifo, - mp->vpp_event_queue_address, mp->mq_index, 0, session); + mp->vpp_event_queue_address, mp->mq_index, 0, &tmp_session); if (tmp_session.vpp_evt_q) vcl_send_session_disconnect (wrk, &tmp_session); return VCL_INVALID_SESSION_INDEX; @@ -659,8 +659,7 @@ vcl_session_migrated_handler (vcl_worker_t * wrk, void *data) /* Generate new tx event if we have outstanding data */ if (svm_fifo_has_event (s->tx_fifo)) - app_send_io_evt_to_vpp (s->vpp_evt_q, - s->tx_fifo->shr->master_session_index, + app_send_io_evt_to_vpp (s->vpp_evt_q, s->tx_fifo->vpp_session_index, SESSION_IO_EVT_TX, SVM_Q_WAIT); VDBG (0, "Migrated 0x%lx to thread %u 0x%lx", mp->handle, @@ -2201,8 +2200,7 @@ read_again: if (PREDICT_FALSE (svm_fifo_needs_deq_ntf (rx_fifo, n_read))) { svm_fifo_clear_deq_ntf (rx_fifo); - app_send_io_evt_to_vpp (s->vpp_evt_q, - s->rx_fifo->shr->master_session_index, + app_send_io_evt_to_vpp (s->vpp_evt_q, s->rx_fifo->vpp_session_index, SESSION_IO_EVT_RX, SVM_Q_WAIT); } @@ -2407,8 +2405,8 @@ vppcom_session_write_inline (vcl_worker_t *wrk, vcl_session_t *s, void *buf, } if (svm_fifo_set_event (s->tx_fifo)) - app_send_io_evt_to_vpp ( - s->vpp_evt_q, s->tx_fifo->shr->master_session_index, et, SVM_Q_WAIT); + app_send_io_evt_to_vpp (s->vpp_evt_q, s->tx_fifo->vpp_session_index, et, + SVM_Q_WAIT); /* The underlying fifo segment can run out of memory */ if (PREDICT_FALSE (n_write < 0)) diff --git a/src/vlib/threads.c b/src/vlib/threads.c index ef2c5616f21..fa8d949d549 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -282,6 +282,11 @@ vlib_thread_init (vlib_main_t * vm) tr->first_index = first_index; first_index += tr->count; n_vlib_mains += (tr->no_data_structure_clone == 0) ? tr->count : 0; + if (n_vlib_mains >= FRAME_QUEUE_MAX_NELTS) + return clib_error_return (0, + "configured amount of workers %u is" + " greater than VPP_MAX_WORKERS (%u)", + n_vlib_mains, FRAME_QUEUE_MAX_NELTS); /* construct coremask */ if (tr->use_pthreads || !tr->count) @@ -1070,6 +1075,13 @@ vlib_worker_thread_node_refork (void) VLIB_NODE_RUNTIME_DATA_SIZE); } + for (j = vec_len (old_rt); + j < vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]); j++) + { + rt = &nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT][j]; + nm_clone->input_node_counts_by_state[rt->state] += 1; + } + vec_free (old_rt); /* re-clone pre-input nodes */ diff --git a/src/vnet/CMakeLists.txt b/src/vnet/CMakeLists.txt index 46b1a870e9e..b6227d45a2a 100644 --- a/src/vnet/CMakeLists.txt +++ b/src/vnet/CMakeLists.txt @@ -524,6 +524,7 @@ list(APPEND VNET_SOURCES crypto/cli.c crypto/crypto.c crypto/format.c + crypto/main.c crypto/node.c crypto/crypto_api.c ) @@ -552,6 +553,7 @@ list(APPEND VNET_SOURCES ipsec/ipsec_spd_policy.c ipsec/ipsec_tun.c ipsec/ipsec_tun_in.c + ipsec/main.c ipsec/esp_format.c ipsec/esp_encrypt.c ipsec/esp_decrypt.c @@ -625,6 +627,7 @@ list(APPEND VNET_HEADERS tcp/tcp_debug.h tcp/tcp_inlines.h tcp/tcp_sack.h + tcp/tcp_sdl.h tcp/tcp_types.h tcp/tcp.h tcp/tcp_error.def diff --git a/src/vnet/arp/arp_proxy.c b/src/vnet/arp/arp_proxy.c index 39f624d5a1d..16ed09bee14 100644 --- a/src/vnet/arp/arp_proxy.c +++ b/src/vnet/arp/arp_proxy.c @@ -251,7 +251,8 @@ VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = { VLIB_CLI_COMMAND (set_arp_proxy_command, static) = { .path = "set arp proxy", - .short_help = "set arp proxy [del] table-ID <table-ID> start <start-address> end <end-addres>", + .short_help = "set arp proxy [del] table-id <table-id> start " + "<start-address> end <end-addres>", .function = set_arp_proxy, }; diff --git a/src/vnet/crypto/cli.c b/src/vnet/crypto/cli.c index 2ca66f228c3..4cfa1bb1abc 100644 --- a/src/vnet/crypto/cli.c +++ b/src/vnet/crypto/cli.c @@ -1,19 +1,7 @@ -/* - * Copyright (c) 2019 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. */ -#include <stdbool.h> #include <vlib/vlib.h> #include <vnet/crypto/crypto.h> @@ -38,7 +26,8 @@ show_crypto_engines_command_fn (vlib_main_t * vm, vlib_cli_output (vm, "%-20s%-8s%s", "Name", "Prio", "Description"); vec_foreach (p, cm->engines) { - vlib_cli_output (vm, "%-20s%-8u%s", p->name, p->priority, p->desc); + if (p->name) + vlib_cli_output (vm, "%-20s%-8u%s", p->name, p->priority, p->desc); } return 0; } @@ -50,100 +39,53 @@ VLIB_CLI_COMMAND (show_crypto_engines_command, static) = .function = show_crypto_engines_command_fn, }; -static u8 * -format_vnet_crypto_engine_candidates (u8 * s, va_list * args) -{ - vnet_crypto_engine_t *e; - vnet_crypto_main_t *cm = &crypto_main; - u32 id = va_arg (*args, u32); - u32 ei = va_arg (*args, u32); - int is_chained = va_arg (*args, int); - int is_async = va_arg (*args, int); - - if (is_async) - { - vec_foreach (e, cm->engines) - { - if (e->enqueue_handlers[id] && e->dequeue_handler) - { - s = format (s, "%U", format_vnet_crypto_engine, e - cm->engines); - if (ei == e - cm->engines) - s = format (s, "%c ", '*'); - else - s = format (s, " "); - } - } - - return s; - } - else - { - vec_foreach (e, cm->engines) - { - void * h = is_chained ? (void *) e->chained_ops_handlers[id] - : (void *) e->ops_handlers[id]; - - if (h) - { - s = format (s, "%U", format_vnet_crypto_engine, e - cm->engines); - if (ei == e - cm->engines) - s = format (s, "%c ", '*'); - else - s = format (s, " "); - } - } - return s; - } -} - -static u8 * -format_vnet_crypto_handlers (u8 * s, va_list * args) -{ - vnet_crypto_alg_t alg = va_arg (*args, vnet_crypto_alg_t); - vnet_crypto_main_t *cm = &crypto_main; - vnet_crypto_alg_data_t *d = vec_elt_at_index (cm->algs, alg); - u32 indent = format_get_indent (s); - int i, first = 1; - - for (i = 0; i < VNET_CRYPTO_OP_N_TYPES; i++) - { - vnet_crypto_op_data_t *od; - vnet_crypto_op_id_t id = d->op_by_type[i]; - - if (id == 0) - continue; - - od = cm->opt_data + id; - if (first == 0) - s = format (s, "\n%U", format_white_space, indent); - s = format (s, "%-16U", format_vnet_crypto_op_type, od->type); - - s = format (s, "%-28U", format_vnet_crypto_engine_candidates, id, - od->active_engine_index_simple, 0, 0); - s = format (s, "%U", format_vnet_crypto_engine_candidates, id, - od->active_engine_index_chained, 1, 0); - first = 0; - } - return s; -} - - static clib_error_t * show_crypto_handlers_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { + vnet_crypto_main_t *cm = &crypto_main; unformat_input_t _line_input, *line_input = &_line_input; - int i; + u8 *s = 0; + char *handler_type_str[] = { +#define _(n, s) [VNET_CRYPTO_HANDLER_TYPE_##n] = s, + foreach_crypto_handler_type + }; if (unformat_user (input, unformat_line_input, line_input)) unformat_free (line_input); - vlib_cli_output (vm, "%-16s%-16s%-28s%s", "Algo", "Type", "Simple", - "Chained"); + FOREACH_ARRAY_ELT (a, cm->algs) + { + if (a == cm->algs) + continue; - for (i = 0; i < VNET_CRYPTO_N_ALGS; i++) - vlib_cli_output (vm, "%-20U%U", format_vnet_crypto_alg, i, - format_vnet_crypto_handlers, i); + vlib_cli_output (vm, "\n%s:", a->name); + for (u32 i = 0; i < VNET_CRYPTO_OP_N_TYPES; i++) + if (a->op_by_type[i] != VNET_CRYPTO_OP_NONE) + { + vlib_cli_output (vm, " %U:", format_vnet_crypto_op_type, i); + vnet_crypto_op_id_t id = a->op_by_type[i]; + vnet_crypto_op_data_t *od = cm->opt_data + id; + vnet_crypto_engine_t *e; + + for (u32 i = 0; i < VNET_CRYPTO_HANDLER_N_TYPES; i++) + { + vec_foreach (e, cm->engines) + { + if (e->ops[id].handlers[i]) + { + s = format (s, " %s", e->name); + if (e->ops[id].handlers[i] == od->handlers[i]) + s = format (s, "*"); + } + } + + vlib_cli_output (vm, " %s:%v", handler_type_str[i], s); + vec_reset_length (s); + } + } + } + vec_free (s); return 0; } @@ -163,10 +105,10 @@ set_crypto_handler_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; vnet_crypto_main_t *cm = &crypto_main; int rc = 0; - char **args = 0, *s, **arg, *engine = 0; + char **args = 0, *s, **arg; int all = 0; clib_error_t *error = 0; - crypto_op_class_type_t oct = CRYPTO_OP_BOTH; + vnet_crypto_set_handlers_args_t ha = {}; if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -176,11 +118,13 @@ set_crypto_handler_command_fn (vlib_main_t * vm, if (unformat (line_input, "all")) all = 1; else if (unformat (line_input, "simple")) - oct = CRYPTO_OP_SIMPLE; + ha.set_simple = 1; else if (unformat (line_input, "chained")) - oct = CRYPTO_OP_CHAINED; + ha.set_chained = 1; else if (unformat (line_input, "both")) - oct = CRYPTO_OP_BOTH; + ha.set_simple = ha.set_chained = 1; + else if (unformat (line_input, "async")) + ha.set_async = 1; else if (unformat (line_input, "%s", &s)) vec_add1 (args, s); else @@ -196,7 +140,7 @@ set_crypto_handler_command_fn (vlib_main_t * vm, goto done; } - engine = vec_elt_at_index (args, vec_len (args) - 1)[0]; + ha.engine = vec_elt_at_index (args, vec_len (args) - 1)[0]; vec_del1 (args, vec_len (args) - 1); if (all) @@ -207,7 +151,8 @@ set_crypto_handler_command_fn (vlib_main_t * vm, hash_foreach_mem (key, value, cm->alg_index_by_name, ({ (void) value; - rc += vnet_crypto_set_handler2 (key, engine, oct); + ha.handler_name = key; + rc += vnet_crypto_set_handlers (&ha); })); if (rc) @@ -217,88 +162,29 @@ set_crypto_handler_command_fn (vlib_main_t * vm, { vec_foreach (arg, args) { - rc = vnet_crypto_set_handler2 (arg[0], engine, oct); - if (rc) - { - vlib_cli_output (vm, "failed to set engine %s for %s!", - engine, arg[0]); - } + ha.handler_name = arg[0]; + rc = vnet_crypto_set_handlers (&ha); + if (rc) + vlib_cli_output (vm, "failed to set engine %s for %s!", ha.engine, + arg[0]); } } done: - vec_free (engine); + vec_free (ha.engine); vec_foreach (arg, args) vec_free (arg[0]); vec_free (args); unformat_free (line_input); return error; } -VLIB_CLI_COMMAND (set_crypto_handler_command, static) = -{ +VLIB_CLI_COMMAND (set_crypto_handler_command, static) = { .path = "set crypto handler", .short_help = "set crypto handler cipher [cipher2 cipher3 ...] engine" - " [simple|chained]", + " [simple|chained|async]", .function = set_crypto_handler_command_fn, }; -static u8 * -format_vnet_crypto_async_handlers (u8 * s, va_list * args) -{ - vnet_crypto_async_alg_t alg = va_arg (*args, vnet_crypto_async_alg_t); - vnet_crypto_main_t *cm = &crypto_main; - vnet_crypto_async_alg_data_t *d = vec_elt_at_index (cm->async_algs, alg); - u32 indent = format_get_indent (s); - int i, first = 1; - - for (i = 0; i < VNET_CRYPTO_ASYNC_OP_N_TYPES; i++) - { - vnet_crypto_async_op_data_t *od; - vnet_crypto_async_op_id_t id = d->op_by_type[i]; - - if (id == 0) - continue; - - od = cm->async_opt_data + id; - if (first == 0) - s = format (s, "\n%U", format_white_space, indent); - s = format (s, "%-16U", format_vnet_crypto_async_op_type, od->type); - - s = format (s, "%U", format_vnet_crypto_engine_candidates, id, - od->active_engine_index_async, 0, 1); - first = 0; - } - return s; -} - -static clib_error_t * -show_crypto_async_handlers_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - int i; - - if (unformat_user (input, unformat_line_input, line_input)) - unformat_free (line_input); - - vlib_cli_output (vm, "%-28s%-16s%s", "Algo", "Type", "Handler"); - - for (i = 0; i < VNET_CRYPTO_N_ASYNC_ALGS; i++) - vlib_cli_output (vm, "%-28U%U", format_vnet_crypto_async_alg, i, - format_vnet_crypto_async_handlers, i); - - return 0; -} - -VLIB_CLI_COMMAND (show_crypto_async_handlers_command, static) = -{ - .path = "show crypto async handlers", - .short_help = "show crypto async handlers", - .function = show_crypto_async_handlers_command_fn, -}; - - static clib_error_t * show_crypto_async_status_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -334,85 +220,6 @@ VLIB_CLI_COMMAND (show_crypto_async_status_command, static) = }; static clib_error_t * -set_crypto_async_handler_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - vnet_crypto_main_t *cm = &crypto_main; - int rc = 0; - char **args = 0, *s, **arg, *engine = 0; - int all = 0; - clib_error_t *error = 0; - - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (line_input, "all")) - all = 1; - else if (unformat (line_input, "%s", &s)) - vec_add1 (args, s); - else - { - error = clib_error_return (0, "invalid params"); - goto done; - } - } - - if ((vec_len (args) < 2 && !all) || (vec_len (args) == 0 && all)) - { - error = clib_error_return (0, "missing cipher or engine!"); - goto done; - } - - engine = vec_elt_at_index (args, vec_len (args) - 1)[0]; - vec_del1 (args, vec_len (args) - 1); - - if (all) - { - char *key; - u8 *value; - - hash_foreach_mem (key, value, cm->async_alg_index_by_name, - ({ - (void) value; - rc += vnet_crypto_set_async_handler2 (key, engine); - })); - - if (rc) - vlib_cli_output (vm, "failed to set crypto engine!"); - } - else - { - vec_foreach (arg, args) - { - rc = vnet_crypto_set_async_handler2 (arg[0], engine); - if (rc) - { - vlib_cli_output (vm, "failed to set engine %s for %s!", - engine, arg[0]); - } - } - } - -done: - vec_free (engine); - vec_foreach (arg, args) vec_free (arg[0]); - vec_free (args); - unformat_free (line_input); - return error; -} - -VLIB_CLI_COMMAND (set_crypto_async_handler_command, static) = -{ - .path = "set crypto async handler", - .short_help = "set crypto async handler type [type2 type3 ...] engine", - .function = set_crypto_async_handler_command_fn, -}; - -static clib_error_t * set_crypto_async_dispatch_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) { @@ -450,11 +257,3 @@ VLIB_CLI_COMMAND (set_crypto_async_dispatch_mode_command, static) = { .short_help = "set crypto async dispatch mode <polling|interrupt|adaptive>", .function = set_crypto_async_dispatch_command_fn, }; - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/crypto/crypto.c b/src/vnet/crypto/crypto.c index f46e307af89..35e7768375d 100644 --- a/src/vnet/crypto/crypto.c +++ b/src/vnet/crypto/crypto.c @@ -1,16 +1,5 @@ -/* - * Copyright (c) 2018 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. */ #include <stdbool.h> @@ -22,8 +11,6 @@ #include <dlfcn.h> #include <dirent.h> -vnet_crypto_main_t crypto_main; - VLIB_REGISTER_LOG_CLASS (crypto_main_log, static) = { .class_name = "crypto", .subclass_name = "main", @@ -52,26 +39,31 @@ vnet_crypto_process_ops_call_handler (vlib_main_t * vm, vnet_crypto_op_chunk_t * chunks, u32 n_ops) { + vnet_crypto_op_data_t *od = cm->opt_data + opt; u32 rv = 0; if (n_ops == 0) return 0; if (chunks) { + vnet_crypto_chained_op_fn_t *fn = + od->handlers[VNET_CRYPTO_HANDLER_TYPE_CHAINED]; - if (cm->chained_ops_handlers[opt] == 0) + if (fn == 0) crypto_set_op_status (ops, n_ops, VNET_CRYPTO_OP_STATUS_FAIL_NO_HANDLER); else - rv = (cm->chained_ops_handlers[opt]) (vm, ops, chunks, n_ops); + rv = fn (vm, ops, chunks, n_ops); } else { - if (cm->ops_handlers[opt] == 0) + vnet_crypto_simple_op_fn_t *fn = + od->handlers[VNET_CRYPTO_HANDLER_TYPE_SIMPLE]; + if (fn == 0) crypto_set_op_status (ops, n_ops, VNET_CRYPTO_OP_STATUS_FAIL_NO_HANDLER); else - rv = (cm->ops_handlers[opt]) (vm, ops, n_ops); + rv = fn (vm, ops, n_ops); } return rv; } @@ -141,48 +133,34 @@ vnet_crypto_register_engine (vlib_main_t * vm, char *name, int prio, } static_always_inline void -crypto_set_active_engine (vnet_crypto_op_data_t * od, - vnet_crypto_op_id_t id, u32 ei, - crypto_op_class_type_t oct) +crypto_set_active_engine (vnet_crypto_op_data_t *od, vnet_crypto_op_id_t id, + u32 ei, vnet_crypto_handler_type_t t) { vnet_crypto_main_t *cm = &crypto_main; vnet_crypto_engine_t *ce = vec_elt_at_index (cm->engines, ei); - if (oct == CRYPTO_OP_BOTH || oct == CRYPTO_OP_CHAINED) - { - if (ce->chained_ops_handlers[id]) - { - od->active_engine_index_chained = ei; - cm->chained_ops_handlers[id] = ce->chained_ops_handlers[id]; - } - } - - if (oct == CRYPTO_OP_BOTH || oct == CRYPTO_OP_SIMPLE) + if (ce->ops[id].handlers[t]) { - if (ce->ops_handlers[id]) - { - od->active_engine_index_simple = ei; - cm->ops_handlers[id] = ce->ops_handlers[id]; - } + od->active_engine_index[t] = ei; + cm->opt_data[id].handlers[t] = ce->ops[id].handlers[t]; } } int -vnet_crypto_set_handler2 (char *alg_name, char *engine, - crypto_op_class_type_t oct) +vnet_crypto_set_handlers (vnet_crypto_set_handlers_args_t *a) { uword *p; vnet_crypto_main_t *cm = &crypto_main; vnet_crypto_alg_data_t *ad; int i; - p = hash_get_mem (cm->alg_index_by_name, alg_name); + p = hash_get_mem (cm->alg_index_by_name, a->handler_name); if (!p) return -1; - ad = vec_elt_at_index (cm->algs, p[0]); + ad = cm->algs + p[0]; - p = hash_get_mem (cm->engine_index_by_name, engine); + p = hash_get_mem (cm->engine_index_by_name, a->engine); if (!p) return -1; @@ -194,7 +172,15 @@ vnet_crypto_set_handler2 (char *alg_name, char *engine, continue; od = cm->opt_data + id; - crypto_set_active_engine (od, id, p[0], oct); + if (a->set_async) + crypto_set_active_engine (od, id, p[0], + VNET_CRYPTO_HANDLER_TYPE_ASYNC); + if (a->set_simple) + crypto_set_active_engine (od, id, p[0], + VNET_CRYPTO_HANDLER_TYPE_SIMPLE); + if (a->set_chained) + crypto_set_active_engine (od, id, p[0], + VNET_CRYPTO_HANDLER_TYPE_CHAINED); } return 0; @@ -207,117 +193,109 @@ vnet_crypto_is_set_handler (vnet_crypto_alg_t alg) vnet_crypto_op_id_t opt = 0; int i; - if (alg >= vec_len (cm->algs)) + if (alg >= ARRAY_LEN (cm->algs)) return 0; for (i = 0; i < VNET_CRYPTO_OP_N_TYPES; i++) if ((opt = cm->algs[alg].op_by_type[i]) != 0) break; - if (opt >= vec_len (cm->ops_handlers)) - return 0; - - return NULL != cm->ops_handlers[opt]; + return NULL != cm->opt_data[opt].handlers[VNET_CRYPTO_HANDLER_TYPE_SIMPLE]; } void -vnet_crypto_register_ops_handler_inline (vlib_main_t * vm, u32 engine_index, +vnet_crypto_register_ops_handler_inline (vlib_main_t *vm, u32 engine_index, vnet_crypto_op_id_t opt, - vnet_crypto_ops_handler_t * fn, - vnet_crypto_chained_ops_handler_t * - cfn) + vnet_crypto_simple_op_fn_t *fn, + vnet_crypto_chained_op_fn_t *cfn) { vnet_crypto_main_t *cm = &crypto_main; vnet_crypto_engine_t *ae, *e = vec_elt_at_index (cm->engines, engine_index); vnet_crypto_op_data_t *otd = cm->opt_data + opt; - vec_validate_aligned (cm->ops_handlers, VNET_CRYPTO_N_OP_IDS - 1, - CLIB_CACHE_LINE_BYTES); - vec_validate_aligned (cm->chained_ops_handlers, VNET_CRYPTO_N_OP_IDS - 1, - CLIB_CACHE_LINE_BYTES); if (fn) { - e->ops_handlers[opt] = fn; - if (otd->active_engine_index_simple == ~0) + vnet_crypto_handler_type_t t = VNET_CRYPTO_HANDLER_TYPE_SIMPLE; + e->ops[opt].handlers[t] = fn; + if (!otd->active_engine_index[t]) { - otd->active_engine_index_simple = engine_index; - cm->ops_handlers[opt] = fn; + otd->active_engine_index[t] = engine_index; + cm->opt_data[opt].handlers[t] = fn; } - ae = vec_elt_at_index (cm->engines, otd->active_engine_index_simple); + ae = vec_elt_at_index (cm->engines, otd->active_engine_index[t]); if (ae->priority < e->priority) - crypto_set_active_engine (otd, opt, engine_index, CRYPTO_OP_SIMPLE); + crypto_set_active_engine (otd, opt, engine_index, t); } if (cfn) { - e->chained_ops_handlers[opt] = cfn; - if (otd->active_engine_index_chained == ~0) + vnet_crypto_handler_type_t t = VNET_CRYPTO_HANDLER_TYPE_CHAINED; + e->ops[opt].handlers[t] = cfn; + if (otd->active_engine_index[t]) { - otd->active_engine_index_chained = engine_index; - cm->chained_ops_handlers[opt] = cfn; + otd->active_engine_index[t] = engine_index; + cm->opt_data[opt].handlers[t] = cfn; } - ae = vec_elt_at_index (cm->engines, otd->active_engine_index_chained); + ae = vec_elt_at_index (cm->engines, otd->active_engine_index[t]); if (ae->priority < e->priority) - crypto_set_active_engine (otd, opt, engine_index, CRYPTO_OP_CHAINED); + crypto_set_active_engine (otd, opt, engine_index, t); } return; } void -vnet_crypto_register_ops_handler (vlib_main_t * vm, u32 engine_index, +vnet_crypto_register_ops_handler (vlib_main_t *vm, u32 engine_index, vnet_crypto_op_id_t opt, - vnet_crypto_ops_handler_t * fn) + vnet_crypto_simple_op_fn_t *fn) { vnet_crypto_register_ops_handler_inline (vm, engine_index, opt, fn, 0); } void -vnet_crypto_register_chained_ops_handler (vlib_main_t * vm, u32 engine_index, +vnet_crypto_register_chained_ops_handler (vlib_main_t *vm, u32 engine_index, vnet_crypto_op_id_t opt, - vnet_crypto_chained_ops_handler_t * - fn) + vnet_crypto_chained_op_fn_t *fn) { vnet_crypto_register_ops_handler_inline (vm, engine_index, opt, 0, fn); } void -vnet_crypto_register_ops_handlers (vlib_main_t * vm, u32 engine_index, +vnet_crypto_register_ops_handlers (vlib_main_t *vm, u32 engine_index, vnet_crypto_op_id_t opt, - vnet_crypto_ops_handler_t * fn, - vnet_crypto_chained_ops_handler_t * cfn) + vnet_crypto_simple_op_fn_t *fn, + vnet_crypto_chained_op_fn_t *cfn) { vnet_crypto_register_ops_handler_inline (vm, engine_index, opt, fn, cfn); } void vnet_crypto_register_enqueue_handler (vlib_main_t *vm, u32 engine_index, - vnet_crypto_async_op_id_t opt, - vnet_crypto_frame_enqueue_t *enqueue_hdl) + vnet_crypto_op_id_t opt, + vnet_crypto_frame_enq_fn_t *enqueue_hdl) { vnet_crypto_main_t *cm = &crypto_main; vnet_crypto_engine_t *ae, *e = vec_elt_at_index (cm->engines, engine_index); - vnet_crypto_async_op_data_t *otd = cm->async_opt_data + opt; - vec_validate_aligned (cm->enqueue_handlers, VNET_CRYPTO_ASYNC_OP_N_IDS, - CLIB_CACHE_LINE_BYTES); + vnet_crypto_op_data_t *otd = cm->opt_data + opt; + vnet_crypto_handler_type_t t = VNET_CRYPTO_HANDLER_TYPE_ASYNC; if (!enqueue_hdl) return; - e->enqueue_handlers[opt] = enqueue_hdl; - if (otd->active_engine_index_async == ~0) + e->ops[opt].handlers[t] = enqueue_hdl; + if (!otd->active_engine_index[t]) { - otd->active_engine_index_async = engine_index; - cm->enqueue_handlers[opt] = enqueue_hdl; + otd->active_engine_index[t] = engine_index; + otd->handlers[t] = enqueue_hdl; } - ae = vec_elt_at_index (cm->engines, otd->active_engine_index_async); + ae = vec_elt_at_index (cm->engines, otd->active_engine_index[t]); if (ae->priority <= e->priority) { - otd->active_engine_index_async = engine_index; - cm->enqueue_handlers[opt] = enqueue_hdl; + otd->active_engine_index[t] = engine_index; + otd->handlers[t] = enqueue_hdl; } return; @@ -340,21 +318,23 @@ static void vnet_crypto_update_cm_dequeue_handlers (void) { vnet_crypto_main_t *cm = &crypto_main; - vnet_crypto_async_op_data_t *otd; + vnet_crypto_op_data_t *otd; vnet_crypto_engine_t *e; u32 *active_engines = 0, *ei, last_ei = ~0, i; vec_reset_length (cm->dequeue_handlers); - for (i = 0; i < VNET_CRYPTO_ASYNC_OP_N_IDS; i++) + for (i = 0; i < VNET_CRYPTO_N_OP_IDS; i++) { - otd = cm->async_opt_data + i; - if (otd->active_engine_index_async == ~0) + otd = cm->opt_data + i; + if (!otd->active_engine_index[VNET_CRYPTO_HANDLER_TYPE_ASYNC]) continue; - e = cm->engines + otd->active_engine_index_async; + e = + cm->engines + otd->active_engine_index[VNET_CRYPTO_HANDLER_TYPE_ASYNC]; if (!e->dequeue_handler) continue; - vec_add1 (active_engines, otd->active_engine_index_async); + vec_add1 (active_engines, + otd->active_engine_index[VNET_CRYPTO_HANDLER_TYPE_ASYNC]); } vec_sort_with_function (active_engines, engine_index_cmp); @@ -392,8 +372,8 @@ vnet_crypto_register_dequeue_handler (vlib_main_t *vm, u32 engine_index, } void -vnet_crypto_register_key_handler (vlib_main_t * vm, u32 engine_index, - vnet_crypto_key_handler_t * key_handler) +vnet_crypto_register_key_handler (vlib_main_t *vm, u32 engine_index, + vnet_crypto_key_fn_t *key_handler) { vnet_crypto_main_t *cm = &crypto_main; vnet_crypto_engine_t *e = vec_elt_at_index (cm->engines, engine_index); @@ -401,40 +381,6 @@ vnet_crypto_register_key_handler (vlib_main_t * vm, u32 engine_index, return; } -static int -vnet_crypto_key_len_check (vnet_crypto_alg_t alg, u16 length) -{ - switch (alg) - { - case VNET_CRYPTO_N_ALGS: - return 0; - case VNET_CRYPTO_ALG_NONE: - return 1; - -#define _(n, s, l) \ - case VNET_CRYPTO_ALG_##n: \ - if ((l) == length) \ - return 1; \ - break; - foreach_crypto_cipher_alg foreach_crypto_aead_alg -#undef _ - /* HMAC allows any key length */ -#define _(n, s) \ - case VNET_CRYPTO_ALG_HMAC_##n: \ - return 1; - foreach_crypto_hmac_alg -#undef _ - -#define _(n, s) \ - case VNET_CRYPTO_ALG_HASH_##n: \ - return 1; - foreach_crypto_hash_alg -#undef _ - } - - return 0; -} - u32 vnet_crypto_key_add (vlib_main_t * vm, vnet_crypto_alg_t alg, u8 * data, u16 length) @@ -442,27 +388,43 @@ vnet_crypto_key_add (vlib_main_t * vm, vnet_crypto_alg_t alg, u8 * data, u32 index; vnet_crypto_main_t *cm = &crypto_main; vnet_crypto_engine_t *engine; - vnet_crypto_key_t *key; - + vnet_crypto_key_t *key, **kp; + vnet_crypto_alg_data_t *ad = cm->algs + alg; + u32 alloc_sz = sizeof (vnet_crypto_key_t) + round_pow2 (length, 16); u8 need_barrier_sync = 0; - if (!vnet_crypto_key_len_check (alg, length)) + ASSERT (alg != 0); + + if (length == 0) return ~0; + if (ad->variable_key_length == 0) + { + if (ad->key_length == 0) + return ~0; + + if (ad->key_length != length) + return ~0; + } + need_barrier_sync = pool_get_will_expand (cm->keys); /* If the cm->keys will expand, stop the parade. */ if (need_barrier_sync) vlib_worker_thread_barrier_sync (vm); - pool_get_zero (cm->keys, key); + pool_get (cm->keys, kp); if (need_barrier_sync) vlib_worker_thread_barrier_release (vm); - index = key - cm->keys; - key->type = VNET_CRYPTO_KEY_TYPE_DATA; - key->alg = alg; - vec_validate_aligned (key->data, length - 1, CLIB_CACHE_LINE_BYTES); + key = clib_mem_alloc_aligned (alloc_sz, _Alignof (vnet_crypto_key_t)); + kp[0] = key; + index = kp - cm->keys; + *key = (vnet_crypto_key_t){ + .index = index, + .alg = alg, + .length = length, + }; clib_memcpy (key->data, data, length); vec_foreach (engine, cm->engines) if (engine->key_op_handler) @@ -475,23 +437,16 @@ vnet_crypto_key_del (vlib_main_t * vm, vnet_crypto_key_index_t index) { vnet_crypto_main_t *cm = &crypto_main; vnet_crypto_engine_t *engine; - vnet_crypto_key_t *key = pool_elt_at_index (cm->keys, index); + vnet_crypto_key_t *key = cm->keys[index]; + u32 sz = sizeof (vnet_crypto_key_t) + round_pow2 (key->length, 16); vec_foreach (engine, cm->engines) if (engine->key_op_handler) engine->key_op_handler (VNET_CRYPTO_KEY_OP_DEL, index); - if (key->type == VNET_CRYPTO_KEY_TYPE_DATA) - { - clib_memset (key->data, 0xfe, vec_len (key->data)); - vec_free (key->data); - } - else if (key->type == VNET_CRYPTO_KEY_TYPE_LINK) - { - key->index_crypto = key->index_integ = ~0; - } - - pool_put (cm->keys, key); + clib_memset (key, 0xfe, sz); + clib_mem_free (key); + pool_put_index (cm->keys, index); } void @@ -505,7 +460,7 @@ vnet_crypto_key_update (vlib_main_t *vm, vnet_crypto_key_index_t index) engine->key_op_handler (VNET_CRYPTO_KEY_OP_MODIFY, index); } -vnet_crypto_async_alg_t +vnet_crypto_alg_t vnet_crypto_link_algs (vnet_crypto_alg_t crypto_alg, vnet_crypto_alg_t integ_alg) { @@ -523,25 +478,40 @@ vnet_crypto_key_add_linked (vlib_main_t * vm, vnet_crypto_key_index_t index_crypto, vnet_crypto_key_index_t index_integ) { - u32 index; + u32 index, need_barrier_sync; vnet_crypto_main_t *cm = &crypto_main; vnet_crypto_engine_t *engine; - vnet_crypto_key_t *key_crypto, *key_integ, *key; - vnet_crypto_async_alg_t linked_alg; + vnet_crypto_key_t *key_crypto, *key_integ, *key, **kp; + vnet_crypto_alg_t linked_alg; - key_crypto = pool_elt_at_index (cm->keys, index_crypto); - key_integ = pool_elt_at_index (cm->keys, index_integ); + key_crypto = cm->keys[index_crypto]; + key_integ = cm->keys[index_integ]; linked_alg = vnet_crypto_link_algs (key_crypto->alg, key_integ->alg); if (linked_alg == ~0) return ~0; - pool_get_zero (cm->keys, key); - index = key - cm->keys; - key->type = VNET_CRYPTO_KEY_TYPE_LINK; - key->index_crypto = index_crypto; - key->index_integ = index_integ; - key->async_alg = linked_alg; + need_barrier_sync = pool_get_will_expand (cm->keys); + /* If the cm->keys will expand, stop the parade. */ + if (need_barrier_sync) + vlib_worker_thread_barrier_sync (vm); + + pool_get (cm->keys, kp); + + if (need_barrier_sync) + vlib_worker_thread_barrier_release (vm); + + key = clib_mem_alloc_aligned (sizeof (vnet_crypto_key_t), + _Alignof (vnet_crypto_key_t)); + kp[0] = key; + index = kp - cm->keys; + *key = (vnet_crypto_key_t){ + .index = index, + .is_link = 1, + .index_crypto = index_crypto, + .index_integ = index_integ, + .alg = linked_alg, + }; vec_foreach (engine, cm->engines) if (engine->key_op_handler) @@ -550,54 +520,6 @@ vnet_crypto_key_add_linked (vlib_main_t * vm, return index; } -static_always_inline void -crypto_set_active_async_engine (vnet_crypto_async_op_data_t * od, - vnet_crypto_async_op_id_t id, u32 ei) -{ - vnet_crypto_main_t *cm = &crypto_main; - vnet_crypto_engine_t *ce = vec_elt_at_index (cm->engines, ei); - - if (ce->enqueue_handlers[id] && ce->dequeue_handler) - { - od->active_engine_index_async = ei; - cm->enqueue_handlers[id] = ce->enqueue_handlers[id]; - } -} - -int -vnet_crypto_set_async_handler2 (char *alg_name, char *engine) -{ - uword *p; - vnet_crypto_main_t *cm = &crypto_main; - vnet_crypto_async_alg_data_t *ad; - int i; - - p = hash_get_mem (cm->async_alg_index_by_name, alg_name); - if (!p) - return -1; - - ad = vec_elt_at_index (cm->async_algs, p[0]); - - p = hash_get_mem (cm->engine_index_by_name, engine); - if (!p) - return -1; - - for (i = 0; i < VNET_CRYPTO_ASYNC_OP_N_TYPES; i++) - { - vnet_crypto_async_op_data_t *od; - vnet_crypto_async_op_id_t id = ad->op_by_type[i]; - if (id == 0) - continue; - - od = cm->async_opt_data + id; - crypto_set_active_async_engine (od, id, p[0]); - } - - vnet_crypto_update_cm_dequeue_handlers (); - - return 0; -} - u32 vnet_crypto_register_post_node (vlib_main_t * vm, char *post_node_name) { @@ -643,94 +565,6 @@ vnet_crypto_set_async_dispatch (u8 mode, u8 adaptive) } } -int -vnet_crypto_is_set_async_handler (vnet_crypto_async_op_id_t op) -{ - vnet_crypto_main_t *cm = &crypto_main; - - return (op < vec_len (cm->enqueue_handlers) && - NULL != cm->enqueue_handlers[op]); -} - -static void -vnet_crypto_init_cipher_data (vnet_crypto_alg_t alg, vnet_crypto_op_id_t eid, - vnet_crypto_op_id_t did, char *name, u8 is_aead) -{ - vnet_crypto_op_type_t eopt, dopt; - vnet_crypto_main_t *cm = &crypto_main; - - cm->algs[alg].name = name; - cm->opt_data[eid].alg = cm->opt_data[did].alg = alg; - cm->opt_data[eid].active_engine_index_simple = ~0; - cm->opt_data[did].active_engine_index_simple = ~0; - cm->opt_data[eid].active_engine_index_chained = ~0; - cm->opt_data[did].active_engine_index_chained = ~0; - if (is_aead) - { - eopt = VNET_CRYPTO_OP_TYPE_AEAD_ENCRYPT; - dopt = VNET_CRYPTO_OP_TYPE_AEAD_DECRYPT; - } - else - { - eopt = VNET_CRYPTO_OP_TYPE_ENCRYPT; - dopt = VNET_CRYPTO_OP_TYPE_DECRYPT; - } - cm->opt_data[eid].type = eopt; - cm->opt_data[did].type = dopt; - cm->algs[alg].op_by_type[eopt] = eid; - cm->algs[alg].op_by_type[dopt] = did; - hash_set_mem (cm->alg_index_by_name, name, alg); -} - -static void -vnet_crypto_init_hash_data (vnet_crypto_alg_t alg, vnet_crypto_op_id_t id, - char *name) -{ - vnet_crypto_main_t *cm = &crypto_main; - cm->algs[alg].name = name; - cm->algs[alg].op_by_type[VNET_CRYPTO_OP_TYPE_HASH] = id; - cm->opt_data[id].alg = alg; - cm->opt_data[id].active_engine_index_simple = ~0; - cm->opt_data[id].active_engine_index_chained = ~0; - cm->opt_data[id].type = VNET_CRYPTO_OP_TYPE_HASH; - hash_set_mem (cm->alg_index_by_name, name, alg); -} - -static void -vnet_crypto_init_hmac_data (vnet_crypto_alg_t alg, - vnet_crypto_op_id_t id, char *name) -{ - vnet_crypto_main_t *cm = &crypto_main; - cm->algs[alg].name = name; - cm->algs[alg].op_by_type[VNET_CRYPTO_OP_TYPE_HMAC] = id; - cm->opt_data[id].alg = alg; - cm->opt_data[id].active_engine_index_simple = ~0; - cm->opt_data[id].active_engine_index_chained = ~0; - cm->opt_data[id].type = VNET_CRYPTO_OP_TYPE_HMAC; - hash_set_mem (cm->alg_index_by_name, name, alg); -} - -static void -vnet_crypto_init_async_data (vnet_crypto_async_alg_t alg, - vnet_crypto_async_op_id_t eid, - vnet_crypto_async_op_id_t did, char *name) -{ - vnet_crypto_main_t *cm = &crypto_main; - - cm->async_algs[alg].name = name; - cm->async_algs[alg].op_by_type[VNET_CRYPTO_ASYNC_OP_TYPE_ENCRYPT] = eid; - cm->async_algs[alg].op_by_type[VNET_CRYPTO_ASYNC_OP_TYPE_DECRYPT] = did; - cm->async_opt_data[eid].type = VNET_CRYPTO_ASYNC_OP_TYPE_ENCRYPT; - cm->async_opt_data[eid].alg = alg; - cm->async_opt_data[eid].active_engine_index_async = ~0; - cm->async_opt_data[eid].active_engine_index_async = ~0; - cm->async_opt_data[did].type = VNET_CRYPTO_ASYNC_OP_TYPE_DECRYPT; - cm->async_opt_data[did].alg = alg; - cm->async_opt_data[did].active_engine_index_async = ~0; - cm->async_opt_data[did].active_engine_index_async = ~0; - hash_set_mem (cm->async_alg_index_by_name, name, alg); -} - static void vnet_crypto_load_engines (vlib_main_t *vm) { @@ -848,54 +682,21 @@ vnet_crypto_init (vlib_main_t * vm) vnet_crypto_main_t *cm = &crypto_main; vlib_thread_main_t *tm = vlib_get_thread_main (); vnet_crypto_thread_t *ct = 0; + vnet_crypto_engine_t *p; + vec_add2 (cm->engines, p, 1); cm->engine_index_by_name = hash_create_string ( /* size */ 0, sizeof (uword)); cm->alg_index_by_name = hash_create_string (0, sizeof (uword)); - cm->async_alg_index_by_name = hash_create_string (0, sizeof (uword)); vec_validate_aligned (cm->threads, tm->n_vlib_mains, CLIB_CACHE_LINE_BYTES); vec_foreach (ct, cm->threads) pool_init_fixed (ct->frame_pool, VNET_CRYPTO_FRAME_POOL_SIZE); - vec_validate (cm->algs, VNET_CRYPTO_N_ALGS); - vec_validate (cm->async_algs, VNET_CRYPTO_N_ASYNC_ALGS); - -#define _(n, s, l) \ - vnet_crypto_init_cipher_data (VNET_CRYPTO_ALG_##n, \ - VNET_CRYPTO_OP_##n##_ENC, \ - VNET_CRYPTO_OP_##n##_DEC, s, 0); - foreach_crypto_cipher_alg; -#undef _ -#define _(n, s, l) \ - vnet_crypto_init_cipher_data (VNET_CRYPTO_ALG_##n, \ - VNET_CRYPTO_OP_##n##_ENC, \ - VNET_CRYPTO_OP_##n##_DEC, s, 1); - foreach_crypto_aead_alg; -#undef _ -#define _(n, s) \ - vnet_crypto_init_hmac_data (VNET_CRYPTO_ALG_HMAC_##n, \ - VNET_CRYPTO_OP_##n##_HMAC, "hmac-" s); - foreach_crypto_hmac_alg; -#undef _ -#define _(n, s) \ - vnet_crypto_init_hash_data (VNET_CRYPTO_ALG_HASH_##n, \ - VNET_CRYPTO_OP_##n##_HASH, s); - foreach_crypto_hash_alg; -#undef _ -#define _(n, s, k, t, a) \ - vnet_crypto_init_async_data (VNET_CRYPTO_ALG_##n##_TAG##t##_AAD##a, \ - VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_ENC, \ - VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_DEC, \ - s); - foreach_crypto_aead_async_alg -#undef _ -#define _(c, h, s, k ,d) \ - vnet_crypto_init_async_data (VNET_CRYPTO_ALG_##c##_##h##_TAG##d, \ - VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC, \ - VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC, \ - s); - foreach_crypto_link_async_alg -#undef _ - cm->crypto_node_index = + + FOREACH_ARRAY_ELT (e, cm->algs) + if (e->name) + hash_set_mem (cm->alg_index_by_name, e->name, e - cm->algs); + + cm->crypto_node_index = vlib_get_node_by_name (vm, (u8 *) "crypto-dispatch")->index; vnet_crypto_load_engines (vm); @@ -904,11 +705,3 @@ vnet_crypto_init (vlib_main_t * vm) } VLIB_INIT_FUNCTION (vnet_crypto_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/crypto/crypto.h b/src/vnet/crypto/crypto.h index 13d08756109..a4b6ab97620 100644 --- a/src/vnet/crypto/crypto.h +++ b/src/vnet/crypto/crypto.h @@ -21,47 +21,38 @@ #define VNET_CRYPTO_FRAME_SIZE 64 #define VNET_CRYPTO_FRAME_POOL_SIZE 1024 -/* CRYPTO_ID, PRETTY_NAME, KEY_LENGTH_IN_BYTES */ -#define foreach_crypto_cipher_alg \ - _(DES_CBC, "des-cbc", 7) \ - _(3DES_CBC, "3des-cbc", 24) \ - _(AES_128_CBC, "aes-128-cbc", 16) \ - _(AES_192_CBC, "aes-192-cbc", 24) \ - _(AES_256_CBC, "aes-256-cbc", 32) \ - _(AES_128_CTR, "aes-128-ctr", 16) \ - _(AES_192_CTR, "aes-192-ctr", 24) \ - _(AES_256_CTR, "aes-256-ctr", 32) - -/* CRYPTO_ID, PRETTY_NAME, KEY_LENGTH_IN_BYTES */ +/* CRYPTO_ID, PRETTY_NAME, ARGS*/ +#define foreach_crypto_cipher_alg \ + _ (DES_CBC, "des-cbc", .key_length = 7) \ + _ (3DES_CBC, "3des-cbc", .key_length = 24) \ + _ (AES_128_CBC, "aes-128-cbc", .key_length = 16) \ + _ (AES_192_CBC, "aes-192-cbc", .key_length = 24) \ + _ (AES_256_CBC, "aes-256-cbc", .key_length = 32) \ + _ (AES_128_CTR, "aes-128-ctr", .key_length = 16) \ + _ (AES_192_CTR, "aes-192-ctr", .key_length = 24) \ + _ (AES_256_CTR, "aes-256-ctr", .key_length = 32) + +/* CRYPTO_ID, PRETTY_NAME, ARGS */ #define foreach_crypto_aead_alg \ - _ (AES_128_GCM, "aes-128-gcm", 16) \ - _ (AES_192_GCM, "aes-192-gcm", 24) \ - _ (AES_256_GCM, "aes-256-gcm", 32) \ - _ (AES_128_NULL_GMAC, "aes-128-null-gmac", 16) \ - _ (AES_192_NULL_GMAC, "aes-192-null-gmac", 24) \ - _ (AES_256_NULL_GMAC, "aes-256-null-gmac", 32) \ - _ (CHACHA20_POLY1305, "chacha20-poly1305", 32) + _ (AES_128_GCM, "aes-128-gcm", .is_aead = 1, .key_length = 16) \ + _ (AES_192_GCM, "aes-192-gcm", .is_aead = 1, .key_length = 24) \ + _ (AES_256_GCM, "aes-256-gcm", .is_aead = 1, .key_length = 32) \ + _ (AES_128_NULL_GMAC, "aes-128-null-gmac", .is_aead = 1, .key_length = 16) \ + _ (AES_192_NULL_GMAC, "aes-192-null-gmac", .is_aead = 1, .key_length = 24) \ + _ (AES_256_NULL_GMAC, "aes-256-null-gmac", .is_aead = 1, .key_length = 32) \ + _ (CHACHA20_POLY1305, "chacha20-poly1305", .is_aead = 1, .key_length = 32) #define foreach_crypto_hash_alg \ + _ (MD5, "md5") \ _ (SHA1, "sha-1") \ _ (SHA224, "sha-224") \ _ (SHA256, "sha-256") \ _ (SHA384, "sha-384") \ _ (SHA512, "sha-512") -#define foreach_crypto_hmac_alg \ - _(MD5, "md5") \ - _(SHA1, "sha-1") \ - _(SHA224, "sha-224") \ - _(SHA256, "sha-256") \ - _(SHA384, "sha-384") \ - _(SHA512, "sha-512") - #define foreach_crypto_op_type \ _ (ENCRYPT, "encrypt") \ _ (DECRYPT, "decrypt") \ - _ (AEAD_ENCRYPT, "aead-encrypt") \ - _ (AEAD_DECRYPT, "aead-decrypt") \ _ (HMAC, "hmac") \ _ (HASH, "hash") @@ -100,7 +91,7 @@ typedef enum _ (AES_256_NULL_GMAC, "aes-256-null-gmac-aad12", 32, 16, 12) \ _ (CHACHA20_POLY1305, "chacha20-poly1305-aad8", 32, 16, 8) \ _ (CHACHA20_POLY1305, "chacha20-poly1305-aad12", 32, 16, 12) \ - _ (CHACHA20_POLY1305, "chacha20-poly1305", 32, 16, 0) + _ (CHACHA20_POLY1305, "chacha20-poly1305-aad0", 32, 16, 0) /* CRYPTO_ID, INTEG_ID, PRETTY_NAME, KEY_LENGTH_IN_BYTES, DIGEST_LEN */ #define foreach_crypto_link_async_alg \ @@ -130,11 +121,16 @@ typedef enum _ (AES_256_CBC, SHA512, "aes-256-cbc-hmac-sha-512", 32, 32) \ _ (AES_128_CTR, SHA1, "aes-128-ctr-hmac-sha-1", 16, 12) \ _ (AES_192_CTR, SHA1, "aes-192-ctr-hmac-sha-1", 24, 12) \ - _ (AES_256_CTR, SHA1, "aes-256-ctr-hmac-sha-1", 32, 12) - -#define foreach_crypto_async_op_type \ - _(ENCRYPT, "async-encrypt") \ - _(DECRYPT, "async-decrypt") + _ (AES_256_CTR, SHA1, "aes-256-ctr-hmac-sha-1", 32, 12) \ + _ (AES_128_CTR, SHA256, "aes-128-ctr-hmac-sha-256", 16, 16) \ + _ (AES_192_CTR, SHA256, "aes-192-ctr-hmac-sha-256", 24, 16) \ + _ (AES_256_CTR, SHA256, "aes-256-ctr-hmac-sha-256", 32, 16) \ + _ (AES_128_CTR, SHA384, "aes-128-ctr-hmac-sha-384", 16, 24) \ + _ (AES_192_CTR, SHA384, "aes-192-ctr-hmac-sha-384", 24, 24) \ + _ (AES_256_CTR, SHA384, "aes-256-ctr-hmac-sha-384", 32, 24) \ + _ (AES_128_CTR, SHA512, "aes-128-ctr-hmac-sha-512", 16, 32) \ + _ (AES_192_CTR, SHA512, "aes-192-ctr-hmac-sha-512", 24, 32) \ + _ (AES_256_CTR, SHA512, "aes-256-ctr-hmac-sha-512", 32, 32) typedef enum { @@ -154,102 +150,68 @@ typedef enum typedef enum { VNET_CRYPTO_ALG_NONE = 0, -#define _(n, s, l) VNET_CRYPTO_ALG_##n, +#define _(n, s, ...) VNET_CRYPTO_ALG_##n, foreach_crypto_cipher_alg foreach_crypto_aead_alg #undef _ -#define _(n, s) VNET_CRYPTO_ALG_HMAC_##n, - foreach_crypto_hmac_alg -#undef _ -#define _(n, s) VNET_CRYPTO_ALG_HASH_##n, - foreach_crypto_hash_alg -#undef _ - VNET_CRYPTO_N_ALGS, -} vnet_crypto_alg_t; - -typedef enum -{ -#define _(n, s) VNET_CRYPTO_ASYNC_OP_TYPE_##n, - foreach_crypto_async_op_type +#define _(n, s) VNET_CRYPTO_ALG_HASH_##n, VNET_CRYPTO_ALG_HMAC_##n, + foreach_crypto_hash_alg #undef _ - VNET_CRYPTO_ASYNC_OP_N_TYPES, -} vnet_crypto_async_op_type_t; - -typedef enum -{ - VNET_CRYPTO_ASYNC_ALG_NONE = 0, #define _(n, s, k, t, a) \ VNET_CRYPTO_ALG_##n##_TAG##t##_AAD##a, - foreach_crypto_aead_async_alg + foreach_crypto_aead_async_alg #undef _ #define _(c, h, s, k ,d) \ VNET_CRYPTO_ALG_##c##_##h##_TAG##d, - foreach_crypto_link_async_alg -#undef _ - VNET_CRYPTO_N_ASYNC_ALGS, -} vnet_crypto_async_alg_t; - -typedef enum -{ - VNET_CRYPTO_ASYNC_OP_NONE = 0, -#define _(n, s, k, t, a) \ - VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_ENC, \ - VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_DEC, - foreach_crypto_aead_async_alg -#undef _ -#define _(c, h, s, k ,d) \ - VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC, \ - VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC, - foreach_crypto_link_async_alg + foreach_crypto_link_async_alg #undef _ - VNET_CRYPTO_ASYNC_OP_N_IDS, -} vnet_crypto_async_op_id_t; + VNET_CRYPTO_N_ALGS, +} vnet_crypto_alg_t; typedef struct { + u32 index; + u16 length; + u8 is_link : 1; + vnet_crypto_alg_t alg : 8; union { struct { - u8 *data; - vnet_crypto_alg_t alg:8; - }; - struct - { u32 index_crypto; u32 index_integ; - vnet_crypto_async_alg_t async_alg:8; }; }; -#define VNET_CRYPTO_KEY_TYPE_DATA 0 -#define VNET_CRYPTO_KEY_TYPE_LINK 1 - u8 type; + u8 data[]; } vnet_crypto_key_t; typedef enum { VNET_CRYPTO_OP_NONE = 0, -#define _(n, s, l) VNET_CRYPTO_OP_##n##_ENC, VNET_CRYPTO_OP_##n##_DEC, +#define _(n, s, ...) VNET_CRYPTO_OP_##n##_ENC, VNET_CRYPTO_OP_##n##_DEC, foreach_crypto_cipher_alg foreach_crypto_aead_alg #undef _ -#define _(n, s) VNET_CRYPTO_OP_##n##_HMAC, - foreach_crypto_hmac_alg +#define _(n, s) VNET_CRYPTO_OP_##n##_HASH, VNET_CRYPTO_OP_##n##_HMAC, + foreach_crypto_hash_alg #undef _ -#define _(n, s) VNET_CRYPTO_OP_##n##_HASH, - foreach_crypto_hash_alg +#define _(n, s, k, t, a) \ + VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_ENC, \ + VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_DEC, + foreach_crypto_aead_async_alg #undef _ - VNET_CRYPTO_N_OP_IDS, -} vnet_crypto_op_id_t; - -typedef enum -{ - CRYPTO_OP_SIMPLE, - CRYPTO_OP_CHAINED, - CRYPTO_OP_BOTH, -} crypto_op_class_type_t; +#define _(c, h, s, k, d) \ + VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC, \ + VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC, + foreach_crypto_link_async_alg +#undef _ + VNET_CRYPTO_N_OP_IDS, +} __clib_packed vnet_crypto_op_id_t; typedef struct { char *name; + u16 key_length; + u8 is_aead : 1; + u8 variable_key_length : 1; vnet_crypto_op_id_t op_by_type[VNET_CRYPTO_OP_N_TYPES]; } vnet_crypto_alg_data_t; @@ -264,7 +226,7 @@ typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); uword user_data; - vnet_crypto_op_id_t op:16; + vnet_crypto_op_id_t op; vnet_crypto_op_status_t status:8; u8 flags; #define VNET_CRYPTO_OP_FLAG_HMAC_CHECK (1 << 0) @@ -309,26 +271,19 @@ typedef struct STATIC_ASSERT_SIZEOF (vnet_crypto_op_t, CLIB_CACHE_LINE_BYTES); -typedef struct -{ - vnet_crypto_op_type_t type; - vnet_crypto_alg_t alg; - u32 active_engine_index_simple; - u32 active_engine_index_chained; -} vnet_crypto_op_data_t; +#define foreach_crypto_handler_type \ + _ (SIMPLE, "simple") \ + _ (CHAINED, "chained") \ + _ (ASYNC, "async") -typedef struct +typedef enum { - vnet_crypto_async_op_type_t type; - vnet_crypto_async_alg_t alg; - u32 active_engine_index_async; -} vnet_crypto_async_op_data_t; +#define _(n, s) VNET_CRYPTO_HANDLER_TYPE_##n, + foreach_crypto_handler_type +#undef _ + VNET_CRYPTO_HANDLER_N_TYPES -typedef struct -{ - char *name; - vnet_crypto_async_op_id_t op_by_type[VNET_CRYPTO_ASYNC_OP_N_TYPES]; -} vnet_crypto_async_alg_data_t; +} vnet_crypto_handler_type_t; typedef struct { @@ -366,7 +321,7 @@ typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); vnet_crypto_async_frame_state_t state; - vnet_crypto_async_op_id_t op:8; + vnet_crypto_op_id_t op : 8; u16 n_elts; vnet_crypto_async_frame_elt_t elts[VNET_CRYPTO_FRAME_SIZE]; u32 buffer_indices[VNET_CRYPTO_FRAME_SIZE]; @@ -384,21 +339,20 @@ typedef struct typedef u32 vnet_crypto_key_index_t; -typedef u32 (vnet_crypto_chained_ops_handler_t) (vlib_main_t * vm, - vnet_crypto_op_t * ops[], - vnet_crypto_op_chunk_t * - chunks, u32 n_ops); +typedef u32 (vnet_crypto_chained_op_fn_t) (vlib_main_t *vm, + vnet_crypto_op_t *ops[], + vnet_crypto_op_chunk_t *chunks, + u32 n_ops); -typedef u32 (vnet_crypto_ops_handler_t) (vlib_main_t * vm, - vnet_crypto_op_t * ops[], u32 n_ops); +typedef u32 (vnet_crypto_simple_op_fn_t) (vlib_main_t *vm, + vnet_crypto_op_t *ops[], u32 n_ops); -typedef void (vnet_crypto_key_handler_t) (vnet_crypto_key_op_t kop, - vnet_crypto_key_index_t idx); +typedef void (vnet_crypto_key_fn_t) (vnet_crypto_key_op_t kop, + vnet_crypto_key_index_t idx); /** async crypto function handlers **/ -typedef int - (vnet_crypto_frame_enqueue_t) (vlib_main_t * vm, - vnet_crypto_async_frame_t * frame); +typedef int (vnet_crypto_frame_enq_fn_t) (vlib_main_t *vm, + vnet_crypto_async_frame_t *frame); typedef vnet_crypto_async_frame_t * (vnet_crypto_frame_dequeue_t) (vlib_main_t * vm, u32 * nb_elts_processed, u32 * enqueue_thread_idx); @@ -407,32 +361,29 @@ u32 vnet_crypto_register_engine (vlib_main_t * vm, char *name, int prio, char *desc); -void vnet_crypto_register_ops_handler (vlib_main_t * vm, u32 engine_index, +void vnet_crypto_register_ops_handler (vlib_main_t *vm, u32 engine_index, vnet_crypto_op_id_t opt, - vnet_crypto_ops_handler_t * oph); + vnet_crypto_simple_op_fn_t *oph); -void vnet_crypto_register_chained_ops_handler (vlib_main_t * vm, - u32 engine_index, - vnet_crypto_op_id_t opt, - vnet_crypto_chained_ops_handler_t - * oph); +void +vnet_crypto_register_chained_ops_handler (vlib_main_t *vm, u32 engine_index, + vnet_crypto_op_id_t opt, + vnet_crypto_chained_op_fn_t *oph); -void vnet_crypto_register_ops_handlers (vlib_main_t * vm, u32 engine_index, +void vnet_crypto_register_ops_handlers (vlib_main_t *vm, u32 engine_index, vnet_crypto_op_id_t opt, - vnet_crypto_ops_handler_t * fn, - vnet_crypto_chained_ops_handler_t * - cfn); + vnet_crypto_simple_op_fn_t *fn, + vnet_crypto_chained_op_fn_t *cfn); -void vnet_crypto_register_key_handler (vlib_main_t * vm, u32 engine_index, - vnet_crypto_key_handler_t * keyh); +void vnet_crypto_register_key_handler (vlib_main_t *vm, u32 engine_index, + vnet_crypto_key_fn_t *keyh); /** async crypto register functions */ u32 vnet_crypto_register_post_node (vlib_main_t * vm, char *post_node_name); -void -vnet_crypto_register_enqueue_handler (vlib_main_t *vm, u32 engine_index, - vnet_crypto_async_op_id_t opt, - vnet_crypto_frame_enqueue_t *enq_fn); +void vnet_crypto_register_enqueue_handler (vlib_main_t *vm, u32 engine_index, + vnet_crypto_op_id_t opt, + vnet_crypto_frame_enq_fn_t *enq_fn); void vnet_crypto_register_dequeue_handler (vlib_main_t *vm, u32 engine_index, @@ -440,14 +391,16 @@ vnet_crypto_register_dequeue_handler (vlib_main_t *vm, u32 engine_index, typedef struct { + void *handlers[VNET_CRYPTO_HANDLER_N_TYPES]; +} vnet_crypto_engine_op_t; + +typedef struct +{ char *name; char *desc; int priority; - vnet_crypto_key_handler_t *key_op_handler; - vnet_crypto_ops_handler_t *ops_handlers[VNET_CRYPTO_N_OP_IDS]; - vnet_crypto_chained_ops_handler_t - * chained_ops_handlers[VNET_CRYPTO_N_OP_IDS]; - vnet_crypto_frame_enqueue_t *enqueue_handlers[VNET_CRYPTO_ASYNC_OP_N_IDS]; + vnet_crypto_engine_op_t ops[VNET_CRYPTO_N_OP_IDS]; + vnet_crypto_key_fn_t *key_op_handler; vnet_crypto_frame_dequeue_t *dequeue_handler; } vnet_crypto_engine_t; @@ -459,20 +412,22 @@ typedef struct typedef struct { - vnet_crypto_alg_data_t *algs; + vnet_crypto_op_type_t type; + vnet_crypto_alg_t alg; + u8 active_engine_index[VNET_CRYPTO_HANDLER_N_TYPES]; + void *handlers[VNET_CRYPTO_HANDLER_N_TYPES]; +} vnet_crypto_op_data_t; + +typedef struct +{ + vnet_crypto_alg_data_t algs[VNET_CRYPTO_N_ALGS]; vnet_crypto_thread_t *threads; - vnet_crypto_ops_handler_t **ops_handlers; - vnet_crypto_chained_ops_handler_t **chained_ops_handlers; - vnet_crypto_frame_enqueue_t **enqueue_handlers; vnet_crypto_frame_dequeue_t **dequeue_handlers; vnet_crypto_op_data_t opt_data[VNET_CRYPTO_N_OP_IDS]; - vnet_crypto_async_op_data_t async_opt_data[VNET_CRYPTO_ASYNC_OP_N_IDS]; vnet_crypto_engine_t *engines; - vnet_crypto_key_t *keys; + vnet_crypto_key_t **keys; uword *engine_index_by_name; uword *alg_index_by_name; - uword *async_alg_index_by_name; - vnet_crypto_async_alg_data_t *async_algs; vnet_crypto_async_next_node_t *next_nodes; u32 crypto_node_index; } vnet_crypto_main_t; @@ -486,8 +441,17 @@ u32 vnet_crypto_process_ops (vlib_main_t * vm, vnet_crypto_op_t ops[], u32 n_ops); void vnet_crypto_set_async_dispatch (u8 mode, u8 adaptive); -int vnet_crypto_set_handler2 (char *ops_handler_name, char *engine, - crypto_op_class_type_t oct); + +typedef struct +{ + char *handler_name; + char *engine; + u8 set_simple : 1; + u8 set_chained : 1; + u8 set_async : 1; +} vnet_crypto_set_handlers_args_t; + +int vnet_crypto_set_handlers (vnet_crypto_set_handlers_args_t *); int vnet_crypto_is_set_handler (vnet_crypto_alg_t alg); u32 vnet_crypto_key_add (vlib_main_t * vm, vnet_crypto_alg_t alg, @@ -503,12 +467,8 @@ u32 vnet_crypto_key_add_linked (vlib_main_t * vm, vnet_crypto_key_index_t index_crypto, vnet_crypto_key_index_t index_integ); -int vnet_crypto_set_async_handler2 (char *alg_name, char *engine); - -int vnet_crypto_is_set_async_handler (vnet_crypto_async_op_id_t opt); - -vnet_crypto_async_alg_t vnet_crypto_link_algs (vnet_crypto_alg_t crypto_alg, - vnet_crypto_alg_t integ_alg); +vnet_crypto_alg_t vnet_crypto_link_algs (vnet_crypto_alg_t crypto_alg, + vnet_crypto_alg_t integ_alg); format_function_t format_vnet_crypto_alg; format_function_t format_vnet_crypto_engine; @@ -517,10 +477,6 @@ format_function_t format_vnet_crypto_op_type; format_function_t format_vnet_crypto_op_status; unformat_function_t unformat_vnet_crypto_alg; -format_function_t format_vnet_crypto_async_op; -format_function_t format_vnet_crypto_async_alg; -format_function_t format_vnet_crypto_async_op_type; - static_always_inline void vnet_crypto_op_init (vnet_crypto_op_t * op, vnet_crypto_op_id_t type) { @@ -545,19 +501,13 @@ static_always_inline vnet_crypto_key_t * vnet_crypto_get_key (vnet_crypto_key_index_t index) { vnet_crypto_main_t *cm = &crypto_main; - return vec_elt_at_index (cm->keys, index); -} - -static_always_inline int -vnet_crypto_set_handler (char *alg_name, char *engine) -{ - return vnet_crypto_set_handler2 (alg_name, engine, CRYPTO_OP_BOTH); + return cm->keys[index]; } /** async crypto inline functions **/ static_always_inline vnet_crypto_async_frame_t * -vnet_crypto_async_get_frame (vlib_main_t * vm, vnet_crypto_async_op_id_t opt) +vnet_crypto_async_get_frame (vlib_main_t *vm, vnet_crypto_op_id_t opt) { vnet_crypto_main_t *cm = &crypto_main; vnet_crypto_thread_t *ct = cm->threads + vm->thread_index; @@ -592,19 +542,22 @@ vnet_crypto_async_submit_open_frame (vlib_main_t * vm, { vnet_crypto_main_t *cm = &crypto_main; vlib_thread_main_t *tm = vlib_get_thread_main (); + vnet_crypto_op_id_t op = frame->op; + vnet_crypto_frame_enq_fn_t *fn = + cm->opt_data[op].handlers[VNET_CRYPTO_HANDLER_TYPE_ASYNC]; u32 i; vlib_node_t *n; frame->state = VNET_CRYPTO_FRAME_STATE_PENDING; frame->enqueue_thread_index = vm->thread_index; - if (PREDICT_FALSE (cm->enqueue_handlers == NULL)) + if (PREDICT_FALSE (fn == 0)) { frame->state = VNET_CRYPTO_FRAME_STATE_ELT_ERROR; return -1; } - int ret = (cm->enqueue_handlers[frame->op]) (vm, frame); + int ret = fn (vm, frame); if (PREDICT_TRUE (ret == 0)) { @@ -656,7 +609,7 @@ vnet_crypto_async_add_to_frame (vlib_main_t *vm, vnet_crypto_async_frame_t *f, static_always_inline void vnet_crypto_async_reset_frame (vnet_crypto_async_frame_t * f) { - vnet_crypto_async_op_id_t opt; + vnet_crypto_op_id_t opt; ASSERT (f != 0); ASSERT ((f->state == VNET_CRYPTO_FRAME_STATE_NOT_PROCESSED || f->state == VNET_CRYPTO_FRAME_STATE_ELT_ERROR)); diff --git a/src/vnet/crypto/crypto_api.c b/src/vnet/crypto/crypto_api.c index e701864a5ba..e7322cdd553 100644 --- a/src/vnet/crypto/crypto_api.c +++ b/src/vnet/crypto/crypto_api.c @@ -68,18 +68,23 @@ vl_api_crypto_set_handler_t_handler (vl_api_crypto_set_handler_t * mp) { vl_api_crypto_set_handler_reply_t *rmp; int rv = 0; - char *engine; - char *alg_name; - crypto_op_class_type_t oct; - - engine = (char *) mp->engine; - alg_name = (char *) mp->alg_name; - oct = (crypto_op_class_type_t) mp->oct; - - if (mp->is_async) - rv = vnet_crypto_set_async_handler2 (alg_name, engine); - else - rv = vnet_crypto_set_handler2 (alg_name, engine, oct); + + enum + { + CRYPTO_OP_SIMPLE, + CRYPTO_OP_CHAINED, + CRYPTO_OP_BOTH, + } oct = (typeof (oct)) mp->oct; + + vnet_crypto_set_handlers_args_t args = { + .engine = (char *) mp->engine, + .handler_name = (char *) mp->alg_name, + .set_async = mp->is_async != 0, + .set_simple = oct == CRYPTO_OP_SIMPLE || oct == CRYPTO_OP_BOTH, + .set_chained = oct == CRYPTO_OP_CHAINED || oct == CRYPTO_OP_BOTH, + }; + + rv = vnet_crypto_set_handlers (&args); REPLY_MACRO (VL_API_CRYPTO_SET_HANDLER_REPLY); } diff --git a/src/vnet/crypto/engine.h b/src/vnet/crypto/engine.h index 993befb393a..517b6ec3457 100644 --- a/src/vnet/crypto/engine.h +++ b/src/vnet/crypto/engine.h @@ -12,8 +12,8 @@ typedef unsigned int u32; typedef struct { vnet_crypto_op_id_t opt; - vnet_crypto_ops_handler_t *fn; - vnet_crypto_chained_ops_handler_t *cfn; + vnet_crypto_simple_op_fn_t *fn; + vnet_crypto_chained_op_fn_t *cfn; } vnet_crypto_engine_op_handlers_t; struct vnet_crypto_engine_registration; @@ -31,7 +31,7 @@ typedef struct vnet_crypto_engine_registration u32 num_threads; void *per_thread_data; vnet_crypto_engine_init_fn_t *init_fn; - vnet_crypto_key_handler_t *key_handler; + vnet_crypto_key_fn_t *key_handler; vnet_crypto_engine_op_handlers_t *op_handlers; } vnet_crypto_engine_registration_t; diff --git a/src/vnet/crypto/format.c b/src/vnet/crypto/format.c index c503ac81663..cfcee2f4572 100644 --- a/src/vnet/crypto/format.c +++ b/src/vnet/crypto/format.c @@ -22,7 +22,7 @@ format_vnet_crypto_alg (u8 * s, va_list * args) { vnet_crypto_alg_t alg = va_arg (*args, vnet_crypto_alg_t); vnet_crypto_main_t *cm = &crypto_main; - vnet_crypto_alg_data_t *d = vec_elt_at_index (cm->algs, alg); + vnet_crypto_alg_data_t *d = cm->algs + alg; return format (s, "%s", d->name); } @@ -105,6 +105,7 @@ format_vnet_crypto_engine (u8 * s, va_list * args) return format (s, "%s", e->name); } +#if 0 u8 * format_vnet_crypto_async_op_type (u8 * s, va_list * args) { @@ -125,7 +126,7 @@ format_vnet_crypto_async_op_type (u8 * s, va_list * args) u8 * format_vnet_crypto_async_alg (u8 * s, va_list * args) { - vnet_crypto_async_alg_t alg = va_arg (*args, vnet_crypto_async_alg_t); + vnet_crypto_alg_t alg = va_arg (*args, vnet_crypto_alg_t); vnet_crypto_main_t *cm = &crypto_main; vnet_crypto_async_alg_data_t *d = vec_elt_at_index (cm->async_algs, alg); return format (s, "%s", d->name); @@ -141,6 +142,7 @@ format_vnet_crypto_async_op (u8 * s, va_list * args) return format (s, "%U-%U", format_vnet_crypto_async_op_type, otd->type, format_vnet_crypto_async_alg, otd->alg); } +#endif /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/crypto/main.c b/src/vnet/crypto/main.c new file mode 100644 index 00000000000..4f00e9b5c62 --- /dev/null +++ b/src/vnet/crypto/main.c @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. + */ + +#include <vlib/vlib.h> +#include <vnet/crypto/crypto.h> +#include <vnet/crypto/engine.h> + +vnet_crypto_main_t crypto_main = +{ + .algs = { +#define _(n, s, ...) \ + [VNET_CRYPTO_ALG_##n] = { \ + .name = (s), \ + .op_by_type[VNET_CRYPTO_OP_TYPE_ENCRYPT] = VNET_CRYPTO_OP_##n##_ENC, \ + .op_by_type[VNET_CRYPTO_OP_TYPE_DECRYPT] = VNET_CRYPTO_OP_##n##_DEC, \ + __VA_ARGS__, \ + }, + foreach_crypto_cipher_alg foreach_crypto_aead_alg +#undef _ + +#define _(n, s) \ + [VNET_CRYPTO_ALG_HASH_##n] = { \ + .name = (s), \ + .op_by_type[VNET_CRYPTO_OP_TYPE_HASH] = VNET_CRYPTO_OP_##n##_HASH, \ + }, \ + [VNET_CRYPTO_ALG_HMAC_##n] = { \ + .name = ("hmac-" s), \ + .op_by_type[VNET_CRYPTO_OP_TYPE_HMAC] = VNET_CRYPTO_OP_##n##_HMAC, \ + .variable_key_length = 1, \ + }, + foreach_crypto_hash_alg +#undef _ + +#define _(n, s, k, t, a) \ + [VNET_CRYPTO_ALG_##n##_TAG##t##_AAD##a] = { \ + .name = (s), \ + .op_by_type[VNET_CRYPTO_OP_TYPE_ENCRYPT] = \ + VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_ENC, \ + .op_by_type[VNET_CRYPTO_OP_TYPE_DECRYPT] = \ + VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_DEC, \ + }, + foreach_crypto_aead_async_alg +#undef _ + +#define _(c, h, s, k, d) \ + [VNET_CRYPTO_ALG_##c##_##h##_TAG##d] = { \ + .name = (s), \ + .op_by_type[VNET_CRYPTO_OP_TYPE_ENCRYPT] = \ + VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC, \ + .op_by_type[VNET_CRYPTO_OP_TYPE_DECRYPT] = \ + VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC, \ + }, + foreach_crypto_link_async_alg +#undef _ + + }, + .opt_data = { +#define _(n, s, ...) \ + [VNET_CRYPTO_OP_##n##_ENC] = { \ + .alg = VNET_CRYPTO_ALG_##n, \ + .type = VNET_CRYPTO_OP_TYPE_ENCRYPT, \ + }, \ + [VNET_CRYPTO_OP_##n##_DEC] = { \ + .alg = VNET_CRYPTO_ALG_##n, \ + .type = VNET_CRYPTO_OP_TYPE_DECRYPT, \ + }, + foreach_crypto_cipher_alg foreach_crypto_aead_alg +#undef _ + +#define _(n, s) \ + [VNET_CRYPTO_OP_##n##_HASH] = { \ + .alg = VNET_CRYPTO_ALG_HASH_##n, \ + .type = VNET_CRYPTO_OP_TYPE_HASH, \ + }, \ + [VNET_CRYPTO_OP_##n##_HMAC] = { \ + .alg = VNET_CRYPTO_ALG_HMAC_##n, \ + .type = VNET_CRYPTO_OP_TYPE_HMAC, \ + }, + foreach_crypto_hash_alg +#undef _ + +#define _(n, s, k, t, a) \ + [VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_ENC] = { \ + .alg = VNET_CRYPTO_ALG_##n##_TAG##t##_AAD##a, \ + .type = VNET_CRYPTO_OP_TYPE_ENCRYPT, \ + }, \ + [VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_DEC] = { \ + .alg = VNET_CRYPTO_ALG_##n##_TAG##t##_AAD##a, \ + .type = VNET_CRYPTO_OP_TYPE_DECRYPT, \ + }, + foreach_crypto_aead_async_alg +#undef _ + +#define _(c, h, s, k, d) \ + [VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC] = { \ + .alg = VNET_CRYPTO_ALG_##c##_##h##_TAG##d, \ + .type = VNET_CRYPTO_OP_TYPE_ENCRYPT, \ + } , \ + [VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC] = { \ + .alg = VNET_CRYPTO_ALG_##c##_##h##_TAG##d, \ + .type = VNET_CRYPTO_OP_TYPE_DECRYPT, \ + }, + foreach_crypto_link_async_alg +#undef _ + + }, +}; diff --git a/src/vnet/crypto/node.c b/src/vnet/crypto/node.c index ee7f344ce68..7d023f3ff9d 100644 --- a/src/vnet/crypto/node.c +++ b/src/vnet/crypto/node.c @@ -45,7 +45,7 @@ typedef enum typedef struct { vnet_crypto_op_status_t op_status; - vnet_crypto_async_op_id_t op; + vnet_crypto_op_id_t op; } crypto_dispatch_trace_t; static u8 * @@ -55,15 +55,14 @@ format_crypto_dispatch_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); crypto_dispatch_trace_t *t = va_arg (*args, crypto_dispatch_trace_t *); - s = format (s, "%U: %U", format_vnet_crypto_async_op, t->op, + s = format (s, "%U: %U", format_vnet_crypto_op, t->op, format_vnet_crypto_op_status, t->op_status); return s; } static void -vnet_crypto_async_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_buffer_t * b, - vnet_crypto_async_op_id_t op_id, +vnet_crypto_async_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_buffer_t *b, vnet_crypto_op_id_t op_id, vnet_crypto_op_status_t status) { crypto_dispatch_trace_t *tr = vlib_add_trace (vm, node, b, sizeof (*tr)); diff --git a/src/vnet/dev/port.c b/src/vnet/dev/port.c index fccedebdcf4..e538b89a630 100644 --- a/src/vnet/dev/port.c +++ b/src/vnet/dev/port.c @@ -564,6 +564,7 @@ vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port, void *ptr) vnet_dev_port_if_create_args_t *a = ptr; vnet_dev_port_interfaces_t *ifs = port->interfaces; vnet_dev_instance_t *di; + vnet_dev_tx_queue_t *txq, **qp; vnet_dev_rv_t rv; u16 ti = 0; @@ -614,16 +615,19 @@ vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port, void *ptr) if ((rv = vnet_dev_tx_queue_alloc (vm, port, ifs->txq_sz)) != VNET_DEV_OK) goto error; - foreach_vnet_dev_port_tx_queue (q, port) + for (ti = 0; ti < n_threads; ti++) { /* if consistent_qp is enabled, we start by assigning queues to workers * and we end with main */ u16 real_ti = (ti + a->consistent_qp) % n_threads; - q->assigned_threads = clib_bitmap_set (q->assigned_threads, real_ti, 1); + qp = pool_elt_at_index (port->tx_queues, ti % ifs->num_tx_queues); + txq = qp[0]; + txq->assigned_threads = + clib_bitmap_set (txq->assigned_threads, real_ti, 1); log_debug (dev, "port %u tx queue %u assigned to thread %u", - port->port_id, q->queue_id, real_ti); - if (++ti >= n_threads) - break; + port->port_id, txq->queue_id, real_ti); + if (clib_bitmap_count_set_bits (txq->assigned_threads) > 1) + txq->lock_needed = 1; } pool_get (dm->dev_instances, di); diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index a378dc5268a..81d6cd1a0bd 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -1550,6 +1550,9 @@ ip4_local_check_src (vlib_buffer_t *b, ip4_header_t *ip0, lb0 = load_balance_get (lbi0); dpo0 = load_balance_get_bucket_i (lb0, 0); + /* Do not cache result for packets with errors, e.g., invalid csum */ + last_check->first = *error0 == IP4_ERROR_UNKNOWN_PROTOCOL ? 0 : 1; + /* * Must have a route to source otherwise we drop the packet. * ip4 broadcasts are accepted, e.g. to make dhcp client work @@ -1572,7 +1575,6 @@ ip4_local_check_src (vlib_buffer_t *b, ip4_header_t *ip0, last_check->src.as_u32 = ip0->src_address.as_u32; last_check->lbi = lbi0; last_check->error = *error0; - last_check->first = 0; last_check->fib_index = vnet_buffer (b)->ip.fib_index; } else @@ -1580,7 +1582,8 @@ ip4_local_check_src (vlib_buffer_t *b, ip4_header_t *ip0, vnet_buffer (b)->ip.adj_index[VLIB_RX] = vnet_buffer (b)->ip.adj_index[VLIB_TX]; vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi; - *error0 = last_check->error; + *error0 = + (*error0 == IP4_ERROR_UNKNOWN_PROTOCOL) ? last_check->error : *error0; } } @@ -1652,6 +1655,9 @@ ip4_local_check_src_x2 (vlib_buffer_t **b, ip4_header_t **ip, dpo[0] = load_balance_get_bucket_i (lb[0], 0); dpo[1] = load_balance_get_bucket_i (lb[1], 0); + /* Do not cache result for packets with errors, e.g., invalid csum */ + last_check->first = error[1] == IP4_ERROR_UNKNOWN_PROTOCOL ? 0 : 1; + error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL && dpo[0]->dpoi_type == DPO_RECEIVE) ? IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]); @@ -1671,7 +1677,6 @@ ip4_local_check_src_x2 (vlib_buffer_t **b, ip4_header_t **ip, last_check->src.as_u32 = ip[1]->src_address.as_u32; last_check->lbi = lbi[1]; last_check->error = error[1]; - last_check->first = 0; last_check->fib_index = vnet_buffer (b[1])->ip.fib_index; } else @@ -1684,8 +1689,10 @@ ip4_local_check_src_x2 (vlib_buffer_t **b, ip4_header_t **ip, vnet_buffer (b[1])->ip.adj_index[VLIB_TX]; vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi; - error[0] = last_check->error; - error[1] = last_check->error; + error[0] = (error[0] == IP4_ERROR_UNKNOWN_PROTOCOL) ? last_check->error : + error[0]; + error[1] = (error[1] == IP4_ERROR_UNKNOWN_PROTOCOL) ? last_check->error : + error[1]; } } diff --git a/src/vnet/ipsec/esp_decrypt.c b/src/vnet/ipsec/esp_decrypt.c index 01b2d2971b0..6384bb927a8 100644 --- a/src/vnet/ipsec/esp_decrypt.c +++ b/src/vnet/ipsec/esp_decrypt.c @@ -1104,8 +1104,8 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, ipsec_sa_t *sa0 = 0; bool anti_replay_result; int is_async = im->async_mode; - vnet_crypto_async_op_id_t async_op = ~0; - vnet_crypto_async_frame_t *async_frames[VNET_CRYPTO_ASYNC_OP_N_IDS]; + vnet_crypto_op_id_t async_op = ~0; + vnet_crypto_async_frame_t *async_frames[VNET_CRYPTO_N_OP_IDS]; esp_decrypt_error_t err; vlib_get_buffers (vm, from, b, n_left); diff --git a/src/vnet/ipsec/esp_encrypt.c b/src/vnet/ipsec/esp_encrypt.c index f6d1ecaed24..4338cb01e5d 100644 --- a/src/vnet/ipsec/esp_encrypt.c +++ b/src/vnet/ipsec/esp_encrypt.c @@ -625,9 +625,9 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_t *lb; vnet_crypto_op_t **crypto_ops = &ptd->crypto_ops; vnet_crypto_op_t **integ_ops = &ptd->integ_ops; - vnet_crypto_async_frame_t *async_frames[VNET_CRYPTO_ASYNC_OP_N_IDS]; + vnet_crypto_async_frame_t *async_frames[VNET_CRYPTO_N_OP_IDS]; int is_async = im->async_mode; - vnet_crypto_async_op_id_t async_op = ~0; + vnet_crypto_op_id_t async_op = ~0; u16 drop_next = (lt == VNET_LINK_IP6 ? ESP_ENCRYPT_NEXT_DROP6 : (lt == VNET_LINK_IP4 ? ESP_ENCRYPT_NEXT_DROP4 : diff --git a/src/vnet/ipsec/ipsec.c b/src/vnet/ipsec/ipsec.c index 8b43dd23cc8..a1d4d56768c 100644 --- a/src/vnet/ipsec/ipsec.c +++ b/src/vnet/ipsec/ipsec.c @@ -36,8 +36,6 @@ */ #define IPSEC4_SPD_DEFAULT_HASH_NUM_BUCKETS (1 << 22) -ipsec_main_t ipsec_main; - esp_async_post_next_t esp_encrypt_async_next; esp_async_post_next_t esp_decrypt_async_next; @@ -424,7 +422,6 @@ ipsec_init (vlib_main_t * vm) { clib_error_t *error; ipsec_main_t *im = &ipsec_main; - ipsec_main_crypto_alg_t *a; /* Backend registration requires the feature arcs to be set up */ if ((error = vlib_call_init_function (vm, vnet_feature_init))) @@ -471,154 +468,6 @@ ipsec_init (vlib_main_t * vm) if ((error = vlib_call_init_function (vm, ipsec_cli_init))) return error; - vec_validate (im->crypto_algs, IPSEC_CRYPTO_N_ALG - 1); - - a = im->crypto_algs + IPSEC_CRYPTO_ALG_NONE; - a->enc_op_id = VNET_CRYPTO_OP_NONE; - a->dec_op_id = VNET_CRYPTO_OP_NONE; - a->alg = VNET_CRYPTO_ALG_NONE; - a->iv_size = 0; - a->block_align = 1; - - a = im->crypto_algs + IPSEC_CRYPTO_ALG_DES_CBC; - a->enc_op_id = VNET_CRYPTO_OP_DES_CBC_ENC; - a->dec_op_id = VNET_CRYPTO_OP_DES_CBC_DEC; - a->alg = VNET_CRYPTO_ALG_DES_CBC; - a->iv_size = a->block_align = 8; - - a = im->crypto_algs + IPSEC_CRYPTO_ALG_3DES_CBC; - a->enc_op_id = VNET_CRYPTO_OP_3DES_CBC_ENC; - a->dec_op_id = VNET_CRYPTO_OP_3DES_CBC_DEC; - a->alg = VNET_CRYPTO_ALG_3DES_CBC; - a->iv_size = a->block_align = 8; - - a = im->crypto_algs + IPSEC_CRYPTO_ALG_AES_CBC_128; - a->enc_op_id = VNET_CRYPTO_OP_AES_128_CBC_ENC; - a->dec_op_id = VNET_CRYPTO_OP_AES_128_CBC_DEC; - a->alg = VNET_CRYPTO_ALG_AES_128_CBC; - a->iv_size = a->block_align = 16; - - a = im->crypto_algs + IPSEC_CRYPTO_ALG_AES_CBC_192; - a->enc_op_id = VNET_CRYPTO_OP_AES_192_CBC_ENC; - a->dec_op_id = VNET_CRYPTO_OP_AES_192_CBC_DEC; - a->alg = VNET_CRYPTO_ALG_AES_192_CBC; - a->iv_size = a->block_align = 16; - - a = im->crypto_algs + IPSEC_CRYPTO_ALG_AES_CBC_256; - a->enc_op_id = VNET_CRYPTO_OP_AES_256_CBC_ENC; - a->dec_op_id = VNET_CRYPTO_OP_AES_256_CBC_DEC; - a->alg = VNET_CRYPTO_ALG_AES_256_CBC; - a->iv_size = a->block_align = 16; - - a = im->crypto_algs + IPSEC_CRYPTO_ALG_AES_CTR_128; - a->enc_op_id = VNET_CRYPTO_OP_AES_128_CTR_ENC; - a->dec_op_id = VNET_CRYPTO_OP_AES_128_CTR_DEC; - a->alg = VNET_CRYPTO_ALG_AES_128_CTR; - a->iv_size = 8; - a->block_align = 1; - - a = im->crypto_algs + IPSEC_CRYPTO_ALG_AES_CTR_192; - a->enc_op_id = VNET_CRYPTO_OP_AES_192_CTR_ENC; - a->dec_op_id = VNET_CRYPTO_OP_AES_192_CTR_DEC; - a->alg = VNET_CRYPTO_ALG_AES_192_CTR; - a->iv_size = 8; - a->block_align = 1; - - a = im->crypto_algs + IPSEC_CRYPTO_ALG_AES_CTR_256; - a->enc_op_id = VNET_CRYPTO_OP_AES_256_CTR_ENC; - a->dec_op_id = VNET_CRYPTO_OP_AES_256_CTR_DEC; - a->alg = VNET_CRYPTO_ALG_AES_256_CTR; - a->iv_size = 8; - a->block_align = 1; - - a = im->crypto_algs + IPSEC_CRYPTO_ALG_AES_GCM_128; - a->enc_op_id = VNET_CRYPTO_OP_AES_128_GCM_ENC; - a->dec_op_id = VNET_CRYPTO_OP_AES_128_GCM_DEC; - a->alg = VNET_CRYPTO_ALG_AES_128_GCM; - a->iv_size = 8; - a->block_align = 1; - a->icv_size = 16; - - a = im->crypto_algs + IPSEC_CRYPTO_ALG_AES_GCM_192; - a->enc_op_id = VNET_CRYPTO_OP_AES_192_GCM_ENC; - a->dec_op_id = VNET_CRYPTO_OP_AES_192_GCM_DEC; - a->alg = VNET_CRYPTO_ALG_AES_192_GCM; - a->iv_size = 8; - a->block_align = 1; - a->icv_size = 16; - - a = im->crypto_algs + IPSEC_CRYPTO_ALG_AES_GCM_256; - a->enc_op_id = VNET_CRYPTO_OP_AES_256_GCM_ENC; - a->dec_op_id = VNET_CRYPTO_OP_AES_256_GCM_DEC; - a->alg = VNET_CRYPTO_ALG_AES_256_GCM; - a->iv_size = 8; - a->block_align = 1; - a->icv_size = 16; - - a = im->crypto_algs + IPSEC_CRYPTO_ALG_CHACHA20_POLY1305; - a->enc_op_id = VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC; - a->dec_op_id = VNET_CRYPTO_OP_CHACHA20_POLY1305_DEC; - a->alg = VNET_CRYPTO_ALG_CHACHA20_POLY1305; - a->iv_size = 8; - a->icv_size = 16; - - a = im->crypto_algs + IPSEC_CRYPTO_ALG_AES_NULL_GMAC_128; - a->enc_op_id = VNET_CRYPTO_OP_AES_128_NULL_GMAC_ENC; - a->dec_op_id = VNET_CRYPTO_OP_AES_128_NULL_GMAC_DEC; - a->alg = VNET_CRYPTO_ALG_AES_128_GCM; - a->iv_size = 8; - a->block_align = 1; - a->icv_size = 16; - - a = im->crypto_algs + IPSEC_CRYPTO_ALG_AES_NULL_GMAC_192; - a->enc_op_id = VNET_CRYPTO_OP_AES_192_NULL_GMAC_ENC; - a->dec_op_id = VNET_CRYPTO_OP_AES_192_NULL_GMAC_DEC; - a->alg = VNET_CRYPTO_ALG_AES_192_GCM; - a->iv_size = 8; - a->block_align = 1; - a->icv_size = 16; - - a = im->crypto_algs + IPSEC_CRYPTO_ALG_AES_NULL_GMAC_256; - a->enc_op_id = VNET_CRYPTO_OP_AES_256_NULL_GMAC_ENC; - a->dec_op_id = VNET_CRYPTO_OP_AES_256_NULL_GMAC_DEC; - a->alg = VNET_CRYPTO_ALG_AES_256_GCM; - a->iv_size = 8; - a->block_align = 1; - a->icv_size = 16; - - vec_validate (im->integ_algs, IPSEC_INTEG_N_ALG - 1); - ipsec_main_integ_alg_t *i; - - i = &im->integ_algs[IPSEC_INTEG_ALG_MD5_96]; - i->op_id = VNET_CRYPTO_OP_MD5_HMAC; - i->alg = VNET_CRYPTO_ALG_HMAC_MD5; - i->icv_size = 12; - - i = &im->integ_algs[IPSEC_INTEG_ALG_SHA1_96]; - i->op_id = VNET_CRYPTO_OP_SHA1_HMAC; - i->alg = VNET_CRYPTO_ALG_HMAC_SHA1; - i->icv_size = 12; - - i = &im->integ_algs[IPSEC_INTEG_ALG_SHA_256_96]; - i->op_id = VNET_CRYPTO_OP_SHA1_HMAC; - i->alg = VNET_CRYPTO_ALG_HMAC_SHA256; - i->icv_size = 12; - - i = &im->integ_algs[IPSEC_INTEG_ALG_SHA_256_128]; - i->op_id = VNET_CRYPTO_OP_SHA256_HMAC; - i->alg = VNET_CRYPTO_ALG_HMAC_SHA256; - i->icv_size = 16; - - i = &im->integ_algs[IPSEC_INTEG_ALG_SHA_384_192]; - i->op_id = VNET_CRYPTO_OP_SHA384_HMAC; - i->alg = VNET_CRYPTO_ALG_HMAC_SHA384; - i->icv_size = 24; - - i = &im->integ_algs[IPSEC_INTEG_ALG_SHA_512_256]; - i->op_id = VNET_CRYPTO_OP_SHA512_HMAC; - i->alg = VNET_CRYPTO_ALG_HMAC_SHA512; - i->icv_size = 32; - vec_validate_aligned (im->ptd, vlib_num_workers (), CLIB_CACHE_LINE_BYTES); im->async_mode = 0; diff --git a/src/vnet/ipsec/ipsec.h b/src/vnet/ipsec/ipsec.h index 9ab054cf2a9..3409d0e4fb9 100644 --- a/src/vnet/ipsec/ipsec.h +++ b/src/vnet/ipsec/ipsec.h @@ -112,19 +112,19 @@ typedef struct typedef struct { - vnet_crypto_op_id_t enc_op_id; - vnet_crypto_op_id_t dec_op_id; - vnet_crypto_alg_t alg; - u8 iv_size; - u8 block_align; - u8 icv_size; + const vnet_crypto_op_id_t enc_op_id; + const vnet_crypto_op_id_t dec_op_id; + const vnet_crypto_alg_t alg; + const u8 iv_size; + const u8 block_align; + const u8 icv_size; } ipsec_main_crypto_alg_t; typedef struct { - vnet_crypto_op_id_t op_id; - vnet_crypto_alg_t alg; - u8 icv_size; + const vnet_crypto_op_id_t op_id; + const vnet_crypto_alg_t alg; + const u8 icv_size; } ipsec_main_integ_alg_t; typedef struct @@ -224,10 +224,10 @@ typedef struct u32 esp_default_backend; /* crypto alg data */ - ipsec_main_crypto_alg_t *crypto_algs; + ipsec_main_crypto_alg_t crypto_algs[IPSEC_CRYPTO_N_ALG]; /* crypto integ data */ - ipsec_main_integ_alg_t *integ_algs; + ipsec_main_integ_alg_t integ_algs[IPSEC_INTEG_N_ALG]; /* per-thread data */ ipsec_per_thread_data_t *ptd; diff --git a/src/vnet/ipsec/ipsec_sa.c b/src/vnet/ipsec/ipsec_sa.c index 1d5195ec793..d37d89d5e3e 100644 --- a/src/vnet/ipsec/ipsec_sa.c +++ b/src/vnet/ipsec/ipsec_sa.c @@ -161,7 +161,7 @@ ipsec_sa_set_async_op_ids (ipsec_sa_t * sa) { if (ipsec_sa_is_set_USE_ESN (sa)) { -#define _(n, s, k) \ +#define _(n, s, ...) \ if (sa->crypto_sync_enc_op_id == VNET_CRYPTO_OP_##n##_ENC) \ sa->crypto_async_enc_op_id = VNET_CRYPTO_OP_##n##_TAG16_AAD12_ENC; \ if (sa->crypto_sync_dec_op_id == VNET_CRYPTO_OP_##n##_DEC) \ @@ -171,7 +171,7 @@ ipsec_sa_set_async_op_ids (ipsec_sa_t * sa) } else { -#define _(n, s, k) \ +#define _(n, s, ...) \ if (sa->crypto_sync_enc_op_id == VNET_CRYPTO_OP_##n##_ENC) \ sa->crypto_async_enc_op_id = VNET_CRYPTO_OP_##n##_TAG16_AAD8_ENC; \ if (sa->crypto_sync_dec_op_id == VNET_CRYPTO_OP_##n##_DEC) \ @@ -383,12 +383,15 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto, clib_memcpy (&sa->crypto_key, ck, sizeof (sa->crypto_key)); - sa->crypto_sync_key_index = vnet_crypto_key_add ( - vm, im->crypto_algs[crypto_alg].alg, (u8 *) ck->data, ck->len); - if (~0 == sa->crypto_sync_key_index) + if (crypto_alg != IPSEC_CRYPTO_ALG_NONE) { - pool_put (ipsec_sa_pool, sa); - return VNET_API_ERROR_KEY_LENGTH; + sa->crypto_sync_key_index = vnet_crypto_key_add ( + vm, im->crypto_algs[crypto_alg].alg, (u8 *) ck->data, ck->len); + if (~0 == sa->crypto_sync_key_index) + { + pool_put (ipsec_sa_pool, sa); + return VNET_API_ERROR_KEY_LENGTH; + } } if (integ_alg != IPSEC_INTEG_ALG_NONE) @@ -536,7 +539,8 @@ ipsec_sa_del (ipsec_sa_t * sa) if (ipsec_sa_is_set_IS_TUNNEL (sa) && !ipsec_sa_is_set_IS_INBOUND (sa)) dpo_reset (&sa->dpo); - vnet_crypto_key_del (vm, sa->crypto_sync_key_index); + if (sa->crypto_alg != IPSEC_CRYPTO_ALG_NONE) + vnet_crypto_key_del (vm, sa->crypto_sync_key_index); if (sa->integ_alg != IPSEC_INTEG_ALG_NONE) vnet_crypto_key_del (vm, sa->integ_sync_key_index); if (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa)) diff --git a/src/vnet/ipsec/main.c b/src/vnet/ipsec/main.c new file mode 100644 index 00000000000..e17d1dc5cfe --- /dev/null +++ b/src/vnet/ipsec/main.c @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2025 Cisco Systems, Inc. + */ + +#include <vlib/vlib.h> +#include <vnet/ipsec/ipsec.h> + +ipsec_main_t ipsec_main = { + .crypto_algs = { + [IPSEC_CRYPTO_ALG_NONE] = { + .enc_op_id = VNET_CRYPTO_OP_NONE, + .dec_op_id = VNET_CRYPTO_OP_NONE, + .alg = VNET_CRYPTO_ALG_NONE, + .iv_size = 0, + .block_align = 1, + }, + + [IPSEC_CRYPTO_ALG_DES_CBC] = { + .enc_op_id = VNET_CRYPTO_OP_DES_CBC_ENC, + .dec_op_id = VNET_CRYPTO_OP_DES_CBC_DEC, + .alg = VNET_CRYPTO_ALG_DES_CBC, + .iv_size = 8, + .block_align = 8, + }, + + [IPSEC_CRYPTO_ALG_3DES_CBC] = { + .enc_op_id = VNET_CRYPTO_OP_3DES_CBC_ENC, + .dec_op_id = VNET_CRYPTO_OP_3DES_CBC_DEC, + .alg = VNET_CRYPTO_ALG_3DES_CBC, + .iv_size = 8, + .block_align = 8, + }, + + [IPSEC_CRYPTO_ALG_AES_CBC_128] = { + .enc_op_id = VNET_CRYPTO_OP_AES_128_CBC_ENC, + .dec_op_id = VNET_CRYPTO_OP_AES_128_CBC_DEC, + .alg = VNET_CRYPTO_ALG_AES_128_CBC, + .iv_size = 16, + .block_align = 16, + }, + + [IPSEC_CRYPTO_ALG_AES_CBC_192] = { + .enc_op_id = VNET_CRYPTO_OP_AES_192_CBC_ENC, + .dec_op_id = VNET_CRYPTO_OP_AES_192_CBC_DEC, + .alg = VNET_CRYPTO_ALG_AES_192_CBC, + .iv_size = 16, + .block_align = 16, + }, + + [IPSEC_CRYPTO_ALG_AES_CBC_256] = { + .enc_op_id = VNET_CRYPTO_OP_AES_256_CBC_ENC, + .dec_op_id = VNET_CRYPTO_OP_AES_256_CBC_DEC, + .alg = VNET_CRYPTO_ALG_AES_256_CBC, + .iv_size = 16, + .block_align = 16, + }, + + [IPSEC_CRYPTO_ALG_AES_CTR_128] = { + .enc_op_id = VNET_CRYPTO_OP_AES_128_CTR_ENC, + .dec_op_id = VNET_CRYPTO_OP_AES_128_CTR_DEC, + .alg = VNET_CRYPTO_ALG_AES_128_CTR, + .iv_size = 8, + .block_align = 1, + }, + + [IPSEC_CRYPTO_ALG_AES_CTR_192] = { + .enc_op_id = VNET_CRYPTO_OP_AES_192_CTR_ENC, + .dec_op_id = VNET_CRYPTO_OP_AES_192_CTR_DEC, + .alg = VNET_CRYPTO_ALG_AES_192_CTR, + .iv_size = 8, + .block_align = 1, + }, + + [IPSEC_CRYPTO_ALG_AES_CTR_256] = { + .enc_op_id = VNET_CRYPTO_OP_AES_256_CTR_ENC, + .dec_op_id = VNET_CRYPTO_OP_AES_256_CTR_DEC, + .alg = VNET_CRYPTO_ALG_AES_256_CTR, + .iv_size = 8, + .block_align = 1, + }, + + [IPSEC_CRYPTO_ALG_AES_GCM_128] = { + .enc_op_id = VNET_CRYPTO_OP_AES_128_GCM_ENC, + .dec_op_id = VNET_CRYPTO_OP_AES_128_GCM_DEC, + .alg = VNET_CRYPTO_ALG_AES_128_GCM, + .iv_size = 8, + .block_align = 1, + .icv_size = 16, + }, + + [IPSEC_CRYPTO_ALG_AES_GCM_192] = { + .enc_op_id = VNET_CRYPTO_OP_AES_192_GCM_ENC, + .dec_op_id = VNET_CRYPTO_OP_AES_192_GCM_DEC, + .alg = VNET_CRYPTO_ALG_AES_192_GCM, + .iv_size = 8, + .block_align = 1, + .icv_size = 16, + }, + + [IPSEC_CRYPTO_ALG_AES_GCM_256] = { + .enc_op_id = VNET_CRYPTO_OP_AES_256_GCM_ENC, + .dec_op_id = VNET_CRYPTO_OP_AES_256_GCM_DEC, + .alg = VNET_CRYPTO_ALG_AES_256_GCM, + .iv_size = 8, + .block_align = 1, + .icv_size = 16, + }, + + [IPSEC_CRYPTO_ALG_CHACHA20_POLY1305] = { + .enc_op_id = VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC, + .dec_op_id = VNET_CRYPTO_OP_CHACHA20_POLY1305_DEC, + .alg = VNET_CRYPTO_ALG_CHACHA20_POLY1305, + .iv_size = 8, + .icv_size = 16, + }, + + [IPSEC_CRYPTO_ALG_AES_NULL_GMAC_128] = { + .enc_op_id = VNET_CRYPTO_OP_AES_128_NULL_GMAC_ENC, + .dec_op_id = VNET_CRYPTO_OP_AES_128_NULL_GMAC_DEC, + .alg = VNET_CRYPTO_ALG_AES_128_GCM, + .iv_size = 8, + .block_align = 1, + .icv_size = 16, + }, + + [IPSEC_CRYPTO_ALG_AES_NULL_GMAC_192] = { + .enc_op_id = VNET_CRYPTO_OP_AES_192_NULL_GMAC_ENC, + .dec_op_id = VNET_CRYPTO_OP_AES_192_NULL_GMAC_DEC, + .alg = VNET_CRYPTO_ALG_AES_192_GCM, + .iv_size = 8, + .block_align = 1, + .icv_size = 16, + }, + + [IPSEC_CRYPTO_ALG_AES_NULL_GMAC_256] = { + .enc_op_id = VNET_CRYPTO_OP_AES_256_NULL_GMAC_ENC, + .dec_op_id = VNET_CRYPTO_OP_AES_256_NULL_GMAC_DEC, + .alg = VNET_CRYPTO_ALG_AES_256_GCM, + .iv_size = 8, + .block_align = 1, + .icv_size = 16, + }, + }, + .integ_algs = { + [IPSEC_INTEG_ALG_MD5_96] = { + .op_id = VNET_CRYPTO_OP_MD5_HMAC, + .alg = VNET_CRYPTO_ALG_HMAC_MD5, + .icv_size = 12, + }, + + [IPSEC_INTEG_ALG_SHA1_96] = { + .op_id = VNET_CRYPTO_OP_SHA1_HMAC, + .alg = VNET_CRYPTO_ALG_HMAC_SHA1, + .icv_size = 12, + }, + + [IPSEC_INTEG_ALG_SHA_256_96] = { + .op_id = VNET_CRYPTO_OP_SHA1_HMAC, + .alg = VNET_CRYPTO_ALG_HMAC_SHA256, + .icv_size = 12, + }, + + [IPSEC_INTEG_ALG_SHA_256_128] = { + .op_id = VNET_CRYPTO_OP_SHA256_HMAC, + .alg = VNET_CRYPTO_ALG_HMAC_SHA256, + .icv_size = 16, + }, + + [IPSEC_INTEG_ALG_SHA_384_192] = { + .op_id = VNET_CRYPTO_OP_SHA384_HMAC, + .alg = VNET_CRYPTO_ALG_HMAC_SHA384, + .icv_size = 24, + }, + + [IPSEC_INTEG_ALG_SHA_512_256] = { + .op_id = VNET_CRYPTO_OP_SHA512_HMAC, + .alg = VNET_CRYPTO_ALG_HMAC_SHA512, + .icv_size = 32, + }, + }, +}; diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h index d5656ff8341..21ed97998f2 100644 --- a/src/vnet/session/application_interface.h +++ b/src/vnet/session/application_interface.h @@ -688,8 +688,8 @@ app_send_dgram_raw_gso (svm_fifo_t *f, app_session_transport_t *at, if (do_evt) { if (svm_fifo_set_event (f)) - app_send_io_evt_to_vpp (vpp_evt_q, f->shr->master_session_index, - evt_type, noblock); + app_send_io_evt_to_vpp (vpp_evt_q, f->vpp_session_index, evt_type, + noblock); } return len; } @@ -712,8 +712,8 @@ app_send_stream_raw (svm_fifo_t * f, svm_msg_q_t * vpp_evt_q, u8 * data, if (do_evt) { if (rv > 0 && svm_fifo_set_event (f)) - app_send_io_evt_to_vpp (vpp_evt_q, f->shr->master_session_index, - evt_type, noblock); + app_send_io_evt_to_vpp (vpp_evt_q, f->vpp_session_index, evt_type, + noblock); } return rv; } diff --git a/src/vnet/session/application_local.c b/src/vnet/session/application_local.c index 0800ce2b041..18ea77dc8a8 100644 --- a/src/vnet/session/application_local.c +++ b/src/vnet/session/application_local.c @@ -647,8 +647,8 @@ ct_init_accepted_session (app_worker_t *server_wrk, ct_connection_t *ct, ls->rx_fifo->shr->master_session_index = ls->session_index; ls->tx_fifo->shr->master_session_index = ls->session_index; - ls->rx_fifo->master_thread_index = ls->thread_index; - ls->tx_fifo->master_thread_index = ls->thread_index; + ls->rx_fifo->vpp_sh = ls->handle; + ls->tx_fifo->vpp_sh = ls->handle; seg_handle = segment_manager_segment_handle (sm, fs); segment_manager_segment_reader_unlock (sm); diff --git a/src/vnet/session/application_worker.c b/src/vnet/session/application_worker.c index cae340cd64e..ad0b18e8d75 100644 --- a/src/vnet/session/application_worker.c +++ b/src/vnet/session/application_worker.c @@ -175,10 +175,10 @@ app_worker_alloc_session_fifos (segment_manager_t * sm, session_t * s) return rv; rx_fifo->shr->master_session_index = s->session_index; - rx_fifo->master_thread_index = s->thread_index; + rx_fifo->vpp_sh = s->handle; tx_fifo->shr->master_session_index = s->session_index; - tx_fifo->master_thread_index = s->thread_index; + tx_fifo->vpp_sh = s->handle; s->rx_fifo = rx_fifo; s->tx_fifo = tx_fifo; @@ -210,10 +210,10 @@ app_worker_alloc_wrk_cl_session (app_worker_t *app_wrk, session_t *ls) &tx_fifo); rx_fifo->shr->master_session_index = s->session_index; - rx_fifo->master_thread_index = s->thread_index; + rx_fifo->vpp_sh = s->handle; tx_fifo->shr->master_session_index = s->session_index; - tx_fifo->master_thread_index = s->thread_index; + tx_fifo->vpp_sh = s->handle; s->rx_fifo = rx_fifo; s->tx_fifo = tx_fifo; diff --git a/src/vnet/session/segment_manager.c b/src/vnet/session/segment_manager.c index 2b44d92e0b5..8c8b904c33d 100644 --- a/src/vnet/session/segment_manager.c +++ b/src/vnet/session/segment_manager.c @@ -625,7 +625,7 @@ segment_manager_del_sessions (segment_manager_t * sm) */ while (f) { - session = session_get_if_valid (f->shr->master_session_index, + session = session_get_if_valid (f->vpp_session_index, f->master_thread_index); if (session) vec_add1 (handles, session_handle (session)); @@ -672,7 +672,7 @@ segment_manager_del_sessions_filter (segment_manager_t *sm, f = fifo_segment_get_slice_fifo_list (fs, slice_index); while (f) { - session = session_get_if_valid (f->shr->master_session_index, + session = session_get_if_valid (f->vpp_session_index, f->master_thread_index); if (session) { @@ -920,7 +920,7 @@ segment_manager_attach_fifo (segment_manager_t *sm, svm_fifo_t **f, segment_manager_segment_reader_unlock (sm); (*f)->shr->master_session_index = s->session_index; - (*f)->master_thread_index = s->thread_index; + (*f)->vpp_sh = s->handle; } u32 @@ -1195,7 +1195,7 @@ segment_manager_format_sessions (segment_manager_t * sm, int verbose) u32 session_index, thread_index; session_t *session; - session_index = f->shr->master_session_index; + session_index = f->vpp_session_index; thread_index = f->master_thread_index; session = session_get (session_index, thread_index); diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index cc0e89fd1e2..2a6ac283fb9 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -83,13 +83,15 @@ session_send_evt_to_thread (void *data, void *args, u32 thread_index, return 0; } +/* Deprecated, use session_program_* functions */ int session_send_io_evt_to_thread (svm_fifo_t * f, session_evt_type_t evt_type) { - return session_send_evt_to_thread (&f->shr->master_session_index, 0, + return session_send_evt_to_thread (&f->vpp_session_index, 0, f->master_thread_index, evt_type); } +/* Deprecated, use session_program_* functions */ int session_send_io_evt_to_thread_custom (void *data, u32 thread_index, session_evt_type_t evt_type) @@ -121,6 +123,14 @@ session_program_rx_io_evt (session_handle_tu_t sh) } int +session_program_transport_io_evt (session_handle_tu_t sh, + session_evt_type_t evt_type) +{ + return session_send_evt_to_thread ((void *) &sh.session_index, 0, + (u32) sh.thread_index, evt_type); +} + +int session_send_ctrl_evt_to_thread (session_t * s, session_evt_type_t evt_type) { /* only events supported are disconnect, shutdown and reset */ diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h index 823bdcb02af..daa3bf97f56 100644 --- a/src/vnet/session/session.h +++ b/src/vnet/session/session.h @@ -484,12 +484,16 @@ void session_transport_cleanup (session_t * s); int session_enqueue_notify (session_t *s); int session_dequeue_notify (session_t * s); int session_enqueue_notify_cl (session_t *s); +/* Deprecated, use session_program_* functions */ int session_send_io_evt_to_thread (svm_fifo_t *f, session_evt_type_t evt_type); +/* Deprecated, use session_program_* functions */ int session_send_io_evt_to_thread_custom (void *data, u32 thread_index, session_evt_type_t evt_type); int session_program_tx_io_evt (session_handle_tu_t sh, session_evt_type_t evt_type); int session_program_rx_io_evt (session_handle_tu_t sh); +int session_program_transport_io_evt (session_handle_tu_t sh, + session_evt_type_t evt_type); void session_send_rpc_evt_to_thread (u32 thread_index, void *fp, void *rpc_args); void session_send_rpc_evt_to_thread_force (u32 thread_index, void *fp, @@ -659,7 +663,7 @@ transport_add_tx_event (transport_connection_t * tc) session_t *s = session_get (tc->s_index, tc->thread_index); if (svm_fifo_has_event (s->tx_fifo)) return; - session_send_io_evt_to_thread (s->tx_fifo, SESSION_IO_EVT_TX); + session_program_tx_io_evt (s->handle, SESSION_IO_EVT_TX); } always_inline u32 diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c index c6df47b412b..5ac21c4eb85 100644 --- a/src/vnet/session/session_api.c +++ b/src/vnet/session/session_api.c @@ -426,9 +426,12 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context, } /* Setup client session index in advance, in case data arrives - * before the app processes message and updates it */ + * before the app processes message and updates it + * Maybe this needs to be done via a reply message from app */ s->rx_fifo->shr->client_session_index = api_context; s->tx_fifo->shr->client_session_index = api_context; + s->rx_fifo->app_session_index = api_context; + s->tx_fifo->app_session_index = api_context; snd_msg: @@ -637,7 +640,7 @@ mq_send_io_rx_event (session_t *s) mq_evt = svm_msg_q_msg_data (mq, &mq_msg); mq_evt->event_type = SESSION_IO_EVT_RX; - mq_evt->session_index = s->rx_fifo->shr->client_session_index; + mq_evt->session_index = s->rx_fifo->app_session_index; (void) svm_fifo_set_event (s->rx_fifo); @@ -658,7 +661,7 @@ mq_send_io_tx_event (session_t *s) mq_evt = svm_msg_q_msg_data (mq, &mq_msg); mq_evt->event_type = SESSION_IO_EVT_TX; - mq_evt->session_index = s->tx_fifo->shr->client_session_index; + mq_evt->session_index = s->tx_fifo->app_session_index; svm_msg_q_add_raw (mq, &mq_msg); diff --git a/src/vnet/session/session_debug.c b/src/vnet/session/session_debug.c index 2a50adac5dd..158751c4eed 100644 --- a/src/vnet/session/session_debug.c +++ b/src/vnet/session/session_debug.c @@ -278,7 +278,7 @@ session_node_cmp_event (session_event_t * e, svm_fifo_t * f) case SESSION_IO_EVT_BUILTIN_RX: case SESSION_IO_EVT_TX_MAIN: case SESSION_IO_EVT_TX_FLUSH: - if (e->session_index == f->shr->master_session_index) + if (e->session_index == f->vpp_session_index) return 1; break; case SESSION_CTRL_EVT_CLOSE: diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c index 4d86d409e98..c0ff1de39bc 100644 --- a/src/vnet/session/session_node.c +++ b/src/vnet/session/session_node.c @@ -474,6 +474,10 @@ session_mq_accepted_reply_handler (session_worker_t *wrk, return; } + /* TODO(fcoras) This needs to be part of the reply message */ + s->rx_fifo->app_session_index = s->rx_fifo->shr->client_session_index; + s->tx_fifo->app_session_index = s->tx_fifo->shr->client_session_index; + /* Special handling for cut-through sessions */ if (!session_has_transport (s)) { @@ -640,6 +644,8 @@ session_mq_worker_update_handler (void *data) } owner_app_wrk_map = app_wrk->wrk_map_index; app_wrk = application_get_worker (app, mp->wrk_index); + if (!app_wrk) + return; /* This needs to come from the new owner */ if (mp->req_wrk_index == owner_app_wrk_map) @@ -684,7 +690,7 @@ session_mq_worker_update_handler (void *data) * Retransmit messages that may have been lost */ if (s->tx_fifo && !svm_fifo_is_empty (s->tx_fifo)) - session_send_io_evt_to_thread (s->tx_fifo, SESSION_IO_EVT_TX); + session_program_tx_io_evt (s->handle, SESSION_IO_EVT_TX); if (s->rx_fifo && !svm_fifo_is_empty (s->rx_fifo)) app_worker_rx_notify (app_wrk, s); diff --git a/src/vnet/srv6/sr_localsid.c b/src/vnet/srv6/sr_localsid.c index 2172fa10ef1..47082e9c96a 100644 --- a/src/vnet/srv6/sr_localsid.c +++ b/src/vnet/srv6/sr_localsid.c @@ -2028,6 +2028,7 @@ sr_localsid_un_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t *b0; ip6_header_t *ip0 = 0; ip6_ext_header_t *prev0; + prev0 = 0; ip6_sr_header_t *sr0; u32 next0 = SR_LOCALSID_NEXT_IP6_LOOKUP; ip6_sr_localsid_t *ls0; diff --git a/src/vnet/tls/tls.c b/src/vnet/tls/tls.c index b9ff30ba6a6..08809f70070 100644 --- a/src/vnet/tls/tls.c +++ b/src/vnet/tls/tls.c @@ -66,7 +66,7 @@ int tls_add_vpp_q_rx_evt (session_t * s) { if (svm_fifo_set_event (s->rx_fifo)) - session_send_io_evt_to_thread (s->rx_fifo, SESSION_IO_EVT_RX); + session_enqueue_notify (s); return 0; } @@ -81,7 +81,7 @@ int tls_add_vpp_q_tx_evt (session_t * s) { if (svm_fifo_set_event (s->tx_fifo)) - session_send_io_evt_to_thread (s->tx_fifo, SESSION_IO_EVT_TX); + session_program_tx_io_evt (s->handle, SESSION_IO_EVT_TX); return 0; } @@ -569,7 +569,7 @@ dtls_migrate_ctx (void *arg) } if (svm_fifo_max_dequeue (us->tx_fifo)) - session_send_io_evt_to_thread (us->tx_fifo, SESSION_IO_EVT_TX); + session_program_tx_io_evt (us->handle, SESSION_IO_EVT_TX); } static void diff --git a/test/asf/asfframework.py b/test/asf/asfframework.py index 841a923dc79..7670a0753d1 100644 --- a/test/asf/asfframework.py +++ b/test/asf/asfframework.py @@ -155,17 +155,6 @@ def _is_platform_aarch64(): is_platform_aarch64 = _is_platform_aarch64() -def _is_distro_ubuntu2404(): - with open("/etc/os-release") as f: - for line in f.readlines(): - if "noble" in line: - return True - return False - - -is_distro_ubuntu2404 = _is_distro_ubuntu2404() - - def _is_distro_debian11(): with open("/etc/os-release") as f: for line in f.readlines(): @@ -226,8 +215,6 @@ class TestCaseTag(Enum): FIXME_DEBIAN11 = 4 # marks suites broken on debug vpp image FIXME_VPP_DEBUG = 5 - # marks suites broken on Ubuntu-24.04 - FIXME_UBUNTU2404 = 6 def create_tag_decorator(e): @@ -246,7 +233,6 @@ tag_fixme_vpp_workers = create_tag_decorator(TestCaseTag.FIXME_VPP_WORKERS) tag_fixme_asan = create_tag_decorator(TestCaseTag.FIXME_ASAN) tag_fixme_debian11 = create_tag_decorator(TestCaseTag.FIXME_DEBIAN11) tag_fixme_vpp_debug = create_tag_decorator(TestCaseTag.FIXME_VPP_DEBUG) -tag_fixme_ubuntu2404 = create_tag_decorator(TestCaseTag.FIXME_UBUNTU2404) class DummyVpp: @@ -309,12 +295,6 @@ class VppAsfTestCase(CPUInterface, unittest.TestCase): cls = unittest.skip("Skipping @tag_fixme_asan tests")(cls) @classmethod - def skip_fixme_ubuntu2404(cls): - """if @tag_fixme_ubuntu2404 & is Ubuntu24.04 - mark for skip""" - if cls.has_tag(TestCaseTag.FIXME_UBUNTU2404) and is_distro_ubuntu2404 == True: - cls = unittest.skip("Skipping @tag_fixme_ubuntu2404 tests")(cls) - - @classmethod def instance(cls): """Return the instance of this testcase""" return cls.test_instance @@ -1366,13 +1346,6 @@ class VppTestResult(unittest.TestResult): test_title = colorize(f"FIXME with ASAN: {test_title}", RED) test.skip_fixme_asan() - if ( - test.has_tag(TestCaseTag.FIXME_UBUNTU2404) - and is_distro_ubuntu2404 == True - ): - test_title = colorize(f"FIXME with Ubuntu 24.04: {test_title}", RED) - test.skip_fixme_ubuntu2404() - if hasattr(test, "vpp_worker_count"): if test.vpp_worker_count == 0: test_title += " [main thread only]" diff --git a/test/test_ipsec_spd_fp_input.py b/test/test_ipsec_spd_fp_input.py index eb04df49244..1953bbe5eaf 100644 --- a/test/test_ipsec_spd_fp_input.py +++ b/test/test_ipsec_spd_fp_input.py @@ -9,6 +9,7 @@ from template_ipsec import IPSecIPv6Fwd from test_ipsec_esp import TemplateIpsecEsp from template_ipsec import SpdFastPathTemplate from config import config +import pdb def debug_signal_handler(signal, frame): @@ -888,5 +889,50 @@ class IPSec6SpdTestCaseProtect(SpdFastPathIPv6InboundProtect): self.assertEqual(p.tra_sa_in.get_err("lost"), 0) +class IPSec6SpdTestCaseTunProtect(SpdFastPathIPv6InboundProtect): + """IPSec/IPv6 inbound: Policy mode test case with fast path""" + + # In this test sa_in defines a tunnel. Matching should be + # done based on the sa tunnel header. + + @classmethod + def setUpClass(cls): + super(IPSec6SpdTestCaseTunProtect, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(IPSec6SpdTestCaseTunProtect, cls).tearDownClass() + + def setUp(self): + super(IPSec6SpdTestCaseTunProtect, self).setUp() + + def tearDown(self): + super(IPSec6SpdTestCaseTunProtect, self).tearDown() + + def test_ipsec6_spd_inbound_tun_protect(self): + pkt_count = 5 + payload_size = 64 + p = self.params[socket.AF_INET6] + send_pkts = self.gen_encrypt_pkts6( + p, + p.scapy_tun_sa, + self.tun_if, + src=p.remote_tun_if_host, + dst=self.pg1.remote_ip6, + count=pkt_count, + payload_size=payload_size, + ) + recv_pkts = self.send_and_expect(self.tun_if, send_pkts, self.pg1) + self.logger.info(self.vapi.ppcli("show error")) + self.logger.info(self.vapi.ppcli("show ipsec all")) + pkts = p.tun_sa_in.get_stats()["packets"] + self.assertEqual( + pkts, + pkt_count, + "incorrect SA in counts: expected %d != %d" % (pkt_count, pkts), + ) + self.assertEqual(p.tun_sa_in.get_err("lost"), 0) + + if __name__ == "__main__": unittest.main(testRunner=VppTestRunner) diff --git a/test/test_sflow.py b/test/test_sflow.py new file mode 100644 index 00000000000..d16c0e6a804 --- /dev/null +++ b/test/test_sflow.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python3 + +import unittest +from framework import VppTestCase +from asfframework import VppTestRunner +from scapy.layers.l2 import Ether +from scapy.packet import Raw +from scapy.layers.inet import IP, UDP +from random import randint +import re # for finding counters in "sh errors" output + + +class SFlowTestCase(VppTestCase): + """sFlow test case""" + + @classmethod + def setUpClass(self): + super(SFlowTestCase, self).setUpClass() + + @classmethod + def teadDownClass(cls): + super(SFlowTestCase, cls).tearDownClass() + + def setUp(self): + self.create_pg_interfaces(range(2)) # create pg0 and pg1 + for i in self.pg_interfaces: + i.admin_up() # put the interface up + i.config_ip4() # configure IPv4 address on the interface + i.resolve_arp() # resolve ARP, so that we know VPP MAC + + def tearDown(self): + for i in self.pg_interfaces: + i.admin_down() + i.unconfig() + i.set_table_ip4(0) + i.set_table_ip6(0) + + def is_hw_interface_in_dump(self, dump, hw_if_index): + for i in dump: + if i.hw_if_index == hw_if_index: + return True + else: + return False + + def enable_sflow_via_api(self): + ## TEST: Enable one interface + ret = self.vapi.sflow_enable_disable(hw_if_index=1, enable_disable=True) + self.assertEqual(ret.retval, 0) + + ## TEST: interface dump all + ret = self.vapi.sflow_interface_dump() + self.assertTrue(self.is_hw_interface_in_dump(ret, 1)) + + ## TEST: Disable one interface + ret = self.vapi.sflow_enable_disable(hw_if_index=1, enable_disable=False) + self.assertEqual(ret.retval, 0) + + ## TEST: interface dump all after enable + disable + ret = self.vapi.sflow_interface_dump() + self.assertEqual(len(ret), 0) + + ## TEST: Enable both interfaces + ret = self.vapi.sflow_enable_disable(hw_if_index=1, enable_disable=True) + self.assertEqual(ret.retval, 0) + ret = self.vapi.sflow_enable_disable(hw_if_index=2, enable_disable=True) + self.assertEqual(ret.retval, 0) + + ## TEST: interface dump all + ret = self.vapi.sflow_interface_dump() + self.assertTrue(self.is_hw_interface_in_dump(ret, 1)) + self.assertTrue(self.is_hw_interface_in_dump(ret, 2)) + + ## TEST: the default sampling rate + ret = self.vapi.sflow_sampling_rate_get() + self.assert_equal(ret.sampling_N, 10000) + + ## TEST: sflow_sampling_rate_set() + self.vapi.sflow_sampling_rate_set(sampling_N=1) + ret = self.vapi.sflow_sampling_rate_get() + self.assert_equal(ret.sampling_N, 1) + + ## TEST: the default polling interval + ret = self.vapi.sflow_polling_interval_get() + self.assert_equal(ret.polling_S, 20) + + ## TEST: sflow_polling_interval_set() + self.vapi.sflow_polling_interval_set(polling_S=10) + ret = self.vapi.sflow_polling_interval_get() + self.assert_equal(ret.polling_S, 10) + + ## TEST: the default header bytes + ret = self.vapi.sflow_header_bytes_get() + self.assert_equal(ret.header_B, 128) + + ## TEST: sflow_header_bytes_set() + self.vapi.sflow_header_bytes_set(header_B=96) + ret = self.vapi.sflow_header_bytes_get() + self.assert_equal(ret.header_B, 96) + + def create_stream(self, src_if, dst_if, count): + packets = [] + for i in range(count): + # create packet info stored in the test case instance + info = self.create_packet_info(src_if, dst_if) + # convert the info into packet payload + payload = self.info_to_payload(info) + # create the packet itself + p = ( + Ether(dst=src_if.local_mac, src=src_if.remote_mac) + / IP(src=src_if.remote_ip4, dst=dst_if.remote_ip4) + / UDP(sport=randint(1000, 2000), dport=5678) + / Raw(payload) + ) + # store a copy of the packet in the packet info + info.data = p.copy() + # append the packet to the list + packets.append(p) + # return the created packet list + return packets + + def verify_capture(self, src_if, dst_if, capture): + packet_info = None + for packet in capture: + try: + ip = packet[IP] + udp = packet[UDP] + # convert the payload to packet info object + payload_info = self.payload_to_info(packet[Raw]) + # make sure the indexes match + self.assert_equal( + payload_info.src, src_if.sw_if_index, "source sw_if_index" + ) + self.assert_equal( + payload_info.dst, dst_if.sw_if_index, "destination sw_if_index" + ) + packet_info = self.get_next_packet_info_for_interface2( + src_if.sw_if_index, dst_if.sw_if_index, packet_info + ) + # make sure we didn't run out of saved packets + self.assertIsNotNone(packet_info) + self.assert_equal( + payload_info.index, packet_info.index, "packet info index" + ) + saved_packet = packet_info.data # fetch the saved packet + # assert the values match + self.assert_equal(ip.src, saved_packet[IP].src, "IP source address") + self.assert_equal(udp.sport, saved_packet[UDP].sport, "UDP source port") + except: + self.logger.error("Unexpected or invalid packet:", packet) + raise + remaining_packet = self.get_next_packet_info_for_interface2( + src_if.sw_if_index, dst_if.sw_if_index, packet_info + ) + self.assertIsNone( + remaining_packet, + "Interface %s: Packet expected from interface " + "%s didn't arrive" % (dst_if.name, src_if.name), + ) + + def get_sflow_counter(self, counter): + counters = self.vapi.cli("sh errors").split("\n") + for i in range(1, len(counters) - 1): + results = counters[i].split() + if results[1] == "sflow": + if re.search(counter, counters[i]) is not None: + return int(results[0]) + return None + + def verify_sflow(self, count): + ctr_processed = "sflow packets processed" + ctr_sampled = "sflow packets sampled" + ctr_dropped = "sflow packets dropped" + ctr_ps_sent = "sflow PSAMPLE sent" + ctr_ps_fail = "sflow PSAMPLE send failed" + processed = self.get_sflow_counter(ctr_processed) + sampled = self.get_sflow_counter(ctr_sampled) + dropped = self.get_sflow_counter(ctr_dropped) + ps_sent = self.get_sflow_counter(ctr_ps_sent) + ps_fail = self.get_sflow_counter(ctr_ps_fail) + self.assert_equal(processed, count, ctr_processed) + self.assert_equal(sampled, count, ctr_sampled) + self.assert_equal(dropped, None, ctr_dropped) + # TODO decide how to warn if PSAMPLE is not working + # It requires a prior "sudo modprobe psample", but + # that should probably be done at system boot time + # or maybe in a systemctl startup script, so we + # should only warn here. + self.logger.info(ctr_ps_sent + "=" + str(ps_sent)) + self.logger.info(ctr_ps_fail + "=" + str(ps_fail)) + + def test_basic(self): + self.enable_sflow_via_api() + count = 7 + # create the packet stream + packets = self.create_stream(self.pg0, self.pg1, count) + # add the stream to the source interface + self.pg0.add_stream(packets) + # enable capture on both interfaces + self.pg0.enable_capture() + self.pg1.enable_capture() + # start the packet generator + self.pg_start() + # get capture - the proper count of packets was saved by + # create_packet_info() based on dst_if parameter + capture = self.pg1.get_capture() + # assert nothing captured on pg0 (always do this last, so that + # some time has already passed since pg_start()) + self.pg0.assert_nothing_captured() + # verify capture + self.verify_capture(self.pg0, self.pg1, capture) + # verify sflow counters + self.verify_sflow(count) diff --git a/test/test_snort.py b/test/test_snort.py index 19401cb7b85..5335091dba7 100644 --- a/test/test_snort.py +++ b/test/test_snort.py @@ -12,10 +12,10 @@ class TestSnort(VppTestCase): def setUpClass(cls): super(TestSnort, cls).setUpClass() try: - cls.create_pg_interfaces(range(2)) + cls.create_pg_interfaces(range(4)) for i in cls.pg_interfaces: i.config_ip4().resolve_arp() - i.admin_up() + i.admin_down() except Exception: cls.tearDownClass() raise @@ -24,26 +24,28 @@ class TestSnort(VppTestCase): def tearDownClass(cls): for i in cls.pg_interfaces: i.unconfig_ip4() - i.admin_down() super(TestSnort, cls).tearDownClass() def test_snort_cli(self): # TODO: add a test with packets # { cli command : part of the expected reply } - print("TEST SNORT CLI") commands_replies = { "snort create-instance name snortTest queue-size 16 on-disconnect drop": "", "snort create-instance name snortTest2 queue-size 16 on-disconnect pass": "", "snort attach instance snortTest interface pg0 output": "", "snort attach instance snortTest2 interface pg1 input": "", + "snort attach all-instances interface pg2 inout": "", + "snort attach instance snortTest instance snortTest2 interface pg3 inout": "", "show snort instances": "snortTest", "show snort interfaces": "pg0", "show snort clients": "number of clients", "show snort mode": "input mode: interrupt", "snort mode polling": "", "snort mode interrupt": "", - "snort detach interface pg0": "", - "snort detach interface pg1": "", + "snort detach instance snortTest interface pg0": "", + "snort detach instance snortTest2 interface pg1": "", + "snort detach all-instances interface pg2": "", + "snort detach instance snortTest instance snortTest2 interface pg3": "", "snort delete instance snortTest": "", } @@ -64,7 +66,7 @@ class TestSnortVapi(VppTestCase): for i in cls.pg_interfaces: i.config_ip4() i.resolve_arp() - i.admin_up() + i.admin_down() except Exception: cls.tearDownClass() raise @@ -73,7 +75,6 @@ class TestSnortVapi(VppTestCase): def tearDownClass(cls): for i in cls.pg_interfaces: i.unconfig_ip4() - i.admin_down() super(TestSnortVapi, cls).tearDownClass() def test_snort_01_modes_set_interrupt(self): @@ -109,14 +110,20 @@ class TestSnortVapi(VppTestCase): reply = self.vapi.snort_interface_attach( instance_index=0, sw_if_index=1, snort_dir=1 ) + reply = self.vapi.snort_interface_attach( + instance_index=0, sw_if_index=2, snort_dir=2 + ) + # verify attaching with an invalid direction is rejected try: reply = self.vapi.snort_interface_attach( - instance_index=1, sw_if_index=1, snort_dir=1 + instance_index=1, sw_if_index=2, snort_dir=4 ) except: pass else: self.assertNotEqual(reply.retval, 0) + reply = self.vapi.cli("show snort interfaces") + self.assertNotIn("snortTest1", reply) reply = self.vapi.snort_interface_attach( instance_index=1, sw_if_index=2, snort_dir=3 @@ -124,6 +131,31 @@ class TestSnortVapi(VppTestCase): reply = self.vapi.cli("show snort interfaces") self.assertIn("snortTest0", reply) self.assertIn("snortTest1", reply) + self.assertIn("input", reply) + self.assertIn("inout", reply) + self.assertIn("output", reply) + + # verify attaching a previously attached interface is rejected + try: + reply = self.vapi.snort_interface_attach( + instance_index=1, sw_if_index=2, snort_dir=2 + ) + except: + pass + else: + self.assertNotEqual(reply.retval, 0) + + # verify attaching an invalid sw_if_index is rejected + try: + reply = self.vapi.snort_interface_attach( + instance_index=1, sw_if_index=3, snort_dir=2 + ) + except: + pass + else: + self.assertNotEqual(reply.retval, 0) + reply = self.vapi.cli("show snort interfaces") + self.assertIn("snortTest1", reply) def test_snort_05_delete_instance(self): """Instances can be deleted""" @@ -131,14 +163,14 @@ class TestSnortVapi(VppTestCase): reply = self.vapi.cli("show snort interfaces") self.assertNotIn("snortTest0", reply) self.assertIn("snortTest1", reply) - reply = self.vapi.cli("show snort interfaces") self.assertNotIn("pg0", reply) self.assertIn("pg1", reply) def test_snort_06_detach_if(self): """Interfaces can be detached""" + # verify detaching an invalid sw_if_index is rejected try: - reply = self.vapi.snort_interface_detach(sw_if_index=1) + reply = self.vapi.snort_interface_detach(sw_if_index=3) except: pass else: |