aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/crypto_engines/ipsecmb/ipsecmb.c9
-rw-r--r--src/crypto_engines/openssl/main.c93
-rw-r--r--src/examples/srv6-sample-localsid/node.c2
-rw-r--r--src/plugins/abf/abf_itf_attach.c14
-rw-r--r--src/plugins/acl/elog_acl_trace.h234
-rw-r--r--src/plugins/acl/fa_node.h238
-rw-r--r--src/plugins/acl/hash_lookup.c22
-rw-r--r--src/plugins/acl/public_inlines.h10
-rw-r--r--src/plugins/acl/sess_mgmt_node.c16
-rw-r--r--src/plugins/acl/session_inlines.h15
-rw-r--r--src/plugins/adl/ip4_allowlist.c2
-rw-r--r--src/plugins/adl/ip6_allowlist.c2
-rw-r--r--src/plugins/af_packet/af_packet.c2
-rw-r--r--src/plugins/af_packet/node.c4
-rw-r--r--src/plugins/af_xdp/device.c2
-rw-r--r--src/plugins/cnat/cnat_snat_policy.c5
-rw-r--r--src/plugins/cnat/cnat_types.h2
-rw-r--r--src/plugins/crypto_sw_scheduler/main.c2
-rw-r--r--src/plugins/ct6/ct6.h6
-rw-r--r--src/plugins/dev_armada/pp2/rx.c3
-rw-r--r--src/plugins/dev_ena/ena.c3
-rw-r--r--src/plugins/dev_iavf/virtchnl.h1
-rw-r--r--src/plugins/dev_octeon/crypto.c14
-rw-r--r--src/plugins/dev_octeon/crypto.h13
-rw-r--r--src/plugins/dev_octeon/init.c43
-rw-r--r--src/plugins/dev_octeon/octeon.h6
-rw-r--r--src/plugins/dma_intel/dsa.c2
-rw-r--r--src/plugins/dpdk/cryptodev/cryptodev.c10
-rw-r--r--src/plugins/dpdk/cryptodev/cryptodev_op_data_path.c7
-rw-r--r--src/plugins/dpdk/cryptodev/cryptodev_raw_data_path.c5
-rw-r--r--src/plugins/dpdk/device/common.c5
-rw-r--r--src/plugins/dpdk/device/device.c74
-rw-r--r--src/plugins/dpdk/device/dpdk.h4
-rw-r--r--src/plugins/dpdk/device/dpdk_priv.h4
-rw-r--r--src/plugins/dpdk/device/driver.c1
-rw-r--r--src/plugins/dpdk/device/format.c6
-rw-r--r--src/plugins/dpdk/device/init.c4
-rw-r--r--src/plugins/dpdk/device/node.c7
-rw-r--r--src/plugins/geneve/decap.c2
-rw-r--r--src/plugins/geneve/encap.c2
-rw-r--r--src/plugins/gtpu/gtpu_decap.c4
-rw-r--r--src/plugins/gtpu/gtpu_encap.c2
-rw-r--r--src/plugins/hs_apps/CMakeLists.txt6
-rw-r--r--src/plugins/hs_apps/echo_client.c130
-rw-r--r--src/plugins/hs_apps/echo_client.h12
-rw-r--r--src/plugins/hs_apps/echo_server.c8
-rw-r--r--src/plugins/hs_apps/http_cli.c8
-rw-r--r--src/plugins/hs_apps/http_client.c491
-rw-r--r--src/plugins/hs_apps/http_client_cli.c31
-rw-r--r--src/plugins/hs_apps/http_tps.c13
-rw-r--r--src/plugins/hs_apps/proxy.c6
-rw-r--r--src/plugins/hs_apps/proxy.h2
-rw-r--r--src/plugins/hs_apps/test_builtins.c1
-rw-r--r--src/plugins/hs_apps/vcl/vcl_test_cl_udp.c156
-rw-r--r--src/plugins/http/CMakeLists.txt9
-rw-r--r--src/plugins/http/extras/mk_huffman_table.py416
-rw-r--r--src/plugins/http/http.c2481
-rw-r--r--src/plugins/http/http.h589
-rw-r--r--src/plugins/http/http1.c1936
-rw-r--r--src/plugins/http/http2/frame.c339
-rw-r--r--src/plugins/http/http2/frame.h246
-rw-r--r--src/plugins/http/http2/hpack.c1173
-rw-r--r--src/plugins/http/http2/hpack.h183
-rw-r--r--src/plugins/http/http2/http2.c1492
-rw-r--r--src/plugins/http/http2/http2.h97
-rw-r--r--src/plugins/http/http2/huffman_table.h319
-rw-r--r--src/plugins/http/http_buffer.c52
-rw-r--r--src/plugins/http/http_buffer.h18
-rw-r--r--src/plugins/http/http_header_names.h3
-rw-r--r--src/plugins/http/http_plugin.rst2
-rw-r--r--src/plugins/http/http_private.h885
-rw-r--r--src/plugins/http/http_timer.h10
-rw-r--r--src/plugins/http/test/http_test.c775
-rw-r--r--src/plugins/http_static/http_cache.c8
-rw-r--r--src/plugins/http_static/http_cache.h1
-rw-r--r--src/plugins/http_static/http_static.api47
-rw-r--r--src/plugins/http_static/http_static.c68
-rw-r--r--src/plugins/http_static/http_static.h93
-rw-r--r--src/plugins/http_static/http_static_test.c112
-rw-r--r--src/plugins/http_static/static_server.c719
-rw-r--r--src/plugins/ikev2/ikev2.c13
-rw-r--r--src/plugins/ikev2/ikev2_priv.h2
-rw-r--r--src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c2
-rw-r--r--src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c4
-rw-r--r--src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c4
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c5
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c5
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c5
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c2
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c18
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c6
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h8
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h4
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c4
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h4
-rw-r--r--src/plugins/l2tp/l2tp.c2
-rw-r--r--src/plugins/lb/lb.c4
-rw-r--r--src/plugins/lb/node.c4
-rw-r--r--src/plugins/linux-cp/lcp.api36
-rw-r--r--src/plugins/linux-cp/lcp_api.c34
-rw-r--r--src/plugins/linux-cp/lcp_cli.c56
-rw-r--r--src/plugins/linux-cp/lcp_interface.c63
-rw-r--r--src/plugins/linux-cp/lcp_interface.h13
-rw-r--r--src/plugins/linux-cp/lcp_nl.c2
-rw-r--r--src/plugins/linux-cp/lcp_node.c116
-rw-r--r--src/plugins/linux-cp/lcp_router.c13
-rw-r--r--src/plugins/lisp/lisp-gpe/decap.c6
-rw-r--r--src/plugins/lisp/lisp-gpe/interface.c2
-rw-r--r--src/plugins/mactime/node.c2
-rw-r--r--src/plugins/map/ip4_map.c2
-rw-r--r--src/plugins/map/ip4_map_t.c4
-rw-r--r--src/plugins/map/ip6_map.c6
-rw-r--r--src/plugins/map/ip6_map_t.c4
-rw-r--r--src/plugins/memif/memif.c8
-rw-r--r--src/plugins/memif/node.c6
-rw-r--r--src/plugins/memif/private.h2
-rw-r--r--src/plugins/memif/socket.c2
-rw-r--r--src/plugins/nat/det44/det44.h1
-rw-r--r--src/plugins/nat/det44/det44_in2out.c2
-rw-r--r--src/plugins/nat/det44/det44_out2in.c2
-rw-r--r--src/plugins/nat/dslite/dslite.h1
-rw-r--r--src/plugins/nat/dslite/dslite_in2out.c1
-rw-r--r--src/plugins/nat/dslite/dslite_out2in.c1
-rw-r--r--src/plugins/nat/lib/ipfix_logging.c1
-rw-r--r--src/plugins/nat/lib/nat_syslog.c1
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed.h1
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_inlines.h1
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei.c1
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei.h1
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_in2out.c1
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_out2in.c1
-rw-r--r--src/plugins/nat/nat64/nat64.c1
-rw-r--r--src/plugins/nat/nat64/nat64.h1
-rw-r--r--src/plugins/nat/nat64/nat64_db.c1
-rw-r--r--src/plugins/nat/pnat/pnat.api17
-rw-r--r--src/plugins/nat/pnat/pnat_api.c14
-rw-r--r--src/plugins/netmap/netmap.c4
-rw-r--r--src/plugins/netmap/node.c4
-rw-r--r--src/plugins/nsh/nsh.c35
-rw-r--r--src/plugins/nsh/nsh.h5
-rw-r--r--src/plugins/nsh/nsh_pop.c2
-rw-r--r--src/plugins/ping/ping_api.c18
-rw-r--r--src/plugins/pppoe/pppoe_cp_node.c2
-rw-r--r--src/plugins/pppoe/pppoe_decap.c2
-rw-r--r--src/plugins/prom/prom.c7
-rw-r--r--src/plugins/prom/prom.h2
-rw-r--r--src/plugins/prom/prom_cli.c2
-rw-r--r--src/plugins/pvti/input.c2
-rw-r--r--src/plugins/pvti/output.c2
-rw-r--r--src/plugins/pvti/pvti.h2
-rw-r--r--src/plugins/quic/quic.c43
-rw-r--r--src/plugins/quic/quic.h4
-rw-r--r--src/plugins/quic/quic_crypto.c9
-rw-r--r--src/plugins/rdma/device.c2
-rw-r--r--src/plugins/sflow/CMakeLists.txt18
-rw-r--r--src/plugins/sflow/sflow.c68
-rw-r--r--src/plugins/sflow/sflow.h10
-rw-r--r--src/plugins/sflow/sflow_common.h2
-rw-r--r--src/plugins/sflow/sflow_dlapi.h (renamed from src/plugins/nat/lib/inlines.h)35
-rw-r--r--src/plugins/sflow/sflow_psample.c5
-rw-r--r--src/plugins/sflow/sflow_vapi.c226
-rw-r--r--src/plugins/sflow/sflow_vapi.h55
-rw-r--r--src/plugins/snort/enqueue.c2
-rw-r--r--src/plugins/snort/main.c14
-rw-r--r--src/plugins/srtp/srtp.c16
-rw-r--r--src/plugins/srv6-ad-flow/node.c2
-rw-r--r--src/plugins/srv6-am/node.c2
-rw-r--r--src/plugins/srv6-mobile/node.c10
-rw-r--r--src/plugins/tlsmbedtls/tls_mbedtls.c2
-rw-r--r--src/plugins/tlsopenssl/tls_async.c6
-rw-r--r--src/plugins/tlsopenssl/tls_openssl.c4
-rw-r--r--src/plugins/tlspicotls/pico_vpp_crypto.c6
-rw-r--r--src/plugins/unittest/ipsec_test.c21
-rw-r--r--src/plugins/unittest/session_test.c278
-rw-r--r--src/plugins/unittest/svm_fifo_test.c2
-rw-r--r--src/plugins/unittest/tcp_test.c10
-rw-r--r--src/plugins/urpf/urpf_dp.h18
-rw-r--r--src/plugins/vhost/vhost_user.c27
-rw-r--r--src/plugins/vhost/vhost_user.h14
-rw-r--r--src/plugins/vhost/vhost_user_input.c2
-rw-r--r--src/plugins/vhost/vhost_user_output.c8
-rw-r--r--src/plugins/vmxnet3/input.c2
-rw-r--r--src/plugins/vmxnet3/vmxnet3.h2
-rw-r--r--src/plugins/vrrp/vrrp_periodic.c14
-rw-r--r--src/plugins/vxlan-gpe/CMakeLists.txt32
-rw-r--r--src/plugins/vxlan-gpe/FEATURE.yaml (renamed from src/vnet/vxlan-gpe/FEATURE.yaml)0
-rw-r--r--src/plugins/vxlan-gpe/decap.c (renamed from src/vnet/vxlan-gpe/decap.c)6
-rw-r--r--src/plugins/vxlan-gpe/dir.dox (renamed from src/vnet/vxlan-gpe/dir.dox)0
-rw-r--r--src/plugins/vxlan-gpe/encap.c (renamed from src/vnet/vxlan-gpe/encap.c)4
-rw-r--r--src/plugins/vxlan-gpe/plugin.c26
-rw-r--r--src/plugins/vxlan-gpe/vxlan-gpe-rfc.txt (renamed from src/vnet/vxlan-gpe/vxlan-gpe-rfc.txt)0
-rw-r--r--src/plugins/vxlan-gpe/vxlan_gpe.api (renamed from src/vnet/vxlan-gpe/vxlan_gpe.api)0
-rw-r--r--src/plugins/vxlan-gpe/vxlan_gpe.c (renamed from src/vnet/vxlan-gpe/vxlan_gpe.c)10
-rw-r--r--src/plugins/vxlan-gpe/vxlan_gpe.h (renamed from src/vnet/vxlan-gpe/vxlan_gpe.h)18
-rw-r--r--src/plugins/vxlan-gpe/vxlan_gpe_api.c (renamed from src/vnet/vxlan-gpe/vxlan_gpe_api.c)6
-rw-r--r--src/plugins/vxlan-gpe/vxlan_gpe_error.def (renamed from src/vnet/vxlan-gpe/vxlan_gpe_error.def)0
-rw-r--r--src/plugins/vxlan-gpe/vxlan_gpe_packet.h (renamed from src/vnet/vxlan-gpe/vxlan_gpe_packet.h)0
-rw-r--r--src/plugins/vxlan/decap.c4
-rw-r--r--src/plugins/vxlan/encap.c2
-rw-r--r--src/plugins/wireguard/wireguard_input.c2
-rw-r--r--src/plugins/wireguard/wireguard_output_tun.c2
-rwxr-xr-xsrc/scripts/generate_version_h7
-rwxr-xr-xsrc/scripts/host-stack/cc_plots.py246
-rwxr-xr-xsrc/scripts/host-stack/convert_evt12
-rw-r--r--src/vat2/main.c35
-rw-r--r--src/vcl/vcl_locked.c66
-rw-r--r--src/vcl/vcl_private.c75
-rw-r--r--src/vcl/vcl_private.h15
-rw-r--r--src/vcl/vppcom.c76
-rw-r--r--src/vlib/CMakeLists.txt19
-rw-r--r--src/vlib/buffer_funcs.c2
-rw-r--r--src/vlib/cli.c9
-rw-r--r--src/vlib/counter.h27
-rw-r--r--src/vlib/file.c305
-rw-r--r--src/vlib/file.h14
-rw-r--r--src/vlib/format.c4
-rw-r--r--src/vlib/freebsd/pci.c4
-rw-r--r--src/vlib/global_funcs.h2
-rw-r--r--src/vlib/linux/pci.c14
-rw-r--r--src/vlib/linux/vmbus.c13
-rw-r--r--src/vlib/log.c58
-rw-r--r--src/vlib/log.h4
-rw-r--r--src/vlib/main.c321
-rw-r--r--src/vlib/main.h24
-rw-r--r--src/vlib/node.c13
-rw-r--r--src/vlib/node.h99
-rw-r--r--src/vlib/node_cli.c3
-rw-r--r--src/vlib/node_funcs.h98
-rw-r--r--src/vlib/punt_node.c23
-rw-r--r--src/vlib/stats/init.c5
-rw-r--r--src/vlib/threads.c331
-rw-r--r--src/vlib/threads.h41
-rw-r--r--src/vlib/threads_cli.c22
-rw-r--r--src/vlib/time.h7
-rw-r--r--src/vlib/tw_funcs.h90
-rw-r--r--src/vlib/unix/cli.c56
-rw-r--r--src/vlib/unix/input.c431
-rw-r--r--src/vlib/unix/main.c36
-rw-r--r--src/vlib/unix/mc_socket.c3
-rw-r--r--src/vlib/unix/plugin.c5
-rw-r--r--src/vlib/unix/unix.h1
-rw-r--r--src/vlib/vlib.h1
-rw-r--r--src/vlibapi/api.h1
-rw-r--r--src/vlibmemory/socket_api.c2
-rw-r--r--src/vnet/CMakeLists.txt25
-rw-r--r--src/vnet/adj/adj_l2.c2
-rw-r--r--src/vnet/adj/adj_nsh.c2
-rw-r--r--src/vnet/bier/bier_lookup.c4
-rw-r--r--src/vnet/bier/bier_output.c2
-rw-r--r--src/vnet/bonding/cli.c2
-rw-r--r--src/vnet/bonding/device.c81
-rw-r--r--src/vnet/bonding/node.c2
-rw-r--r--src/vnet/bonding/node.h1
-rw-r--r--src/vnet/buffer.h4
-rw-r--r--src/vnet/classify/vnet_classify.c4
-rw-r--r--src/vnet/crypto/config.c105
-rw-r--r--src/vnet/crypto/crypto.c121
-rw-r--r--src/vnet/crypto/crypto.h28
-rw-r--r--src/vnet/crypto/node.c2
-rw-r--r--src/vnet/dev/bus/pci.c5
-rw-r--r--src/vnet/dev/dev.h2
-rw-r--r--src/vnet/dev/runtime.c2
-rw-r--r--src/vnet/devices/devices.h2
-rw-r--r--src/vnet/devices/virtio/node.c2
-rw-r--r--src/vnet/devices/virtio/virtio.c2
-rw-r--r--src/vnet/dpo/interface_rx_dpo.c2
-rw-r--r--src/vnet/dpo/load_balance.c7
-rw-r--r--src/vnet/dpo/load_balance.h1
-rw-r--r--src/vnet/dpo/lookup_dpo.c6
-rw-r--r--src/vnet/dpo/replicate_dpo.c2
-rw-r--r--src/vnet/ethernet/interface.c2
-rw-r--r--src/vnet/ethernet/node.c2
-rw-r--r--src/vnet/ethernet/p2p_ethernet_input.c2
-rw-r--r--src/vnet/feature/feature.api19
-rw-r--r--src/vnet/feature/feature_api.c24
-rw-r--r--src/vnet/fib/fib_entry.c6
-rw-r--r--src/vnet/fib/fib_entry.h1
-rw-r--r--src/vnet/gso/node.c4
-rw-r--r--src/vnet/interface.h2
-rw-r--r--src/vnet/interface/rx_queue.c8
-rw-r--r--src/vnet/interface/rx_queue_funcs.h5
-rw-r--r--src/vnet/interface/tx_queue.c4
-rw-r--r--src/vnet/interface/tx_queue_funcs.h4
-rw-r--r--src/vnet/interface_api.c2
-rw-r--r--src/vnet/interface_cli.c9
-rw-r--r--src/vnet/interface_funcs.h3
-rw-r--r--src/vnet/interface_test.c2
-rw-r--r--src/vnet/ip-neighbor/ip4_neighbor.c7
-rw-r--r--src/vnet/ip-neighbor/ip4_neighbor.h6
-rw-r--r--src/vnet/ip-neighbor/ip6_neighbor.c7
-rw-r--r--src/vnet/ip-neighbor/ip6_neighbor.h8
-rw-r--r--src/vnet/ip-neighbor/ip_neighbor.c2
-rw-r--r--src/vnet/ip-neighbor/ip_neighbor.h3
-rw-r--r--src/vnet/ip/icmp4.c2
-rw-r--r--src/vnet/ip/icmp6.c2
-rw-r--r--src/vnet/ip/ip4_forward.c4
-rw-r--r--src/vnet/ip/ip4_forward.h2
-rw-r--r--src/vnet/ip/ip4_input.c4
-rw-r--r--src/vnet/ip/ip4_mtrie.c10
-rw-r--r--src/vnet/ip/ip4_mtrie.h5
-rw-r--r--src/vnet/ip/ip4_to_ip6.h21
-rw-r--r--src/vnet/ip/ip6_forward.c4
-rw-r--r--src/vnet/ip/ip6_forward.h2
-rw-r--r--src/vnet/ip/ip6_input.c2
-rw-r--r--src/vnet/ip/ip6_to_ip4.h14
-rw-r--r--src/vnet/ip/ip_init.c36
-rw-r--r--src/vnet/ip/punt.c3
-rw-r--r--src/vnet/ip/punt_node.c2
-rw-r--r--src/vnet/ip/reass/ip4_full_reass.c29
-rw-r--r--src/vnet/ip/reass/ip4_sv_reass.c4
-rw-r--r--src/vnet/ip/reass/ip6_sv_reass.c4
-rw-r--r--src/vnet/ipfix-export/flow_report.c5
-rw-r--r--src/vnet/ipfix-export/flow_report.h6
-rw-r--r--src/vnet/ipip/node.c2
-rw-r--r--src/vnet/ipsec/ah.h8
-rw-r--r--src/vnet/ipsec/ah_decrypt.c85
-rw-r--r--src/vnet/ipsec/ah_encrypt.c106
-rw-r--r--src/vnet/ipsec/esp.h43
-rw-r--r--src/vnet/ipsec/esp_decrypt.c247
-rw-r--r--src/vnet/ipsec/esp_encrypt.c232
-rw-r--r--src/vnet/ipsec/ipsec.c6
-rw-r--r--src/vnet/ipsec/ipsec.h14
-rw-r--r--src/vnet/ipsec/ipsec_api.c177
-rw-r--r--src/vnet/ipsec/ipsec_cli.c7
-rw-r--r--src/vnet/ipsec/ipsec_format.c32
-rw-r--r--src/vnet/ipsec/ipsec_funcs.h41
-rw-r--r--src/vnet/ipsec/ipsec_input.c368
-rw-r--r--src/vnet/ipsec/ipsec_sa.c363
-rw-r--r--src/vnet/ipsec/ipsec_sa.h510
-rw-r--r--src/vnet/ipsec/ipsec_tun.c13
-rw-r--r--src/vnet/ipsec/ipsec_tun_in.c2
-rw-r--r--src/vnet/ipsec/main.c20
-rw-r--r--src/vnet/l2/l2_flood.c2
-rw-r--r--src/vnet/l2/l2_input_node.c5
-rw-r--r--src/vnet/mpls/mpls_input.c2
-rw-r--r--src/vnet/mpls/mpls_lookup.c4
-rw-r--r--src/vnet/policer/police.h3
-rw-r--r--src/vnet/policer/police_inlines.h2
-rw-r--r--src/vnet/qos/qos_store.c2
-rw-r--r--src/vnet/session/application.c6
-rw-r--r--src/vnet/session/application.h16
-rw-r--r--src/vnet/session/application_interface.c113
-rw-r--r--src/vnet/session/application_interface.h1
-rw-r--r--src/vnet/session/application_local.c24
-rw-r--r--src/vnet/session/application_worker.c15
-rw-r--r--src/vnet/session/segment_manager.c18
-rw-r--r--src/vnet/session/segment_manager.h16
-rw-r--r--src/vnet/session/session.c347
-rw-r--r--src/vnet/session/session.h446
-rw-r--r--src/vnet/session/session_api.c2
-rw-r--r--src/vnet/session/session_cli.c9
-rw-r--r--src/vnet/session/session_input.c20
-rw-r--r--src/vnet/session/session_lookup.c81
-rw-r--r--src/vnet/session/session_lookup.h23
-rw-r--r--src/vnet/session/session_node.c61
-rw-r--r--src/vnet/session/session_types.h8
-rw-r--r--src/vnet/session/transport.c71
-rw-r--r--src/vnet/session/transport.h37
-rw-r--r--src/vnet/session/transport_types.h2
-rw-r--r--src/vnet/srv6/sr_api.c2
-rw-r--r--src/vnet/srv6/sr_localsid.c8
-rw-r--r--src/vnet/srv6/sr_policy_rewrite.c5
-rw-r--r--src/vnet/tcp/tcp.c39
-rw-r--r--src/vnet/tcp/tcp.h12
-rw-r--r--src/vnet/tcp/tcp_bt.c2
-rw-r--r--src/vnet/tcp/tcp_cli.c4
-rw-r--r--src/vnet/tcp/tcp_cubic.c2
-rw-r--r--src/vnet/tcp/tcp_inlines.h17
-rw-r--r--src/vnet/tcp/tcp_input.c78
-rw-r--r--src/vnet/tcp/tcp_output.c15
-rw-r--r--src/vnet/tls/tls.c14
-rw-r--r--src/vnet/tls/tls.h4
-rw-r--r--src/vnet/tls/tls_inlines.h6
-rw-r--r--src/vnet/udp/udp.c32
-rw-r--r--src/vnet/udp/udp.h15
-rw-r--r--src/vnet/udp/udp_cli.c32
-rw-r--r--src/vnet/udp/udp_encap_node.c2
-rw-r--r--src/vnet/udp/udp_input.c37
-rw-r--r--src/vnet/udp/udp_output.c2
-rw-r--r--src/vnet/unix/tuntap.c6
-rw-r--r--src/vnet/util/refcount.c2
-rw-r--r--src/vnet/util/refcount.h7
-rw-r--r--src/vnet/util/throttle.h5
-rw-r--r--src/vpp-api/python/vpp_papi/vpp_papi_async.py3
-rw-r--r--src/vpp-api/python/vpp_papi/vpp_transport_socket.py3
-rw-r--r--src/vpp-api/vapi/vapi.c19
-rw-r--r--src/vpp/CMakeLists.txt2
-rw-r--r--src/vpp/conf/80-vpp.conf13
-rw-r--r--src/vpp/conf/startup.conf12
-rw-r--r--src/vpp/vnet/main.c14
-rw-r--r--src/vppinfra/CMakeLists.txt1
-rw-r--r--src/vppinfra/bihash_template.c4
-rw-r--r--src/vppinfra/bihash_vec8_8.h1
-rw-r--r--src/vppinfra/bitmap.h11
-rw-r--r--src/vppinfra/bitops.h35
-rw-r--r--src/vppinfra/clib.h1
-rw-r--r--src/vppinfra/clib_error.h2
-rw-r--r--src/vppinfra/devicetree.c1
-rw-r--r--src/vppinfra/devicetree.h3
-rw-r--r--src/vppinfra/elog.h12
-rw-r--r--src/vppinfra/error_bootstrap.h2
-rw-r--r--src/vppinfra/file.h127
-rw-r--r--src/vppinfra/lock.h26
-rw-r--r--src/vppinfra/mem.h2
-rw-r--r--src/vppinfra/os.h22
-rw-r--r--src/vppinfra/pool.h6
-rw-r--r--src/vppinfra/smp.h79
-rw-r--r--src/vppinfra/socket.c1
-rw-r--r--src/vppinfra/string.c2
-rw-r--r--src/vppinfra/time.c11
-rw-r--r--src/vppinfra/types.h4
-rw-r--r--src/vppinfra/unix-misc.c132
-rw-r--r--src/vppinfra/unix.h13
-rw-r--r--src/vppinfra/vec.h20
-rw-r--r--src/vppinfra/vec_bootstrap.h5
415 files changed, 15862 insertions, 7520 deletions
diff --git a/src/crypto_engines/ipsecmb/ipsecmb.c b/src/crypto_engines/ipsecmb/ipsecmb.c
index 9981d738401..3006c5294d5 100644
--- a/src/crypto_engines/ipsecmb/ipsecmb.c
+++ b/src/crypto_engines/ipsecmb/ipsecmb.c
@@ -861,6 +861,7 @@ crypto_ipsecmb_init (vnet_crypto_engine_registration_t *r)
ipsecmb_alg_data_t *ad;
ipsecmb_per_thread_data_t *ptd;
IMB_MGR *m = 0;
+ IMB_ARCH arch;
if (!clib_cpu_supports_aes ())
return "AES ISA not available on this CPU";
@@ -875,12 +876,8 @@ crypto_ipsecmb_init (vnet_crypto_engine_registration_t *r)
clib_memset_u8 (ptd->burst_jobs, 0,
sizeof (IMB_JOB) * IMB_MAX_BURST_SIZE);
#endif
- if (clib_cpu_supports_avx512f ())
- init_mb_mgr_avx512 (ptd->mgr);
- else if (clib_cpu_supports_avx2 () && clib_cpu_supports_bmi2 ())
- init_mb_mgr_avx2 (ptd->mgr);
- else
- init_mb_mgr_sse (ptd->mgr);
+
+ init_mb_mgr_auto (ptd->mgr, &arch);
if (ptd == imbm->per_thread_data)
m = ptd->mgr;
diff --git a/src/crypto_engines/openssl/main.c b/src/crypto_engines/openssl/main.c
index f6c2229d3cd..a95c1710a34 100644
--- a/src/crypto_engines/openssl/main.c
+++ b/src/crypto_engines/openssl/main.c
@@ -49,6 +49,26 @@ static u32 num_threads;
_ (null_gmac, AES_192_NULL_GMAC, EVP_aes_192_gcm, 0, 0) \
_ (null_gmac, AES_256_NULL_GMAC, EVP_aes_256_gcm, 0, 0)
+#define foreach_openssl_linked_cbc_hmac_op \
+ _ (AES_128_CBC_SHA1_TAG12, EVP_aes_128_cbc, EVP_sha1, 12) \
+ _ (AES_192_CBC_SHA1_TAG12, EVP_aes_192_cbc, EVP_sha1, 12) \
+ _ (AES_256_CBC_SHA1_TAG12, EVP_aes_256_cbc, EVP_sha1, 12) \
+ _ (AES_128_CBC_SHA224_TAG14, EVP_aes_128_cbc, EVP_sha224, 14) \
+ _ (AES_192_CBC_SHA224_TAG14, EVP_aes_192_cbc, EVP_sha224, 14) \
+ _ (AES_256_CBC_SHA224_TAG14, EVP_aes_256_cbc, EVP_sha224, 14) \
+ _ (AES_128_CBC_SHA256_TAG16, EVP_aes_128_cbc, EVP_sha256, 16) \
+ _ (AES_192_CBC_SHA256_TAG16, EVP_aes_192_cbc, EVP_sha256, 16) \
+ _ (AES_256_CBC_SHA256_TAG16, EVP_aes_256_cbc, EVP_sha256, 16) \
+ _ (AES_128_CBC_SHA384_TAG24, EVP_aes_128_cbc, EVP_sha384, 24) \
+ _ (AES_192_CBC_SHA384_TAG24, EVP_aes_192_cbc, EVP_sha384, 24) \
+ _ (AES_256_CBC_SHA384_TAG24, EVP_aes_256_cbc, EVP_sha384, 24) \
+ _ (AES_128_CBC_SHA512_TAG32, EVP_aes_128_cbc, EVP_sha512, 32) \
+ _ (AES_192_CBC_SHA512_TAG32, EVP_aes_192_cbc, EVP_sha512, 32) \
+ _ (AES_256_CBC_SHA512_TAG32, EVP_aes_256_cbc, EVP_sha512, 32) \
+ _ (AES_128_CBC_MD5_TAG12, EVP_aes_128_cbc, EVP_md5, 12) \
+ _ (AES_192_CBC_MD5_TAG12, EVP_aes_192_cbc, EVP_md5, 12) \
+ _ (AES_256_CBC_MD5_TAG12, EVP_aes_256_cbc, EVP_md5, 12)
+
#define foreach_openssl_chacha20_evp_op \
_ (chacha20_poly1305, CHACHA20_POLY1305, EVP_chacha20_poly1305, 0, 0) \
_ (chacha20_poly1305, CHACHA20_POLY1305_TAG16_AAD0, EVP_chacha20_poly1305, \
@@ -611,6 +631,56 @@ crypto_openssl_key_handler (vnet_crypto_key_op_t kop,
foreach_openssl_evp_op;
#undef _
+#define _(n, c, m, t) \
+ static u32 openssl_ops_enc_##n (vlib_main_t *vm, vnet_crypto_op_t *ops[], \
+ u32 n_ops) \
+ { \
+ for (u32 i = 0; i < n_ops; i++) \
+ ops[i]->digest_len = t; \
+ openssl_ops_enc_cbc (vm, ops, 0, n_ops, c (), 1, 16); \
+ openssl_ops_hmac (vm, ops, 0, n_ops, m ()); \
+ return n_ops; \
+ } \
+ static u32 openssl_ops_dec_##n (vlib_main_t *vm, vnet_crypto_op_t *ops[], \
+ u32 n_ops) \
+ { \
+ for (u32 i = 0; i < n_ops; i++) \
+ ops[i]->digest_len = t; \
+ openssl_ops_dec_cbc (vm, ops, 0, n_ops, c (), 1, 16); \
+ openssl_ops_hmac (vm, ops, 0, n_ops, m ()); \
+ return n_ops; \
+ } \
+ static u32 openssl_ops_enc_chained_##n ( \
+ vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, \
+ u32 n_ops) \
+ { \
+ for (u32 i = 0; i < n_ops; i++) \
+ ops[i]->digest_len = t; \
+ openssl_ops_enc_cbc (vm, ops, chunks, n_ops, c (), 1, 16); \
+ openssl_ops_hmac (vm, ops, chunks, n_ops, m ()); \
+ return n_ops; \
+ } \
+ static u32 openssl_ops_dec_chained_##n ( \
+ vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, \
+ u32 n_ops) \
+ { \
+ for (u32 i = 0; i < n_ops; i++) \
+ ops[i]->digest_len = t; \
+ openssl_ops_dec_cbc (vm, ops, chunks, n_ops, c (), 1, 16); \
+ openssl_ops_hmac (vm, ops, chunks, n_ops, m ()); \
+ return n_ops; \
+ } \
+ static void *openssl_ctx_##n (vnet_crypto_key_t *key, \
+ vnet_crypto_key_op_t kop, \
+ vnet_crypto_key_index_t idx) \
+ { \
+ openssl_ctx_cipher (key, kop, idx, c (), 0); \
+ openssl_ctx_hmac (key, kop, idx, m ()); \
+ return NULL; \
+ }
+foreach_openssl_linked_cbc_hmac_op
+#undef _
+
#define _(a, b) \
static u32 openssl_ops_hash_##a (vlib_main_t *vm, vnet_crypto_op_t *ops[], \
u32 n_ops) \
@@ -624,7 +694,7 @@ foreach_openssl_evp_op;
return openssl_ops_hash (vm, ops, chunks, n_ops, b ()); \
}
-foreach_openssl_hash_op;
+ foreach_openssl_hash_op;
#undef _
#define _(a, b) \
@@ -666,8 +736,12 @@ crypto_openssl_init (vnet_crypto_engine_registration_t *r)
foreach_openssl_evp_op;
#undef _
+#define _(n, c, m, t) cm->ctx_fn[VNET_CRYPTO_ALG_##n] = openssl_ctx_##n;
+ foreach_openssl_linked_cbc_hmac_op
+#undef _
+
#define _(a, b) cm->ctx_fn[VNET_CRYPTO_ALG_HMAC_##a] = openssl_ctx_hmac_##a;
- foreach_openssl_hmac_op;
+ foreach_openssl_hmac_op;
#undef _
per_thread_data = r->per_thread_data;
@@ -691,17 +765,28 @@ vnet_crypto_engine_op_handlers_t op_handlers[] = {
.cfn = openssl_ops_dec_chained_##a },
foreach_openssl_evp_op
#undef _
+#define _(n, c, m, t) \
+ { \
+ .opt = VNET_CRYPTO_OP_##n##_ENC, \
+ .fn = openssl_ops_enc_##n, \
+ .cfn = openssl_ops_enc_chained_##n, \
+ }, \
+ { .opt = VNET_CRYPTO_OP_##n##_DEC, \
+ .fn = openssl_ops_dec_##n, \
+ .cfn = openssl_ops_dec_chained_##n },
+ foreach_openssl_linked_cbc_hmac_op
+#undef _
#define _(a, b) \
{ .opt = VNET_CRYPTO_OP_##a##_HMAC, \
.fn = openssl_ops_hmac_##a, \
.cfn = openssl_ops_hmac_chained_##a },
- foreach_openssl_hmac_op
+ foreach_openssl_hmac_op
#undef _
#define _(a, b) \
{ .opt = VNET_CRYPTO_OP_##a##_HASH, \
.fn = openssl_ops_hash_##a, \
.cfn = openssl_ops_hash_chained_##a },
- foreach_openssl_hash_op
+ foreach_openssl_hash_op
#undef _
{}
};
diff --git a/src/examples/srv6-sample-localsid/node.c b/src/examples/srv6-sample-localsid/node.c
index e3a3259e877..4d727498c03 100644
--- a/src/examples/srv6-sample-localsid/node.c
+++ b/src/examples/srv6-sample-localsid/node.c
@@ -173,7 +173,7 @@ srv6_localsid_sample_fn (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_fram
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
- u32 thread_index = vlib_get_thread_index ();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
while (n_left_from > 0)
{
diff --git a/src/plugins/abf/abf_itf_attach.c b/src/plugins/abf/abf_itf_attach.c
index 04e5c4c40c2..3e55df52562 100644
--- a/src/plugins/abf/abf_itf_attach.c
+++ b/src/plugins/abf/abf_itf_attach.c
@@ -681,18 +681,20 @@ VLIB_REGISTER_NODE (abf_ip6_node) =
}
};
-VNET_FEATURE_INIT (abf_ip4_feat, static) =
-{
+VNET_FEATURE_INIT (abf_ip4_feat, static) = {
.arc_name = "ip4-unicast",
.node_name = "abf-input-ip4",
- .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa"),
+ .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
+ "ip4-full-reassembly-feature",
+ "ip4-sv-reassembly-feature"),
};
-VNET_FEATURE_INIT (abf_ip6_feat, static) =
-{
+VNET_FEATURE_INIT (abf_ip6_feat, static) = {
.arc_name = "ip6-unicast",
.node_name = "abf-input-ip6",
- .runs_after = VNET_FEATURES ("acl-plugin-in-ip6-fa"),
+ .runs_after = VNET_FEATURES ("acl-plugin-in-ip6-fa",
+ "ip6-full-reassembly-feature",
+ "ip6-sv-reassembly-feature"),
};
static fib_node_t *
diff --git a/src/plugins/acl/elog_acl_trace.h b/src/plugins/acl/elog_acl_trace.h
index 0c4f68f7b0f..ae2ef8588ea 100644
--- a/src/plugins/acl/elog_acl_trace.h
+++ b/src/plugins/acl/elog_acl_trace.h
@@ -19,119 +19,143 @@
/* use like: elog_acl_cond_trace_X1(am, (x < 0), "foobar: %d", "i4", int32_value); */
-#define elog_acl_cond_trace_X1(am, trace_cond, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1) \
-do { \
- if (trace_cond) { \
- CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1)]; } *static_check); \
- u16 thread_index = os_get_thread_index (); \
- vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \
- ELOG_TYPE_DECLARE (e) = \
- { \
- .format = "(%02d) " acl_elog_trace_format_label, \
- .format_args = "i2" acl_elog_trace_format_args, \
- }; \
- CLIB_PACKED(struct \
- { \
- u16 thread; \
- typeof(acl_elog_val1) val1; \
- }) *ed; \
- ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \
- ed->thread = thread_index; \
- ed->val1 = acl_elog_val1; \
- } \
-} while (0)
-
+#define elog_acl_cond_trace_X1(am, trace_cond, acl_elog_trace_format_label, \
+ acl_elog_trace_format_args, acl_elog_val1) \
+ do \
+ { \
+ if (trace_cond) \
+ { \
+ CLIB_UNUSED (struct { \
+ u8 available_space[18 - sizeof (acl_elog_val1)]; \
+ } * static_check); \
+ clib_thread_index_t thread_index = os_get_thread_index (); \
+ vlib_worker_thread_t *w = vlib_worker_threads + thread_index; \
+ ELOG_TYPE_DECLARE (e) = { \
+ .format = "(%02d) " acl_elog_trace_format_label, \
+ .format_args = "i2" acl_elog_trace_format_args, \
+ }; \
+ CLIB_PACKED (struct { \
+ u16 thread; \
+ typeof (acl_elog_val1) val1; \
+ }) * \
+ ed; \
+ ed = \
+ ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \
+ ed->thread = thread_index; \
+ ed->val1 = acl_elog_val1; \
+ } \
+ } \
+ while (0)
/* use like: elog_acl_cond_trace_X2(am, (x<0), "foobar: %d some u64: %lu", "i4i8", int32_value, int64_value); */
-#define elog_acl_cond_trace_X2(am, trace_cond, acl_elog_trace_format_label, acl_elog_trace_format_args, \
- acl_elog_val1, acl_elog_val2) \
-do { \
- if (trace_cond) { \
- CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2)]; } *static_check); \
- u16 thread_index = os_get_thread_index (); \
- vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \
- ELOG_TYPE_DECLARE (e) = \
- { \
- .format = "(%02d) " acl_elog_trace_format_label, \
- .format_args = "i2" acl_elog_trace_format_args, \
- }; \
- CLIB_PACKED(struct \
- { \
- u16 thread; \
- typeof(acl_elog_val1) val1; \
- typeof(acl_elog_val2) val2; \
- }) *ed; \
- ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \
- ed->thread = thread_index; \
- ed->val1 = acl_elog_val1; \
- ed->val2 = acl_elog_val2; \
- } \
-} while (0)
-
+#define elog_acl_cond_trace_X2(am, trace_cond, acl_elog_trace_format_label, \
+ acl_elog_trace_format_args, acl_elog_val1, \
+ acl_elog_val2) \
+ do \
+ { \
+ if (trace_cond) \
+ { \
+ CLIB_UNUSED (struct { \
+ u8 available_space[18 - sizeof (acl_elog_val1) - \
+ sizeof (acl_elog_val2)]; \
+ } * static_check); \
+ clib_thread_index_t thread_index = os_get_thread_index (); \
+ vlib_worker_thread_t *w = vlib_worker_threads + thread_index; \
+ ELOG_TYPE_DECLARE (e) = { \
+ .format = "(%02d) " acl_elog_trace_format_label, \
+ .format_args = "i2" acl_elog_trace_format_args, \
+ }; \
+ CLIB_PACKED (struct { \
+ u16 thread; \
+ typeof (acl_elog_val1) val1; \
+ typeof (acl_elog_val2) val2; \
+ }) * \
+ ed; \
+ ed = \
+ ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \
+ ed->thread = thread_index; \
+ ed->val1 = acl_elog_val1; \
+ ed->val2 = acl_elog_val2; \
+ } \
+ } \
+ while (0)
/* use like: elog_acl_cond_trace_X3(am, (x<0), "foobar: %d some u64 %lu baz: %d", "i4i8i4", int32_value, u64_value, int_value); */
-#define elog_acl_cond_trace_X3(am, trace_cond, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1, \
- acl_elog_val2, acl_elog_val3) \
-do { \
- if (trace_cond) { \
- CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2) \
- - sizeof(acl_elog_val3)]; } *static_check); \
- u16 thread_index = os_get_thread_index (); \
- vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \
- ELOG_TYPE_DECLARE (e) = \
- { \
- .format = "(%02d) " acl_elog_trace_format_label, \
- .format_args = "i2" acl_elog_trace_format_args, \
- }; \
- CLIB_PACKED(struct \
- { \
- u16 thread; \
- typeof(acl_elog_val1) val1; \
- typeof(acl_elog_val2) val2; \
- typeof(acl_elog_val3) val3; \
- }) *ed; \
- ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \
- ed->thread = thread_index; \
- ed->val1 = acl_elog_val1; \
- ed->val2 = acl_elog_val2; \
- ed->val3 = acl_elog_val3; \
- } \
-} while (0)
-
+#define elog_acl_cond_trace_X3(am, trace_cond, acl_elog_trace_format_label, \
+ acl_elog_trace_format_args, acl_elog_val1, \
+ acl_elog_val2, acl_elog_val3) \
+ do \
+ { \
+ if (trace_cond) \
+ { \
+ CLIB_UNUSED (struct { \
+ u8 available_space[18 - sizeof (acl_elog_val1) - \
+ sizeof (acl_elog_val2) - \
+ sizeof (acl_elog_val3)]; \
+ } * static_check); \
+ clib_thread_index_t thread_index = os_get_thread_index (); \
+ vlib_worker_thread_t *w = vlib_worker_threads + thread_index; \
+ ELOG_TYPE_DECLARE (e) = { \
+ .format = "(%02d) " acl_elog_trace_format_label, \
+ .format_args = "i2" acl_elog_trace_format_args, \
+ }; \
+ CLIB_PACKED (struct { \
+ u16 thread; \
+ typeof (acl_elog_val1) val1; \
+ typeof (acl_elog_val2) val2; \
+ typeof (acl_elog_val3) val3; \
+ }) * \
+ ed; \
+ ed = \
+ ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \
+ ed->thread = thread_index; \
+ ed->val1 = acl_elog_val1; \
+ ed->val2 = acl_elog_val2; \
+ ed->val3 = acl_elog_val3; \
+ } \
+ } \
+ while (0)
/* use like: elog_acl_cond_trace_X4(am, (x<0), "foobar: %d some int %d baz: %d bar: %d", "i4i4i4i4", int32_value, int32_value2, int_value, int_value); */
-#define elog_acl_cond_trace_X4(am, trace_cond, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1, \
- acl_elog_val2, acl_elog_val3, acl_elog_val4) \
-do { \
- if (trace_cond) { \
- CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2) \
- - sizeof(acl_elog_val3) -sizeof(acl_elog_val4)]; } *static_check); \
- u16 thread_index = os_get_thread_index (); \
- vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \
- ELOG_TYPE_DECLARE (e) = \
- { \
- .format = "(%02d) " acl_elog_trace_format_label, \
- .format_args = "i2" acl_elog_trace_format_args, \
- }; \
- CLIB_PACKED(struct \
- { \
- u16 thread; \
- typeof(acl_elog_val1) val1; \
- typeof(acl_elog_val2) val2; \
- typeof(acl_elog_val3) val3; \
- typeof(acl_elog_val4) val4; \
- }) *ed; \
- ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \
- ed->thread = thread_index; \
- ed->val1 = acl_elog_val1; \
- ed->val2 = acl_elog_val2; \
- ed->val3 = acl_elog_val3; \
- ed->val4 = acl_elog_val4; \
- } \
-} while (0)
-
+#define elog_acl_cond_trace_X4(am, trace_cond, acl_elog_trace_format_label, \
+ acl_elog_trace_format_args, acl_elog_val1, \
+ acl_elog_val2, acl_elog_val3, acl_elog_val4) \
+ do \
+ { \
+ if (trace_cond) \
+ { \
+ CLIB_UNUSED (struct { \
+ u8 available_space[18 - sizeof (acl_elog_val1) - \
+ sizeof (acl_elog_val2) - \
+ sizeof (acl_elog_val3) - \
+ sizeof (acl_elog_val4)]; \
+ } * static_check); \
+ clib_thread_index_t thread_index = os_get_thread_index (); \
+ vlib_worker_thread_t *w = vlib_worker_threads + thread_index; \
+ ELOG_TYPE_DECLARE (e) = { \
+ .format = "(%02d) " acl_elog_trace_format_label, \
+ .format_args = "i2" acl_elog_trace_format_args, \
+ }; \
+ CLIB_PACKED (struct { \
+ u16 thread; \
+ typeof (acl_elog_val1) val1; \
+ typeof (acl_elog_val2) val2; \
+ typeof (acl_elog_val3) val3; \
+ typeof (acl_elog_val4) val4; \
+ }) * \
+ ed; \
+ ed = \
+ ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \
+ ed->thread = thread_index; \
+ ed->val1 = acl_elog_val1; \
+ ed->val2 = acl_elog_val2; \
+ ed->val3 = acl_elog_val3; \
+ ed->val4 = acl_elog_val4; \
+ } \
+ } \
+ while (0)
#endif
diff --git a/src/plugins/acl/fa_node.h b/src/plugins/acl/fa_node.h
index c4a971aada3..f1ea8dfaf0a 100644
--- a/src/plugins/acl/fa_node.h
+++ b/src/plugins/acl/fa_node.h
@@ -110,7 +110,7 @@ typedef struct {
u8 as_u8[2];
u16 as_u16;
} tcp_flags_seen; ; /* +2 bytes = 62 */
- u16 thread_index; /* +2 bytes = 64 */
+ clib_thread_index_t thread_index; /* +2 bytes = 64 */
u64 link_enqueue_time; /* 8 byte = 8 */
u32 link_prev_idx; /* +4 bytes = 12 */
u32 link_next_idx; /* +4 bytes = 16 */
@@ -133,7 +133,7 @@ typedef struct {
u64 as_u64;
struct {
u32 session_index;
- u16 thread_index;
+ clib_thread_index_t thread_index;
u16 intf_policy_epoch;
};
};
@@ -255,119 +255,143 @@ u8 *format_acl_plugin_5tuple (u8 * s, va_list * args);
/* use like: elog_acl_maybe_trace_X1(am, "foobar: %d", "i4", int32_value); */
-#define elog_acl_maybe_trace_X1(am, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1) \
-do { \
- if (am->trace_sessions) { \
- CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1)]; } *static_check); \
- u16 thread_index = os_get_thread_index (); \
- vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \
- ELOG_TYPE_DECLARE (e) = \
- { \
- .format = "(%02d) " acl_elog_trace_format_label, \
- .format_args = "i2" acl_elog_trace_format_args, \
- }; \
- CLIB_PACKED(struct \
- { \
- u16 thread; \
- typeof(acl_elog_val1) val1; \
- }) *ed; \
- ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \
- ed->thread = thread_index; \
- ed->val1 = acl_elog_val1; \
- } \
-} while (0)
-
+#define elog_acl_maybe_trace_X1(am, acl_elog_trace_format_label, \
+ acl_elog_trace_format_args, acl_elog_val1) \
+ do \
+ { \
+ if (am->trace_sessions) \
+ { \
+ CLIB_UNUSED (struct { \
+ u8 available_space[18 - sizeof (acl_elog_val1)]; \
+ } * static_check); \
+ clib_thread_index_t thread_index = os_get_thread_index (); \
+ vlib_worker_thread_t *w = vlib_worker_threads + thread_index; \
+ ELOG_TYPE_DECLARE (e) = { \
+ .format = "(%02d) " acl_elog_trace_format_label, \
+ .format_args = "i2" acl_elog_trace_format_args, \
+ }; \
+ CLIB_PACKED (struct { \
+ u16 thread; \
+ typeof (acl_elog_val1) val1; \
+ }) * \
+ ed; \
+ ed = \
+ ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \
+ ed->thread = thread_index; \
+ ed->val1 = acl_elog_val1; \
+ } \
+ } \
+ while (0)
/* use like: elog_acl_maybe_trace_X2(am, "foobar: %d some u64: %lu", "i4i8", int32_value, int64_value); */
-#define elog_acl_maybe_trace_X2(am, acl_elog_trace_format_label, acl_elog_trace_format_args, \
- acl_elog_val1, acl_elog_val2) \
-do { \
- if (am->trace_sessions) { \
- CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2)]; } *static_check); \
- u16 thread_index = os_get_thread_index (); \
- vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \
- ELOG_TYPE_DECLARE (e) = \
- { \
- .format = "(%02d) " acl_elog_trace_format_label, \
- .format_args = "i2" acl_elog_trace_format_args, \
- }; \
- CLIB_PACKED(struct \
- { \
- u16 thread; \
- typeof(acl_elog_val1) val1; \
- typeof(acl_elog_val2) val2; \
- }) *ed; \
- ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \
- ed->thread = thread_index; \
- ed->val1 = acl_elog_val1; \
- ed->val2 = acl_elog_val2; \
- } \
-} while (0)
-
+#define elog_acl_maybe_trace_X2(am, acl_elog_trace_format_label, \
+ acl_elog_trace_format_args, acl_elog_val1, \
+ acl_elog_val2) \
+ do \
+ { \
+ if (am->trace_sessions) \
+ { \
+ CLIB_UNUSED (struct { \
+ u8 available_space[18 - sizeof (acl_elog_val1) - \
+ sizeof (acl_elog_val2)]; \
+ } * static_check); \
+ clib_thread_index_t thread_index = os_get_thread_index (); \
+ vlib_worker_thread_t *w = vlib_worker_threads + thread_index; \
+ ELOG_TYPE_DECLARE (e) = { \
+ .format = "(%02d) " acl_elog_trace_format_label, \
+ .format_args = "i2" acl_elog_trace_format_args, \
+ }; \
+ CLIB_PACKED (struct { \
+ u16 thread; \
+ typeof (acl_elog_val1) val1; \
+ typeof (acl_elog_val2) val2; \
+ }) * \
+ ed; \
+ ed = \
+ ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \
+ ed->thread = thread_index; \
+ ed->val1 = acl_elog_val1; \
+ ed->val2 = acl_elog_val2; \
+ } \
+ } \
+ while (0)
/* use like: elog_acl_maybe_trace_X3(am, "foobar: %d some u64 %lu baz: %d", "i4i8i4", int32_value, u64_value, int_value); */
-#define elog_acl_maybe_trace_X3(am, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1, \
- acl_elog_val2, acl_elog_val3) \
-do { \
- if (am->trace_sessions) { \
- CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2) \
- - sizeof(acl_elog_val3)]; } *static_check); \
- u16 thread_index = os_get_thread_index (); \
- vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \
- ELOG_TYPE_DECLARE (e) = \
- { \
- .format = "(%02d) " acl_elog_trace_format_label, \
- .format_args = "i2" acl_elog_trace_format_args, \
- }; \
- CLIB_PACKED(struct \
- { \
- u16 thread; \
- typeof(acl_elog_val1) val1; \
- typeof(acl_elog_val2) val2; \
- typeof(acl_elog_val3) val3; \
- }) *ed; \
- ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \
- ed->thread = thread_index; \
- ed->val1 = acl_elog_val1; \
- ed->val2 = acl_elog_val2; \
- ed->val3 = acl_elog_val3; \
- } \
-} while (0)
-
+#define elog_acl_maybe_trace_X3(am, acl_elog_trace_format_label, \
+ acl_elog_trace_format_args, acl_elog_val1, \
+ acl_elog_val2, acl_elog_val3) \
+ do \
+ { \
+ if (am->trace_sessions) \
+ { \
+ CLIB_UNUSED (struct { \
+ u8 available_space[18 - sizeof (acl_elog_val1) - \
+ sizeof (acl_elog_val2) - \
+ sizeof (acl_elog_val3)]; \
+ } * static_check); \
+ clib_thread_index_t thread_index = os_get_thread_index (); \
+ vlib_worker_thread_t *w = vlib_worker_threads + thread_index; \
+ ELOG_TYPE_DECLARE (e) = { \
+ .format = "(%02d) " acl_elog_trace_format_label, \
+ .format_args = "i2" acl_elog_trace_format_args, \
+ }; \
+ CLIB_PACKED (struct { \
+ u16 thread; \
+ typeof (acl_elog_val1) val1; \
+ typeof (acl_elog_val2) val2; \
+ typeof (acl_elog_val3) val3; \
+ }) * \
+ ed; \
+ ed = \
+ ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \
+ ed->thread = thread_index; \
+ ed->val1 = acl_elog_val1; \
+ ed->val2 = acl_elog_val2; \
+ ed->val3 = acl_elog_val3; \
+ } \
+ } \
+ while (0)
/* use like: elog_acl_maybe_trace_X4(am, "foobar: %d some int %d baz: %d bar: %d", "i4i4i4i4", int32_value, int32_value2, int_value, int_value); */
-#define elog_acl_maybe_trace_X4(am, acl_elog_trace_format_label, acl_elog_trace_format_args, acl_elog_val1, \
- acl_elog_val2, acl_elog_val3, acl_elog_val4) \
-do { \
- if (am->trace_sessions) { \
- CLIB_UNUSED(struct { u8 available_space[18 - sizeof(acl_elog_val1) - sizeof(acl_elog_val2) \
- - sizeof(acl_elog_val3) -sizeof(acl_elog_val4)]; } *static_check); \
- u16 thread_index = os_get_thread_index (); \
- vlib_worker_thread_t * w = vlib_worker_threads + thread_index; \
- ELOG_TYPE_DECLARE (e) = \
- { \
- .format = "(%02d) " acl_elog_trace_format_label, \
- .format_args = "i2" acl_elog_trace_format_args, \
- }; \
- CLIB_PACKED(struct \
- { \
- u16 thread; \
- typeof(acl_elog_val1) val1; \
- typeof(acl_elog_val2) val2; \
- typeof(acl_elog_val3) val3; \
- typeof(acl_elog_val4) val4; \
- }) *ed; \
- ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \
- ed->thread = thread_index; \
- ed->val1 = acl_elog_val1; \
- ed->val2 = acl_elog_val2; \
- ed->val3 = acl_elog_val3; \
- ed->val4 = acl_elog_val4; \
- } \
-} while (0)
-
+#define elog_acl_maybe_trace_X4(am, acl_elog_trace_format_label, \
+ acl_elog_trace_format_args, acl_elog_val1, \
+ acl_elog_val2, acl_elog_val3, acl_elog_val4) \
+ do \
+ { \
+ if (am->trace_sessions) \
+ { \
+ CLIB_UNUSED (struct { \
+ u8 available_space[18 - sizeof (acl_elog_val1) - \
+ sizeof (acl_elog_val2) - \
+ sizeof (acl_elog_val3) - \
+ sizeof (acl_elog_val4)]; \
+ } * static_check); \
+ clib_thread_index_t thread_index = os_get_thread_index (); \
+ vlib_worker_thread_t *w = vlib_worker_threads + thread_index; \
+ ELOG_TYPE_DECLARE (e) = { \
+ .format = "(%02d) " acl_elog_trace_format_label, \
+ .format_args = "i2" acl_elog_trace_format_args, \
+ }; \
+ CLIB_PACKED (struct { \
+ u16 thread; \
+ typeof (acl_elog_val1) val1; \
+ typeof (acl_elog_val2) val2; \
+ typeof (acl_elog_val3) val3; \
+ typeof (acl_elog_val4) val4; \
+ }) * \
+ ed; \
+ ed = \
+ ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track); \
+ ed->thread = thread_index; \
+ ed->val1 = acl_elog_val1; \
+ ed->val2 = acl_elog_val2; \
+ ed->val3 = acl_elog_val3; \
+ ed->val4 = acl_elog_val4; \
+ } \
+ } \
+ while (0)
#endif
diff --git a/src/plugins/acl/hash_lookup.c b/src/plugins/acl/hash_lookup.c
index 9c3c662a8f1..b4f86208a71 100644
--- a/src/plugins/acl/hash_lookup.c
+++ b/src/plugins/acl/hash_lookup.c
@@ -946,31 +946,15 @@ hash_acl_reapply(acl_main_t *am, u32 lc_index, int acl_index)
static void
make_ip6_address_mask(ip6_address_t *addr, u8 prefix_len)
{
+ ASSERT (prefix_len <= 128);
ip6_address_mask_from_width(addr, prefix_len);
}
-
-/* Maybe should be moved into the core somewhere */
-always_inline void
-ip4_address_mask_from_width (ip4_address_t * a, u32 width)
-{
- int i, byte, bit, bitnum;
- ASSERT (width <= 32);
- clib_memset (a, 0, sizeof (a[0]));
- for (i = 0; i < width; i++)
- {
- bitnum = (7 - (i & 7));
- byte = i / 8;
- bit = 1 << bitnum;
- a->as_u8[byte] |= bit;
- }
-}
-
-
static void
make_ip4_address_mask(ip4_address_t *addr, u8 prefix_len)
{
- ip4_address_mask_from_width(addr, prefix_len);
+ ASSERT (prefix_len <= 32);
+ ip4_preflen_to_mask (prefix_len, addr);
}
static void
diff --git a/src/plugins/acl/public_inlines.h b/src/plugins/acl/public_inlines.h
index eb9f0de920f..f39285344b0 100644
--- a/src/plugins/acl/public_inlines.h
+++ b/src/plugins/acl/public_inlines.h
@@ -268,8 +268,8 @@ fa_acl_match_ip6_addr (ip6_address_t * addr1, ip6_address_t * addr2,
}
if (prefixlen % 8)
{
- u8 b1 = *((u8 *) addr1 + 1 + prefixlen / 8);
- u8 b2 = *((u8 *) addr2 + 1 + prefixlen / 8);
+ u8 b1 = *((u8 *) addr1 + prefixlen / 8);
+ u8 b2 = *((u8 *) addr2 + prefixlen / 8);
u8 mask0 = (0xff - ((1 << (8 - (prefixlen % 8))) - 1));
return (b1 & mask0) == b2;
}
@@ -715,8 +715,10 @@ acl_plugin_match_5tuple_inline_and_count (void *p_acl_main, u32 lc_index,
r_acl_pos_p, r_acl_match_p, r_rule_match_p, trace_bitmap);
}
if (PREDICT_TRUE(ret)) {
- u16 thread_index = os_get_thread_index ();
- vlib_increment_combined_counter(am->combined_acl_counters + *r_acl_match_p, thread_index, *r_rule_match_p, 1, packet_size);
+ clib_thread_index_t thread_index = os_get_thread_index ();
+ vlib_increment_combined_counter (
+ am->combined_acl_counters + *r_acl_match_p, thread_index,
+ *r_rule_match_p, 1, packet_size);
}
return ret;
}
diff --git a/src/plugins/acl/sess_mgmt_node.c b/src/plugins/acl/sess_mgmt_node.c
index 418baef9b6b..10f0e92c808 100644
--- a/src/plugins/acl/sess_mgmt_node.c
+++ b/src/plugins/acl/sess_mgmt_node.c
@@ -136,16 +136,17 @@ fa_session_get_list_timeout (acl_main_t * am, fa_session_t * sess)
}
static u64
-acl_fa_get_list_head_expiry_time (acl_main_t * am,
- acl_fa_per_worker_data_t * pw, u64 now,
- u16 thread_index, int timeout_type)
+acl_fa_get_list_head_expiry_time (acl_main_t *am, acl_fa_per_worker_data_t *pw,
+ u64 now, clib_thread_index_t thread_index,
+ int timeout_type)
{
return pw->fa_conn_list_head_expiry_time[timeout_type];
}
static int
-acl_fa_conn_time_to_check (acl_main_t * am, acl_fa_per_worker_data_t * pw,
- u64 now, u16 thread_index, u32 session_index)
+acl_fa_conn_time_to_check (acl_main_t *am, acl_fa_per_worker_data_t *pw,
+ u64 now, clib_thread_index_t thread_index,
+ u32 session_index)
{
if (session_index == FA_SESSION_BOGUS_INDEX)
return 0;
@@ -162,7 +163,8 @@ acl_fa_conn_time_to_check (acl_main_t * am, acl_fa_per_worker_data_t * pw,
* return the total number of sessions reclaimed.
*/
static int
-acl_fa_check_idle_sessions (acl_main_t * am, u16 thread_index, u64 now)
+acl_fa_check_idle_sessions (acl_main_t *am, clib_thread_index_t thread_index,
+ u64 now)
{
acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
fa_full_session_id_t fsid;
@@ -429,7 +431,7 @@ acl_fa_worker_conn_cleaner_process (vlib_main_t * vm,
{
acl_main_t *am = &acl_main;
u64 now = clib_cpu_time_now ();
- u16 thread_index = os_get_thread_index ();
+ clib_thread_index_t thread_index = os_get_thread_index ();
acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
int num_expired;
elog_acl_maybe_trace_X1 (am,
diff --git a/src/plugins/acl/session_inlines.h b/src/plugins/acl/session_inlines.h
index edc8a7057ee..c98194005a4 100644
--- a/src/plugins/acl/session_inlines.h
+++ b/src/plugins/acl/session_inlines.h
@@ -115,16 +115,16 @@ fa_session_get_timeout (acl_main_t * am, fa_session_t * sess)
}
always_inline fa_session_t *
-get_session_ptr_no_check (acl_main_t * am, u16 thread_index,
+get_session_ptr_no_check (acl_main_t *am, clib_thread_index_t thread_index,
u32 session_index)
{
acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
return pool_elt_at_index (pw->fa_sessions_pool, session_index);
}
-
always_inline fa_session_t *
-get_session_ptr (acl_main_t * am, u16 thread_index, u32 session_index)
+get_session_ptr (acl_main_t *am, clib_thread_index_t thread_index,
+ u32 session_index)
{
acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
@@ -135,7 +135,8 @@ get_session_ptr (acl_main_t * am, u16 thread_index, u32 session_index)
}
always_inline int
-is_valid_session_ptr (acl_main_t * am, u16 thread_index, fa_session_t * sess)
+is_valid_session_ptr (acl_main_t *am, clib_thread_index_t thread_index,
+ fa_session_t *sess)
{
acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
return ((sess != 0)
@@ -470,10 +471,10 @@ acl_fa_can_add_session (acl_main_t * am, int is_input, u32 sw_if_index)
am->fa_conn_table_max_entries);
}
-
always_inline void
-acl_fa_try_recycle_session (acl_main_t * am, int is_input, u16 thread_index,
- u32 sw_if_index, u64 now)
+acl_fa_try_recycle_session (acl_main_t *am, int is_input,
+ clib_thread_index_t thread_index, u32 sw_if_index,
+ u64 now)
{
/* try to recycle a TCP transient session */
acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index];
diff --git a/src/plugins/adl/ip4_allowlist.c b/src/plugins/adl/ip4_allowlist.c
index 4c755725ea7..a44cb51762f 100644
--- a/src/plugins/adl/ip4_allowlist.c
+++ b/src/plugins/adl/ip4_allowlist.c
@@ -58,7 +58,7 @@ VLIB_NODE_FN (ip4_adl_allowlist_node) (vlib_main_t * vm,
adl_feature_type_t next_index;
adl_main_t *cm = &adl_main;
vlib_combined_counter_main_t * vcm = &load_balance_main.lbm_via_counters;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u32 allowed_packets;
from = vlib_frame_vector_args (frame);
diff --git a/src/plugins/adl/ip6_allowlist.c b/src/plugins/adl/ip6_allowlist.c
index 5f38484666b..f9d964645c4 100644
--- a/src/plugins/adl/ip6_allowlist.c
+++ b/src/plugins/adl/ip6_allowlist.c
@@ -58,7 +58,7 @@ VLIB_NODE_FN (ip6_adl_allowlist_node) (vlib_main_t * vm,
adl_feature_type_t next_index;
adl_main_t *cm = &adl_main;
vlib_combined_counter_main_t * vcm = &load_balance_main.lbm_via_counters;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u32 allowed_packets;
from = vlib_frame_vector_args (frame);
diff --git a/src/plugins/af_packet/af_packet.c b/src/plugins/af_packet/af_packet.c
index 8cb2af27d7f..f3a1f495fe7 100644
--- a/src/plugins/af_packet/af_packet.c
+++ b/src/plugins/af_packet/af_packet.c
@@ -30,7 +30,7 @@
#include <vppinfra/linux/sysfs.h>
#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
+#include <vlib/file.h>
#include <vnet/ip/ip.h>
#include <vnet/devices/netlink.h>
#include <vnet/ethernet/ethernet.h>
diff --git a/src/plugins/af_packet/node.c b/src/plugins/af_packet/node.c
index 279f11c0183..e60a037b093 100644
--- a/src/plugins/af_packet/node.c
+++ b/src/plugins/af_packet/node.c
@@ -269,7 +269,7 @@ af_packet_v3_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
u32 block_nr = rx_queue->rx_req->req3.tp_block_nr;
u8 *block_start = 0;
uword n_trace = vlib_get_trace_count (vm, node);
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm);
u32 min_bufs = rx_queue->rx_req->req3.tp_frame_size / n_buffer_bytes;
u32 num_pkts = 0;
@@ -571,7 +571,7 @@ af_packet_v2_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
u32 frame_num = rx_queue->rx_req->req.tp_frame_nr;
u8 *block_start = rx_queue->rx_ring[block];
uword n_trace = vlib_get_trace_count (vm, node);
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm);
u32 min_bufs = rx_queue->rx_req->req.tp_frame_size / n_buffer_bytes;
u32 sw_if_index = apif->sw_if_index;
diff --git a/src/plugins/af_xdp/device.c b/src/plugins/af_xdp/device.c
index 63a276ce51e..8d9496206d2 100644
--- a/src/plugins/af_xdp/device.c
+++ b/src/plugins/af_xdp/device.c
@@ -24,7 +24,7 @@
#include <linux/limits.h>
#include <bpf/bpf.h>
#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
+#include <vlib/file.h>
#include <vlib/pci/pci.h>
#include <vppinfra/linux/netns.h>
#include <vppinfra/linux/sysfs.h>
diff --git a/src/plugins/cnat/cnat_snat_policy.c b/src/plugins/cnat/cnat_snat_policy.c
index cd9bfef492a..5f15b7d26c9 100644
--- a/src/plugins/cnat/cnat_snat_policy.c
+++ b/src/plugins/cnat/cnat_snat_policy.c
@@ -22,7 +22,8 @@ cnat_snat_policy_main_t cnat_snat_policy_main;
uword
unformat_cnat_snat_interface_map_type (unformat_input_t *input, va_list *args)
{
- u8 *a = va_arg (*args, u8 *);
+ cnat_snat_interface_map_type_t *a =
+ va_arg (*args, cnat_snat_interface_map_type_t *);
if (unformat (input, "include-v4"))
*a = CNAT_SNAT_IF_MAP_INCLUDE_V4;
else if (unformat (input, "include-v6"))
@@ -113,7 +114,7 @@ cnat_snat_policy_add_del_if_command_fn (vlib_main_t *vm,
vnet_main_t *vnm = vnet_get_main ();
int is_add = 1;
u32 sw_if_index = ~0;
- u32 table = 0;
+ cnat_snat_interface_map_type_t table = CNAT_SNAT_IF_MAP_INCLUDE_V4;
int rv;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
diff --git a/src/plugins/cnat/cnat_types.h b/src/plugins/cnat/cnat_types.h
index d229d21adae..37eb62ec981 100644
--- a/src/plugins/cnat/cnat_types.h
+++ b/src/plugins/cnat/cnat_types.h
@@ -192,7 +192,7 @@ typedef struct cnat_timestamp_mpool_t_
typedef struct cnat_node_ctx_
{
f64 now;
- u32 thread_index;
+ clib_thread_index_t thread_index;
ip_address_family_t af;
u8 do_trace;
} cnat_node_ctx_t;
diff --git a/src/plugins/crypto_sw_scheduler/main.c b/src/plugins/crypto_sw_scheduler/main.c
index dc97ce937d9..bb1505a38cf 100644
--- a/src/plugins/crypto_sw_scheduler/main.c
+++ b/src/plugins/crypto_sw_scheduler/main.c
@@ -446,7 +446,7 @@ convert_async_crypto_id (vnet_crypto_op_id_t async_op_id, u32 *crypto_op,
static_always_inline vnet_crypto_async_frame_t *
crypto_sw_scheduler_dequeue (vlib_main_t *vm, u32 *nb_elts_processed,
- u32 *enqueue_thread_idx)
+ clib_thread_index_t *enqueue_thread_idx)
{
crypto_sw_scheduler_main_t *cm = &crypto_sw_scheduler_main;
crypto_sw_scheduler_per_thread_data_t *ptd =
diff --git a/src/plugins/ct6/ct6.h b/src/plugins/ct6/ct6.h
index 0b7deb07839..a6919174d86 100644
--- a/src/plugins/ct6/ct6.h
+++ b/src/plugins/ct6/ct6.h
@@ -46,7 +46,7 @@ typedef CLIB_PACKED (struct
typedef struct
{
ct6_session_key_t key;
- u32 thread_index;
+ clib_thread_index_t thread_index;
u32 next_index;
u32 prev_index;
u32 hits;
@@ -95,7 +95,7 @@ static inline void
ct6_lru_remove (ct6_main_t * cmp, ct6_session_t * s0)
{
ct6_session_t *next_sess, *prev_sess;
- u32 thread_index;
+ clib_thread_index_t thread_index;
u32 s0_index;
thread_index = s0->thread_index;
@@ -128,7 +128,7 @@ static inline void
ct6_lru_add (ct6_main_t * cmp, ct6_session_t * s0, f64 now)
{
ct6_session_t *next_sess;
- u32 thread_index;
+ clib_thread_index_t thread_index;
u32 s0_index;
s0->hits++;
diff --git a/src/plugins/dev_armada/pp2/rx.c b/src/plugins/dev_armada/pp2/rx.c
index 5b0e8d35000..8eff72d6157 100644
--- a/src/plugins/dev_armada/pp2/rx.c
+++ b/src/plugins/dev_armada/pp2/rx.c
@@ -140,6 +140,7 @@ mrvl_pp2_rx_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
n_desc);
n_sel = vlib_frame_bitmap_count_set_bits (selected_bmp);
n_avail -= n_sel;
+ vlib_frame_bitmap_xor (avail_bmp, selected_bmp);
if (uword_bitmap_is_bit_set (mp->valid_dsa_src_bitmap, index))
{
@@ -207,7 +208,7 @@ mrvl_pp2_rx_refill (vlib_main_t *vm, vlib_node_runtime_t *node,
vnet_dev_port_t *port = rxq->port;
vnet_dev_t *dev = port->dev;
mvpp2_device_t *md = vnet_dev_get_data (dev);
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
struct pp2_hif *hif = md->hif[thread_index];
struct pp2_bpool *bpool = md->thread[thread_index].bpool;
struct buff_release_entry *bre = md->thread[thread_index].bre;
diff --git a/src/plugins/dev_ena/ena.c b/src/plugins/dev_ena/ena.c
index ed5c47ed505..a81a33d5f22 100644
--- a/src/plugins/dev_ena/ena.c
+++ b/src/plugins/dev_ena/ena.c
@@ -13,7 +13,6 @@
static ena_aq_host_info_t host_info = {
.os_type = 3, /* DPDK */
- .kernel_ver_str = VPP_BUILD_VER,
.os_dist_str = VPP_BUILD_VER,
.driver_version = {
.major = 16,
@@ -171,6 +170,8 @@ ena_init (vlib_main_t *vm, vnet_dev_t *dev)
*ed->host_info = host_info;
ed->host_info->num_cpus = vlib_get_n_threads ();
+ strncpy ((char *) ed->host_info->kernel_ver_str, VPP_BUILD_VER,
+ sizeof (ed->host_info->kernel_ver_str) - 1);
ena_set_mem_addr (vm, dev, &host_attr.os_info_ba, ed->host_info);
if ((rv = ena_aq_set_feature (vm, dev, ENA_ADMIN_FEAT_ID_HOST_ATTR_CONFIG,
diff --git a/src/plugins/dev_iavf/virtchnl.h b/src/plugins/dev_iavf/virtchnl.h
index 2099104c8ad..72158684e9e 100644
--- a/src/plugins/dev_iavf/virtchnl.h
+++ b/src/plugins/dev_iavf/virtchnl.h
@@ -560,6 +560,7 @@ typedef struct
{
u16 unicast_promisc : 1;
u16 multicast_promisc : 1;
+ u16 unused : 14;
};
u16 flags;
};
diff --git a/src/plugins/dev_octeon/crypto.c b/src/plugins/dev_octeon/crypto.c
index 800f24a008a..49b6f61375c 100644
--- a/src/plugins/dev_octeon/crypto.c
+++ b/src/plugins/dev_octeon/crypto.c
@@ -1354,7 +1354,7 @@ oct_crypto_aead_session_update (vlib_main_t *vm, oct_crypto_sess_t *sess,
vnet_crypto_key_t *key = vnet_crypto_get_key (key_index);
roc_se_cipher_type enc_type = 0;
roc_se_auth_type auth_type = 0;
- u32 digest_len = ~0;
+ u32 digest_len = 16;
i32 rv = 0;
switch (key->alg)
@@ -1366,9 +1366,6 @@ oct_crypto_aead_session_update (vlib_main_t *vm, oct_crypto_sess_t *sess,
sess->aes_gcm = 1;
sess->iv_offset = 0;
sess->iv_length = 16;
- sess->cpt_ctx.mac_len = 16;
- sess->cpt_op = type;
- digest_len = 16;
break;
case VNET_CRYPTO_ALG_CHACHA20_POLY1305:
enc_type = ROC_SE_CHACHA20;
@@ -1381,6 +1378,9 @@ oct_crypto_aead_session_update (vlib_main_t *vm, oct_crypto_sess_t *sess,
return -1;
}
+ sess->cpt_ctx.mac_len = digest_len;
+ sess->cpt_op = type;
+
rv = roc_se_ciph_key_set (&sess->cpt_ctx, enc_type, key->data, key->length);
if (rv)
{
@@ -1827,7 +1827,7 @@ oct_crypto_enqueue_aead_aad_0_dec (vlib_main_t *vm,
vnet_crypto_async_frame_t *
oct_crypto_frame_dequeue (vlib_main_t *vm, u32 *nb_elts_processed,
- u32 *enqueue_thread_idx)
+ clib_thread_index_t *enqueue_thread_idx)
{
oct_crypto_main_t *ocm = &oct_crypto_main;
u32 deq_head, status = VNET_CRYPTO_OP_STATUS_COMPLETED;
@@ -1940,7 +1940,7 @@ oct_init_crypto_engine_handlers (vlib_main_t *vm, vnet_dev_t *dev)
}
int
-oct_conf_sw_queue (vlib_main_t *vm, vnet_dev_t *dev)
+oct_conf_sw_queue (vlib_main_t *vm, vnet_dev_t *dev, oct_crypto_dev_t *ocd)
{
oct_crypto_main_t *ocm = &oct_crypto_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
@@ -1961,7 +1961,7 @@ oct_conf_sw_queue (vlib_main_t *vm, vnet_dev_t *dev)
* Each pending queue will get number of cpt desc / number of cores.
* And that desc count is shared across inflight entries.
*/
- n_inflight_req = (OCT_CPT_LF_MAX_NB_DESC / tm->n_vlib_mains);
+ n_inflight_req = (ocd->n_desc / tm->n_vlib_mains);
for (i = 0; i < tm->n_vlib_mains; ++i)
{
diff --git a/src/plugins/dev_octeon/crypto.h b/src/plugins/dev_octeon/crypto.h
index 5bd26f6b9be..a99ee12ddb2 100644
--- a/src/plugins/dev_octeon/crypto.h
+++ b/src/plugins/dev_octeon/crypto.h
@@ -11,6 +11,9 @@
#define OCT_MAX_N_CPT_DEV 2
+#define OCT_CPT_LF_DEF_NB_DESC 16384
+
+#define OCT_CPT_LF_MIN_NB_DESC 1024
#define OCT_CPT_LF_MAX_NB_DESC 128000
/* CRYPTO_ID, KEY_LENGTH_IN_BYTES, TAG_LEN, AAD_LEN */
@@ -81,6 +84,7 @@ typedef struct
struct roc_cpt_lmtline lmtline;
struct roc_cpt_lf lf;
vnet_dev_t *dev;
+ u32 n_desc;
} oct_crypto_dev_t;
typedef struct
@@ -207,9 +211,10 @@ int oct_crypto_enqueue_aead_aad_12_dec (vlib_main_t *vm,
vnet_crypto_async_frame_t *frame);
int oct_crypto_enqueue_aead_aad_0_dec (vlib_main_t *vm,
vnet_crypto_async_frame_t *frame);
-vnet_crypto_async_frame_t *oct_crypto_frame_dequeue (vlib_main_t *vm,
- u32 *nb_elts_processed,
- u32 *enqueue_thread_idx);
+vnet_crypto_async_frame_t *
+oct_crypto_frame_dequeue (vlib_main_t *vm, u32 *nb_elts_processed,
+ clib_thread_index_t *enqueue_thread_idx);
int oct_init_crypto_engine_handlers (vlib_main_t *vm, vnet_dev_t *dev);
-int oct_conf_sw_queue (vlib_main_t *vm, vnet_dev_t *dev);
+int oct_conf_sw_queue (vlib_main_t *vm, vnet_dev_t *dev,
+ oct_crypto_dev_t *ocd);
#endif /* _CRYPTO_H_ */
diff --git a/src/plugins/dev_octeon/init.c b/src/plugins/dev_octeon/init.c
index 561cbe94fed..69fb097e91f 100644
--- a/src/plugins/dev_octeon/init.c
+++ b/src/plugins/dev_octeon/init.c
@@ -61,6 +61,22 @@ static struct
#undef _
};
+static vnet_dev_arg_t oct_dev_args[] = {
+ {
+ .id = OCT_DEV_ARG_CRYPTO_N_DESC,
+ .name = "n_desc",
+ .desc = "number of cpt descriptors, applicable to cpt devices only",
+ .type = VNET_DEV_ARG_TYPE_UINT32,
+ .default_val.uint32 = OCT_CPT_LF_DEF_NB_DESC,
+ },
+ {
+ .id = OCT_DEV_ARG_END,
+ .name = "end",
+ .desc = "Argument end",
+ .type = VNET_DEV_ARG_END,
+ },
+};
+
static u8 *
oct_probe (vlib_main_t *vm, vnet_dev_bus_index_t bus_index, void *dev_info)
{
@@ -241,7 +257,7 @@ oct_conf_cpt_queue (vlib_main_t *vm, vnet_dev_t *dev, oct_crypto_dev_t *ocd)
cpt_lf = &ocd->lf;
cpt_lmtline = &ocd->lmtline;
- cpt_lf->nb_desc = OCT_CPT_LF_MAX_NB_DESC;
+ cpt_lf->nb_desc = ocd->n_desc;
cpt_lf->lf_id = 0;
if ((rrv = roc_cpt_lf_init (roc_cpt, cpt_lf)) < 0)
return cnx_return_roc_err (dev, rrv, "roc_cpt_lf_init");
@@ -261,6 +277,7 @@ oct_init_cpt (vlib_main_t *vm, vnet_dev_t *dev)
extern oct_plt_init_param_t oct_plt_init_param;
oct_device_t *cd = vnet_dev_get_data (dev);
oct_crypto_dev_t *ocd = NULL;
+ u32 n_desc;
int rrv;
if (ocm->n_cpt == OCT_MAX_N_CPT_DEV || ocm->started)
@@ -274,6 +291,27 @@ oct_init_cpt (vlib_main_t *vm, vnet_dev_t *dev)
ocd->roc_cpt->pci_dev = &cd->plt_pci_dev;
ocd->dev = dev;
+ ocd->n_desc = OCT_CPT_LF_DEF_NB_DESC;
+
+ foreach_vnet_dev_args (arg, dev)
+ {
+ if (arg->id == OCT_DEV_ARG_CRYPTO_N_DESC &&
+ vnet_dev_arg_get_uint32 (arg))
+ {
+ n_desc = vnet_dev_arg_get_uint32 (arg);
+ if (n_desc < OCT_CPT_LF_MIN_NB_DESC ||
+ n_desc > OCT_CPT_LF_MAX_NB_DESC)
+ {
+ log_err (dev,
+ "number of cpt descriptors should be within range "
+ "of %u and %u",
+ OCT_CPT_LF_MIN_NB_DESC, OCT_CPT_LF_MAX_NB_DESC);
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+
+ ocd->n_desc = vnet_dev_arg_get_uint32 (arg);
+ }
+ }
if ((rrv = roc_cpt_dev_init (ocd->roc_cpt)))
return cnx_return_roc_err (dev, rrv, "roc_cpt_dev_init");
@@ -290,7 +328,7 @@ oct_init_cpt (vlib_main_t *vm, vnet_dev_t *dev)
* Initialize s/w queues, which are common across multiple
* crypto devices
*/
- oct_conf_sw_queue (vm, dev);
+ oct_conf_sw_queue (vm, dev, ocd);
ocm->crypto_dev[0] = ocd;
}
@@ -396,6 +434,7 @@ VNET_DEV_REGISTER_DRIVER (octeon) = {
.free = oct_free,
.probe = oct_probe,
},
+ .args = oct_dev_args,
};
static clib_error_t *
diff --git a/src/plugins/dev_octeon/octeon.h b/src/plugins/dev_octeon/octeon.h
index ccf8f62880d..0cf937528f0 100644
--- a/src/plugins/dev_octeon/octeon.h
+++ b/src/plugins/dev_octeon/octeon.h
@@ -25,6 +25,12 @@
typedef enum
{
+ OCT_DEV_ARG_CRYPTO_N_DESC = 1,
+ OCT_DEV_ARG_END,
+} oct_dev_args_t;
+
+typedef enum
+{
OCT_DEVICE_TYPE_UNKNOWN = 0,
OCT_DEVICE_TYPE_RVU_PF,
OCT_DEVICE_TYPE_RVU_VF,
diff --git a/src/plugins/dma_intel/dsa.c b/src/plugins/dma_intel/dsa.c
index 473f2efa93e..20a90e34b0e 100644
--- a/src/plugins/dma_intel/dsa.c
+++ b/src/plugins/dma_intel/dsa.c
@@ -103,7 +103,6 @@ intel_dsa_batch_fallback (vlib_main_t *vm, intel_dsa_batch_t *b,
clib_memcpy_fast (desc->dst, desc->src, desc->size);
}
b->status = INTEL_DSA_STATUS_CPU_SUCCESS;
- ch->submitted++;
return;
}
@@ -407,6 +406,7 @@ intel_dsa_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
/* fallback to software if exception happened */
intel_dsa_batch_fallback (vm, b, ch);
glitch = 1 & b->barrier_before_last;
+ t->pending_batches[n++] = b;
}
else
{
diff --git a/src/plugins/dpdk/cryptodev/cryptodev.c b/src/plugins/dpdk/cryptodev/cryptodev.c
index c60f9c886ff..af695580363 100644
--- a/src/plugins/dpdk/cryptodev/cryptodev.c
+++ b/src/plugins/dpdk/cryptodev/cryptodev.c
@@ -128,14 +128,14 @@ prepare_linked_xform (struct rte_crypto_sym_xform *xforms,
xform_cipher->cipher.algo = cipher_algo;
xform_cipher->cipher.key.data = key_cipher->data;
- xform_cipher->cipher.key.length = vec_len (key_cipher->data);
+ xform_cipher->cipher.key.length = key_cipher->length;
xform_cipher->cipher.iv.length = 16;
xform_cipher->cipher.iv.offset = CRYPTODEV_IV_OFFSET;
xform_auth->auth.algo = auth_algo;
xform_auth->auth.digest_length = digest_len;
xform_auth->auth.key.data = key_auth->data;
- xform_auth->auth.key.length = vec_len (key_auth->data);
+ xform_auth->auth.key.length = key_auth->length;
return 0;
}
@@ -608,7 +608,7 @@ format_cryptodev_inst (u8 * s, va_list * args)
cryptodev_main_t *cmt = &cryptodev_main;
u32 inst = va_arg (*args, u32);
cryptodev_inst_t *cit = cmt->cryptodev_inst + inst;
- u32 thread_index = 0;
+ clib_thread_index_t thread_index = 0;
struct rte_cryptodev_info info;
rte_cryptodev_info_get (cit->dev_id, &info);
@@ -670,7 +670,7 @@ cryptodev_show_cache_rings_fn (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd)
{
cryptodev_main_t *cmt = &cryptodev_main;
- u32 thread_index = 0;
+ clib_thread_index_t thread_index = 0;
u16 i;
vec_foreach_index (thread_index, cmt->per_thread_data)
{
@@ -756,7 +756,7 @@ cryptodev_set_assignment_fn (vlib_main_t * vm, unformat_input_t * input,
cryptodev_main_t *cmt = &cryptodev_main;
cryptodev_engine_thread_t *cet;
unformat_input_t _line_input, *line_input = &_line_input;
- u32 thread_index, inst_index;
+ clib_thread_index_t thread_index, inst_index;
u32 thread_present = 0, inst_present = 0;
clib_error_t *error = 0;
int ret;
diff --git a/src/plugins/dpdk/cryptodev/cryptodev_op_data_path.c b/src/plugins/dpdk/cryptodev/cryptodev_op_data_path.c
index 8d55e4fbf0f..2282ffac10c 100644
--- a/src/plugins/dpdk/cryptodev/cryptodev_op_data_path.c
+++ b/src/plugins/dpdk/cryptodev/cryptodev_op_data_path.c
@@ -461,7 +461,8 @@ error_exit:
}
static_always_inline u8
-cryptodev_frame_dequeue_internal (vlib_main_t *vm, u32 *enqueue_thread_idx)
+cryptodev_frame_dequeue_internal (vlib_main_t *vm,
+ clib_thread_index_t *enqueue_thread_idx)
{
cryptodev_main_t *cmt = &cryptodev_main;
cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
@@ -563,7 +564,7 @@ cryptodev_enqueue_frame (vlib_main_t *vm, cryptodev_cache_ring_elt_t *ring_elt)
static_always_inline vnet_crypto_async_frame_t *
cryptodev_frame_dequeue (vlib_main_t *vm, u32 *nb_elts_processed,
- u32 *enqueue_thread_idx)
+ clib_thread_index_t *enqueue_thread_idx)
{
cryptodev_main_t *cmt = &cryptodev_main;
vnet_crypto_main_t *cm = &crypto_main;
@@ -670,7 +671,7 @@ cryptodev_register_cop_hdl (vlib_main_t *vm, u32 eidx)
vec_foreach (cet, cmt->per_thread_data)
{
- u32 thread_index = cet - cmt->per_thread_data;
+ clib_thread_index_t thread_index = cet - cmt->per_thread_data;
u32 numa = vlib_get_main_by_index (thread_index)->numa_node;
name = format (0, "vpp_cop_pool_%u_%u", numa, thread_index);
cet->cop_pool = rte_mempool_create (
diff --git a/src/plugins/dpdk/cryptodev/cryptodev_raw_data_path.c b/src/plugins/dpdk/cryptodev/cryptodev_raw_data_path.c
index 67ab9c89e67..40d0a4299da 100644
--- a/src/plugins/dpdk/cryptodev/cryptodev_raw_data_path.c
+++ b/src/plugins/dpdk/cryptodev/cryptodev_raw_data_path.c
@@ -463,7 +463,8 @@ cryptodev_post_dequeue (void *frame, u32 index, u8 is_op_success)
}
static_always_inline u8
-cryptodev_raw_dequeue_internal (vlib_main_t *vm, u32 *enqueue_thread_idx)
+cryptodev_raw_dequeue_internal (vlib_main_t *vm,
+ clib_thread_index_t *enqueue_thread_idx)
{
cryptodev_main_t *cmt = &cryptodev_main;
cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
@@ -537,7 +538,7 @@ cryptodev_enqueue_frame_to_qat (vlib_main_t *vm,
static_always_inline vnet_crypto_async_frame_t *
cryptodev_raw_dequeue (vlib_main_t *vm, u32 *nb_elts_processed,
- u32 *enqueue_thread_idx)
+ clib_thread_index_t *enqueue_thread_idx)
{
cryptodev_main_t *cmt = &cryptodev_main;
vnet_crypto_main_t *cm = &crypto_main;
diff --git a/src/plugins/dpdk/device/common.c b/src/plugins/dpdk/device/common.c
index d6eed5441b4..7671fc2639c 100644
--- a/src/plugins/dpdk/device/common.c
+++ b/src/plugins/dpdk/device/common.c
@@ -17,7 +17,7 @@
#include <vppinfra/vec.h>
#include <vppinfra/format.h>
#include <vppinfra/file.h>
-#include <vlib/unix/unix.h>
+#include <vlib/file.h>
#include <assert.h>
#include <vnet/ip/ip.h>
@@ -369,8 +369,7 @@ dpdk_setup_interrupts (dpdk_device_t *xd)
if (xd->flags & DPDK_DEVICE_FLAG_INT_UNMASKABLE)
{
clib_file_main_t *fm = &file_main;
- clib_file_t *f =
- pool_elt_at_index (fm->file_pool, rxq->clib_file_index);
+ clib_file_t *f = clib_file_get (fm, rxq->clib_file_index);
fm->file_update (f, UNIX_FILE_UPDATE_DELETE);
}
}
diff --git a/src/plugins/dpdk/device/device.c b/src/plugins/dpdk/device/device.c
index c5abbd5f727..5fd936d1743 100644
--- a/src/plugins/dpdk/device/device.c
+++ b/src/plugins/dpdk/device/device.c
@@ -22,7 +22,7 @@
#include <dpdk/device/dpdk.h>
#include <dpdk/device/dpdk_priv.h>
#include <vppinfra/error.h>
-#include <vlib/unix/unix.h>
+#include <vlib/file.h>
#define foreach_dpdk_tx_func_error \
_(PKT_DROP, "Tx packet drops (dpdk tx failure)")
@@ -159,7 +159,7 @@ tx_burst_vector_internal (vlib_main_t *vm, dpdk_device_t *xd,
{
dpdk_tx_queue_t *txq;
u32 n_retry;
- int n_sent = 0;
+ u32 n_sent = 0;
n_retry = 16;
txq = vec_elt_at_index (xd->tx_queues, queue_id);
@@ -279,9 +279,11 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm,
vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (f);
u32 n_packets = f->n_vectors;
u32 n_left;
- u32 thread_index = vm->thread_index;
+ u32 n_prep;
+ clib_thread_index_t thread_index = vm->thread_index;
int queue_id = tf->queue_id;
u8 is_shared = tf->shared_queue;
+ u8 offload_enabled = 0;
u32 tx_pkts = 0;
dpdk_per_thread_data_t *ptd = vec_elt_at_index (dm->per_thread_data,
thread_index);
@@ -333,6 +335,7 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm,
if (PREDICT_FALSE ((xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD) &&
(or_flags & VNET_BUFFER_F_OFFLOAD)))
{
+ offload_enabled = 1;
dpdk_buffer_tx_offload (xd, b[0], mb[0]);
dpdk_buffer_tx_offload (xd, b[1], mb[1]);
dpdk_buffer_tx_offload (xd, b[2], mb[2]);
@@ -386,6 +389,7 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm,
if (PREDICT_FALSE ((xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD) &&
(or_flags & VNET_BUFFER_F_OFFLOAD)))
{
+ offload_enabled = 1;
dpdk_buffer_tx_offload (xd, b[0], mb[0]);
dpdk_buffer_tx_offload (xd, b[1], mb[1]);
}
@@ -408,7 +412,13 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm,
b[0] = vlib_buffer_from_rte_mbuf (mb[0]);
dpdk_validate_rte_mbuf (vm, b[0], 1);
- dpdk_buffer_tx_offload (xd, b[0], mb[0]);
+
+ if (PREDICT_FALSE ((xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD) &&
+ (b[0]->flags & VNET_BUFFER_F_OFFLOAD)))
+ {
+ offload_enabled = 1;
+ dpdk_buffer_tx_offload (xd, b[0], mb[0]);
+ }
if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
@@ -418,32 +428,44 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm,
n_left--;
}
- /* transmit as many packets as possible */
+ /* prepare and transmit as many packets as possible */
tx_pkts = n_packets = mb - ptd->mbufs;
- n_left = tx_burst_vector_internal (vm, xd, ptd->mbufs, n_packets, queue_id,
- is_shared);
+ n_prep = n_packets;
- {
- /* If there is no callback then drop any non-transmitted packets */
- if (PREDICT_FALSE (n_left))
- {
- tx_pkts -= n_left;
- vlib_simple_counter_main_t *cm;
- vnet_main_t *vnm = vnet_get_main ();
+ if (PREDICT_FALSE (offload_enabled &&
+ (xd->flags & DPDK_DEVICE_FLAG_TX_PREPARE)))
+ {
+ n_prep =
+ rte_eth_tx_prepare (xd->port_id, queue_id, ptd->mbufs, n_packets);
- cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
- VNET_INTERFACE_COUNTER_TX_ERROR);
+ /* If mbufs are malformed then drop any non-prepared packets */
+ if (PREDICT_FALSE (n_prep != n_packets))
+ {
+ n_left = n_packets - n_prep;
+ }
+ }
- vlib_increment_simple_counter (cm, thread_index, xd->sw_if_index,
- n_left);
+ n_left +=
+ tx_burst_vector_internal (vm, xd, ptd->mbufs, n_prep, queue_id, is_shared);
- vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP,
- n_left);
+ /* If there is no callback then drop any non-transmitted packets */
+ if (PREDICT_FALSE (n_left))
+ {
+ tx_pkts -= n_left;
+ vlib_simple_counter_main_t *cm;
+ vnet_main_t *vnm = vnet_get_main ();
- while (n_left--)
- rte_pktmbuf_free (ptd->mbufs[n_packets - n_left - 1]);
- }
- }
+ cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
+ VNET_INTERFACE_COUNTER_TX_ERROR);
+
+ vlib_increment_simple_counter (cm, thread_index, xd->sw_if_index,
+ n_left);
+
+ vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP,
+ n_left);
+
+ rte_pktmbuf_free_bulk (&ptd->mbufs[tx_pkts], n_left);
+ }
return tx_pkts;
}
@@ -707,7 +729,7 @@ dpdk_interface_rx_mode_change (vnet_main_t *vnm, u32 hw_if_index, u32 qid,
else if (mode == VNET_HW_IF_RX_MODE_POLLING)
{
rxq = vec_elt_at_index (xd->rx_queues, qid);
- f = pool_elt_at_index (fm->file_pool, rxq->clib_file_index);
+ f = clib_file_get (fm, rxq->clib_file_index);
fm->file_update (f, UNIX_FILE_UPDATE_DELETE);
}
else if (!(xd->flags & DPDK_DEVICE_FLAG_INT_UNMASKABLE))
@@ -715,7 +737,7 @@ dpdk_interface_rx_mode_change (vnet_main_t *vnm, u32 hw_if_index, u32 qid,
else
{
rxq = vec_elt_at_index (xd->rx_queues, qid);
- f = pool_elt_at_index (fm->file_pool, rxq->clib_file_index);
+ f = clib_file_get (fm, rxq->clib_file_index);
fm->file_update (f, UNIX_FILE_UPDATE_ADD);
}
if (rv)
diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h
index 2440439989f..70d9cc715dc 100644
--- a/src/plugins/dpdk/device/dpdk.h
+++ b/src/plugins/dpdk/device/dpdk.h
@@ -71,7 +71,8 @@ typedef uint16_t dpdk_portid_t;
_ (11, RX_FLOW_OFFLOAD, "rx-flow-offload") \
_ (12, RX_IP4_CKSUM, "rx-ip4-cksum") \
_ (13, INT_SUPPORTED, "int-supported") \
- _ (14, INT_UNMASKABLE, "int-unmaskable")
+ _ (14, INT_UNMASKABLE, "int-unmaskable") \
+ _ (15, TX_PREPARE, "tx-prepare")
typedef enum
{
@@ -131,6 +132,7 @@ typedef struct
u32 interface_number_from_port_id : 1;
u32 use_intel_phdr_cksum : 1;
u32 int_unmaskable : 1;
+ u32 need_tx_prepare : 1;
} dpdk_driver_t;
dpdk_driver_t *dpdk_driver_find (const char *name, const char **desc);
diff --git a/src/plugins/dpdk/device/dpdk_priv.h b/src/plugins/dpdk/device/dpdk_priv.h
index 794953da55e..2067b118532 100644
--- a/src/plugins/dpdk/device/dpdk_priv.h
+++ b/src/plugins/dpdk/device/dpdk_priv.h
@@ -50,7 +50,7 @@ dpdk_device_flag_set (dpdk_device_t *xd, __typeof__ (xd->flags) flag, int val)
void dpdk_counters_xstats_init (dpdk_device_t *xd);
static inline void
-dpdk_get_xstats (dpdk_device_t *xd, u32 thread_index)
+dpdk_get_xstats (dpdk_device_t *xd, clib_thread_index_t thread_index)
{
int ret;
int i;
@@ -101,7 +101,7 @@ static inline void
dpdk_update_counters (dpdk_device_t * xd, f64 now)
{
vnet_main_t *vnm = vnet_get_main ();
- u32 thread_index = vlib_get_thread_index ();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
xd->time_last_stats_update = now ? now : xd->time_last_stats_update;
clib_memcpy_fast (&xd->last_stats, &xd->stats, sizeof (xd->last_stats));
diff --git a/src/plugins/dpdk/device/driver.c b/src/plugins/dpdk/device/driver.c
index 2fde041684c..469a4b5de2b 100644
--- a/src/plugins/dpdk/device/driver.c
+++ b/src/plugins/dpdk/device/driver.c
@@ -113,6 +113,7 @@ static dpdk_driver_t dpdk_drivers[] = {
.drivers = DPDK_DRIVERS ({ "net_ena", "AWS ENA VF" }),
.interface_name_prefix = "VirtualFunctionEthernet",
.enable_rxq_int = 1,
+ .need_tx_prepare = 1,
},
{
.drivers = DPDK_DRIVERS ({ "net_vmxnet3", "VMware VMXNET3" }),
diff --git a/src/plugins/dpdk/device/format.c b/src/plugins/dpdk/device/format.c
index fd301da8ea5..f0199c929cc 100644
--- a/src/plugins/dpdk/device/format.c
+++ b/src/plugins/dpdk/device/format.c
@@ -117,8 +117,8 @@
_ (TX_MACSEC, "TX MACSEC") \
_ (TX_OUTER_IPV4, "TX outer IPV4") \
_ (TX_OUTER_IPV6, "TX outer IPV6") \
- _ (TX_OUTER_IP_CKSUM, "Outer IP cksum of Tx pkt. computed by NIC") \
- _ (TX_OUTER_UDP_CKSUM, "TX outer UDP cksum") \
+ _ (TX_OUTER_IP_CKSUM, "Outer IP cksum of TX pkt. computed by NIC") \
+ _ (TX_OUTER_UDP_CKSUM, "Outer UDP cksum of TX pkt. computed by NIC") \
_ (TX_QINQ, "TX QINQ") \
_ (TX_SCTP_CKSUM, "SCTP cksum of TX pkt. computed by NIC") \
_ (TX_SEC_OFFLOAD, "TX SEC OFFLOAD") \
@@ -133,7 +133,7 @@
_ (TX_TUNNEL_UDP, "TX tunnel UDP") \
_ (TX_TUNNEL_VXLAN, "TX packet is a VXLAN packet") \
_ (TX_TUNNEL_VXLAN_GPE, "TX tunnel VXLAN GPE") \
- _ (TX_UDP_CKSUM, "TX UDP cksum") \
+ _ (TX_UDP_CKSUM, "UDP cksum of TX pkt. computed by NIC") \
_ (TX_UDP_SEG, "TX UDP SEG") \
_ (TX_VLAN, "TX packet is a 802.1q VLAN packet")
diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c
index aaa2c1f4a68..83c2614e97e 100644
--- a/src/plugins/dpdk/device/init.c
+++ b/src/plugins/dpdk/device/init.c
@@ -18,7 +18,7 @@
#include <vppinfra/format.h>
#include <vppinfra/bitmap.h>
#include <vppinfra/linux/sysfs.h>
-#include <vlib/unix/unix.h>
+#include <vlib/file.h>
#include <vlib/log.h>
#include <vnet/vnet.h>
@@ -390,6 +390,8 @@ dpdk_lib_init (dpdk_main_t * dm)
dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM, 1);
if (dr->int_unmaskable)
dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_INT_UNMASKABLE, 1);
+ if (dr->need_tx_prepare)
+ dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_TX_PREPARE, 1);
}
else
dpdk_log_warn ("[%u] unknown driver '%s'", port_id, di.driver_name);
diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c
index ca1690b708f..2f4c10ebf46 100644
--- a/src/plugins/dpdk/device/node.c
+++ b/src/plugins/dpdk/device/node.c
@@ -340,8 +340,9 @@ dpdk_process_lro_offload (dpdk_device_t *xd, dpdk_per_thread_data_t *ptd,
}
static_always_inline u32
-dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
- vlib_node_runtime_t * node, u32 thread_index, u16 queue_id)
+dpdk_device_input (vlib_main_t *vm, dpdk_main_t *dm, dpdk_device_t *xd,
+ vlib_node_runtime_t *node, clib_thread_index_t thread_index,
+ u16 queue_id)
{
uword n_rx_packets = 0, n_rx_bytes;
dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, queue_id);
@@ -543,7 +544,7 @@ VLIB_NODE_FN (dpdk_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
dpdk_device_t *xd;
uword n_rx_packets = 0;
vnet_hw_if_rxq_poll_vector_t *pv;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
/*
* Poll all devices on this cpu for input/interrupts.
diff --git a/src/plugins/geneve/decap.c b/src/plugins/geneve/decap.c
index c64121e2829..3a1de2af217 100644
--- a/src/plugins/geneve/decap.c
+++ b/src/plugins/geneve/decap.c
@@ -79,7 +79,7 @@ geneve_input (vlib_main_t * vm,
geneve4_tunnel_key_t last_key4;
geneve6_tunnel_key_t last_key6;
u32 pkts_decapsulated = 0;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
diff --git a/src/plugins/geneve/encap.c b/src/plugins/geneve/encap.c
index 609da2218cf..581c47983df 100644
--- a/src/plugins/geneve/encap.c
+++ b/src/plugins/geneve/encap.c
@@ -60,7 +60,7 @@ geneve_encap_inline (vlib_main_t * vm,
vnet_interface_main_t *im = &vnm->interface_main;
u32 pkts_encapsulated = 0;
u16 old_l0 = 0, old_l1 = 0;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
u32 sw_if_index0 = ~0, sw_if_index1 = ~0;
u32 next0 = 0, next1 = 0;
diff --git a/src/plugins/gtpu/gtpu_decap.c b/src/plugins/gtpu/gtpu_decap.c
index 093d85ef13c..4e0f8bf8e16 100644
--- a/src/plugins/gtpu/gtpu_decap.c
+++ b/src/plugins/gtpu/gtpu_decap.c
@@ -85,7 +85,7 @@ gtpu_input (vlib_main_t * vm,
gtpu4_tunnel_key_t last_key4;
gtpu6_tunnel_key_t last_key6;
u32 pkts_decapsulated = 0;
- u32 thread_index = vlib_get_thread_index();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
if (is_ip4)
@@ -1838,7 +1838,7 @@ gtpu_flow_input (vlib_main_t * vm,
vnet_main_t * vnm = gtm->vnet_main;
vnet_interface_main_t * im = &vnm->interface_main;
u32 pkts_decapsulated = 0;
- u32 thread_index = vlib_get_thread_index();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
u8 ip_err0, ip_err1, udp_err0, udp_err1, csum_err0, csum_err1;
diff --git a/src/plugins/gtpu/gtpu_encap.c b/src/plugins/gtpu/gtpu_encap.c
index 2c3c46a4be2..1caca1da915 100644
--- a/src/plugins/gtpu/gtpu_encap.c
+++ b/src/plugins/gtpu/gtpu_encap.c
@@ -67,7 +67,7 @@ gtpu_encap_inline (vlib_main_t * vm,
vnet_interface_main_t * im = &vnm->interface_main;
u32 pkts_encapsulated = 0;
u16 old_l0 = 0, old_l1 = 0, old_l2 = 0, old_l3 = 0;
- u32 thread_index = vlib_get_thread_index();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
u32 sw_if_index0 = 0, sw_if_index1 = 0, sw_if_index2 = 0, sw_if_index3 = 0;
u32 next0 = 0, next1 = 0, next2 = 0, next3 = 0;
diff --git a/src/plugins/hs_apps/CMakeLists.txt b/src/plugins/hs_apps/CMakeLists.txt
index eae100949d4..3e80a84aae4 100644
--- a/src/plugins/hs_apps/CMakeLists.txt
+++ b/src/plugins/hs_apps/CMakeLists.txt
@@ -71,7 +71,11 @@ if(VPP_BUILD_VCL_TESTS)
"vcl/${test}.c"
vcl/vcl_test_protos.c
LINK_LIBRARIES vppcom pthread ${EPOLL_LIB}
- NO_INSTALL
)
endforeach()
+
+ add_vpp_executable(vcl_test_cl_udp SOURCES "vcl/vcl_test_cl_udp.c"
+ LINK_LIBRARIES vppcom pthread ${EPOLL_LIB}
+ NO_INSTALL
+ )
endif(VPP_BUILD_VCL_TESTS)
diff --git a/src/plugins/hs_apps/echo_client.c b/src/plugins/hs_apps/echo_client.c
index ff5a3bd6b3c..1c0e49c716b 100644
--- a/src/plugins/hs_apps/echo_client.c
+++ b/src/plugins/hs_apps/echo_client.c
@@ -53,7 +53,7 @@ signal_evt_to_cli (int code)
}
static inline ec_worker_t *
-ec_worker_get (u32 thread_index)
+ec_worker_get (clib_thread_index_t thread_index)
{
return vec_elt_at_index (ec_main.wrk, thread_index);
}
@@ -79,21 +79,29 @@ ec_session_get (ec_worker_t *wrk, u32 ec_index)
static void
send_data_chunk (ec_main_t *ecm, ec_session_t *es)
{
+ const u64 max_burst = 128000;
u8 *test_data = ecm->connect_test_data;
int test_buf_len, test_buf_offset, rv;
+ u64 bytes_to_send;
u32 bytes_this_chunk;
+ svm_fifo_t *f = es->tx_fifo;
test_buf_len = vec_len (test_data);
ASSERT (test_buf_len > 0);
+ if (ecm->run_time)
+ bytes_to_send = clib_min (svm_fifo_max_enqueue_prod (f), max_burst);
+ else
+ bytes_to_send = clib_min (es->bytes_to_send, max_burst);
+ if (ecm->throughput)
+ bytes_to_send = clib_min (es->bytes_paced_current, bytes_to_send);
test_buf_offset = es->bytes_sent % test_buf_len;
- bytes_this_chunk =
- clib_min (test_buf_len - test_buf_offset, es->bytes_to_send);
+
+ bytes_this_chunk = clib_min (test_buf_len - test_buf_offset, bytes_to_send);
if (!es->is_dgram)
{
if (ecm->no_copy)
{
- svm_fifo_t *f = es->tx_fifo;
rv = clib_min (svm_fifo_max_enqueue_prod (f), bytes_this_chunk);
svm_fifo_enqueue_nocopy (f, rv);
session_program_tx_io_evt (es->tx_fifo->vpp_sh, SESSION_IO_EVT_TX);
@@ -105,7 +113,6 @@ send_data_chunk (ec_main_t *ecm, ec_session_t *es)
}
else
{
- svm_fifo_t *f = es->tx_fifo;
u32 max_enqueue = svm_fifo_max_enqueue_prod (f);
if (max_enqueue < sizeof (session_dgram_hdr_t))
@@ -136,7 +143,8 @@ send_data_chunk (ec_main_t *ecm, ec_session_t *es)
else
{
bytes_this_chunk = clib_min (bytes_this_chunk, max_enqueue);
- bytes_this_chunk = clib_min (bytes_this_chunk, 1460);
+ if (!ecm->throughput)
+ bytes_this_chunk = clib_min (bytes_this_chunk, 1460);
rv =
app_send_dgram ((app_session_t *) es, test_data + test_buf_offset,
bytes_this_chunk, 0);
@@ -147,8 +155,16 @@ send_data_chunk (ec_main_t *ecm, ec_session_t *es)
if (rv > 0)
{
/* Account for it... */
- es->bytes_to_send -= rv;
es->bytes_sent += rv;
+ if (ecm->run_time)
+ es->bytes_to_receive += rv;
+ else
+ es->bytes_to_send -= rv;
+ if (ecm->throughput)
+ {
+ es->bytes_paced_current -= rv;
+ es->bytes_paced_current += es->bytes_paced_target;
+ }
if (ecm->cfg.verbose)
{
@@ -228,6 +244,7 @@ ec_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
u32 *conn_indices, *conns_this_batch, nconns_this_batch;
int thread_index = vm->thread_index, i, delete_session;
+ f64 time_now;
ec_main_t *ecm = &ec_main;
ec_worker_t *wrk;
ec_session_t *es;
@@ -266,7 +283,7 @@ ec_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
ecm->repeats++;
ecm->prev_conns = vec_len (conns_this_batch);
- if (ecm->repeats == 500000)
+ if (ecm->repeats == 500000 && !ecm->run_time)
{
ec_err ("stuck clients");
}
@@ -277,18 +294,23 @@ ec_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
ecm->repeats = 0;
}
+ time_now = vlib_time_now (ecm->vlib_main);
/*
* Handle connections in this batch
*/
for (i = 0; i < vec_len (conns_this_batch); i++)
{
es = ec_session_get (wrk, conns_this_batch[i]);
+ if (ecm->throughput && time_now < es->time_to_send)
+ continue;
delete_session = 1;
if (es->bytes_to_send > 0)
{
send_data_chunk (ecm, es);
+ if (ecm->throughput)
+ es->time_to_send += ecm->pacing_window_len;
delete_session = 0;
}
@@ -297,7 +319,7 @@ ec_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
delete_session = 0;
}
- if (PREDICT_FALSE (delete_session == 1))
+ if (PREDICT_FALSE (delete_session == 1) || ecm->timer_expired)
{
clib_atomic_fetch_add (&ecm->tx_total, es->bytes_sent);
clib_atomic_fetch_add (&ecm->rx_total, es->bytes_received);
@@ -326,6 +348,8 @@ ec_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
signal_evt_to_cli (EC_CLI_TEST_DONE);
}
}
+ if (ecm->throughput)
+ time_now = vlib_time_now (vm);
}
wrk->conn_indices = conn_indices;
@@ -356,6 +380,7 @@ ec_reset_runtime_config (ec_main_t *ecm)
ecm->tls_engine = CRYPTO_ENGINE_OPENSSL;
ecm->no_copy = 0;
ecm->run_test = EC_STARTING;
+ ecm->timer_expired = false;
ecm->ready_connections = 0;
ecm->connect_conn_index = 0;
ecm->rx_total = 0;
@@ -368,6 +393,9 @@ ec_reset_runtime_config (ec_main_t *ecm)
ecm->attach_flags = 0;
ecm->syn_timeout = 20.0;
ecm->test_timeout = 20.0;
+ ecm->run_time = 0;
+ ecm->throughput = 0;
+ ecm->pacing_window_len = 1;
vec_free (ecm->connect_uri);
}
@@ -474,7 +502,8 @@ ec_cleanup (ec_main_t *ecm)
vec_free (ecm->connect_uri);
vec_free (ecm->appns_id);
-
+ if (ecm->throughput)
+ ecm->pacing_window_len = 1;
if (ecm->barrier_acq_needed)
vlib_worker_thread_barrier_sync (ecm->vlib_main);
}
@@ -565,7 +594,7 @@ quic_ec_session_connected_callback (u32 app_index, u32 api_context,
ec_main_t *ecm = &ec_main;
ec_session_t *es;
ec_worker_t *wrk;
- u32 thread_index;
+ clib_thread_index_t thread_index;
if (PREDICT_FALSE (api_context == HS_CTRL_HANDLE))
return ec_ctrl_session_connected_callback (s);
@@ -616,13 +645,48 @@ quic_ec_session_connected_callback (u32 app_index, u32 api_context,
return 0;
}
+static void
+ec_calc_tput (ec_main_t *ecm)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ ec_worker_t *wrk;
+ ec_session_t *sess;
+ f64 pacing_base;
+ u64 bytes_paced_target;
+ /* periodic writes larger than this clog up the fifo */
+ const u64 target_size_threshold = 4344;
+
+ /* find a suitable pacing window length & data chunk size */
+ bytes_paced_target =
+ ecm->throughput * ecm->pacing_window_len / ecm->n_clients;
+ while (bytes_paced_target > target_size_threshold)
+ {
+ ecm->pacing_window_len /= 2;
+ bytes_paced_target /= 2;
+ }
+
+ /* order sessions to shoot out data sequentially */
+ pacing_base = vlib_time_now (vm) - ecm->pacing_window_len;
+ vec_foreach (wrk, ecm->wrk)
+ {
+ vec_foreach (sess, wrk->sessions)
+ {
+ sess->time_to_send =
+ pacing_base + ecm->pacing_window_len / ecm->n_clients;
+ pacing_base = sess->time_to_send;
+ sess->bytes_paced_target = bytes_paced_target;
+ sess->bytes_paced_current = bytes_paced_target;
+ }
+ }
+}
+
static int
ec_session_connected_callback (u32 app_index, u32 api_context, session_t *s,
session_error_t err)
{
ec_main_t *ecm = &ec_main;
ec_session_t *es;
- u32 thread_index;
+ clib_thread_index_t thread_index;
ec_worker_t *wrk;
if (PREDICT_FALSE (ecm->run_test != EC_STARTING))
@@ -656,12 +720,16 @@ ec_session_connected_callback (u32 app_index, u32 api_context, session_t *s,
es->bytes_to_receive = ecm->echo_bytes ? ecm->bytes_to_send : 0ULL;
es->vpp_session_handle = session_handle (s);
es->vpp_session_index = s->session_index;
+ es->bytes_paced_target = ~0;
+ es->bytes_paced_current = ~0;
s->opaque = es->session_index;
vec_add1 (wrk->conn_indices, es->session_index);
clib_atomic_fetch_add (&ecm->ready_connections, 1);
if (ecm->ready_connections == ecm->expected_connections)
{
+ if (ecm->throughput)
+ ec_calc_tput (ecm);
ecm->run_test = EC_RUNNING;
/* Signal the CLI process that the action is starting... */
signal_evt_to_cli (EC_CLI_CONNECTS_DONE);
@@ -1072,8 +1140,8 @@ ec_command_fn (vlib_main_t *vm, unformat_input_t *input,
ec_main_t *ecm = &ec_main;
uword *event_data = 0, event_type;
clib_error_t *error = 0;
- int rv, had_config = 1;
- u64 tmp, total_bytes;
+ int rv, timed_run_conflict = 0, had_config = 1;
+ u64 total_bytes;
f64 delta;
if (ecm->test_client_attached)
@@ -1099,17 +1167,15 @@ ec_command_fn (vlib_main_t *vm, unformat_input_t *input,
;
else if (unformat (line_input, "quic-streams %d", &ecm->quic_streams))
;
- else if (unformat (line_input, "mbytes %lld", &tmp))
- ecm->bytes_to_send = tmp << 20;
- else if (unformat (line_input, "gbytes %lld", &tmp))
- ecm->bytes_to_send = tmp << 30;
else if (unformat (line_input, "bytes %U", unformat_memory_size,
&ecm->bytes_to_send))
- ;
+ timed_run_conflict++;
else if (unformat (line_input, "test-timeout %f", &ecm->test_timeout))
;
else if (unformat (line_input, "syn-timeout %f", &ecm->syn_timeout))
;
+ else if (unformat (line_input, "run-time %f", &ecm->run_time))
+ ;
else if (unformat (line_input, "echo-bytes"))
ecm->echo_bytes = 1;
else if (unformat (line_input, "fifo-size %U", unformat_memory_size,
@@ -1121,6 +1187,9 @@ ec_command_fn (vlib_main_t *vm, unformat_input_t *input,
else if (unformat (line_input, "private-segment-size %U",
unformat_memory_size, &ecm->private_segment_size))
;
+ else if (unformat (line_input, "throughput %U", unformat_memory_size,
+ &ecm->throughput))
+ ;
else if (unformat (line_input, "preallocate-fifos"))
ecm->prealloc_fifos = 1;
else if (unformat (line_input, "preallocate-sessions"))
@@ -1153,6 +1222,9 @@ ec_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
}
+ if (timed_run_conflict && ecm->run_time)
+ return clib_error_return (0, "failed: invalid arguments for a timed run!");
+
parse_config:
ecm->cfg.num_test_sessions = ecm->expected_connections =
@@ -1237,12 +1309,22 @@ parse_config:
clib_error_return (0, "failed: unexpected event(2): %d", event_type);
goto stop_test;
}
+ /* Testing officially starts now */
+ ecm->test_start_time = vlib_time_now (ecm->vlib_main);
+ ec_cli ("Test started at %.6f", ecm->test_start_time);
+
+ /*
+ * If a timed run, wait and expire timer
+ */
+ if (ecm->run_time)
+ {
+ vlib_process_suspend (vm, ecm->run_time);
+ ec_main.timer_expired = true;
+ }
/*
* Wait for the sessions to finish or test_timeout seconds pass
*/
- ecm->test_start_time = vlib_time_now (ecm->vlib_main);
- ec_cli ("Test started at %.6f", ecm->test_start_time);
vlib_process_wait_for_event_or_clock (vm, ecm->test_timeout);
event_type = vlib_process_get_events (vm, &event_data);
switch (event_type)
@@ -1336,11 +1418,11 @@ cleanup:
VLIB_CLI_COMMAND (ec_command, static) = {
.path = "test echo clients",
.short_help =
- "test echo clients [nclients %d][[m|g]bytes <bytes>]"
- "[test-timeout <time>][syn-timeout <time>][echo-bytes][fifo-size <size>]"
+ "test echo clients [nclients %d][bytes <bytes>[m|g]][test-timeout <time>]"
+ "[run-time <time>][syn-timeout <time>][echo-bytes][fifo-size <size>]"
"[private-segment-count <count>][private-segment-size <bytes>[m|g]]"
"[preallocate-fifos][preallocate-sessions][client-batch <batch-size>]"
- "[uri <tcp://ip/port>][test-bytes][verbose]",
+ "[throughput <bytes>[m|g]][uri <tcp://ip/port>][test-bytes][verbose]",
.function = ec_command_fn,
.is_mp_safe = 1,
};
diff --git a/src/plugins/hs_apps/echo_client.h b/src/plugins/hs_apps/echo_client.h
index 5868c3652ce..d928a4e936f 100644
--- a/src/plugins/hs_apps/echo_client.h
+++ b/src/plugins/hs_apps/echo_client.h
@@ -29,12 +29,15 @@ typedef struct ec_session_
foreach_app_session_field
#undef _
u32 vpp_session_index;
- u32 thread_index;
+ clib_thread_index_t thread_index;
u64 bytes_to_send;
u64 bytes_sent;
u64 bytes_to_receive;
u64 bytes_received;
u64 vpp_session_handle;
+ f64 time_to_send;
+ u64 bytes_paced_target;
+ u64 bytes_paced_current;
} ec_session_t;
typedef struct ec_worker_
@@ -45,7 +48,7 @@ typedef struct ec_worker_
u32 *conn_indices; /**< sessions handled by worker */
u32 *conns_this_batch; /**< sessions handled in batch */
svm_msg_q_t *vpp_event_queue; /**< session layer worker mq */
- u32 thread_index; /**< thread index for worker */
+ clib_thread_index_t thread_index; /**< thread index for worker */
} ec_worker_t;
typedef struct
@@ -57,6 +60,7 @@ typedef struct
volatile u64 rx_total;
volatile u64 tx_total;
volatile int run_test; /**< Signal start of test */
+ volatile bool timer_expired; /**< Signal end of timed test */
f64 syn_start_time;
f64 test_start_time;
@@ -64,6 +68,8 @@ typedef struct
u32 prev_conns;
u32 repeats;
+ f64
+ pacing_window_len; /**< Time between data chunk sends when limiting tput */
u32 connect_conn_index; /**< Connects attempted progress */
/*
@@ -88,6 +94,7 @@ typedef struct
u32 connections_per_batch; /**< Connections to rx/tx at once */
u32 private_segment_count; /**< Number of private fifo segs */
u64 private_segment_size; /**< size of private fifo segs */
+ u64 throughput; /**< Target bytes per second */
u32 tls_engine; /**< TLS engine mbedtls/openssl */
u32 no_copy; /**< Don't memcpy data to tx fifo */
u32 quic_streams; /**< QUIC streams per connection */
@@ -97,6 +104,7 @@ typedef struct
u64 appns_secret; /**< App namespace secret */
f64 syn_timeout; /**< Test syn timeout (s) */
f64 test_timeout; /**< Test timeout (s) */
+ f64 run_time; /**< Length of a test (s) */
/*
* Flags
diff --git a/src/plugins/hs_apps/echo_server.c b/src/plugins/hs_apps/echo_server.c
index dc303e2f83a..61b86769768 100644
--- a/src/plugins/hs_apps/echo_server.c
+++ b/src/plugins/hs_apps/echo_server.c
@@ -40,7 +40,7 @@ typedef struct
es_session_t *sessions;
u8 *rx_buf; /**< Per-thread RX buffer */
svm_msg_q_t *vpp_event_queue;
- u32 thread_index;
+ clib_thread_index_t thread_index;
} es_worker_t;
typedef struct
@@ -87,7 +87,7 @@ echo_server_main_t echo_server_main;
#define es_cli(_fmt, _args...) vlib_cli_output (vm, _fmt, ##_args)
static inline es_worker_t *
-es_worker_get (u32 thread_index)
+es_worker_get (clib_thread_index_t thread_index)
{
return vec_elt_at_index (echo_server_main.wrk, thread_index);
}
@@ -277,7 +277,7 @@ es_wrk_cleanup_sessions (void *args)
{
echo_server_main_t *esm = &echo_server_main;
vnet_disconnect_args_t _a = {}, *a = &_a;
- u32 thread_index = pointer_to_uword (args);
+ clib_thread_index_t thread_index = pointer_to_uword (args);
es_session_t *es;
es_worker_t *wrk;
@@ -373,7 +373,7 @@ echo_server_rx_callback (session_t * s)
int actual_transfer;
svm_fifo_t *tx_fifo, *rx_fifo;
echo_server_main_t *esm = &echo_server_main;
- u32 thread_index = vlib_get_thread_index ();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
es_worker_t *wrk;
es_session_t *es;
diff --git a/src/plugins/hs_apps/http_cli.c b/src/plugins/hs_apps/http_cli.c
index 531e2750c1e..40acf6a1635 100644
--- a/src/plugins/hs_apps/http_cli.c
+++ b/src/plugins/hs_apps/http_cli.c
@@ -37,7 +37,7 @@ typedef struct
typedef struct
{
u32 hs_index;
- u32 thread_index;
+ clib_thread_index_t thread_index;
u64 node_index;
u8 plain_text;
u8 *buf;
@@ -47,7 +47,7 @@ typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
u32 session_index;
- u32 thread_index;
+ clib_thread_index_t thread_index;
u8 *tx_buf;
u32 tx_offset;
u32 vpp_session_index;
@@ -85,7 +85,7 @@ typedef struct
static hcs_main_t hcs_main;
static hcs_session_t *
-hcs_session_alloc (u32 thread_index)
+hcs_session_alloc (clib_thread_index_t thread_index)
{
hcs_main_t *hcm = &hcs_main;
hcs_session_t *hs;
@@ -98,7 +98,7 @@ hcs_session_alloc (u32 thread_index)
}
static hcs_session_t *
-hcs_session_get (u32 thread_index, u32 hs_index)
+hcs_session_get (clib_thread_index_t thread_index, u32 hs_index)
{
hcs_main_t *hcm = &hcs_main;
if (pool_is_free_index (hcm->sessions[thread_index], hs_index))
diff --git a/src/plugins/hs_apps/http_client.c b/src/plugins/hs_apps/http_client.c
index 20271fc4aea..578d21140f1 100644
--- a/src/plugins/hs_apps/http_client.c
+++ b/src/plugins/hs_apps/http_client.c
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: Apache-2.0
- * Copyright(c) 2024 Cisco Systems, Inc.
+ * Copyright(c) 2025 Cisco Systems, Inc.
*/
#include <vnet/session/application.h>
@@ -12,29 +12,36 @@
typedef struct
{
+ u64 req_per_wrk;
+ u64 request_count;
+ f64 start, end;
+ f64 elapsed_time;
+} hc_stats_t;
+
+typedef struct
+{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
u32 session_index;
- u32 thread_index;
- u32 vpp_session_index;
+ clib_thread_index_t thread_index;
u64 to_recv;
u8 is_closed;
+ hc_stats_t stats;
+ u64 data_offset;
+ u8 *resp_headers;
+ u8 *http_response;
+ u8 *response_status;
} hc_session_t;
typedef struct
{
- u64 request_count;
- f64 start, end;
- f64 elapsed_time;
-} hc_stats_t;
-
-typedef struct
-{
hc_session_t *sessions;
- u32 thread_index;
+ clib_thread_index_t thread_index;
vlib_main_t *vlib_main;
u8 *headers_buf;
http_headers_ctx_t req_headers;
http_msg_t msg;
+ u32 session_index;
+ bool has_common_headers;
} hc_worker_t;
typedef struct
@@ -52,11 +59,7 @@ typedef struct
session_endpoint_cfg_t connect_sep;
u8 *target;
u8 *data;
- u64 data_offset;
hc_worker_t *wrk;
- u8 *resp_headers;
- u8 *http_response;
- u8 *response_status;
hc_http_header_t *custom_header;
u8 is_file;
u8 use_ptr;
@@ -67,6 +70,19 @@ typedef struct
u64 repeat_count;
f64 duration;
bool repeat;
+ bool multi_session;
+ u32 done_count;
+ u32 connected_counter;
+ u32 worker_index;
+ u32 max_sessions;
+ u32 private_segment_size;
+ u32 prealloc_fifos;
+ u32 fifo_size;
+ u8 *appns_id;
+ u64 appns_secret;
+ clib_spinlock_t lock;
+ bool was_transport_closed;
+ u32 ckpair_index;
} hc_main_t;
typedef enum
@@ -82,26 +98,19 @@ static hc_main_t hc_main;
static hc_stats_t hc_stats;
static inline hc_worker_t *
-hc_worker_get (u32 thread_index)
+hc_worker_get (clib_thread_index_t thread_index)
{
return &hc_main.wrk[thread_index];
}
static inline hc_session_t *
-hc_session_get (u32 session_index, u32 thread_index)
+hc_session_get (u32 session_index, clib_thread_index_t thread_index)
{
hc_worker_t *wrk = hc_worker_get (thread_index);
wrk->vlib_main = vlib_get_main_by_index (thread_index);
return pool_elt_at_index (wrk->sessions, session_index);
}
-static void
-hc_ho_session_free (u32 hs_index)
-{
- hc_worker_t *wrk = hc_worker_get (0);
- pool_put_index (wrk->sessions, hs_index);
-}
-
static hc_session_t *
hc_session_alloc (hc_worker_t *wrk)
{
@@ -115,14 +124,14 @@ hc_session_alloc (hc_worker_t *wrk)
}
static int
-hc_request (session_t *s, session_error_t err)
+hc_request (session_t *s, hc_worker_t *wrk, hc_session_t *hc_session,
+ session_error_t err)
{
hc_main_t *hcm = &hc_main;
u64 to_send;
u32 n_enq;
u8 n_segs;
int rv;
- hc_worker_t *wrk = hc_worker_get (s->thread_index);
if (hcm->use_ptr)
{
@@ -166,7 +175,7 @@ hc_request (session_t *s, session_error_t err)
rv = svm_fifo_enqueue (s->tx_fifo, n_enq, hcm->data);
if (rv < to_send)
{
- hcm->data_offset = (rv > 0) ? rv : 0;
+ hc_session->data_offset = (rv > 0) ? rv : 0;
svm_fifo_add_want_deq_ntf (s->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
}
}
@@ -185,9 +194,8 @@ hc_session_connected_callback (u32 app_index, u32 hc_session_index,
{
hc_main_t *hcm = &hc_main;
hc_worker_t *wrk;
- u32 new_hc_index;
+ hc_session_t *hc_session;
hc_http_header_t *header;
- HTTP_DBG (1, "ho hc_index: %d", hc_session_index);
if (err)
{
@@ -199,68 +207,89 @@ hc_session_connected_callback (u32 app_index, u32 hc_session_index,
}
wrk = hc_worker_get (s->thread_index);
- hc_session_t *hc_session, *new_hc_session = hc_session_alloc (wrk);
- hc_session = hc_session_get (hc_session_index, 0);
- new_hc_index = new_hc_session->session_index;
- clib_memcpy_fast (new_hc_session, hc_session, sizeof (*hc_session));
- new_hc_session->session_index = new_hc_index;
- new_hc_session->thread_index = s->thread_index;
- new_hc_session->vpp_session_index = s->session_index;
- HTTP_DBG (1, "new hc_index: %d", new_hc_session->session_index);
- s->opaque = new_hc_index;
+ hc_session = hc_session_alloc (wrk);
+ clib_spinlock_lock_if_init (&hcm->lock);
+ hcm->connected_counter++;
+ clib_spinlock_unlock_if_init (&hcm->lock);
- if (hcm->req_method == HTTP_REQ_POST)
+ hc_session->thread_index = s->thread_index;
+ s->opaque = hc_session->session_index;
+ wrk->session_index = hc_session->session_index;
+
+ if (hcm->multi_session)
{
- if (hcm->is_file)
- http_add_header (
- &wrk->req_headers, HTTP_HEADER_CONTENT_TYPE,
- http_content_type_token (HTTP_CONTENT_APP_OCTET_STREAM));
- else
- http_add_header (
- &wrk->req_headers, HTTP_HEADER_CONTENT_TYPE,
- http_content_type_token (HTTP_CONTENT_APP_X_WWW_FORM_URLENCODED));
+ hc_session->stats.req_per_wrk = hcm->repeat_count / hcm->max_sessions;
+ clib_spinlock_lock_if_init (&hcm->lock);
+ /* add remaining requests to the first connected session */
+ if (hcm->connected_counter == 1)
+ {
+ hc_session->stats.req_per_wrk +=
+ hcm->repeat_count % hcm->max_sessions;
+ }
+ clib_spinlock_unlock_if_init (&hcm->lock);
}
- http_add_header (&wrk->req_headers, HTTP_HEADER_ACCEPT, "*", 1);
-
- vec_foreach (header, hcm->custom_header)
- http_add_custom_header (
- &wrk->req_headers, (const char *) header->name, vec_len (header->name),
- (const char *) header->value, vec_len (header->value));
-
- clib_warning ("%U", format_http_bytes, wrk->headers_buf,
- wrk->req_headers.tail_offset);
- wrk->msg.method_type = hcm->req_method;
- if (hcm->req_method == HTTP_REQ_POST)
- wrk->msg.data.body_len = vec_len (hcm->data);
else
- wrk->msg.data.body_len = 0;
-
- wrk->msg.type = HTTP_MSG_REQUEST;
- /* request target */
- wrk->msg.data.target_path_len = vec_len (hcm->target);
- /* custom headers */
- wrk->msg.data.headers_len = wrk->req_headers.tail_offset;
- /* total length */
- wrk->msg.data.len = wrk->msg.data.target_path_len +
- wrk->msg.data.headers_len + wrk->msg.data.body_len;
-
- if (hcm->use_ptr)
{
- wrk->msg.data.type = HTTP_MSG_DATA_PTR;
+ hc_session->stats.req_per_wrk = hcm->repeat_count;
+ hcm->worker_index = s->thread_index;
}
- else
+
+ if (!wrk->has_common_headers)
{
- wrk->msg.data.type = HTTP_MSG_DATA_INLINE;
- wrk->msg.data.target_path_offset = 0;
- wrk->msg.data.headers_offset = wrk->msg.data.target_path_len;
- wrk->msg.data.body_offset =
- wrk->msg.data.headers_offset + wrk->msg.data.headers_len;
+ wrk->has_common_headers = true;
+ if (hcm->req_method == HTTP_REQ_POST)
+ {
+ if (hcm->is_file)
+ http_add_header (
+ &wrk->req_headers, HTTP_HEADER_CONTENT_TYPE,
+ http_content_type_token (HTTP_CONTENT_APP_OCTET_STREAM));
+ else
+ http_add_header (&wrk->req_headers, HTTP_HEADER_CONTENT_TYPE,
+ http_content_type_token (
+ HTTP_CONTENT_APP_X_WWW_FORM_URLENCODED));
+ }
+ http_add_header (&wrk->req_headers, HTTP_HEADER_ACCEPT, "*", 1);
+
+ vec_foreach (header, hcm->custom_header)
+ http_add_custom_header (&wrk->req_headers, (const char *) header->name,
+ vec_len (header->name),
+ (const char *) header->value,
+ vec_len (header->value));
+
+ wrk->msg.method_type = hcm->req_method;
+ if (hcm->req_method == HTTP_REQ_POST)
+ wrk->msg.data.body_len = vec_len (hcm->data);
+ else
+ wrk->msg.data.body_len = 0;
+
+ wrk->msg.type = HTTP_MSG_REQUEST;
+ /* request target */
+ wrk->msg.data.target_path_len = vec_len (hcm->target);
+ /* custom headers */
+ wrk->msg.data.headers_len = wrk->req_headers.tail_offset;
+ /* total length */
+ wrk->msg.data.len = wrk->msg.data.target_path_len +
+ wrk->msg.data.headers_len + wrk->msg.data.body_len;
+
+ if (hcm->use_ptr)
+ {
+ wrk->msg.data.type = HTTP_MSG_DATA_PTR;
+ }
+ else
+ {
+ wrk->msg.data.type = HTTP_MSG_DATA_INLINE;
+ wrk->msg.data.target_path_offset = 0;
+ wrk->msg.data.headers_offset = wrk->msg.data.target_path_len;
+ wrk->msg.data.body_offset =
+ wrk->msg.data.headers_offset + wrk->msg.data.headers_len;
+ }
}
if (hcm->repeat)
- hc_stats.start = vlib_time_now (vlib_get_main_by_index (s->thread_index));
+ hc_session->stats.start =
+ vlib_time_now (vlib_get_main_by_index (s->thread_index));
- return hc_request (s, err);
+ return hc_request (s, wrk, hc_session, err);
}
static void
@@ -275,21 +304,38 @@ hc_session_disconnect_callback (session_t *s)
if ((rv = vnet_disconnect_session (a)))
clib_warning ("warning: disconnect returned: %U", format_session_error,
rv);
+ clib_spinlock_lock_if_init (&hcm->lock);
+ hcm->done_count++;
+ clib_spinlock_unlock_if_init (&hcm->lock);
}
static void
hc_session_transport_closed_callback (session_t *s)
{
hc_main_t *hcm = &hc_main;
- vlib_process_signal_event_mt (hcm->wrk->vlib_main, hcm->cli_node_index,
- HC_TRANSPORT_CLOSED, 0);
-}
+ hc_worker_t *wrk = hc_worker_get (s->thread_index);
-static void
-hc_ho_cleanup_callback (session_t *s)
-{
- HTTP_DBG (1, "ho hc_index: %d:", s->opaque);
- hc_ho_session_free (s->opaque);
+ clib_spinlock_lock_if_init (&hcm->lock);
+ if (s->session_state == SESSION_STATE_TRANSPORT_CLOSED)
+ {
+ hcm->was_transport_closed = true;
+ }
+
+ /* send an event when all sessions are closed */
+ if (hcm->done_count >= hcm->max_sessions)
+ {
+ if (hcm->was_transport_closed)
+ {
+ vlib_process_signal_event_mt (wrk->vlib_main, hcm->cli_node_index,
+ HC_TRANSPORT_CLOSED, 0);
+ }
+ else
+ {
+ vlib_process_signal_event_mt (wrk->vlib_main, hcm->cli_node_index,
+ HC_REPEAT_DONE, 0);
+ }
+ }
+ clib_spinlock_unlock_if_init (&hcm->lock);
}
static void
@@ -315,20 +361,23 @@ hc_rx_callback (session_t *s)
{
hc_main_t *hcm = &hc_main;
hc_worker_t *wrk = hc_worker_get (s->thread_index);
- hc_session_t *hc_session;
+ hc_session_t *hc_session = hc_session_get (s->opaque, s->thread_index);
http_msg_t msg;
int rv;
+ u32 max_deq;
session_error_t session_err = 0;
int send_err = 0;
- hc_session = hc_session_get (s->opaque, s->thread_index);
-
if (hc_session->is_closed)
{
clib_warning ("hc_session_index[%d] is closed", s->opaque);
return -1;
}
+ max_deq = svm_fifo_max_dequeue_cons (s->rx_fifo);
+ if (PREDICT_FALSE (max_deq == 0))
+ goto done;
+
if (hc_session->to_recv == 0)
{
rv = svm_fifo_dequeue (s->rx_fifo, sizeof (msg), (u8 *) &msg);
@@ -344,17 +393,20 @@ hc_rx_callback (session_t *s)
if (msg.data.headers_len)
{
- hcm->response_status =
- format (0, "%U", format_http_status_code, msg.code);
+
+ if (!hcm->repeat)
+ hc_session->response_status =
+ format (0, "%U", format_http_status_code, msg.code);
+
svm_fifo_dequeue_drop (s->rx_fifo, msg.data.headers_offset);
- vec_validate (hcm->resp_headers, msg.data.headers_len - 1);
- vec_set_len (hcm->resp_headers, msg.data.headers_len);
+ vec_validate (hc_session->resp_headers, msg.data.headers_len - 1);
+ vec_set_len (hc_session->resp_headers, msg.data.headers_len);
rv = svm_fifo_dequeue (s->rx_fifo, msg.data.headers_len,
- hcm->resp_headers);
+ hc_session->resp_headers);
ASSERT (rv == msg.data.headers_len);
- HTTP_DBG (1, (char *) format (0, "%v", hcm->resp_headers));
+ HTTP_DBG (1, (char *) format (0, "%v", hc_session->resp_headers));
msg.data.body_offset -=
msg.data.headers_len + msg.data.headers_offset;
}
@@ -372,18 +424,18 @@ hc_rx_callback (session_t *s)
{
goto done;
}
- vec_validate (hcm->http_response, msg.data.body_len - 1);
- vec_reset_length (hcm->http_response);
+ vec_validate (hc_session->http_response, msg.data.body_len - 1);
+ vec_reset_length (hc_session->http_response);
}
- u32 max_deq = svm_fifo_max_dequeue (s->rx_fifo);
+ max_deq = svm_fifo_max_dequeue (s->rx_fifo);
if (!max_deq)
{
goto done;
}
u32 n_deq = clib_min (hc_session->to_recv, max_deq);
- u32 curr = vec_len (hcm->http_response);
- rv = svm_fifo_dequeue (s->rx_fifo, n_deq, hcm->http_response + curr);
+ u32 curr = vec_len (hc_session->http_response);
+ rv = svm_fifo_dequeue (s->rx_fifo, n_deq, hc_session->http_response + curr);
if (rv < 0)
{
clib_warning ("app dequeue(n=%d) failed; rv = %d", n_deq, rv);
@@ -393,7 +445,7 @@ hc_rx_callback (session_t *s)
}
ASSERT (rv == n_deq);
- vec_set_len (hcm->http_response, curr + n_deq);
+ vec_set_len (hc_session->http_response, curr + n_deq);
ASSERT (hc_session->to_recv >= rv);
hc_session->to_recv -= rv;
@@ -402,20 +454,19 @@ done:
{
if (hcm->repeat)
{
- hc_stats.request_count++;
- hc_stats.end = vlib_time_now (wrk->vlib_main);
- hc_stats.elapsed_time = hc_stats.end - hc_stats.start;
+ hc_session->stats.request_count++;
+ hc_session->stats.end = vlib_time_now (wrk->vlib_main);
+ hc_session->stats.elapsed_time =
+ hc_session->stats.end - hc_session->stats.start;
- if (hc_stats.elapsed_time >= hcm->duration &&
- hc_stats.request_count >= hcm->repeat_count)
+ if (hc_session->stats.elapsed_time >= hcm->duration &&
+ hc_session->stats.request_count >= hc_session->stats.req_per_wrk)
{
- vlib_process_signal_event_mt (
- wrk->vlib_main, hcm->cli_node_index, HC_REPEAT_DONE, 0);
hc_session_disconnect_callback (s);
}
else
{
- send_err = hc_request (s, session_err);
+ send_err = hc_request (s, wrk, hc_session, session_err);
if (send_err)
clib_warning ("failed to send request, error %d", send_err);
}
@@ -434,11 +485,13 @@ static int
hc_tx_callback (session_t *s)
{
hc_main_t *hcm = &hc_main;
+ hc_session_t *hc_session = hc_session_get (s->opaque, s->thread_index);
u64 to_send;
int rv;
- to_send = vec_len (hcm->data) - hcm->data_offset;
- rv = svm_fifo_enqueue (s->tx_fifo, to_send, hcm->data + hcm->data_offset);
+ to_send = vec_len (hcm->data) - hc_session->data_offset;
+ rv = svm_fifo_enqueue (s->tx_fifo, to_send,
+ hcm->data + hc_session->data_offset);
if (rv <= 0)
{
@@ -448,7 +501,7 @@ hc_tx_callback (session_t *s)
if (rv < to_send)
{
- hcm->data_offset += rv;
+ hc_session->data_offset += rv;
svm_fifo_add_want_deq_ntf (s->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
}
@@ -465,7 +518,6 @@ static session_cb_vft_t hc_session_cb_vft = {
.session_reset_callback = hc_session_reset_callback,
.builtin_app_rx_callback = hc_rx_callback,
.builtin_app_tx_callback = hc_tx_callback,
- .half_open_cleanup_callback = hc_ho_cleanup_callback,
};
static clib_error_t *
@@ -474,8 +526,13 @@ hc_attach ()
hc_main_t *hcm = &hc_main;
vnet_app_attach_args_t _a, *a = &_a;
u64 options[18];
+ u32 segment_size = 128 << 20;
+ vnet_app_add_cert_key_pair_args_t _ck_pair, *ck_pair = &_ck_pair;
int rv;
+ if (hcm->private_segment_size)
+ segment_size = hcm->private_segment_size;
+
clib_memset (a, 0, sizeof (*a));
clib_memset (options, 0, sizeof (options));
@@ -483,7 +540,20 @@ hc_attach ()
a->name = format (0, "http_client");
a->session_cb_vft = &hc_session_cb_vft;
a->options = options;
+ a->options[APP_OPTIONS_SEGMENT_SIZE] = segment_size;
+ a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = segment_size;
+ a->options[APP_OPTIONS_RX_FIFO_SIZE] =
+ hcm->fifo_size ? hcm->fifo_size : 8 << 10;
+ a->options[APP_OPTIONS_TX_FIFO_SIZE] =
+ hcm->fifo_size ? hcm->fifo_size : 32 << 10;
a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN;
+ a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = hcm->prealloc_fifos;
+ a->options[APP_OPTIONS_TLS_ENGINE] = CRYPTO_ENGINE_OPENSSL;
+ if (hcm->appns_id)
+ {
+ a->namespace_id = hcm->appns_id;
+ a->options[APP_OPTIONS_NAMESPACE_SECRET] = hcm->appns_secret;
+ }
if ((rv = vnet_application_attach (a)))
return clib_error_return (0, "attach returned: %U", format_session_error,
@@ -493,6 +563,14 @@ hc_attach ()
vec_free (a->name);
hcm->attached = 1;
+ clib_memset (ck_pair, 0, sizeof (*ck_pair));
+ ck_pair->cert = (u8 *) test_srv_crt_rsa;
+ ck_pair->key = (u8 *) test_srv_key_rsa;
+ ck_pair->cert_len = test_srv_crt_rsa_len;
+ ck_pair->key_len = test_srv_key_rsa_len;
+ vnet_app_add_cert_key_pair (ck_pair);
+ hcm->ckpair_index = ck_pair->index;
+
return 0;
}
@@ -500,14 +578,19 @@ static int
hc_connect_rpc (void *rpc_args)
{
vnet_connect_args_t *a = rpc_args;
- int rv;
+ int rv = ~0;
+ hc_main_t *hcm = &hc_main;
- rv = vnet_connect (a);
- if (rv > 0)
- clib_warning (0, "connect returned: %U", format_session_error, rv);
+ for (u32 i = 0; i < hcm->max_sessions; i++)
+ {
+ rv = vnet_connect (a);
+ if (rv > 0)
+ clib_warning (0, "connect returned: %U", format_session_error, rv);
+ }
session_endpoint_free_ext_cfgs (&a->sep_ext);
vec_free (a);
+
return rv;
}
@@ -516,14 +599,10 @@ hc_connect ()
{
hc_main_t *hcm = &hc_main;
vnet_connect_args_t *a = 0;
- hc_worker_t *wrk;
- hc_session_t *hc_session;
transport_endpt_ext_cfg_t *ext_cfg;
transport_endpt_cfg_http_t http_cfg = { (u32) hcm->timeout, 0 };
-
vec_validate (a, 0);
clib_memset (a, 0, sizeof (a[0]));
-
clib_memcpy (&a->sep_ext, &hcm->connect_sep, sizeof (hcm->connect_sep));
a->app_index = hcm->app_index;
@@ -531,15 +610,49 @@ hc_connect ()
&a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (http_cfg));
clib_memcpy (ext_cfg->data, &http_cfg, sizeof (http_cfg));
- /* allocate http session on main thread */
- wrk = hc_worker_get (0);
- hc_session = hc_session_alloc (wrk);
- a->api_context = hc_session->session_index;
+ if (hcm->connect_sep.flags & SESSION_ENDPT_CFG_F_SECURE)
+ {
+ ext_cfg = session_endpoint_add_ext_cfg (
+ &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO,
+ sizeof (transport_endpt_crypto_cfg_t));
+ ext_cfg->crypto.ckpair_index = hcm->ckpair_index;
+ }
session_send_rpc_evt_to_thread_force (transport_cl_thread (), hc_connect_rpc,
a);
}
+static void
+hc_get_repeat_stats (vlib_main_t *vm)
+{
+ hc_main_t *hcm = &hc_main;
+ hc_worker_t *wrk;
+ hc_session_t *hc_session;
+
+ if (hcm->repeat)
+ {
+ vec_foreach (wrk, hcm->wrk)
+ {
+ vec_foreach (hc_session, wrk->sessions)
+ {
+ hc_stats.request_count += hc_session->stats.request_count;
+ hc_session->stats.request_count = 0;
+ if (hc_stats.elapsed_time < hc_session->stats.elapsed_time)
+ {
+ hc_stats.elapsed_time = hc_session->stats.elapsed_time;
+ hc_session->stats.elapsed_time = 0;
+ }
+ }
+ }
+ vlib_cli_output (vm,
+ "< %d request(s) in %.6fs\n< avg latency "
+ "%.4fms\n< %.2f req/sec",
+ hc_stats.request_count, hc_stats.elapsed_time,
+ (hc_stats.elapsed_time / hc_stats.request_count) * 1000,
+ hc_stats.request_count / hc_stats.elapsed_time);
+ }
+}
+
static clib_error_t *
hc_get_event (vlib_main_t *vm)
{
@@ -548,6 +661,8 @@ hc_get_event (vlib_main_t *vm)
clib_error_t *err = NULL;
FILE *file_ptr;
u64 event_timeout;
+ hc_worker_t *wrk;
+ hc_session_t *hc_session;
event_timeout = hcm->timeout ? hcm->timeout : 10;
if (event_timeout == hcm->duration)
@@ -558,20 +673,26 @@ hc_get_event (vlib_main_t *vm)
switch (event_type)
{
case ~0:
+ hc_get_repeat_stats (vm);
err = clib_error_return (0, "error: timeout");
break;
case HC_CONNECT_FAILED:
+ hc_get_repeat_stats (vm);
err = clib_error_return (0, "error: failed to connect");
break;
case HC_TRANSPORT_CLOSED:
+ hc_get_repeat_stats (vm);
err = clib_error_return (0, "error: transport closed");
break;
case HC_GENERIC_ERR:
+ hc_get_repeat_stats (vm);
err = clib_error_return (0, "error: unknown");
break;
case HC_REPLY_RECEIVED:
if (hcm->filename)
{
+ wrk = hc_worker_get (hcm->worker_index);
+ hc_session = hc_session_get (wrk->session_index, wrk->thread_index);
file_ptr =
fopen ((char *) format (0, "/tmp/%v", hcm->filename), "a");
if (file_ptr == NULL)
@@ -580,26 +701,27 @@ hc_get_event (vlib_main_t *vm)
}
else
{
- fprintf (file_ptr, "< %s\n< %s\n< %s", hcm->response_status,
- hcm->resp_headers, hcm->http_response);
+ fprintf (file_ptr, "< %s\n< %s\n< %s",
+ hc_session->response_status, hc_session->resp_headers,
+ hc_session->http_response);
fclose (file_ptr);
vlib_cli_output (vm, "file saved (/tmp/%v)", hcm->filename);
}
}
if (hcm->verbose)
- vlib_cli_output (vm, "< %v< %v", hcm->response_status,
- hcm->resp_headers);
- vlib_cli_output (vm, "\n%v\n", hcm->http_response);
+ {
+ wrk = hc_worker_get (hcm->worker_index);
+ hc_session = hc_session_get (wrk->session_index, wrk->thread_index);
+ vlib_cli_output (vm, "< %v\n< %v\n%v", hc_session->response_status,
+ hc_session->resp_headers,
+ hc_session->http_response);
+ }
break;
case HC_REPEAT_DONE:
- vlib_cli_output (vm,
- "< %d request(s) in %.6fs\n< avg latency "
- "%.4fms\n< %.2f req/sec",
- hc_stats.request_count, hc_stats.elapsed_time,
- (hc_stats.elapsed_time / hc_stats.request_count) * 1000,
- hc_stats.request_count / hc_stats.elapsed_time);
+ hc_get_repeat_stats (vm);
break;
default:
+ hc_get_repeat_stats (vm);
err = clib_error_return (0, "error: unexpected event %d", event_type);
break;
}
@@ -612,15 +734,17 @@ static clib_error_t *
hc_run (vlib_main_t *vm)
{
hc_main_t *hcm = &hc_main;
- vlib_thread_main_t *vtm = vlib_get_thread_main ();
u32 num_threads;
hc_worker_t *wrk;
clib_error_t *err;
- num_threads = 1 /* main thread */ + vtm->n_threads;
+ num_threads = 1 /* main thread */ + vlib_num_workers ();
+ if (vlib_num_workers ())
+ clib_spinlock_init (&hcm->lock);
vec_validate (hcm->wrk, num_threads - 1);
vec_foreach (wrk, hcm->wrk)
{
+ wrk->has_common_headers = false;
wrk->thread_index = wrk - hcm->wrk;
/* 4k for headers should be enough */
vec_validate (wrk->headers_buf, 4095);
@@ -657,10 +781,18 @@ hc_detach ()
}
static void
-hcc_worker_cleanup (hc_worker_t *wrk)
+hc_worker_cleanup (hc_worker_t *wrk)
{
- HTTP_DBG (1, "worker cleanup");
+ hc_session_t *hc_session;
+ HTTP_DBG (1, "worker and worker sessions cleanup");
+
vec_free (wrk->headers_buf);
+ vec_foreach (hc_session, wrk->sessions)
+ {
+ vec_free (hc_session->resp_headers);
+ vec_free (hc_session->http_response);
+ vec_free (hc_session->response_status);
+ }
pool_free (wrk->sessions);
}
@@ -673,16 +805,14 @@ hc_cleanup ()
hc_http_header_t *header;
vec_foreach (wrk, hcm->wrk)
- hcc_worker_cleanup (wrk);
+ hc_worker_cleanup (wrk);
vec_free (hcm->uri);
vec_free (hcm->target);
vec_free (hcm->data);
- vec_free (hcm->resp_headers);
- vec_free (hcm->http_response);
- vec_free (hcm->response_status);
vec_free (hcm->wrk);
vec_free (hcm->filename);
+ vec_free (hcm->appns_id);
vec_foreach (header, hcm->custom_header)
{
vec_free (header->name);
@@ -698,6 +828,8 @@ hc_command_fn (vlib_main_t *vm, unformat_input_t *input,
hc_main_t *hcm = &hc_main;
clib_error_t *err = 0;
unformat_input_t _line_input, *line_input = &_line_input;
+ u64 mem_size;
+ u8 *appns_id = 0;
u8 *path = 0;
u8 *file_data;
hc_http_header_t new_header;
@@ -708,7 +840,16 @@ hc_command_fn (vlib_main_t *vm, unformat_input_t *input,
hcm->repeat_count = 0;
hcm->duration = 0;
hcm->repeat = false;
+ hcm->multi_session = false;
+ hcm->done_count = 0;
+ hcm->connected_counter = 0;
+ hcm->max_sessions = 1;
+ hcm->prealloc_fifos = 0;
+ hcm->private_segment_size = 0;
+ hcm->fifo_size = 0;
+ hcm->was_transport_closed = false;
hc_stats.request_count = 0;
+ hc_stats.elapsed_time = 0;
if (hcm->attached)
return clib_error_return (0, "failed: already running!");
@@ -729,8 +870,6 @@ hc_command_fn (vlib_main_t *vm, unformat_input_t *input,
;
else if (unformat (line_input, "data %v", &hcm->data))
hcm->is_file = 0;
- else if (unformat (line_input, "target %s", &hcm->target))
- ;
else if (unformat (line_input, "file %s", &path))
hcm->is_file = 1;
else if (unformat (line_input, "use-ptr"))
@@ -761,6 +900,29 @@ hc_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
else if (unformat (line_input, "duration %f", &hcm->duration))
hcm->repeat = true;
+ else if (unformat (line_input, "sessions %d", &hcm->max_sessions))
+ {
+ hcm->multi_session = true;
+ if (hcm->max_sessions <= 1)
+ {
+ err = clib_error_return (0, "sessions must be > 1");
+ goto done;
+ }
+ }
+ else if (unformat (line_input, "prealloc-fifos %d",
+ &hcm->prealloc_fifos))
+ ;
+ else if (unformat (line_input, "private-segment-size %U",
+ unformat_memory_size, &mem_size))
+ hcm->private_segment_size = mem_size;
+ else if (unformat (line_input, "fifo-size %U", unformat_memory_size,
+ &mem_size))
+ hcm->fifo_size = mem_size;
+ else if (unformat (line_input, "appns %_%v%_", &appns_id))
+ ;
+ else if (unformat (line_input, "secret %lu", &hcm->appns_secret))
+ ;
+
else
{
err = clib_error_return (0, "unknown input `%U'",
@@ -774,11 +936,7 @@ hc_command_fn (vlib_main_t *vm, unformat_input_t *input,
err = clib_error_return (0, "URI not defined");
goto done;
}
- if (!hcm->target)
- {
- err = clib_error_return (0, "target not defined");
- goto done;
- }
+
if (!hcm->data && hcm->req_method == HTTP_REQ_POST)
{
if (path)
@@ -794,6 +952,7 @@ hc_command_fn (vlib_main_t *vm, unformat_input_t *input,
goto done;
}
}
+
if (hcm->duration && hcm->repeat_count)
{
err = clib_error_return (
@@ -801,6 +960,20 @@ hc_command_fn (vlib_main_t *vm, unformat_input_t *input,
goto done;
}
+ if (hcm->multi_session && !hcm->repeat)
+ {
+ err = clib_error_return (
+ 0, "multiple sessions are only supported with request repeating");
+ goto done;
+ }
+
+ if ((rv = parse_target ((char **) &hcm->uri, (char **) &hcm->target)))
+ {
+ err = clib_error_return (0, "target parse error: %U",
+ format_session_error, rv);
+ goto done;
+ }
+
if ((rv = parse_uri ((char *) hcm->uri, &hcm->connect_sep)))
{
err =
@@ -808,6 +981,12 @@ hc_command_fn (vlib_main_t *vm, unformat_input_t *input,
goto done;
}
+ if (hcm->duration >= hcm->timeout)
+ {
+ hcm->timeout = hcm->duration + 10;
+ }
+ hcm->appns_id = appns_id;
+
if (hcm->repeat)
vlib_cli_output (vm, "Running, please wait...");
@@ -842,10 +1021,12 @@ done:
VLIB_CLI_COMMAND (hc_command, static) = {
.path = "http client",
.short_help =
- "[post] uri http://<ip-addr> target <origin-form> "
+ "[post] uri http://<ip-addr>/<origin-form> "
"[data <form-urlencoded> | file <file-path>] [use-ptr] "
"[save-to <filename>] [header <Key:Value>] [verbose] "
- "[timeout <seconds> (default = 10)] [repeat <count> | duration <seconds>]",
+ "[timeout <seconds> (default = 10)] [repeat <count> | duration <seconds>] "
+ "[sessions <# of sessions>] [appns <app-ns> secret <appns-secret>] "
+ "[fifo-size <nM|G>] [private-segment-size <nM|G>] [prealloc-fifos <n>]",
.function = hc_command_fn,
.is_mp_safe = 1,
};
diff --git a/src/plugins/hs_apps/http_client_cli.c b/src/plugins/hs_apps/http_client_cli.c
index 4ee3b49444c..b72d4dfae54 100644
--- a/src/plugins/hs_apps/http_client_cli.c
+++ b/src/plugins/hs_apps/http_client_cli.c
@@ -31,7 +31,7 @@ typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
u32 session_index;
- u32 thread_index;
+ clib_thread_index_t thread_index;
u32 rx_offset;
u32 vpp_session_index;
u64 to_recv;
@@ -41,7 +41,7 @@ typedef struct
typedef struct
{
hcc_session_t *sessions;
- u32 thread_index;
+ clib_thread_index_t thread_index;
} hcc_worker_t;
typedef struct
@@ -62,6 +62,8 @@ typedef struct
u8 *http_response;
u8 *appns_id;
u64 appns_secret;
+ u32 ckpair_index;
+ u8 need_crypto;
} hcc_main_t;
typedef enum
@@ -74,7 +76,7 @@ typedef enum
static hcc_main_t hcc_main;
static hcc_worker_t *
-hcc_worker_get (u32 thread_index)
+hcc_worker_get (clib_thread_index_t thread_index)
{
return vec_elt_at_index (hcc_main.wrk, thread_index);
}
@@ -90,7 +92,7 @@ hcc_session_alloc (hcc_worker_t *wrk)
}
static hcc_session_t *
-hcc_session_get (u32 hs_index, u32 thread_index)
+hcc_session_get (u32 hs_index, clib_thread_index_t thread_index)
{
hcc_worker_t *wrk = hcc_worker_get (thread_index);
return pool_elt_at_index (wrk->sessions, hs_index);
@@ -333,6 +335,7 @@ hcc_attach ()
vnet_app_attach_args_t _a, *a = &_a;
u64 options[18];
u32 segment_size = 128 << 20;
+ vnet_app_add_cert_key_pair_args_t _ck_pair, *ck_pair = &_ck_pair;
int rv;
if (hcm->private_segment_size)
@@ -353,6 +356,7 @@ hcc_attach ()
hcm->fifo_size ? hcm->fifo_size : 32 << 10;
a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN;
a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = hcm->prealloc_fifos;
+ a->options[APP_OPTIONS_TLS_ENGINE] = CRYPTO_ENGINE_OPENSSL;
if (hcm->appns_id)
{
a->namespace_id = hcm->appns_id;
@@ -365,6 +369,15 @@ hcc_attach ()
hcm->app_index = a->app_index;
vec_free (a->name);
hcm->test_client_attached = 1;
+
+ clib_memset (ck_pair, 0, sizeof (*ck_pair));
+ ck_pair->cert = (u8 *) test_srv_crt_rsa;
+ ck_pair->key = (u8 *) test_srv_key_rsa;
+ ck_pair->cert_len = test_srv_crt_rsa_len;
+ ck_pair->key_len = test_srv_key_rsa_len;
+ vnet_app_add_cert_key_pair (ck_pair);
+ hcm->ckpair_index = ck_pair->index;
+
return 0;
}
@@ -411,6 +424,14 @@ hcc_connect ()
&a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (http_cfg));
clib_memcpy (ext_cfg->data, &http_cfg, sizeof (http_cfg));
+ if (hcm->need_crypto)
+ {
+ ext_cfg = session_endpoint_add_ext_cfg (
+ &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO,
+ sizeof (transport_endpt_crypto_cfg_t));
+ ext_cfg->crypto.ckpair_index = hcm->ckpair_index;
+ }
+
/* allocate http session on main thread */
wrk = hcc_worker_get (0);
hs = hcc_session_alloc (wrk);
@@ -581,6 +602,8 @@ hcc_command_fn (vlib_main_t *vm, unformat_input_t *input,
err = clib_error_return (0, "Uri parse error: %d", rv);
goto done;
}
+ hcm->need_crypto = hcm->connect_sep.transport_proto == TRANSPORT_PROTO_TLS;
+ hcm->connect_sep.transport_proto = TRANSPORT_PROTO_HTTP;
session_enable_disable_args_t args = { .is_en = 1,
.rt_engine_type =
diff --git a/src/plugins/hs_apps/http_tps.c b/src/plugins/hs_apps/http_tps.c
index 59a0309e363..486d4a525e3 100644
--- a/src/plugins/hs_apps/http_tps.c
+++ b/src/plugins/hs_apps/http_tps.c
@@ -25,7 +25,7 @@ typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
u32 session_index;
- u32 thread_index;
+ clib_thread_index_t thread_index;
u64 data_len;
u64 data_offset;
u32 vpp_session_index;
@@ -78,7 +78,7 @@ typedef struct hs_main_
static hts_main_t hts_main;
static hts_session_t *
-hts_session_alloc (u32 thread_index)
+hts_session_alloc (clib_thread_index_t thread_index)
{
hts_main_t *htm = &hts_main;
hts_session_t *hs;
@@ -92,7 +92,7 @@ hts_session_alloc (u32 thread_index)
}
static hts_session_t *
-hts_session_get (u32 thread_index, u32 hts_index)
+hts_session_get (clib_thread_index_t thread_index, u32 hts_index)
{
hts_main_t *htm = &hts_main;
@@ -345,6 +345,11 @@ hts_session_rx_body (hts_session_t *hs, session_t *ts)
ASSERT (rv == n_deq);
}
hs->left_recv -= n_deq;
+ if (svm_fifo_needs_deq_ntf (ts->rx_fifo, n_deq))
+ {
+ svm_fifo_clear_deq_ntf (ts->rx_fifo);
+ session_program_transport_io_evt (ts->handle, SESSION_IO_EVT_RX);
+ }
if (hs->close_threshold > 0)
{
@@ -620,7 +625,7 @@ hts_start_listen (hts_main_t *htm, session_endpoint_cfg_t *sep, u8 *uri,
u8 need_crypto;
hts_session_t *hls;
session_t *ls;
- u32 thread_index = 0;
+ clib_thread_index_t thread_index = 0;
int rv;
clib_memset (a, 0, sizeof (*a));
diff --git a/src/plugins/hs_apps/proxy.c b/src/plugins/hs_apps/proxy.c
index 1bcc1e85a17..140183d5f59 100644
--- a/src/plugins/hs_apps/proxy.c
+++ b/src/plugins/hs_apps/proxy.c
@@ -112,7 +112,8 @@ proxy_do_connect (vnet_connect_args_t *a)
static void
proxy_handle_connects_rpc (void *args)
{
- u32 thread_index = pointer_to_uword (args), n_connects = 0, n_pending;
+ clib_thread_index_t thread_index = pointer_to_uword (args), n_connects = 0,
+ n_pending;
proxy_worker_t *wrk;
u32 max_connects;
@@ -1137,7 +1138,8 @@ active_open_tx_callback (session_t * ao_s)
if (sc->pair.is_http)
{
/* notify HTTP transport */
- session_program_rx_io_evt (sc->pair.session_handle);
+ session_program_transport_io_evt (sc->pair.session_handle,
+ SESSION_IO_EVT_RX);
}
else
{
diff --git a/src/plugins/hs_apps/proxy.h b/src/plugins/hs_apps/proxy.h
index f26f4bf0ea2..88b7cdf41ee 100644
--- a/src/plugins/hs_apps/proxy.h
+++ b/src/plugins/hs_apps/proxy.h
@@ -117,7 +117,7 @@ typedef struct
extern proxy_main_t proxy_main;
static inline proxy_worker_t *
-proxy_worker_get (u32 thread_index)
+proxy_worker_get (clib_thread_index_t thread_index)
{
proxy_main_t *pm = &proxy_main;
return vec_elt_at_index (pm->workers, thread_index);
diff --git a/src/plugins/hs_apps/test_builtins.c b/src/plugins/hs_apps/test_builtins.c
index c314e71b5df..4c324d5b953 100644
--- a/src/plugins/hs_apps/test_builtins.c
+++ b/src/plugins/hs_apps/test_builtins.c
@@ -161,6 +161,7 @@ test_builtins_init (vlib_main_t *vm)
0, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx");
(*fp) (handle_get_test1, "test1", HTTP_REQ_GET);
+ (*fp) (handle_get_test1, "test1", HTTP_REQ_POST);
(*fp) (handle_get_test2, "test2", HTTP_REQ_GET);
(*fp) (handle_get_test_delayed, "test_delayed", HTTP_REQ_GET);
(*fp) (handle_post_test3, "test3", HTTP_REQ_POST);
diff --git a/src/plugins/hs_apps/vcl/vcl_test_cl_udp.c b/src/plugins/hs_apps/vcl/vcl_test_cl_udp.c
new file mode 100644
index 00000000000..066635e3d9b
--- /dev/null
+++ b/src/plugins/hs_apps/vcl/vcl_test_cl_udp.c
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2025 Cisco Systems, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <string.h>
+#include <vcl/vppcom.h>
+#include <hs_apps/vcl/vcl_test.h>
+
+typedef enum vt_clu_type_
+{
+ VT_CLU_TYPE_NONE = 0,
+ VT_CLU_TYPE_SERVER,
+ VT_CLU_TYPE_CLIENT,
+} vt_clu_type_t;
+
+typedef struct vtclu_main_
+{
+ vt_clu_type_t app_type;
+ vppcom_endpt_t endpt;
+ union
+ {
+ struct sockaddr_storage srvr_addr;
+ struct sockaddr_storage clnt_addr;
+ };
+ uint16_t port;
+} vt_clu_main_t;
+
+static vt_clu_main_t vt_clu_main;
+
+static void
+vt_clu_parse_args (vt_clu_main_t *vclum, int argc, char **argv)
+{
+ int c;
+
+ memset (vclum, 0, sizeof (*vclum));
+ vclum->port = VCL_TEST_SERVER_PORT;
+
+ opterr = 0;
+ while ((c = getopt (argc, argv, "s:c:")) != -1)
+ switch (c)
+ {
+ case 's':
+ vclum->app_type = VT_CLU_TYPE_SERVER;
+ if (inet_pton (
+ AF_INET, optarg,
+ &((struct sockaddr_in *) &vclum->srvr_addr)->sin_addr) != 1)
+ vtwrn ("couldn't parse ipv4 addr %s", optarg);
+ break;
+ case 'c':
+ vclum->app_type = VT_CLU_TYPE_CLIENT;
+ if (inet_pton (
+ AF_INET, optarg,
+ &((struct sockaddr_in *) &vclum->clnt_addr)->sin_addr) != 1)
+ break;
+ }
+
+ if (vclum->app_type == VT_CLU_TYPE_NONE)
+ {
+ vtwrn ("client or server must be configured");
+ exit (1);
+ }
+
+ vclum->endpt.is_ip4 = 1;
+ vclum->endpt.ip =
+ (uint8_t *) &((struct sockaddr_in *) &vclum->srvr_addr)->sin_addr;
+ vclum->endpt.port = htons (vclum->endpt.port);
+}
+
+int
+main (int argc, char **argv)
+{
+ vt_clu_main_t *vclum = &vt_clu_main;
+ int rv, vcl_sh;
+ const int buflen = 64;
+ char buf[buflen];
+
+ struct sockaddr_in _addr;
+ vppcom_endpt_t rmt_ep = { .ip = (void *) &_addr };
+
+ vt_clu_parse_args (vclum, argc, argv);
+
+ rv = vppcom_app_create ("vcl_test_cl_udp");
+ if (rv)
+ vtfail ("vppcom_app_create()", rv);
+
+ vcl_sh = vppcom_session_create (VPPCOM_PROTO_UDP, 0 /* is_nonblocking */);
+ if (vcl_sh < 0)
+ {
+ vterr ("vppcom_session_create()", vcl_sh);
+ return vcl_sh;
+ }
+
+ if (vclum->app_type == VT_CLU_TYPE_SERVER)
+ {
+ /* Listen is implicit */
+ rv = vppcom_session_bind (vcl_sh, &vclum->endpt);
+ if (rv < 0)
+ {
+ vterr ("vppcom_session_bind()", rv);
+ return rv;
+ }
+
+ rv = vppcom_session_recvfrom (vcl_sh, buf, buflen, 0, &rmt_ep);
+ if (rv < 0)
+ {
+ vterr ("vppcom_session_recvfrom()", rv);
+ return rv;
+ }
+ buf[rv] = 0;
+ vtinf ("Received message from client: %s", buf);
+
+ char *msg = "hello cl udp client";
+ int msg_len = strnlen (msg, buflen);
+ memcpy (buf, msg, msg_len);
+ /* send 2 times to be sure */
+ for (int i = 0; i < 2; i++)
+ {
+ rv = vppcom_session_sendto (vcl_sh, buf, msg_len, 0, &rmt_ep);
+ if (rv < 0)
+ {
+ vterr ("vppcom_session_sendto()", rv);
+ return rv;
+ }
+ usleep (500);
+ }
+ }
+ else if (vclum->app_type == VT_CLU_TYPE_CLIENT)
+ {
+ char *msg = "hello cl udp server";
+ int msg_len = strnlen (msg, buflen);
+ memcpy (buf, msg, msg_len);
+
+ /* send 3 times to be sure */
+ for (int i = 0; i < 3; i++)
+ {
+ rv = vppcom_session_sendto (vcl_sh, buf, msg_len, 0, &vclum->endpt);
+ if (rv < 0)
+ {
+ vterr ("vppcom_session_sendto()", rv);
+ return rv;
+ }
+ usleep (500);
+ }
+
+ rv = vppcom_session_recvfrom (vcl_sh, buf, buflen, 0, &rmt_ep);
+ if (rv < 0)
+ {
+ vterr ("vppcom_session_recvfrom()", rv);
+ return rv;
+ }
+ buf[rv] = 0;
+ vtinf ("Received message from server: %s", buf);
+ }
+} \ No newline at end of file
diff --git a/src/plugins/http/CMakeLists.txt b/src/plugins/http/CMakeLists.txt
index 075b8d6817b..ca2c0a9dc05 100644
--- a/src/plugins/http/CMakeLists.txt
+++ b/src/plugins/http/CMakeLists.txt
@@ -11,11 +11,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+option(VPP_ENABLE_HTTP_2 "Build http plugin with HTTP/2 enabled" OFF)
+if(VPP_ENABLE_HTTP_2)
+ add_compile_definitions(HTTP_2_ENABLE=1)
+endif()
+
add_vpp_plugin(http
SOURCES
+ http2/hpack.c
+ http2/http2.c
+ http2/frame.c
http.c
http_buffer.c
http_timer.c
+ http1.c
)
add_vpp_plugin(http_unittest
diff --git a/src/plugins/http/extras/mk_huffman_table.py b/src/plugins/http/extras/mk_huffman_table.py
new file mode 100644
index 00000000000..378544b0dce
--- /dev/null
+++ b/src/plugins/http/extras/mk_huffman_table.py
@@ -0,0 +1,416 @@
+#!/usr/bin/env python3
+from io import StringIO
+
+
+# SPDX-License-Identifier: Apache-2.0
+# Copyright(c) 2025 Cisco Systems, Inc.
+
+
+# e.g. 5 bit code symbol has 8 slots (2^8-5), last 3 bits are irrelevant
+def generate_slots(fh, s, cl):
+ for i in range(1 << 8 - cl):
+ fh.write(" { 0x%02X, %d },\n" % (s, cl))
+
+
+# list of code and code length tuples
+huff_code_table = []
+
+# Huffman code from RFC7541 Appendix B, EOS removed
+rfc7541_huffman_code = """\
+ ( 0) |11111111|11000 1ff8 [13]
+ ( 1) |11111111|11111111|1011000 7fffd8 [23]
+ ( 2) |11111111|11111111|11111110|0010 fffffe2 [28]
+ ( 3) |11111111|11111111|11111110|0011 fffffe3 [28]
+ ( 4) |11111111|11111111|11111110|0100 fffffe4 [28]
+ ( 5) |11111111|11111111|11111110|0101 fffffe5 [28]
+ ( 6) |11111111|11111111|11111110|0110 fffffe6 [28]
+ ( 7) |11111111|11111111|11111110|0111 fffffe7 [28]
+ ( 8) |11111111|11111111|11111110|1000 fffffe8 [28]
+ ( 9) |11111111|11111111|11101010 ffffea [24]
+ ( 10) |11111111|11111111|11111111|111100 3ffffffc [30]
+ ( 11) |11111111|11111111|11111110|1001 fffffe9 [28]
+ ( 12) |11111111|11111111|11111110|1010 fffffea [28]
+ ( 13) |11111111|11111111|11111111|111101 3ffffffd [30]
+ ( 14) |11111111|11111111|11111110|1011 fffffeb [28]
+ ( 15) |11111111|11111111|11111110|1100 fffffec [28]
+ ( 16) |11111111|11111111|11111110|1101 fffffed [28]
+ ( 17) |11111111|11111111|11111110|1110 fffffee [28]
+ ( 18) |11111111|11111111|11111110|1111 fffffef [28]
+ ( 19) |11111111|11111111|11111111|0000 ffffff0 [28]
+ ( 20) |11111111|11111111|11111111|0001 ffffff1 [28]
+ ( 21) |11111111|11111111|11111111|0010 ffffff2 [28]
+ ( 22) |11111111|11111111|11111111|111110 3ffffffe [30]
+ ( 23) |11111111|11111111|11111111|0011 ffffff3 [28]
+ ( 24) |11111111|11111111|11111111|0100 ffffff4 [28]
+ ( 25) |11111111|11111111|11111111|0101 ffffff5 [28]
+ ( 26) |11111111|11111111|11111111|0110 ffffff6 [28]
+ ( 27) |11111111|11111111|11111111|0111 ffffff7 [28]
+ ( 28) |11111111|11111111|11111111|1000 ffffff8 [28]
+ ( 29) |11111111|11111111|11111111|1001 ffffff9 [28]
+ ( 30) |11111111|11111111|11111111|1010 ffffffa [28]
+ ( 31) |11111111|11111111|11111111|1011 ffffffb [28]
+' ' ( 32) |010100 14 [ 6]
+'!' ( 33) |11111110|00 3f8 [10]
+'"' ( 34) |11111110|01 3f9 [10]
+'#' ( 35) |11111111|1010 ffa [12]
+'$' ( 36) |11111111|11001 1ff9 [13]
+'%' ( 37) |010101 15 [ 6]
+'&' ( 38) |11111000 f8 [ 8]
+''' ( 39) |11111111|010 7fa [11]
+'(' ( 40) |11111110|10 3fa [10]
+')' ( 41) |11111110|11 3fb [10]
+'*' ( 42) |11111001 f9 [ 8]
+'+' ( 43) |11111111|011 7fb [11]
+',' ( 44) |11111010 fa [ 8]
+'-' ( 45) |010110 16 [ 6]
+'.' ( 46) |010111 17 [ 6]
+'/' ( 47) |011000 18 [ 6]
+'0' ( 48) |00000 0 [ 5]
+'1' ( 49) |00001 1 [ 5]
+'2' ( 50) |00010 2 [ 5]
+'3' ( 51) |011001 19 [ 6]
+'4' ( 52) |011010 1a [ 6]
+'5' ( 53) |011011 1b [ 6]
+'6' ( 54) |011100 1c [ 6]
+'7' ( 55) |011101 1d [ 6]
+'8' ( 56) |011110 1e [ 6]
+'9' ( 57) |011111 1f [ 6]
+':' ( 58) |1011100 5c [ 7]
+';' ( 59) |11111011 fb [ 8]
+'<' ( 60) |11111111|1111100 7ffc [15]
+'=' ( 61) |100000 20 [ 6]
+'>' ( 62) |11111111|1011 ffb [12]
+'?' ( 63) |11111111|00 3fc [10]
+'@' ( 64) |11111111|11010 1ffa [13]
+'A' ( 65) |100001 21 [ 6]
+'B' ( 66) |1011101 5d [ 7]
+'C' ( 67) |1011110 5e [ 7]
+'D' ( 68) |1011111 5f [ 7]
+'E' ( 69) |1100000 60 [ 7]
+'F' ( 70) |1100001 61 [ 7]
+'G' ( 71) |1100010 62 [ 7]
+'H' ( 72) |1100011 63 [ 7]
+'I' ( 73) |1100100 64 [ 7]
+'J' ( 74) |1100101 65 [ 7]
+'K' ( 75) |1100110 66 [ 7]
+'L' ( 76) |1100111 67 [ 7]
+'M' ( 77) |1101000 68 [ 7]
+'N' ( 78) |1101001 69 [ 7]
+'O' ( 79) |1101010 6a [ 7]
+'P' ( 80) |1101011 6b [ 7]
+'Q' ( 81) |1101100 6c [ 7]
+'R' ( 82) |1101101 6d [ 7]
+'S' ( 83) |1101110 6e [ 7]
+'T' ( 84) |1101111 6f [ 7]
+'U' ( 85) |1110000 70 [ 7]
+'V' ( 86) |1110001 71 [ 7]
+'W' ( 87) |1110010 72 [ 7]
+'X' ( 88) |11111100 fc [ 8]
+'Y' ( 89) |1110011 73 [ 7]
+'Z' ( 90) |11111101 fd [ 8]
+'[' ( 91) |11111111|11011 1ffb [13]
+'\' ( 92) |11111111|11111110|000 7fff0 [19]
+']' ( 93) |11111111|11100 1ffc [13]
+'^' ( 94) |11111111|111100 3ffc [14]
+'_' ( 95) |100010 22 [ 6]
+'`' ( 96) |11111111|1111101 7ffd [15]
+'a' ( 97) |00011 3 [ 5]
+'b' ( 98) |100011 23 [ 6]
+'c' ( 99) |00100 4 [ 5]
+'d' (100) |100100 24 [ 6]
+'e' (101) |00101 5 [ 5]
+'f' (102) |100101 25 [ 6]
+'g' (103) |100110 26 [ 6]
+'h' (104) |100111 27 [ 6]
+'i' (105) |00110 6 [ 5]
+'j' (106) |1110100 74 [ 7]
+'k' (107) |1110101 75 [ 7]
+'l' (108) |101000 28 [ 6]
+'m' (109) |101001 29 [ 6]
+'n' (110) |101010 2a [ 6]
+'o' (111) |00111 7 [ 5]
+'p' (112) |101011 2b [ 6]
+'q' (113) |1110110 76 [ 7]
+'r' (114) |101100 2c [ 6]
+'s' (115) |01000 8 [ 5]
+'t' (116) |01001 9 [ 5]
+'u' (117) |101101 2d [ 6]
+'v' (118) |1110111 77 [ 7]
+'w' (119) |1111000 78 [ 7]
+'x' (120) |1111001 79 [ 7]
+'y' (121) |1111010 7a [ 7]
+'z' (122) |1111011 7b [ 7]
+'{' (123) |11111111|1111110 7ffe [15]
+'|' (124) |11111111|100 7fc [11]
+'}' (125) |11111111|111101 3ffd [14]
+'~' (126) |11111111|11101 1ffd [13]
+ (127) |11111111|11111111|11111111|1100 ffffffc [28]
+ (128) |11111111|11111110|0110 fffe6 [20]
+ (129) |11111111|11111111|010010 3fffd2 [22]
+ (130) |11111111|11111110|0111 fffe7 [20]
+ (131) |11111111|11111110|1000 fffe8 [20]
+ (132) |11111111|11111111|010011 3fffd3 [22]
+ (133) |11111111|11111111|010100 3fffd4 [22]
+ (134) |11111111|11111111|010101 3fffd5 [22]
+ (135) |11111111|11111111|1011001 7fffd9 [23]
+ (136) |11111111|11111111|010110 3fffd6 [22]
+ (137) |11111111|11111111|1011010 7fffda [23]
+ (138) |11111111|11111111|1011011 7fffdb [23]
+ (139) |11111111|11111111|1011100 7fffdc [23]
+ (140) |11111111|11111111|1011101 7fffdd [23]
+ (141) |11111111|11111111|1011110 7fffde [23]
+ (142) |11111111|11111111|11101011 ffffeb [24]
+ (143) |11111111|11111111|1011111 7fffdf [23]
+ (144) |11111111|11111111|11101100 ffffec [24]
+ (145) |11111111|11111111|11101101 ffffed [24]
+ (146) |11111111|11111111|010111 3fffd7 [22]
+ (147) |11111111|11111111|1100000 7fffe0 [23]
+ (148) |11111111|11111111|11101110 ffffee [24]
+ (149) |11111111|11111111|1100001 7fffe1 [23]
+ (150) |11111111|11111111|1100010 7fffe2 [23]
+ (151) |11111111|11111111|1100011 7fffe3 [23]
+ (152) |11111111|11111111|1100100 7fffe4 [23]
+ (153) |11111111|11111110|11100 1fffdc [21]
+ (154) |11111111|11111111|011000 3fffd8 [22]
+ (155) |11111111|11111111|1100101 7fffe5 [23]
+ (156) |11111111|11111111|011001 3fffd9 [22]
+ (157) |11111111|11111111|1100110 7fffe6 [23]
+ (158) |11111111|11111111|1100111 7fffe7 [23]
+ (159) |11111111|11111111|11101111 ffffef [24]
+ (160) |11111111|11111111|011010 3fffda [22]
+ (161) |11111111|11111110|11101 1fffdd [21]
+ (162) |11111111|11111110|1001 fffe9 [20]
+ (163) |11111111|11111111|011011 3fffdb [22]
+ (164) |11111111|11111111|011100 3fffdc [22]
+ (165) |11111111|11111111|1101000 7fffe8 [23]
+ (166) |11111111|11111111|1101001 7fffe9 [23]
+ (167) |11111111|11111110|11110 1fffde [21]
+ (168) |11111111|11111111|1101010 7fffea [23]
+ (169) |11111111|11111111|011101 3fffdd [22]
+ (170) |11111111|11111111|011110 3fffde [22]
+ (171) |11111111|11111111|11110000 fffff0 [24]
+ (172) |11111111|11111110|11111 1fffdf [21]
+ (173) |11111111|11111111|011111 3fffdf [22]
+ (174) |11111111|11111111|1101011 7fffeb [23]
+ (175) |11111111|11111111|1101100 7fffec [23]
+ (176) |11111111|11111111|00000 1fffe0 [21]
+ (177) |11111111|11111111|00001 1fffe1 [21]
+ (178) |11111111|11111111|100000 3fffe0 [22]
+ (179) |11111111|11111111|00010 1fffe2 [21]
+ (180) |11111111|11111111|1101101 7fffed [23]
+ (181) |11111111|11111111|100001 3fffe1 [22]
+ (182) |11111111|11111111|1101110 7fffee [23]
+ (183) |11111111|11111111|1101111 7fffef [23]
+ (184) |11111111|11111110|1010 fffea [20]
+ (185) |11111111|11111111|100010 3fffe2 [22]
+ (186) |11111111|11111111|100011 3fffe3 [22]
+ (187) |11111111|11111111|100100 3fffe4 [22]
+ (188) |11111111|11111111|1110000 7ffff0 [23]
+ (189) |11111111|11111111|100101 3fffe5 [22]
+ (190) |11111111|11111111|100110 3fffe6 [22]
+ (191) |11111111|11111111|1110001 7ffff1 [23]
+ (192) |11111111|11111111|11111000|00 3ffffe0 [26]
+ (193) |11111111|11111111|11111000|01 3ffffe1 [26]
+ (194) |11111111|11111110|1011 fffeb [20]
+ (195) |11111111|11111110|001 7fff1 [19]
+ (196) |11111111|11111111|100111 3fffe7 [22]
+ (197) |11111111|11111111|1110010 7ffff2 [23]
+ (198) |11111111|11111111|101000 3fffe8 [22]
+ (199) |11111111|11111111|11110110|0 1ffffec [25]
+ (200) |11111111|11111111|11111000|10 3ffffe2 [26]
+ (201) |11111111|11111111|11111000|11 3ffffe3 [26]
+ (202) |11111111|11111111|11111001|00 3ffffe4 [26]
+ (203) |11111111|11111111|11111011|110 7ffffde [27]
+ (204) |11111111|11111111|11111011|111 7ffffdf [27]
+ (205) |11111111|11111111|11111001|01 3ffffe5 [26]
+ (206) |11111111|11111111|11110001 fffff1 [24]
+ (207) |11111111|11111111|11110110|1 1ffffed [25]
+ (208) |11111111|11111110|010 7fff2 [19]
+ (209) |11111111|11111111|00011 1fffe3 [21]
+ (210) |11111111|11111111|11111001|10 3ffffe6 [26]
+ (211) |11111111|11111111|11111100|000 7ffffe0 [27]
+ (212) |11111111|11111111|11111100|001 7ffffe1 [27]
+ (213) |11111111|11111111|11111001|11 3ffffe7 [26]
+ (214) |11111111|11111111|11111100|010 7ffffe2 [27]
+ (215) |11111111|11111111|11110010 fffff2 [24]
+ (216) |11111111|11111111|00100 1fffe4 [21]
+ (217) |11111111|11111111|00101 1fffe5 [21]
+ (218) |11111111|11111111|11111010|00 3ffffe8 [26]
+ (219) |11111111|11111111|11111010|01 3ffffe9 [26]
+ (220) |11111111|11111111|11111111|1101 ffffffd [28]
+ (221) |11111111|11111111|11111100|011 7ffffe3 [27]
+ (222) |11111111|11111111|11111100|100 7ffffe4 [27]
+ (223) |11111111|11111111|11111100|101 7ffffe5 [27]
+ (224) |11111111|11111110|1100 fffec [20]
+ (225) |11111111|11111111|11110011 fffff3 [24]
+ (226) |11111111|11111110|1101 fffed [20]
+ (227) |11111111|11111111|00110 1fffe6 [21]
+ (228) |11111111|11111111|101001 3fffe9 [22]
+ (229) |11111111|11111111|00111 1fffe7 [21]
+ (230) |11111111|11111111|01000 1fffe8 [21]
+ (231) |11111111|11111111|1110011 7ffff3 [23]
+ (232) |11111111|11111111|101010 3fffea [22]
+ (233) |11111111|11111111|101011 3fffeb [22]
+ (234) |11111111|11111111|11110111|0 1ffffee [25]
+ (235) |11111111|11111111|11110111|1 1ffffef [25]
+ (236) |11111111|11111111|11110100 fffff4 [24]
+ (237) |11111111|11111111|11110101 fffff5 [24]
+ (238) |11111111|11111111|11111010|10 3ffffea [26]
+ (239) |11111111|11111111|1110100 7ffff4 [23]
+ (240) |11111111|11111111|11111010|11 3ffffeb [26]
+ (241) |11111111|11111111|11111100|110 7ffffe6 [27]
+ (242) |11111111|11111111|11111011|00 3ffffec [26]
+ (243) |11111111|11111111|11111011|01 3ffffed [26]
+ (244) |11111111|11111111|11111100|111 7ffffe7 [27]
+ (245) |11111111|11111111|11111101|000 7ffffe8 [27]
+ (246) |11111111|11111111|11111101|001 7ffffe9 [27]
+ (247) |11111111|11111111|11111101|010 7ffffea [27]
+ (248) |11111111|11111111|11111101|011 7ffffeb [27]
+ (249) |11111111|11111111|11111111|1110 ffffffe [28]
+ (250) |11111111|11111111|11111101|100 7ffffec [27]
+ (251) |11111111|11111111|11111101|101 7ffffed [27]
+ (252) |11111111|11111111|11111101|110 7ffffee [27]
+ (253) |11111111|11111111|11111101|111 7ffffef [27]
+ (254) |11111111|11111111|11111110|000 7fffff0 [27]
+ (255) |11111111|11111111|11111011|10 3ffffee [26]"""
+
+# parse Huffman code
+for line in StringIO(rfc7541_huffman_code):
+ # we need just last two columns
+ l = line.rstrip().split(" ")
+ # len in bits
+ code_len = l[-1][1:-1].strip()
+ # code as hex aligned to LSB
+ code = l[-2].strip()
+ huff_code_table.append((code_len, code))
+
+f = open("../http2/huffman_table.h", "w")
+f.write(
+ """/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2025 Cisco Systems, Inc.
+ */
+
+/* generated by mk_huffman_table.py */
+
+#ifndef SRC_PLUGINS_HTTP_HUFFMAN_TABLE_H_
+#define SRC_PLUGINS_HTTP_HUFFMAN_TABLE_H_
+
+#include <vppinfra/types.h>
+
+typedef struct
+{
+ u8 code_len;
+ u32 code;
+} hpack_huffman_symbol_t;
+
+static hpack_huffman_symbol_t huff_sym_table[] = {
+"""
+)
+
+# encoding table
+[f.write(" {" + code[0] + ", 0x" + code[1] + "},\n") for code in huff_code_table]
+
+f.write(
+ """};
+
+typedef struct
+{
+ u8 symbol;
+ u8 code_len;
+} hpack_huffman_code_t;
+
+static hpack_huffman_code_t huff_code_table_fast[] = {
+"""
+)
+
+# fast decoding table, symbols with code length from 5 to 8 bits (most of printable ASCII characters)
+[generate_slots(f, i, 5) for i, code in enumerate(huff_code_table) if code[0] == "5"]
+[generate_slots(f, i, 6) for i, code in enumerate(huff_code_table) if code[0] == "6"]
+[generate_slots(f, i, 7) for i, code in enumerate(huff_code_table) if code[0] == "7"]
+[generate_slots(f, i, 8) for i, code in enumerate(huff_code_table) if code[0] == "8"]
+
+# last 2 entries are longer codes prefixes, code_len set to 0
+f.write(" { 0x00, 0 },\n")
+f.write(" { 0x00, 0 },\n")
+
+f.write(
+ """};
+
+typedef struct
+{
+ u32 first_code;
+ u8 code_len;
+ u8 symbols[29];
+} hpack_huffman_group_t;
+
+/* clang-format off */
+
+static hpack_huffman_group_t huff_code_table_slow[] = {
+"""
+)
+for i in range(10, 31):
+ symbols = [
+ (symbol, code[1])
+ for symbol, code in enumerate(huff_code_table)
+ if code[0] == str(i)
+ ]
+ if symbols:
+ _, first_code = symbols[0]
+ f.write(" {\n 0x" + first_code + ", /* first_code */\n")
+ f.write(" " + str(i) + ", /* code_len */\n")
+ f.write(" {\n ")
+ [f.write(" 0x%02X," % s) for s, c in symbols[:10]]
+ if len(symbols) > 10:
+ f.write("\n ")
+ [f.write(" 0x%02X," % s) for s, c in symbols[10:20]]
+ if len(symbols) > 20:
+ f.write("\n ")
+ [f.write(" 0x%02X," % s) for s, c in symbols[20:30]]
+ f.write("\n } /* symbols */\n },\n")
+
+f.write(
+ """};
+
+/* clang format-on */
+
+always_inline hpack_huffman_group_t *
+hpack_huffman_get_group (u32 value)
+{
+"""
+)
+
+index = 0
+
+symbols = [
+ (symbol, code[1]) for symbol, code in enumerate(huff_code_table) if code[0] == "10"
+]
+_, last_code = symbols[-1]
+boundary = (int(last_code, 16) + 1) << 22
+f.write(" if (value < 0x%X)\n" % boundary)
+f.write(" return &huff_code_table_slow[%d];\n" % index)
+index += 1
+
+for i in range(11, 30):
+ symbols = [
+ (symbol, code[1])
+ for symbol, code in enumerate(huff_code_table)
+ if code[0] == str(i)
+ ]
+ if symbols:
+ _, last_code = symbols[-1]
+ boundary = (int(last_code, 16) + 1) << (32 - i)
+ f.write(" else if (value < 0x%X)\n" % boundary)
+ f.write(" return &huff_code_table_slow[%d];\n" % index)
+ index += 1
+
+f.write(" else\n")
+f.write(" return &huff_code_table_slow[%d];\n" % index)
+
+f.write(
+ """}
+
+#endif /* SRC_PLUGINS_HTTP_HUFFMAN_TABLE_H_ */
+"""
+)
+
+f.close()
diff --git a/src/plugins/http/http.c b/src/plugins/http/http.c
index 69b661d0611..c7eefcdae48 100644
--- a/src/plugins/http/http.c
+++ b/src/plugins/http/http.c
@@ -13,43 +13,59 @@
* limitations under the License.
*/
+#include <vpp/app/version.h>
+#include <vnet/session/application_interface.h>
+#include <vnet/session/application.h>
+
#include <http/http.h>
-#include <vnet/session/session.h>
+#include <http/http_private.h>
#include <http/http_timer.h>
-#include <http/http_status_codes.h>
-#include <http/http_header_names.h>
static http_main_t http_main;
-
-#define HTTP_FIFO_THRESH (16 << 10)
-
-/* HTTP state machine result */
-typedef enum http_sm_result_t_
-{
- HTTP_SM_STOP = 0,
- HTTP_SM_CONTINUE = 1,
- HTTP_SM_ERROR = -1,
-} http_sm_result_t;
+static http_engine_vft_t *http_vfts;
const http_buffer_type_t msg_to_buf_type[] = {
[HTTP_MSG_DATA_INLINE] = HTTP_BUFFER_FIFO,
[HTTP_MSG_DATA_PTR] = HTTP_BUFFER_PTR,
};
-const char *http_upgrade_proto_str[] = { "",
-#define _(sym, str) str,
- foreach_http_upgrade_proto
-#undef _
-};
+void
+http_register_engine (const http_engine_vft_t *vft, http_version_t version)
+{
+ vec_validate (http_vfts, version);
+ http_vfts[version] = *vft;
+}
+
+int
+http_v_find_index (u8 *vec, u32 offset, u32 num, char *str)
+{
+ int start_index = offset;
+ u32 slen = (u32) strnlen_s_inline (str, 16);
+ u32 vlen = vec_len (vec);
+
+ ASSERT (slen > 0);
-#define expect_char(c) \
- if (*p++ != c) \
- { \
- clib_warning ("unexpected character"); \
- return -1; \
+ if (vlen <= slen)
+ return -1;
+
+ int end_index = vlen - slen;
+ if (num)
+ {
+ if (num < slen)
+ return -1;
+ end_index = clib_min (end_index, offset + num - slen);
}
-static u8 *
+ for (; start_index <= end_index; start_index++)
+ {
+ if (!memcmp (vec + start_index, str, slen))
+ return start_index;
+ }
+
+ return -1;
+}
+
+u8 *
format_http_req_state (u8 *s, va_list *va)
{
http_req_state_t state = va_arg (*va, http_req_state_t);
@@ -68,18 +84,7 @@ format_http_req_state (u8 *s, va_list *va)
return format (s, "%s", t);
}
-#define http_req_state_change(_hc, _state) \
- do \
- { \
- HTTP_DBG (1, "changing http req state: %U -> %U", \
- format_http_req_state, (_hc)->req.state, \
- format_http_req_state, _state); \
- ASSERT ((_hc)->req.state != HTTP_REQ_STATE_TUNNEL); \
- (_hc)->req.state = _state; \
- } \
- while (0)
-
-static u8 *
+u8 *
format_http_conn_state (u8 *s, va_list *args)
{
http_conn_t *hc = va_arg (*args, http_conn_t *);
@@ -98,36 +103,41 @@ format_http_conn_state (u8 *s, va_list *args)
return format (s, "%s", t);
}
+u8 *
+format_http_time_now (u8 *s, va_list *args)
+{
+ http_conn_t __clib_unused *hc = va_arg (*args, http_conn_t *);
+ http_main_t *hm = &http_main;
+ f64 now = clib_timebase_now (&hm->timebase);
+ return format (s, "%U", format_clib_timebase_time, now);
+}
+
static inline http_worker_t *
-http_worker_get (u32 thread_index)
+http_worker_get (clib_thread_index_t thread_index)
{
return &http_main.wrk[thread_index];
}
static inline u32
-http_conn_alloc_w_thread (u32 thread_index)
+http_conn_alloc_w_thread (clib_thread_index_t thread_index)
{
http_worker_t *wrk = http_worker_get (thread_index);
http_conn_t *hc;
pool_get_aligned_safe (wrk->conn_pool, hc, CLIB_CACHE_LINE_BYTES);
- clib_memset (hc, 0, sizeof (*hc));
- hc->c_thread_index = thread_index;
- hc->h_hc_index = hc - wrk->conn_pool;
- hc->h_pa_session_handle = SESSION_INVALID_HANDLE;
- hc->h_tc_session_handle = SESSION_INVALID_HANDLE;
- return hc->h_hc_index;
+ return (hc - wrk->conn_pool);
}
static inline http_conn_t *
-http_conn_get_w_thread (u32 hc_index, u32 thread_index)
+http_conn_get_w_thread (u32 hc_index, clib_thread_index_t thread_index)
{
http_worker_t *wrk = http_worker_get (thread_index);
return pool_elt_at_index (wrk->conn_pool, hc_index);
}
static inline http_conn_t *
-http_conn_get_w_thread_if_valid (u32 hc_index, u32 thread_index)
+http_conn_get_w_thread_if_valid (u32 hc_index,
+ clib_thread_index_t thread_index)
{
http_worker_t *wrk = http_worker_get (thread_index);
if (pool_is_free_index (wrk->conn_pool, hc_index))
@@ -135,13 +145,22 @@ http_conn_get_w_thread_if_valid (u32 hc_index, u32 thread_index)
return pool_elt_at_index (wrk->conn_pool, hc_index);
}
-void
+static void
http_conn_free (http_conn_t *hc)
{
http_worker_t *wrk = http_worker_get (hc->c_thread_index);
+ if (CLIB_DEBUG)
+ memset (hc, 0xba, sizeof (*hc));
pool_put (wrk->conn_pool, hc);
}
+static void
+http_add_postponed_ho_cleanups (u32 ho_hc_index)
+{
+ http_main_t *hm = &http_main;
+ vec_add1 (hm->postponed_ho_free, ho_hc_index);
+}
+
static inline http_conn_t *
http_ho_conn_get (u32 ho_hc_index)
{
@@ -149,26 +168,66 @@ http_ho_conn_get (u32 ho_hc_index)
return pool_elt_at_index (hm->ho_conn_pool, ho_hc_index);
}
-void
+static void
http_ho_conn_free (http_conn_t *ho_hc)
{
http_main_t *hm = &http_main;
+ if (CLIB_DEBUG)
+ memset (ho_hc, 0xba, sizeof (*ho_hc));
pool_put (hm->ho_conn_pool, ho_hc);
}
+static void
+http_ho_try_free (u32 ho_hc_index)
+{
+ http_conn_t *ho_hc;
+ HTTP_DBG (1, "half open: %x", ho_hc_index);
+ ho_hc = http_ho_conn_get (ho_hc_index);
+ if (!(ho_hc->flags & HTTP_CONN_F_HO_DONE))
+ {
+ HTTP_DBG (1, "postponed cleanup");
+ ho_hc->hc_tc_session_handle = SESSION_INVALID_HANDLE;
+ http_add_postponed_ho_cleanups (ho_hc_index);
+ return;
+ }
+ if (!(ho_hc->flags & HTTP_CONN_F_NO_APP_SESSION))
+ session_half_open_delete_notify (&ho_hc->connection);
+ http_ho_conn_free (ho_hc);
+}
+
+static void
+http_flush_postponed_ho_cleanups ()
+{
+ http_main_t *hm = &http_main;
+ u32 *ho_indexp, *tmp;
+
+ tmp = hm->postponed_ho_free;
+ hm->postponed_ho_free = hm->ho_free_list;
+ hm->ho_free_list = tmp;
+
+ vec_foreach (ho_indexp, hm->ho_free_list)
+ http_ho_try_free (*ho_indexp);
+
+ vec_reset_length (hm->ho_free_list);
+}
+
static inline u32
http_ho_conn_alloc (void)
{
http_main_t *hm = &http_main;
http_conn_t *hc;
+ if (vec_len (hm->postponed_ho_free))
+ http_flush_postponed_ho_cleanups ();
+
pool_get_aligned_safe (hm->ho_conn_pool, hc, CLIB_CACHE_LINE_BYTES);
clib_memset (hc, 0, sizeof (*hc));
- hc->h_hc_index = hc - hm->ho_conn_pool;
- hc->h_pa_session_handle = SESSION_INVALID_HANDLE;
- hc->h_tc_session_handle = SESSION_INVALID_HANDLE;
+ hc->hc_hc_index = hc - hm->ho_conn_pool;
+ hc->hc_pa_session_handle = SESSION_INVALID_HANDLE;
+ hc->hc_tc_session_handle = SESSION_INVALID_HANDLE;
hc->timeout = HTTP_CONN_TIMEOUT;
- return hc->h_hc_index;
+ hc->version = HTTP_VERSION_NA;
+ return hc->hc_hc_index;
}
static u32
@@ -178,18 +237,19 @@ http_listener_alloc (void)
http_conn_t *lhc;
pool_get_zero (hm->listener_pool, lhc);
- lhc->c_c_index = lhc - hm->listener_pool;
+ lhc->hc_hc_index = lhc - hm->listener_pool;
lhc->timeout = HTTP_CONN_TIMEOUT;
- return lhc->c_c_index;
+ lhc->version = HTTP_VERSION_NA;
+ return lhc->hc_hc_index;
}
-http_conn_t *
+static http_conn_t *
http_listener_get (u32 lhc_index)
{
return pool_elt_at_index (http_main.listener_pool, lhc_index);
}
-void
+static void
http_listener_free (http_conn_t *lhc)
{
http_main_t *hm = &http_main;
@@ -204,7 +264,7 @@ void
http_disconnect_transport (http_conn_t *hc)
{
vnet_disconnect_args_t a = {
- .handle = hc->h_tc_session_handle,
+ .handle = hc->hc_tc_session_handle,
.app_index = http_main.app_index,
};
@@ -214,6 +274,110 @@ http_disconnect_transport (http_conn_t *hc)
clib_warning ("disconnect returned");
}
+void
+http_shutdown_transport (http_conn_t *hc)
+{
+ vnet_shutdown_args_t a = {
+ .handle = hc->hc_tc_session_handle,
+ .app_index = http_main.app_index,
+ };
+
+ hc->state = HTTP_CONN_STATE_CLOSED;
+
+ if (vnet_shutdown_session (&a))
+ clib_warning ("shutdown returned");
+}
+
+http_status_code_t
+http_sc_by_u16 (u16 status_code)
+{
+ http_main_t *hm = &http_main;
+ return hm->sc_by_u16[status_code];
+}
+
+u8 *
+http_get_app_header_list (http_req_t *req, http_msg_t *msg)
+{
+ http_main_t *hm = &http_main;
+ session_t *as;
+ u8 *app_headers;
+ int rv;
+
+ as = session_get_from_handle (req->hr_pa_session_handle);
+
+ if (msg->data.type == HTTP_MSG_DATA_PTR)
+ {
+ uword app_headers_ptr;
+ rv = svm_fifo_dequeue (as->tx_fifo, sizeof (app_headers_ptr),
+ (u8 *) &app_headers_ptr);
+ ASSERT (rv == sizeof (app_headers_ptr));
+ app_headers = uword_to_pointer (app_headers_ptr, u8 *);
+ }
+ else
+ {
+ app_headers = hm->app_header_lists[as->thread_index];
+ rv = svm_fifo_dequeue (as->tx_fifo, msg->data.headers_len, app_headers);
+ ASSERT (rv == msg->data.headers_len);
+ }
+
+ return app_headers;
+}
+
+u8 *
+http_get_app_target (http_req_t *req, http_msg_t *msg)
+{
+ session_t *as;
+ u8 *target;
+ int rv;
+
+ as = session_get_from_handle (req->hr_pa_session_handle);
+
+ if (msg->data.type == HTTP_MSG_DATA_PTR)
+ {
+ uword target_ptr;
+ rv = svm_fifo_dequeue (as->tx_fifo, sizeof (target_ptr),
+ (u8 *) &target_ptr);
+ ASSERT (rv == sizeof (target_ptr));
+ target = uword_to_pointer (target_ptr, u8 *);
+ }
+ else
+ {
+ vec_reset_length (req->target);
+ vec_validate (req->target, msg->data.target_path_len - 1);
+ rv =
+ svm_fifo_dequeue (as->tx_fifo, msg->data.target_path_len, req->target);
+ ASSERT (rv == msg->data.target_path_len);
+ target = req->target;
+ }
+ return target;
+}
+
+u8 *
+http_get_tx_buf (http_conn_t *hc)
+{
+ http_main_t *hm = &http_main;
+ u8 *buf = hm->tx_bufs[hc->c_thread_index];
+ vec_reset_length (buf);
+ return buf;
+}
+
+u8 *
+http_get_rx_buf (http_conn_t *hc)
+{
+ http_main_t *hm = &http_main;
+ u8 *buf = hm->rx_bufs[hc->c_thread_index];
+ vec_reset_length (buf);
+ return buf;
+}
+
+void
+http_req_tx_buffer_init (http_req_t *req, http_msg_t *msg)
+{
+ session_t *as = session_get_from_handle (req->hr_pa_session_handle);
+ http_buffer_init (&req->tx_buf, msg_to_buf_type[msg->data.type], as->tx_fifo,
+ msg->data.body_len);
+}
+
static void
http_conn_invalidate_timer_cb (u32 hs_handle)
{
@@ -230,7 +394,7 @@ http_conn_invalidate_timer_cb (u32 hs_handle)
}
hc->timer_handle = HTTP_TIMER_HANDLE_INVALID;
- hc->pending_timer = 1;
+ hc->flags |= HTTP_CONN_F_PENDING_TIMER;
}
static void
@@ -250,24 +414,30 @@ http_conn_timeout_cb (void *hc_handlep)
return;
}
- if (!hc->pending_timer)
+ if (!(hc->flags & HTTP_CONN_F_PENDING_TIMER))
{
HTTP_DBG (1, "timer not pending");
return;
}
- session_transport_closing_notify (&hc->connection);
+ /* in case nothing received on cleartext connection before timeout */
+ if (PREDICT_FALSE (hc->version != HTTP_VERSION_NA))
+ http_vfts[hc->version].transport_close_callback (hc);
http_disconnect_transport (hc);
}
+/*************************/
+/* session VFT callbacks */
+/*************************/
+
int
http_ts_accept_callback (session_t *ts)
{
- session_t *ts_listener, *as, *asl;
- app_worker_t *app_wrk;
+ session_t *ts_listener;
http_conn_t *lhc, *hc;
u32 hc_index, thresh;
- int rv;
+ http_conn_handle_t hc_handle;
+ transport_proto_t tp;
ts_listener = listen_session_get_from_handle (ts->listener_handle);
lhc = http_listener_get (ts_listener->opaque);
@@ -277,61 +447,35 @@ http_ts_accept_callback (session_t *ts)
clib_memcpy_fast (hc, lhc, sizeof (*lhc));
hc->timer_handle = HTTP_TIMER_HANDLE_INVALID;
hc->c_thread_index = ts->thread_index;
- hc->h_hc_index = hc_index;
-
- hc->h_tc_session_handle = session_handle (ts);
+ hc->hc_hc_index = hc_index;
+ hc->flags |= HTTP_CONN_F_NO_APP_SESSION;
+ hc->hc_tc_session_handle = session_handle (ts);
hc->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP;
-
hc->state = HTTP_CONN_STATE_ESTABLISHED;
- http_req_state_change (hc, HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD);
ts->session_state = SESSION_STATE_READY;
- ts->opaque = hc_index;
-
- /*
- * Alloc session and initialize
- */
- as = session_alloc (hc->c_thread_index);
- hc->c_s_index = as->session_index;
-
- as->app_wrk_index = hc->h_pa_wrk_index;
- as->connection_index = hc->c_c_index;
- as->session_state = SESSION_STATE_ACCEPTING;
-
- asl = listen_session_get_from_handle (lhc->h_pa_session_handle);
- as->session_type = asl->session_type;
- as->listener_handle = lhc->h_pa_session_handle;
-
- /*
- * Init session fifos and notify app
- */
- if ((rv = app_worker_init_accepted (as)))
+ tp = session_get_transport_proto (ts);
+ if (tp == TRANSPORT_PROTO_TLS)
{
- HTTP_DBG (1, "failed to allocate fifos");
- hc->h_pa_session_handle = SESSION_INVALID_HANDLE;
- session_free (as);
- return rv;
+ /* TODO: set by ALPN result */
+ hc->version = HTTP_VERSION_1;
}
-
- hc->h_pa_session_handle = session_handle (as);
- hc->h_pa_wrk_index = as->app_wrk_index;
- app_wrk = app_worker_get (as->app_wrk_index);
+ else
+ {
+ /* going to decide in http_ts_rx_callback */
+ hc->version = HTTP_VERSION_NA;
+ }
+ hc_handle.version = hc->version;
+ hc_handle.conn_index = hc_index;
+ ts->opaque = hc_handle.as_u32;
HTTP_DBG (1, "Accepted on listener %u new connection [%u]%x",
ts_listener->opaque, vlib_get_thread_index (), hc_index);
- if ((rv = app_worker_accept_notify (app_wrk, as)))
- {
- HTTP_DBG (0, "app accept returned");
- session_free (as);
- return rv;
- }
-
/* Avoid enqueuing small chunks of data on transport tx notifications. If
* the fifo is small (under 16K) we set the threshold to it's size, meaning
* a notification will be given when the fifo empties.
*/
- ts = session_get_from_handle (hc->h_tc_session_handle);
thresh = clib_min (svm_fifo_size (ts->tx_fifo), HTTP_FIFO_THRESH);
svm_fifo_set_deq_thresh (ts->tx_fifo, thresh);
@@ -345,9 +489,9 @@ http_ts_connected_callback (u32 http_app_index, u32 ho_hc_index, session_t *ts,
session_error_t err)
{
u32 new_hc_index;
- session_t *as;
http_conn_t *hc, *ho_hc;
app_worker_t *app_wrk;
+ http_conn_handle_t hc_handle;
int rv;
ho_hc = http_ho_conn_get (ho_hc_index);
@@ -357,9 +501,10 @@ http_ts_connected_callback (u32 http_app_index, u32 ho_hc_index, session_t *ts,
{
clib_warning ("half-open hc index %d, error: %U", ho_hc_index,
format_session_error, err);
- app_wrk = app_worker_get_if_valid (ho_hc->h_pa_wrk_index);
+ ho_hc->flags |= HTTP_CONN_F_HO_DONE;
+ app_wrk = app_worker_get_if_valid (ho_hc->hc_pa_wrk_index);
if (app_wrk)
- app_worker_connect_notify (app_wrk, 0, err, ho_hc->h_pa_app_api_ctx);
+ app_worker_connect_notify (app_wrk, 0, err, ho_hc->hc_pa_app_api_ctx);
return 0;
}
@@ -368,46 +513,31 @@ http_ts_connected_callback (u32 http_app_index, u32 ho_hc_index, session_t *ts,
clib_memcpy_fast (hc, ho_hc, sizeof (*hc));
+ /* in chain with TLS there is race on half-open cleanup */
+ __atomic_fetch_or (&ho_hc->flags, HTTP_CONN_F_HO_DONE, __ATOMIC_RELEASE);
+
hc->timer_handle = HTTP_TIMER_HANDLE_INVALID;
hc->c_thread_index = ts->thread_index;
- hc->h_tc_session_handle = session_handle (ts);
- hc->c_c_index = new_hc_index;
+ hc->hc_tc_session_handle = session_handle (ts);
+ hc->hc_hc_index = new_hc_index;
hc->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP;
hc->state = HTTP_CONN_STATE_ESTABLISHED;
- http_req_state_change (hc, HTTP_REQ_STATE_WAIT_APP_METHOD);
-
ts->session_state = SESSION_STATE_READY;
- ts->opaque = new_hc_index;
-
- /* allocate app session and initialize */
-
- as = session_alloc (hc->c_thread_index);
- hc->c_s_index = as->session_index;
- as->connection_index = hc->c_c_index;
- as->app_wrk_index = hc->h_pa_wrk_index;
- as->session_state = SESSION_STATE_READY;
- as->opaque = hc->h_pa_app_api_ctx;
- as->session_type = session_type_from_proto_and_ip (
- TRANSPORT_PROTO_HTTP, session_type_is_ip4 (ts->session_type));
+ hc->flags |= HTTP_CONN_F_NO_APP_SESSION;
+ /* TODO: TLS set by ALPN result, TCP: prior knowledge (set in ho) */
+ hc_handle.version = hc->version;
+ hc_handle.conn_index = new_hc_index;
+ ts->opaque = hc_handle.as_u32;
HTTP_DBG (1, "half-open hc index %x, hc [%u]%x", ho_hc_index,
ts->thread_index, new_hc_index);
- app_wrk = app_worker_get (hc->h_pa_wrk_index);
- if (!app_wrk)
+ if ((rv = http_vfts[hc->version].transport_connected_callback (hc)))
{
- clib_warning ("no app worker");
- return -1;
- }
-
- if ((rv = app_worker_init_connected (app_wrk, as)))
- {
- HTTP_DBG (1, "failed to allocate fifos");
- session_free (as);
+ clib_warning ("transport_connected_callback failed, rv=%d", rv);
return rv;
}
- app_worker_connect_notify (app_wrk, as, err, hc->h_pa_app_api_ctx);
- hc->h_pa_session_handle = session_handle (as);
+
http_conn_timer_start (hc);
return 0;
@@ -417,1832 +547,154 @@ static void
http_ts_disconnect_callback (session_t *ts)
{
http_conn_t *hc;
+ http_conn_handle_t hc_handle;
+
+ hc_handle.as_u32 = ts->opaque;
- hc = http_conn_get_w_thread (ts->opaque, ts->thread_index);
+ HTTP_DBG (1, "hc [%u]%x", ts->thread_index, hc_handle.conn_index);
+
+ hc = http_conn_get_w_thread (hc_handle.conn_index, ts->thread_index);
if (hc->state < HTTP_CONN_STATE_TRANSPORT_CLOSED)
hc->state = HTTP_CONN_STATE_TRANSPORT_CLOSED;
- /* Nothing more to rx, propagate to app */
- if (!svm_fifo_max_dequeue_cons (ts->rx_fifo))
- session_transport_closing_notify (&hc->connection);
+ /* in case peer close cleartext connection before send something */
+ if (PREDICT_FALSE (hc->version == HTTP_VERSION_NA))
+ return;
+
+ http_vfts[hc->version].transport_close_callback (hc);
}
static void
http_ts_reset_callback (session_t *ts)
{
http_conn_t *hc;
+ http_conn_handle_t hc_handle;
- hc = http_conn_get_w_thread (ts->opaque, ts->thread_index);
-
- hc->state = HTTP_CONN_STATE_CLOSED;
- http_buffer_free (&hc->req.tx_buf);
- http_req_state_change (hc, HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD);
- session_transport_reset_notify (&hc->connection);
-
- http_disconnect_transport (hc);
-}
-
-/**
- * http error boilerplate
- */
-static const char *http_error_template = "HTTP/1.1 %s\r\n"
- "Date: %U GMT\r\n"
- "Connection: close\r\n"
- "Content-Length: 0\r\n\r\n";
-
-/**
- * http response boilerplate
- */
-static const char *http_response_template = "HTTP/1.1 %s\r\n"
- "Date: %U GMT\r\n"
- "Server: %v\r\n";
-
-static const char *content_len_template = "Content-Length: %llu\r\n";
-
-static const char *connection_upgrade_template = "Connection: upgrade\r\n"
- "Upgrade: %s\r\n";
-
-/**
- * http request boilerplate
- */
-static const char *http_get_request_template = "GET %s HTTP/1.1\r\n"
- "Host: %v\r\n"
- "User-Agent: %v\r\n";
-
-static const char *http_post_request_template = "POST %s HTTP/1.1\r\n"
- "Host: %v\r\n"
- "User-Agent: %v\r\n"
- "Content-Length: %llu\r\n";
-
-static u32
-http_send_data (http_conn_t *hc, u8 *data, u32 length)
-{
- const u32 max_burst = 64 << 10;
- session_t *ts;
- u32 to_send;
- int rv;
-
- ts = session_get_from_handle (hc->h_tc_session_handle);
+ hc_handle.as_u32 = ts->opaque;
- to_send = clib_min (length, max_burst);
- rv = svm_fifo_enqueue (ts->tx_fifo, to_send, data);
- if (rv <= 0)
- {
- clib_warning ("svm_fifo_enqueue failed, rv %d", rv);
- return 0;
- }
+ HTTP_DBG (1, "hc [%u]%x", ts->thread_index, hc_handle.conn_index);
- if (svm_fifo_set_event (ts->tx_fifo))
- session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX);
+ hc = http_conn_get_w_thread (hc_handle.conn_index, ts->thread_index);
- return rv;
-}
+ hc->state = HTTP_CONN_STATE_CLOSED;
+ /* in case peer reset cleartext connection before send something */
+ if (PREDICT_FALSE (hc->version != HTTP_VERSION_NA))
+ http_vfts[hc->version].transport_reset_callback (hc);
-static void
-http_send_error (http_conn_t *hc, http_status_code_t ec)
-{
- http_main_t *hm = &http_main;
- u8 *data;
- f64 now;
-
- if (ec >= HTTP_N_STATUS)
- ec = HTTP_STATUS_INTERNAL_ERROR;
-
- now = clib_timebase_now (&hm->timebase);
- data = format (0, http_error_template, http_status_code_str[ec],
- format_clib_timebase_time, now);
- HTTP_DBG (3, "%v", data);
- http_send_data (hc, data, vec_len (data));
- vec_free (data);
+ http_disconnect_transport (hc);
}
static int
-http_read_message (http_conn_t *hc)
+http_ts_rx_callback (session_t *ts)
{
+ http_conn_t *hc;
+ http_conn_handle_t hc_handle;
u32 max_deq;
- session_t *ts;
- int n_read;
-
- ts = session_get_from_handle (hc->h_tc_session_handle);
-
- max_deq = svm_fifo_max_dequeue (ts->rx_fifo);
- if (PREDICT_FALSE (max_deq == 0))
- return -1;
-
- vec_validate (hc->req.rx_buf, max_deq - 1);
- n_read = svm_fifo_peek (ts->rx_fifo, 0, max_deq, hc->req.rx_buf);
- ASSERT (n_read == max_deq);
- HTTP_DBG (1, "read %u bytes from rx_fifo", n_read);
-
- return 0;
-}
-
-static void
-http_read_message_drop (http_conn_t *hc, u32 len)
-{
- session_t *ts;
-
- ts = session_get_from_handle (hc->h_tc_session_handle);
- svm_fifo_dequeue_drop (ts->rx_fifo, len);
- vec_reset_length (hc->req.rx_buf);
+ u8 *rx_buf;
- if (svm_fifo_is_empty (ts->rx_fifo))
- svm_fifo_unset_event (ts->rx_fifo);
-}
-
-static void
-http_read_message_drop_all (http_conn_t *hc)
-{
- session_t *ts;
-
- ts = session_get_from_handle (hc->h_tc_session_handle);
- svm_fifo_dequeue_drop_all (ts->rx_fifo);
- vec_reset_length (hc->req.rx_buf);
-
- if (svm_fifo_is_empty (ts->rx_fifo))
- svm_fifo_unset_event (ts->rx_fifo);
-}
-
-/**
- * @brief Find the first occurrence of the string in the vector.
- *
- * @param vec The vector to be scanned.
- * @param offset Search offset in the vector.
- * @param num Maximum number of characters to be searched if non-zero.
- * @param str The string to be searched.
- *
- * @return @c -1 if the string is not found within the vector; index otherwise.
- */
-static inline int
-v_find_index (u8 *vec, u32 offset, u32 num, char *str)
-{
- int start_index = offset;
- u32 slen = (u32) strnlen_s_inline (str, 16);
- u32 vlen = vec_len (vec);
-
- ASSERT (slen > 0);
-
- if (vlen <= slen)
- return -1;
-
- int end_index = vlen - slen;
- if (num)
- {
- if (num < slen)
- return -1;
- end_index = clib_min (end_index, offset + num - slen);
- }
-
- for (; start_index <= end_index; start_index++)
- {
- if (!memcmp (vec + start_index, str, slen))
- return start_index;
- }
-
- return -1;
-}
-
-static void
-http_identify_optional_query (http_req_t *req)
-{
- int i;
- for (i = req->target_path_offset;
- i < (req->target_path_offset + req->target_path_len); i++)
- {
- if (req->rx_buf[i] == '?')
- {
- req->target_query_offset = i + 1;
- req->target_query_len = req->target_path_offset +
- req->target_path_len -
- req->target_query_offset;
- req->target_path_len =
- req->target_path_len - req->target_query_len - 1;
- break;
- }
- }
-}
-
-static int
-http_parse_target (http_req_t *req)
-{
- int i;
- u8 *p, *end;
-
- /* asterisk-form = "*" */
- if ((req->rx_buf[req->target_path_offset] == '*') &&
- (req->target_path_len == 1))
- {
- req->target_form = HTTP_TARGET_ASTERISK_FORM;
- /* we do not support OPTIONS request */
- return -1;
- }
-
- /* origin-form = 1*( "/" segment ) [ "?" query ] */
- if (req->rx_buf[req->target_path_offset] == '/')
- {
- /* drop leading slash */
- req->target_path_len--;
- req->target_path_offset++;
- req->target_form = HTTP_TARGET_ORIGIN_FORM;
- http_identify_optional_query (req);
- /* can't be CONNECT method */
- return req->method == HTTP_REQ_CONNECT ? -1 : 0;
- }
-
- /* absolute-form =
- * scheme "://" host [ ":" port ] *( "/" segment ) [ "?" query ] */
- if (req->target_path_len > 8 &&
- !memcmp (req->rx_buf + req->target_path_offset, "http", 4))
- {
- req->scheme = HTTP_URL_SCHEME_HTTP;
- p = req->rx_buf + req->target_path_offset + 4;
- if (*p == 's')
- {
- p++;
- req->scheme = HTTP_URL_SCHEME_HTTPS;
- }
- if (*p++ == ':')
- {
- expect_char ('/');
- expect_char ('/');
- req->target_form = HTTP_TARGET_ABSOLUTE_FORM;
- req->target_authority_offset = p - req->rx_buf;
- req->target_authority_len = 0;
- end = req->rx_buf + req->target_path_offset + req->target_path_len;
- while (p < end)
- {
- if (*p == '/')
- {
- p++; /* drop leading slash */
- req->target_path_offset = p - req->rx_buf;
- req->target_path_len = end - p;
- break;
- }
- req->target_authority_len++;
- p++;
- }
- if (!req->target_path_len)
- {
- clib_warning ("zero length host");
- return -1;
- }
- http_identify_optional_query (req);
- /* can't be CONNECT method */
- return req->method == HTTP_REQ_CONNECT ? -1 : 0;
- }
- }
-
- /* authority-form = host ":" port */
- for (i = req->target_path_offset;
- i < (req->target_path_offset + req->target_path_len); i++)
- {
- if ((req->rx_buf[i] == ':') && (isdigit (req->rx_buf[i + 1])))
- {
- req->target_authority_len = req->target_path_len;
- req->target_path_len = 0;
- req->target_authority_offset = req->target_path_offset;
- req->target_path_offset = 0;
- req->target_form = HTTP_TARGET_AUTHORITY_FORM;
- /* "authority-form" is only used for CONNECT requests */
- return req->method == HTTP_REQ_CONNECT ? 0 : -1;
- }
- }
-
- return -1;
-}
+ hc_handle.as_u32 = ts->opaque;
-static int
-http_parse_request_line (http_req_t *req, http_status_code_t *ec)
-{
- int i, target_len;
- u32 next_line_offset, method_offset;
+ HTTP_DBG (1, "hc [%u]%x", ts->thread_index, hc_handle.conn_index);
- /* request-line = method SP request-target SP HTTP-version CRLF */
- i = v_find_index (req->rx_buf, 8, 0, "\r\n");
- if (i < 0)
- {
- clib_warning ("request line incomplete");
- *ec = HTTP_STATUS_BAD_REQUEST;
- return -1;
- }
- HTTP_DBG (2, "request line length: %d", i);
- req->control_data_len = i + 2;
- next_line_offset = req->control_data_len;
-
- /* there should be at least one more CRLF */
- if (vec_len (req->rx_buf) < (next_line_offset + 2))
- {
- clib_warning ("malformed message, too short");
- *ec = HTTP_STATUS_BAD_REQUEST;
- return -1;
- }
+ hc = http_conn_get_w_thread (hc_handle.conn_index, ts->thread_index);
- /*
- * RFC9112 2.2:
- * In the interest of robustness, a server that is expecting to receive and
- * parse a request-line SHOULD ignore at least one empty line (CRLF)
- * received prior to the request-line.
- */
- method_offset = req->rx_buf[0] == '\r' && req->rx_buf[1] == '\n' ? 2 : 0;
- /* parse method */
- if (!memcmp (req->rx_buf + method_offset, "GET ", 4))
- {
- HTTP_DBG (0, "GET method");
- req->method = HTTP_REQ_GET;
- req->target_path_offset = method_offset + 4;
- }
- else if (!memcmp (req->rx_buf + method_offset, "POST ", 5))
- {
- HTTP_DBG (0, "POST method");
- req->method = HTTP_REQ_POST;
- req->target_path_offset = method_offset + 5;
- }
- else if (!memcmp (req->rx_buf + method_offset, "CONNECT ", 8))
- {
- HTTP_DBG (0, "CONNECT method");
- req->method = HTTP_REQ_CONNECT;
- req->upgrade_proto = HTTP_UPGRADE_PROTO_NA;
- req->target_path_offset = method_offset + 8;
- req->is_tunnel = 1;
- }
- else
- {
- if (req->rx_buf[method_offset] - 'A' <= 'Z' - 'A')
- {
- clib_warning ("method not implemented: %8v", req->rx_buf);
- *ec = HTTP_STATUS_NOT_IMPLEMENTED;
- return -1;
- }
- else
- {
- clib_warning ("not method name: %8v", req->rx_buf);
- *ec = HTTP_STATUS_BAD_REQUEST;
- return -1;
- }
- }
-
- /* find version */
- i = v_find_index (req->rx_buf, next_line_offset - 11, 11, " HTTP/");
- if (i < 0)
- {
- clib_warning ("HTTP version not present");
- *ec = HTTP_STATUS_BAD_REQUEST;
- return -1;
- }
- /* verify major version */
- if (isdigit (req->rx_buf[i + 6]))
- {
- if (req->rx_buf[i + 6] != '1')
- {
- clib_warning ("HTTP major version '%c' not supported",
- req->rx_buf[i + 6]);
- *ec = HTTP_STATUS_HTTP_VERSION_NOT_SUPPORTED;
- return -1;
- }
- }
- else
- {
- clib_warning ("HTTP major version '%c' is not digit",
- req->rx_buf[i + 6]);
- *ec = HTTP_STATUS_BAD_REQUEST;
- return -1;
- }
-
- /* parse request-target */
- HTTP_DBG (2, "http at %d", i);
- target_len = i - req->target_path_offset;
- HTTP_DBG (2, "target_len %d", target_len);
- if (target_len < 1)
- {
- clib_warning ("request-target not present");
- *ec = HTTP_STATUS_BAD_REQUEST;
- return -1;
- }
- req->target_path_len = target_len;
- req->target_query_offset = 0;
- req->target_query_len = 0;
- req->target_authority_len = 0;
- req->target_authority_offset = 0;
- if (http_parse_target (req))
- {
- clib_warning ("invalid target");
- *ec = HTTP_STATUS_BAD_REQUEST;
- return -1;
- }
- HTTP_DBG (2, "request-target path length: %u", req->target_path_len);
- HTTP_DBG (2, "request-target path offset: %u", req->target_path_offset);
- HTTP_DBG (2, "request-target query length: %u", req->target_query_len);
- HTTP_DBG (2, "request-target query offset: %u", req->target_query_offset);
-
- /* set buffer offset to nex line start */
- req->rx_buf_offset = next_line_offset;
-
- return 0;
-}
-
-#define parse_int(val, mul) \
- do \
- { \
- if (!isdigit (*p)) \
- { \
- clib_warning ("expected digit"); \
- return -1; \
- } \
- val += mul * (*p++ - '0'); \
- } \
- while (0)
-
-static int
-http_parse_status_line (http_req_t *req)
-{
- int i;
- u32 next_line_offset;
- u8 *p, *end;
- u16 status_code = 0;
- http_main_t *hm = &http_main;
-
- i = v_find_index (req->rx_buf, 0, 0, "\r\n");
- /* status-line = HTTP-version SP status-code SP [ reason-phrase ] CRLF */
- if (i < 0)
- {
- clib_warning ("status line incomplete");
- return -1;
- }
- HTTP_DBG (2, "status line length: %d", i);
- if (i < 12)
- {
- clib_warning ("status line too short (%d)", i);
- return -1;
- }
- req->control_data_len = i + 2;
- next_line_offset = req->control_data_len;
- p = req->rx_buf;
- end = req->rx_buf + i;
-
- /* there should be at least one more CRLF */
- if (vec_len (req->rx_buf) < (next_line_offset + 2))
- {
- clib_warning ("malformed message, too short");
- return -1;
- }
-
- /* parse version */
- expect_char ('H');
- expect_char ('T');
- expect_char ('T');
- expect_char ('P');
- expect_char ('/');
- expect_char ('1');
- expect_char ('.');
- if (!isdigit (*p++))
- {
- clib_warning ("invalid HTTP minor version");
- return -1;
- }
-
- /* skip space(s) */
- if (*p != ' ')
- {
- clib_warning ("no space after HTTP version");
- return -1;
- }
- do
- {
- p++;
- if (p == end)
- {
- clib_warning ("no status code");
- return -1;
- }
- }
- while (*p == ' ');
-
- /* parse status code */
- if ((end - p) < 3)
- {
- clib_warning ("not enough characters for status code");
- return -1;
- }
- parse_int (status_code, 100);
- parse_int (status_code, 10);
- parse_int (status_code, 1);
- if (status_code < 100 || status_code > 599)
- {
- clib_warning ("invalid status code %d", status_code);
- return -1;
- }
- req->status_code = hm->sc_by_u16[status_code];
- HTTP_DBG (0, "status code: %d", status_code);
-
- /* set buffer offset to nex line start */
- req->rx_buf_offset = next_line_offset;
-
- return 0;
-}
-
-static int
-http_identify_headers (http_req_t *req, http_status_code_t *ec)
-{
- int rv;
- u8 *p, *end, *name_start, *value_start;
- u32 name_len, value_len;
- http_field_line_t *field_line;
- uword header_index;
-
- vec_reset_length (req->headers);
- req->content_len_header_index = ~0;
- req->connection_header_index = ~0;
- req->upgrade_header_index = ~0;
- req->host_header_index = ~0;
- req->headers_offset = req->rx_buf_offset;
-
- /* check if we have any header */
- if ((req->rx_buf[req->rx_buf_offset] == '\r') &&
- (req->rx_buf[req->rx_buf_offset + 1] == '\n'))
- {
- /* just another CRLF -> no headers */
- HTTP_DBG (2, "no headers");
- req->headers_len = 0;
- req->control_data_len += 2;
- return 0;
- }
-
- end = req->rx_buf + vec_len (req->rx_buf);
- p = req->rx_buf + req->rx_buf_offset;
-
- while (1)
- {
- rv = _parse_field_name (&p, end, &name_start, &name_len);
- if (rv != 0)
- {
- *ec = HTTP_STATUS_BAD_REQUEST;
- return -1;
- }
- rv = _parse_field_value (&p, end, &value_start, &value_len);
- if (rv != 0 || (end - p) < 2)
- {
- *ec = HTTP_STATUS_BAD_REQUEST;
- return -1;
- }
-
- vec_add2 (req->headers, field_line, 1);
- field_line->name_offset =
- (name_start - req->rx_buf) - req->headers_offset;
- field_line->name_len = name_len;
- field_line->value_offset =
- (value_start - req->rx_buf) - req->headers_offset;
- field_line->value_len = value_len;
- header_index = field_line - req->headers;
-
- /* find headers that will be used later in preprocessing */
- /* names are case-insensitive (RFC9110 section 5.1) */
- if (req->content_len_header_index == ~0 &&
- http_token_is_case (
- (const char *) name_start, name_len,
- http_header_name_token (HTTP_HEADER_CONTENT_LENGTH)))
- req->content_len_header_index = header_index;
- else if (req->connection_header_index == ~0 &&
- http_token_is_case (
- (const char *) name_start, name_len,
- http_header_name_token (HTTP_HEADER_CONNECTION)))
- req->connection_header_index = header_index;
- else if (req->upgrade_header_index == ~0 &&
- http_token_is_case (
- (const char *) name_start, name_len,
- http_header_name_token (HTTP_HEADER_UPGRADE)))
- req->upgrade_header_index = header_index;
- else if (req->host_header_index == ~0 &&
- http_token_is_case ((const char *) name_start, name_len,
- http_header_name_token (HTTP_HEADER_HOST)))
- req->host_header_index = header_index;
-
- /* are we done? */
- if (*p == '\r' && *(p + 1) == '\n')
- break;
- }
-
- req->headers_len = p - (req->rx_buf + req->headers_offset);
- req->control_data_len += (req->headers_len + 2);
- HTTP_DBG (2, "headers length: %u", req->headers_len);
- HTTP_DBG (2, "headers offset: %u", req->headers_offset);
-
- return 0;
-}
-
-static int
-http_identify_message_body (http_req_t *req, http_status_code_t *ec)
-{
- int i;
- u8 *p;
- u64 body_len = 0, digit;
- http_field_line_t *field_line;
-
- req->body_len = 0;
-
- if (req->headers_len == 0)
- {
- HTTP_DBG (2, "no header, no message-body");
- return 0;
- }
- if (req->is_tunnel)
- {
- HTTP_DBG (2, "tunnel, no message-body");
- return 0;
- }
-
- /* TODO check for chunked transfer coding */
-
- if (req->content_len_header_index == ~0)
+ if (hc->state == HTTP_CONN_STATE_CLOSED)
{
- HTTP_DBG (2, "Content-Length header not present, no message-body");
+ HTTP_DBG (1, "conn closed");
+ svm_fifo_dequeue_drop_all (ts->rx_fifo);
return 0;
}
- field_line = vec_elt_at_index (req->headers, req->content_len_header_index);
- p = req->rx_buf + req->headers_offset + field_line->value_offset;
- for (i = 0; i < field_line->value_len; i++)
+ if (hc_handle.version == HTTP_VERSION_NA)
{
- /* check for digit */
- if (!isdigit (*p))
- {
- clib_warning ("expected digit");
- *ec = HTTP_STATUS_BAD_REQUEST;
- return -1;
- }
- digit = *p - '0';
- u64 new_body_len = body_len * 10 + digit;
- /* check for overflow */
- if (new_body_len < body_len)
+ HTTP_DBG (1, "unknown http version");
+ max_deq = svm_fifo_max_dequeue_cons (ts->rx_fifo);
+ if (max_deq >= http2_conn_preface.len)
{
- clib_warning ("too big number, overflow");
- *ec = HTTP_STATUS_BAD_REQUEST;
- return -1;
- }
- body_len = new_body_len;
- p++;
- }
-
- req->body_len = body_len;
-
- req->body_offset = req->headers_offset + req->headers_len + 2;
- HTTP_DBG (2, "body length: %llu", req->body_len);
- HTTP_DBG (2, "body offset: %u", req->body_offset);
-
- return 0;
-}
-
-static http_sm_result_t
-http_req_state_wait_transport_reply (http_conn_t *hc,
- transport_send_params_t *sp)
-{
- int rv;
- http_msg_t msg = {};
- app_worker_t *app_wrk;
- session_t *as;
- u32 len, max_enq, body_sent;
- http_status_code_t ec;
-
- rv = http_read_message (hc);
-
- /* Nothing yet, wait for data or timer expire */
- if (rv)
- {
- HTTP_DBG (1, "no data to deq");
- return HTTP_SM_STOP;
- }
-
- HTTP_DBG (3, "%v", hc->req.rx_buf);
-
- if (vec_len (hc->req.rx_buf) < 8)
- {
- clib_warning ("response buffer too short");
- goto error;
- }
-
- rv = http_parse_status_line (&hc->req);
- if (rv)
- goto error;
-
- rv = http_identify_headers (&hc->req, &ec);
- if (rv)
- goto error;
-
- rv = http_identify_message_body (&hc->req, &ec);
- if (rv)
- goto error;
-
- /* send at least "control data" which is necessary minimum,
- * if there is some space send also portion of body */
- as = session_get_from_handle (hc->h_pa_session_handle);
- max_enq = svm_fifo_max_enqueue (as->rx_fifo);
- max_enq -= sizeof (msg);
- if (max_enq < hc->req.control_data_len)
- {
- clib_warning ("not enough room for control data in app's rx fifo");
- goto error;
- }
- len = clib_min (max_enq, vec_len (hc->req.rx_buf));
-
- msg.type = HTTP_MSG_REPLY;
- msg.code = hc->req.status_code;
- msg.data.headers_offset = hc->req.headers_offset;
- msg.data.headers_len = hc->req.headers_len;
- msg.data.body_offset = hc->req.body_offset;
- msg.data.body_len = hc->req.body_len;
- msg.data.type = HTTP_MSG_DATA_INLINE;
- msg.data.len = len;
- msg.data.headers_ctx = pointer_to_uword (hc->req.headers);
-
- svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) },
- { hc->req.rx_buf, len } };
-
- rv = svm_fifo_enqueue_segments (as->rx_fifo, segs, 2, 0 /* allow partial */);
- ASSERT (rv == (sizeof (msg) + len));
-
- http_read_message_drop (hc, len);
-
- body_sent = len - hc->req.control_data_len;
- hc->req.to_recv = hc->req.body_len - body_sent;
- if (hc->req.to_recv == 0)
- {
- /* all sent, we are done */
- http_req_state_change (hc, HTTP_REQ_STATE_WAIT_APP_METHOD);
- }
- else
- {
- /* stream rest of the response body */
- http_req_state_change (hc, HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA);
- }
-
- app_wrk = app_worker_get_if_valid (as->app_wrk_index);
- if (app_wrk)
- app_worker_rx_notify (app_wrk, as);
- return HTTP_SM_STOP;
-
-error:
- http_read_message_drop_all (hc);
- session_transport_closing_notify (&hc->connection);
- session_transport_closed_notify (&hc->connection);
- http_disconnect_transport (hc);
- return HTTP_SM_ERROR;
-}
-
-#define http_field_line_value_token(_fl, _req) \
- (const char *) ((_req)->rx_buf + (_req)->headers_offset + \
- (_fl)->value_offset), \
- (_fl)->value_len
-
-static void
-http_check_connection_upgrade (http_req_t *req)
-{
- http_field_line_t *connection, *upgrade;
- u8 skip;
-
- skip = (req->method != HTTP_REQ_GET) + (req->connection_header_index == ~0) +
- (req->upgrade_header_index == ~0);
- if (skip)
- return;
-
- connection = vec_elt_at_index (req->headers, req->connection_header_index);
- /* connection options are case-insensitive (RFC9110 7.6.1) */
- if (http_token_is_case (http_field_line_value_token (connection, req),
- http_token_lit ("upgrade")))
- {
- upgrade = vec_elt_at_index (req->headers, req->upgrade_header_index);
-
- /* check upgrade protocol, we want to ignore something like upgrade to
- * newer HTTP version, only tunnels are supported */
- if (0)
- ;
-#define _(sym, str) \
- else if (http_token_is_case (http_field_line_value_token (upgrade, req), \
- http_token_lit (str))) req->upgrade_proto = \
- HTTP_UPGRADE_PROTO_##sym;
- foreach_http_upgrade_proto
-#undef _
- else return;
-
- HTTP_DBG (1, "connection upgrade: %U", format_http_bytes,
- req->rx_buf + req->headers_offset + upgrade->value_offset,
- upgrade->value_len);
- req->is_tunnel = 1;
- req->method = HTTP_REQ_CONNECT;
- }
-}
-
-static void
-http_target_fixup (http_conn_t *hc)
-{
- http_field_line_t *host;
-
- if (hc->req.target_form == HTTP_TARGET_ABSOLUTE_FORM)
- return;
-
- /* scheme fixup */
- hc->req.scheme = session_get_transport_proto (session_get_from_handle (
- hc->h_tc_session_handle)) == TRANSPORT_PROTO_TLS ?
- HTTP_URL_SCHEME_HTTPS :
- HTTP_URL_SCHEME_HTTP;
-
- if (hc->req.target_form == HTTP_TARGET_AUTHORITY_FORM ||
- hc->req.connection_header_index == ~0)
- return;
-
- /* authority fixup */
- host = vec_elt_at_index (hc->req.headers, hc->req.connection_header_index);
- hc->req.target_authority_offset = host->value_offset;
- hc->req.target_authority_len = host->value_len;
-}
-
-static http_sm_result_t
-http_req_state_wait_transport_method (http_conn_t *hc,
- transport_send_params_t *sp)
-{
- http_status_code_t ec;
- app_worker_t *app_wrk;
- http_msg_t msg;
- session_t *as;
- int rv;
- u32 len, max_enq, body_sent;
- u64 max_deq;
-
- rv = http_read_message (hc);
-
- /* Nothing yet, wait for data or timer expire */
- if (rv)
- return HTTP_SM_STOP;
-
- HTTP_DBG (3, "%v", hc->req.rx_buf);
-
- if (vec_len (hc->req.rx_buf) < 8)
- {
- ec = HTTP_STATUS_BAD_REQUEST;
- goto error;
- }
-
- rv = http_parse_request_line (&hc->req, &ec);
- if (rv)
- goto error;
-
- rv = http_identify_headers (&hc->req, &ec);
- if (rv)
- goto error;
-
- http_target_fixup (hc);
- http_check_connection_upgrade (&hc->req);
-
- rv = http_identify_message_body (&hc->req, &ec);
- if (rv)
- goto error;
-
- /* send at least "control data" which is necessary minimum,
- * if there is some space send also portion of body */
- as = session_get_from_handle (hc->h_pa_session_handle);
- max_enq = svm_fifo_max_enqueue (as->rx_fifo);
- if (max_enq < hc->req.control_data_len)
- {
- clib_warning ("not enough room for control data in app's rx fifo");
- ec = HTTP_STATUS_INTERNAL_ERROR;
- goto error;
- }
- /* do not dequeue more than one HTTP request, we do not support pipelining */
- max_deq = clib_min (hc->req.control_data_len + hc->req.body_len,
- vec_len (hc->req.rx_buf));
- len = clib_min (max_enq, max_deq);
-
- msg.type = HTTP_MSG_REQUEST;
- msg.method_type = hc->req.method;
- msg.data.type = HTTP_MSG_DATA_INLINE;
- msg.data.len = len;
- msg.data.scheme = hc->req.scheme;
- msg.data.target_authority_offset = hc->req.target_authority_offset;
- msg.data.target_authority_len = hc->req.target_authority_len;
- msg.data.target_path_offset = hc->req.target_path_offset;
- msg.data.target_path_len = hc->req.target_path_len;
- msg.data.target_query_offset = hc->req.target_query_offset;
- msg.data.target_query_len = hc->req.target_query_len;
- msg.data.headers_offset = hc->req.headers_offset;
- msg.data.headers_len = hc->req.headers_len;
- msg.data.body_offset = hc->req.body_offset;
- msg.data.body_len = hc->req.body_len;
- msg.data.headers_ctx = pointer_to_uword (hc->req.headers);
- msg.data.upgrade_proto = hc->req.upgrade_proto;
-
- svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) },
- { hc->req.rx_buf, len } };
-
- rv = svm_fifo_enqueue_segments (as->rx_fifo, segs, 2, 0 /* allow partial */);
- ASSERT (rv == (sizeof (msg) + len));
-
- body_sent = len - hc->req.control_data_len;
- hc->req.to_recv = hc->req.body_len - body_sent;
- if (hc->req.to_recv == 0)
- {
- /* drop everything, we do not support pipelining */
- http_read_message_drop_all (hc);
- /* all sent, we are done */
- http_req_state_change (hc, HTTP_REQ_STATE_WAIT_APP_REPLY);
- }
- else
- {
- http_read_message_drop (hc, len);
- /* stream rest of the response body */
- http_req_state_change (hc, HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA);
- }
-
- app_wrk = app_worker_get_if_valid (as->app_wrk_index);
- if (app_wrk)
- app_worker_rx_notify (app_wrk, as);
-
- return HTTP_SM_STOP;
-
-error:
- http_read_message_drop_all (hc);
- http_send_error (hc, ec);
- session_transport_closing_notify (&hc->connection);
- http_disconnect_transport (hc);
-
- return HTTP_SM_ERROR;
-}
-
-static void
-http_write_app_headers (http_conn_t *hc, http_msg_t *msg, u8 **tx_buf)
-{
- http_main_t *hm = &http_main;
- session_t *as;
- u8 *app_headers, *p, *end;
- u32 *tmp;
- int rv;
-
- as = session_get_from_handle (hc->h_pa_session_handle);
-
- /* read app header list */
- if (msg->data.type == HTTP_MSG_DATA_PTR)
- {
- uword app_headers_ptr;
- rv = svm_fifo_dequeue (as->tx_fifo, sizeof (app_headers_ptr),
- (u8 *) &app_headers_ptr);
- ASSERT (rv == sizeof (app_headers_ptr));
- app_headers = uword_to_pointer (app_headers_ptr, u8 *);
- }
- else
- {
- app_headers = hm->app_header_lists[hc->c_thread_index];
- rv = svm_fifo_dequeue (as->tx_fifo, msg->data.headers_len, app_headers);
- ASSERT (rv == msg->data.headers_len);
- }
-
- /* serialize app headers to tx_buf */
- end = app_headers + msg->data.headers_len;
- while (app_headers < end)
- {
- /* custom header name? */
- tmp = (u32 *) app_headers;
- if (PREDICT_FALSE (*tmp & HTTP_CUSTOM_HEADER_NAME_BIT))
- {
- http_custom_token_t *name, *value;
- name = (http_custom_token_t *) app_headers;
- u32 name_len = name->len & ~HTTP_CUSTOM_HEADER_NAME_BIT;
- app_headers += sizeof (http_custom_token_t) + name_len;
- value = (http_custom_token_t *) app_headers;
- app_headers += sizeof (http_custom_token_t) + value->len;
- vec_add2 (*tx_buf, p, name_len + value->len + 4);
- clib_memcpy (p, name->token, name_len);
- p += name_len;
- *p++ = ':';
- *p++ = ' ';
- clib_memcpy (p, value->token, value->len);
- p += value->len;
- *p++ = '\r';
- *p++ = '\n';
- }
- else
- {
- http_app_header_t *header;
- header = (http_app_header_t *) app_headers;
- app_headers += sizeof (http_app_header_t) + header->value.len;
- http_token_t name = { http_header_name_token (header->name) };
- vec_add2 (*tx_buf, p, name.len + header->value.len + 4);
- clib_memcpy (p, name.base, name.len);
- p += name.len;
- *p++ = ':';
- *p++ = ' ';
- clib_memcpy (p, header->value.token, header->value.len);
- p += header->value.len;
- *p++ = '\r';
- *p++ = '\n';
- }
- }
-}
-
-static http_sm_result_t
-http_req_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp)
-{
- http_main_t *hm = &http_main;
- u8 *response;
- u32 sent;
- f64 now;
- session_t *as;
- http_status_code_t sc;
- http_msg_t msg;
- int rv;
- http_sm_result_t sm_result = HTTP_SM_ERROR;
- http_req_state_t next_state = HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD;
-
- as = session_get_from_handle (hc->h_pa_session_handle);
-
- rv = svm_fifo_dequeue (as->tx_fifo, sizeof (msg), (u8 *) &msg);
- ASSERT (rv == sizeof (msg));
-
- if (msg.data.type > HTTP_MSG_DATA_PTR)
- {
- clib_warning ("no data");
- sc = HTTP_STATUS_INTERNAL_ERROR;
- goto error;
- }
-
- if (msg.type != HTTP_MSG_REPLY)
- {
- clib_warning ("unexpected message type %d", msg.type);
- sc = HTTP_STATUS_INTERNAL_ERROR;
- goto error;
- }
-
- if (msg.code >= HTTP_N_STATUS)
- {
- clib_warning ("unsupported status code: %d", msg.code);
- return HTTP_SM_ERROR;
- }
-
- response = hm->tx_bufs[hc->c_thread_index];
- vec_reset_length (response);
- /*
- * Add "protocol layer" headers:
- * - current time
- * - server name
- * - data length
- */
- now = clib_timebase_now (&hm->timebase);
- response =
- format (response, http_response_template, http_status_code_str[msg.code],
- /* Date */
- format_clib_timebase_time, now,
- /* Server */
- hc->app_name);
-
- /* RFC9110 8.6: A server MUST NOT send Content-Length header field in a
- * 2xx (Successful) response to CONNECT or with a status code of 101
- * (Switching Protocols). */
- if (hc->req.is_tunnel && (http_status_code_str[msg.code][0] == '2' ||
- msg.code == HTTP_STATUS_SWITCHING_PROTOCOLS))
- {
- ASSERT (msg.data.body_len == 0);
- next_state = HTTP_REQ_STATE_TUNNEL;
- if (hc->req.upgrade_proto > HTTP_UPGRADE_PROTO_NA)
- {
- response = format (response, connection_upgrade_template,
- http_upgrade_proto_str[hc->req.upgrade_proto]);
- if (hc->req.upgrade_proto == HTTP_UPGRADE_PROTO_CONNECT_UDP &&
- hc->udp_tunnel_mode == HTTP_UDP_TUNNEL_DGRAM)
- next_state = HTTP_REQ_STATE_UDP_TUNNEL;
- }
- /* cleanup some stuff we don't need anymore in tunnel mode */
- vec_free (hc->req.rx_buf);
- vec_free (hc->req.headers);
- http_buffer_free (&hc->req.tx_buf);
- hc->req.to_skip = 0;
- }
- else
- response = format (response, content_len_template, msg.data.body_len);
-
- /* Add headers from app (if any) */
- if (msg.data.headers_len)
- {
- HTTP_DBG (0, "got headers from app, len %d", msg.data.headers_len);
- http_write_app_headers (hc, &msg, &response);
- }
- /* Add empty line after headers */
- response = format (response, "\r\n");
- HTTP_DBG (3, "%v", response);
-
- sent = http_send_data (hc, response, vec_len (response));
- if (sent != vec_len (response))
- {
- clib_warning ("sending status-line and headers failed!");
- sc = HTTP_STATUS_INTERNAL_ERROR;
- goto error;
- }
-
- if (msg.data.body_len)
- {
- /* Start sending the actual data */
- http_buffer_init (&hc->req.tx_buf, msg_to_buf_type[msg.data.type],
- as->tx_fifo, msg.data.body_len);
- next_state = HTTP_REQ_STATE_APP_IO_MORE_DATA;
- sm_result = HTTP_SM_CONTINUE;
- }
- else
- {
- /* No response body, we are done */
- sm_result = HTTP_SM_STOP;
- }
-
- http_req_state_change (hc, next_state);
-
- ASSERT (sp->max_burst_size >= sent);
- sp->max_burst_size -= sent;
- return sm_result;
-
-error:
- http_send_error (hc, sc);
- session_transport_closing_notify (&hc->connection);
- http_disconnect_transport (hc);
- return HTTP_SM_STOP;
-}
-
-static http_sm_result_t
-http_req_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp)
-{
- http_main_t *hm = &http_main;
- http_msg_t msg;
- session_t *as;
- u8 *target_buff = 0, *request = 0, *target;
- u32 sent;
- int rv;
- http_sm_result_t sm_result = HTTP_SM_ERROR;
- http_req_state_t next_state;
-
- as = session_get_from_handle (hc->h_pa_session_handle);
-
- rv = svm_fifo_dequeue (as->tx_fifo, sizeof (msg), (u8 *) &msg);
- ASSERT (rv == sizeof (msg));
-
- if (msg.data.type > HTTP_MSG_DATA_PTR)
- {
- clib_warning ("no data");
- goto error;
- }
-
- if (msg.type != HTTP_MSG_REQUEST)
- {
- clib_warning ("unexpected message type %d", msg.type);
- goto error;
- }
-
- /* read request target */
- if (msg.data.type == HTTP_MSG_DATA_PTR)
- {
- uword target_ptr;
- rv = svm_fifo_dequeue (as->tx_fifo, sizeof (target_ptr),
- (u8 *) &target_ptr);
- ASSERT (rv == sizeof (target_ptr));
- target = uword_to_pointer (target_ptr, u8 *);
- }
- else
- {
- vec_validate (target_buff, msg.data.target_path_len - 1);
- rv =
- svm_fifo_dequeue (as->tx_fifo, msg.data.target_path_len, target_buff);
- ASSERT (rv == msg.data.target_path_len);
- target = target_buff;
- }
-
- request = hm->tx_bufs[hc->c_thread_index];
- vec_reset_length (request);
- /* currently we support only GET and POST method */
- if (msg.method_type == HTTP_REQ_GET)
- {
- if (msg.data.body_len)
- {
- clib_warning ("GET request shouldn't include data");
- goto error;
- }
- /*
- * Add "protocol layer" headers:
- * - host
- * - user agent
- */
- request = format (request, http_get_request_template,
- /* target */
- target,
- /* Host */
- hc->host,
- /* User-Agent */
- hc->app_name);
-
- next_state = HTTP_REQ_STATE_WAIT_TRANSPORT_REPLY;
- sm_result = HTTP_SM_STOP;
- }
- else if (msg.method_type == HTTP_REQ_POST)
- {
- if (!msg.data.body_len)
- {
- clib_warning ("POST request should include data");
- goto error;
- }
- /*
- * Add "protocol layer" headers:
- * - host
- * - user agent
- * - content length
- */
- request = format (request, http_post_request_template,
- /* target */
- target,
- /* Host */
- hc->host,
- /* User-Agent */
- hc->app_name,
- /* Content-Length */
- msg.data.body_len);
-
- http_buffer_init (&hc->req.tx_buf, msg_to_buf_type[msg.data.type],
- as->tx_fifo, msg.data.body_len);
-
- next_state = HTTP_REQ_STATE_APP_IO_MORE_DATA;
- sm_result = HTTP_SM_CONTINUE;
- }
- else
- {
- clib_warning ("unsupported method %d", msg.method_type);
- goto error;
- }
-
- /* Add headers from app (if any) */
- if (msg.data.headers_len)
- {
- HTTP_DBG (0, "got headers from app, len %d", msg.data.headers_len);
- http_write_app_headers (hc, &msg, &request);
- }
- /* Add empty line after headers */
- request = format (request, "\r\n");
- HTTP_DBG (3, "%v", request);
-
- sent = http_send_data (hc, request, vec_len (request));
- if (sent != vec_len (request))
- {
- clib_warning ("sending request-line and headers failed!");
- sm_result = HTTP_SM_ERROR;
- goto error;
- }
-
- http_req_state_change (hc, next_state);
- goto done;
-
-error:
- svm_fifo_dequeue_drop_all (as->tx_fifo);
- session_transport_closing_notify (&hc->connection);
- session_transport_closed_notify (&hc->connection);
- http_disconnect_transport (hc);
-
-done:
- vec_free (target_buff);
- return sm_result;
-}
-
-static http_sm_result_t
-http_req_state_transport_io_more_data (http_conn_t *hc,
- transport_send_params_t *sp)
-{
- session_t *as, *ts;
- app_worker_t *app_wrk;
- svm_fifo_seg_t _seg, *seg = &_seg;
- u32 max_len, max_deq, max_enq, n_segs = 1;
- int rv, len;
-
- as = session_get_from_handle (hc->h_pa_session_handle);
- ts = session_get_from_handle (hc->h_tc_session_handle);
-
- max_deq = svm_fifo_max_dequeue (ts->rx_fifo);
- if (max_deq == 0)
- {
- HTTP_DBG (1, "no data to deq");
- return HTTP_SM_STOP;
- }
-
- max_enq = svm_fifo_max_enqueue (as->rx_fifo);
- if (max_enq == 0)
- {
- HTTP_DBG (1, "app's rx fifo full");
- svm_fifo_add_want_deq_ntf (as->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
- return HTTP_SM_STOP;
- }
-
- max_len = clib_min (max_enq, max_deq);
- len = svm_fifo_segments (ts->rx_fifo, 0, seg, &n_segs, max_len);
- if (len < 0)
- {
- HTTP_DBG (1, "svm_fifo_segments() len %d", len);
- return HTTP_SM_STOP;
- }
-
- rv = svm_fifo_enqueue_segments (as->rx_fifo, seg, 1, 0 /* allow partial */);
- if (rv < 0)
- {
- clib_warning ("data enqueue failed, rv: %d", rv);
- return HTTP_SM_ERROR;
- }
-
- svm_fifo_dequeue_drop (ts->rx_fifo, rv);
- if (rv > hc->req.to_recv)
- {
- clib_warning ("http protocol error: received more data than expected");
- session_transport_closing_notify (&hc->connection);
- http_disconnect_transport (hc);
- http_req_state_change (hc, HTTP_REQ_STATE_WAIT_APP_METHOD);
- return HTTP_SM_ERROR;
- }
- hc->req.to_recv -= rv;
- HTTP_DBG (1, "drained %d from ts; remains %lu", rv, hc->req.to_recv);
-
- /* Finished transaction:
- * server back to HTTP_REQ_STATE_WAIT_APP_REPLY
- * client to HTTP_REQ_STATE_WAIT_APP_METHOD */
- if (hc->req.to_recv == 0)
- http_req_state_change (hc, hc->is_server ? HTTP_REQ_STATE_WAIT_APP_REPLY :
- HTTP_REQ_STATE_WAIT_APP_METHOD);
-
- app_wrk = app_worker_get_if_valid (as->app_wrk_index);
- if (app_wrk)
- app_worker_rx_notify (app_wrk, as);
-
- if (svm_fifo_max_dequeue_cons (ts->rx_fifo))
- session_enqueue_notify (ts);
-
- return HTTP_SM_STOP;
-}
-
-static http_sm_result_t
-http_req_state_app_io_more_data (http_conn_t *hc, transport_send_params_t *sp)
-{
- u32 max_send = 64 << 10, n_segs;
- http_buffer_t *hb = &hc->req.tx_buf;
- svm_fifo_seg_t *seg;
- session_t *ts;
- int sent = 0;
-
- max_send = clib_min (max_send, sp->max_burst_size);
- ts = session_get_from_handle (hc->h_tc_session_handle);
- if ((seg = http_buffer_get_segs (hb, max_send, &n_segs)))
- sent = svm_fifo_enqueue_segments (ts->tx_fifo, seg, n_segs,
- 1 /* allow partial */);
-
- if (sent > 0)
- {
- /* Ask scheduler to notify app of deq event if needed */
- sp->bytes_dequeued += http_buffer_drain (hb, sent);
- sp->max_burst_size -= sent;
- }
-
- /* Not finished sending all data */
- if (!http_buffer_is_drained (hb))
- {
- if (sent && svm_fifo_set_event (ts->tx_fifo))
- session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX);
-
- if (svm_fifo_max_enqueue (ts->tx_fifo) < HTTP_FIFO_THRESH)
- {
- /* Deschedule http session and wait for deq notification if
- * underlying ts tx fifo almost full */
- svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
- transport_connection_deschedule (&hc->connection);
- sp->flags |= TRANSPORT_SND_F_DESCHED;
- }
- }
- else
- {
- if (sent && svm_fifo_set_event (ts->tx_fifo))
- session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX_FLUSH);
-
- /* Finished transaction:
- * server back to HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD
- * client to HTTP_REQ_STATE_WAIT_TRANSPORT_REPLY */
- http_req_state_change (hc, hc->is_server ?
- HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD :
- HTTP_REQ_STATE_WAIT_TRANSPORT_REPLY);
- http_buffer_free (hb);
- }
-
- return HTTP_SM_STOP;
-}
-
-static http_sm_result_t
-http_req_state_tunnel_rx (http_conn_t *hc, transport_send_params_t *sp)
-{
- u32 max_deq, max_enq, max_read, n_segs = 2;
- svm_fifo_seg_t segs[n_segs];
- int n_written = 0;
- session_t *as, *ts;
- app_worker_t *app_wrk;
-
- HTTP_DBG (1, "tunnel received data from client");
-
- as = session_get_from_handle (hc->h_pa_session_handle);
- ts = session_get_from_handle (hc->h_tc_session_handle);
-
- max_deq = svm_fifo_max_dequeue (ts->rx_fifo);
- if (PREDICT_FALSE (max_deq == 0))
- {
- HTTP_DBG (1, "max_deq == 0");
- return HTTP_SM_STOP;
- }
- max_enq = svm_fifo_max_enqueue (as->rx_fifo);
- if (max_enq == 0)
- {
- HTTP_DBG (1, "app's rx fifo full");
- svm_fifo_add_want_deq_ntf (as->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
- return HTTP_SM_STOP;
- }
- max_read = clib_min (max_enq, max_deq);
- svm_fifo_segments (ts->rx_fifo, 0, segs, &n_segs, max_read);
- n_written = svm_fifo_enqueue_segments (as->rx_fifo, segs, n_segs, 0);
- ASSERT (n_written > 0);
- HTTP_DBG (1, "transfered %u bytes", n_written);
- svm_fifo_dequeue_drop (ts->rx_fifo, n_written);
- app_wrk = app_worker_get_if_valid (as->app_wrk_index);
- if (app_wrk)
- app_worker_rx_notify (app_wrk, as);
- if (svm_fifo_max_dequeue_cons (ts->rx_fifo))
- session_program_rx_io_evt (session_handle (ts));
-
- return HTTP_SM_STOP;
-}
-
-static http_sm_result_t
-http_req_state_tunnel_tx (http_conn_t *hc, transport_send_params_t *sp)
-{
- u32 max_deq, max_enq, max_read, n_segs = 2;
- svm_fifo_seg_t segs[n_segs];
- session_t *as, *ts;
- int n_written = 0;
-
- HTTP_DBG (1, "tunnel received data from target");
-
- as = session_get_from_handle (hc->h_pa_session_handle);
- ts = session_get_from_handle (hc->h_tc_session_handle);
-
- max_deq = svm_fifo_max_dequeue_cons (as->tx_fifo);
- if (PREDICT_FALSE (max_deq == 0))
- {
- HTTP_DBG (1, "max_deq == 0");
- goto check_fifo;
- }
- max_enq = svm_fifo_max_enqueue_prod (ts->tx_fifo);
- if (max_enq == 0)
- {
- HTTP_DBG (1, "ts tx fifo full");
- goto check_fifo;
- }
- max_read = clib_min (max_enq, max_deq);
- max_read = clib_min (max_read, sp->max_burst_size);
- svm_fifo_segments (as->tx_fifo, 0, segs, &n_segs, max_read);
- n_written = svm_fifo_enqueue_segments (ts->tx_fifo, segs, n_segs, 0);
- ASSERT (n_written > 0);
- HTTP_DBG (1, "transfered %u bytes", n_written);
- sp->bytes_dequeued += n_written;
- sp->max_burst_size -= n_written;
- svm_fifo_dequeue_drop (as->tx_fifo, n_written);
- if (svm_fifo_set_event (ts->tx_fifo))
- session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX);
-
-check_fifo:
- /* Deschedule and wait for deq notification if ts fifo is almost full */
- if (svm_fifo_max_enqueue (ts->tx_fifo) < HTTP_FIFO_THRESH)
- {
- svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
- transport_connection_deschedule (&hc->connection);
- sp->flags |= TRANSPORT_SND_F_DESCHED;
- }
-
- return HTTP_SM_STOP;
-}
-
-static http_sm_result_t
-http_req_state_udp_tunnel_rx (http_conn_t *hc, transport_send_params_t *sp)
-{
- http_main_t *hm = &http_main;
- u32 to_deq, capsule_size, dgram_size, n_written = 0;
- int rv, n_read;
- session_t *as, *ts;
- app_worker_t *app_wrk;
- u8 payload_offset;
- u64 payload_len;
- session_dgram_hdr_t hdr;
- u8 *buf = 0;
-
- HTTP_DBG (1, "udp tunnel received data from client");
-
- as = session_get_from_handle (hc->h_pa_session_handle);
- ts = session_get_from_handle (hc->h_tc_session_handle);
- buf = hm->rx_bufs[hc->c_thread_index];
- to_deq = svm_fifo_max_dequeue_cons (ts->rx_fifo);
-
- while (to_deq > 0)
- {
- /* some bytes remaining to skip? */
- if (PREDICT_FALSE (hc->req.to_skip))
- {
- if (hc->req.to_skip >= to_deq)
+ rx_buf = http_get_rx_buf (hc);
+ svm_fifo_peek (ts->rx_fifo, 0, http2_conn_preface.len, rx_buf);
+ if (memcmp (rx_buf, http2_conn_preface.base,
+ http2_conn_preface.len) == 0)
{
- svm_fifo_dequeue_drop (ts->rx_fifo, to_deq);
- hc->req.to_skip -= to_deq;
- goto done;
- }
- else
- {
- svm_fifo_dequeue_drop (ts->rx_fifo, hc->req.to_skip);
- hc->req.to_skip = 0;
- }
- }
- n_read =
- svm_fifo_peek (ts->rx_fifo, 0, HTTP_CAPSULE_HEADER_MAX_SIZE, buf);
- ASSERT (n_read > 0);
- rv = http_decap_udp_payload_datagram (buf, n_read, &payload_offset,
- &payload_len);
- HTTP_DBG (1, "rv=%d, payload_offset=%u, payload_len=%llu", rv,
- payload_offset, payload_len);
- if (PREDICT_FALSE (rv != 0))
- {
- if (rv < 0)
- {
- /* capsule datagram is invalid (session need to be aborted) */
+#if HTTP_2_ENABLE > 0
+ hc->version = HTTP_VERSION_2;
+ http_vfts[hc->version].conn_accept_callback (hc);
+#else
svm_fifo_dequeue_drop_all (ts->rx_fifo);
- session_transport_closing_notify (&hc->connection);
- session_transport_closed_notify (&hc->connection);
http_disconnect_transport (hc);
- return HTTP_SM_STOP;
+ return 0;
+#endif
}
else
- {
- /* unknown capsule should be skipped */
- if (payload_len <= to_deq)
- {
- svm_fifo_dequeue_drop (ts->rx_fifo, payload_len);
- to_deq -= payload_len;
- continue;
- }
- else
- {
- svm_fifo_dequeue_drop (ts->rx_fifo, to_deq);
- hc->req.to_skip = payload_len - to_deq;
- goto done;
- }
- }
- }
- capsule_size = payload_offset + payload_len;
- /* check if we have the full capsule */
- if (PREDICT_FALSE (to_deq < capsule_size))
- {
- HTTP_DBG (1, "capsule not complete");
- goto done;
+ hc->version = HTTP_VERSION_1;
}
-
- dgram_size = sizeof (hdr) + payload_len;
- if (svm_fifo_max_enqueue_prod (as->rx_fifo) < dgram_size)
- {
- HTTP_DBG (1, "app's rx fifo full");
- svm_fifo_add_want_deq_ntf (as->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
- goto done;
- }
-
- /* read capsule payload */
- rv = svm_fifo_peek (ts->rx_fifo, payload_offset, payload_len, buf);
- ASSERT (rv == payload_len);
- svm_fifo_dequeue_drop (ts->rx_fifo, capsule_size);
-
- hdr.data_length = payload_len;
- hdr.data_offset = 0;
-
- /* send datagram header and payload */
- svm_fifo_seg_t segs[2] = { { (u8 *) &hdr, sizeof (hdr) },
- { buf, payload_len } };
- rv = svm_fifo_enqueue_segments (as->rx_fifo, segs, 2, 0);
- ASSERT (rv > 0);
-
- n_written += dgram_size;
- to_deq -= capsule_size;
- }
-
-done:
- HTTP_DBG (1, "written %lu bytes", n_written);
-
- if (n_written)
- {
- app_wrk = app_worker_get_if_valid (as->app_wrk_index);
- if (app_wrk)
- app_worker_rx_notify (app_wrk, as);
- }
- if (svm_fifo_max_dequeue_cons (ts->rx_fifo))
- session_program_rx_io_evt (session_handle (ts));
-
- return HTTP_SM_STOP;
-}
-
-static http_sm_result_t
-http_req_state_udp_tunnel_tx (http_conn_t *hc, transport_send_params_t *sp)
-{
- http_main_t *hm = &http_main;
- u32 to_deq, capsule_size, dgram_size, n_written = 0;
- session_t *as, *ts;
- int rv;
- session_dgram_pre_hdr_t hdr;
- u8 *buf;
- u8 *payload;
-
- HTTP_DBG (1, "udp tunnel received data from target");
-
- as = session_get_from_handle (hc->h_pa_session_handle);
- ts = session_get_from_handle (hc->h_tc_session_handle);
- buf = hm->tx_bufs[hc->c_thread_index];
- to_deq = svm_fifo_max_dequeue_cons (as->tx_fifo);
-
- while (to_deq > 0)
- {
- /* read datagram header */
- rv = svm_fifo_peek (as->tx_fifo, 0, sizeof (hdr), (u8 *) &hdr);
- ASSERT (rv == sizeof (hdr) &&
- hdr.data_length <= HTTP_UDP_PAYLOAD_MAX_LEN);
- ASSERT (to_deq >= hdr.data_length + SESSION_CONN_HDR_LEN);
- dgram_size = hdr.data_length + SESSION_CONN_HDR_LEN;
-
- if (svm_fifo_max_enqueue_prod (ts->tx_fifo) <
- (hdr.data_length + HTTP_UDP_PROXY_DATAGRAM_CAPSULE_OVERHEAD))
- {
- HTTP_DBG (1, "ts tx fifo full");
- goto done;
- }
-
- /* create capsule header */
- payload = http_encap_udp_payload_datagram (buf, hdr.data_length);
- capsule_size = (payload - buf) + hdr.data_length;
- /* read payload */
- rv = svm_fifo_peek (as->tx_fifo, SESSION_CONN_HDR_LEN, hdr.data_length,
- payload);
- ASSERT (rv == hdr.data_length);
- svm_fifo_dequeue_drop (as->tx_fifo, dgram_size);
- /* send capsule */
- rv = svm_fifo_enqueue (ts->tx_fifo, capsule_size, buf);
- ASSERT (rv == capsule_size);
-
- n_written += capsule_size;
- to_deq -= dgram_size;
- }
-
-done:
- HTTP_DBG (1, "written %lu bytes", n_written);
- if (n_written)
- {
- if (svm_fifo_set_event (ts->tx_fifo))
- session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX);
- }
-
- /* Deschedule and wait for deq notification if ts fifo is almost full */
- if (svm_fifo_max_enqueue (ts->tx_fifo) < HTTP_FIFO_THRESH)
- {
- svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
- transport_connection_deschedule (&hc->connection);
- sp->flags |= TRANSPORT_SND_F_DESCHED;
- }
-
- return HTTP_SM_STOP;
-}
-
-typedef http_sm_result_t (*http_sm_handler) (http_conn_t *,
- transport_send_params_t *sp);
-
-static http_sm_handler tx_state_funcs[HTTP_REQ_N_STATES] = {
- 0, /* idle */
- http_req_state_wait_app_method,
- 0, /* wait transport reply */
- 0, /* transport io more data */
- 0, /* wait transport method */
- http_req_state_wait_app_reply,
- http_req_state_app_io_more_data,
- http_req_state_tunnel_tx,
- http_req_state_udp_tunnel_tx,
-};
-
-static_always_inline int
-http_req_state_is_tx_valid (http_conn_t *hc)
-{
- return tx_state_funcs[hc->req.state] ? 1 : 0;
-}
-
-static http_sm_handler rx_state_funcs[HTTP_REQ_N_STATES] = {
- 0, /* idle */
- 0, /* wait app method */
- http_req_state_wait_transport_reply,
- http_req_state_transport_io_more_data,
- http_req_state_wait_transport_method,
- 0, /* wait app reply */
- 0, /* app io more data */
- http_req_state_tunnel_rx,
- http_req_state_udp_tunnel_rx,
-};
-
-static_always_inline int
-http_req_state_is_rx_valid (http_conn_t *hc)
-{
- return rx_state_funcs[hc->req.state] ? 1 : 0;
-}
-
-static_always_inline void
-http_req_run_state_machine (http_conn_t *hc, transport_send_params_t *sp,
- u8 is_tx)
-{
- http_sm_result_t res;
-
- do
- {
- if (is_tx)
- res = tx_state_funcs[hc->req.state](hc, sp);
else
- res = rx_state_funcs[hc->req.state](hc, sp);
- if (res == HTTP_SM_ERROR)
- {
- HTTP_DBG (1, "error in state machine %d", res);
- return;
- }
+ hc->version = HTTP_VERSION_1;
+
+ HTTP_DBG (1, "identified HTTP/%u",
+ hc->version == HTTP_VERSION_1 ? 1 : 2);
+ hc_handle.version = hc->version;
+ ts->opaque = hc_handle.as_u32;
}
- while (res == HTTP_SM_CONTINUE);
+ http_vfts[hc_handle.version].transport_rx_callback (hc);
- /* Reset the session expiration timer */
- http_conn_timer_update (hc);
+ if (hc->state == HTTP_CONN_STATE_TRANSPORT_CLOSED)
+ http_vfts[hc->version].transport_close_callback (hc);
+ return 0;
}
-static int
-http_ts_rx_callback (session_t *ts)
+int
+http_ts_builtin_tx_callback (session_t *ts)
{
http_conn_t *hc;
+ http_conn_handle_t hc_handle;
- HTTP_DBG (1, "hc [%u]%x", ts->thread_index, ts->opaque);
-
- hc = http_conn_get_w_thread (ts->opaque, ts->thread_index);
-
- if (hc->state == HTTP_CONN_STATE_CLOSED)
- {
- HTTP_DBG (1, "conn closed");
- svm_fifo_dequeue_drop_all (ts->rx_fifo);
- return 0;
- }
-
- if (!http_req_state_is_rx_valid (hc))
- {
- clib_warning ("hc [%u]%x invalid rx state: http req state "
- "'%U', session state '%U'",
- ts->thread_index, ts->opaque, format_http_req_state,
- hc->req.state, format_http_conn_state, hc);
- svm_fifo_dequeue_drop_all (ts->rx_fifo);
- return 0;
- }
+ hc_handle.as_u32 = ts->opaque;
- HTTP_DBG (1, "run state machine");
- http_req_run_state_machine (hc, 0, 0);
+ hc = http_conn_get_w_thread (hc_handle.conn_index, ts->thread_index);
+ HTTP_DBG (1, "transport connection reschedule");
+ http_vfts[hc->version].transport_conn_reschedule_callback (hc);
- if (hc->state == HTTP_CONN_STATE_TRANSPORT_CLOSED)
- {
- if (!svm_fifo_max_dequeue_cons (ts->rx_fifo))
- session_transport_closing_notify (&hc->connection);
- }
return 0;
}
-int
-http_ts_builtin_tx_callback (session_t *ts)
+static void
+http_ts_closed_callback (session_t *ts)
{
+ http_conn_handle_t hc_handle;
http_conn_t *hc;
- hc = http_conn_get_w_thread (ts->opaque, ts->thread_index);
- HTTP_DBG (1, "transport connection reschedule");
- transport_connection_reschedule (&hc->connection);
+ hc_handle.as_u32 = ts->opaque;
+ hc = http_conn_get_w_thread (hc_handle.conn_index, ts->thread_index);
- return 0;
+ http_disconnect_transport (hc);
+ hc->state = HTTP_CONN_STATE_CLOSED;
}
static void
http_ts_cleanup_callback (session_t *ts, session_cleanup_ntf_t ntf)
{
http_conn_t *hc;
+ http_conn_handle_t hc_handle;
if (ntf == SESSION_CLEANUP_TRANSPORT)
return;
- hc = http_conn_get_w_thread (ts->opaque, ts->thread_index);
-
- HTTP_DBG (1, "going to free hc [%u]%x", ts->thread_index, ts->opaque);
+ hc_handle.as_u32 = ts->opaque;
+ hc = http_conn_get_w_thread (hc_handle.conn_index, ts->thread_index);
- vec_free (hc->req.rx_buf);
- vec_free (hc->req.headers);
+ HTTP_DBG (1, "going to free hc [%u]%x", ts->thread_index,
+ hc_handle.conn_index);
- http_buffer_free (&hc->req.tx_buf);
-
- if (hc->pending_timer == 0)
+ if (!(hc->flags & HTTP_CONN_F_PENDING_TIMER))
http_conn_timer_stop (hc);
- session_transport_delete_notify (&hc->connection);
+ /* in case nothing received on cleartext connection */
+ if (PREDICT_FALSE (hc->version != HTTP_VERSION_NA))
+ http_vfts[hc->version].conn_cleanup_callback (hc);
- if (!hc->is_server)
+ if (!(hc->flags & HTTP_CONN_F_IS_SERVER))
{
vec_free (hc->app_name);
vec_free (hc->host);
@@ -2253,11 +705,8 @@ http_ts_cleanup_callback (session_t *ts, session_cleanup_ntf_t ntf)
static void
http_ts_ho_cleanup_callback (session_t *ts)
{
- http_conn_t *ho_hc;
HTTP_DBG (1, "half open: %x", ts->opaque);
- ho_hc = http_ho_conn_get (ts->opaque);
- session_half_open_delete_notify (&ho_hc->connection);
- http_ho_conn_free (ho_hc);
+ http_ho_try_free (ts->opaque);
}
int
@@ -2278,6 +727,7 @@ static session_cb_vft_t http_app_cb_vft = {
.session_disconnect_callback = http_ts_disconnect_callback,
.session_connected_callback = http_ts_connected_callback,
.session_reset_callback = http_ts_reset_callback,
+ .session_transport_closed_callback = http_ts_closed_callback,
.session_cleanup_callback = http_ts_cleanup_callback,
.half_open_cleanup_callback = http_ts_ho_cleanup_callback,
.add_segment_callback = http_add_segment_callback,
@@ -2286,6 +736,10 @@ static session_cb_vft_t http_app_cb_vft = {
.builtin_app_tx_callback = http_ts_builtin_tx_callback,
};
+/*********************************/
+/* transport proto VFT callbacks */
+/*********************************/
+
static clib_error_t *
http_transport_enable (vlib_main_t *vm, u8 is_en)
{
@@ -2295,6 +749,7 @@ http_transport_enable (vlib_main_t *vm, u8 is_en)
u64 options[APP_OPTIONS_N_OPTIONS];
http_main_t *hm = &http_main;
u32 num_threads, i;
+ http_engine_vft_t *http_version;
if (!is_en)
{
@@ -2351,6 +806,12 @@ http_transport_enable (vlib_main_t *vm, u8 is_en)
http_timers_init (vm, http_conn_timeout_cb, http_conn_invalidate_timer_cb);
hm->is_init = 1;
+ vec_foreach (http_version, http_vfts)
+ {
+ if (http_version->enable_callback)
+ http_version->enable_callback ();
+ }
+
return 0;
}
@@ -2377,9 +838,11 @@ http_transport_connect (transport_endpoint_cfg_t *tep)
hc_index = http_ho_conn_alloc ();
hc = http_ho_conn_get (hc_index);
- hc->h_pa_wrk_index = sep->app_wrk_index;
- hc->h_pa_app_api_ctx = sep->opaque;
+ hc->hc_pa_wrk_index = sep->app_wrk_index;
+ hc->hc_pa_app_api_ctx = sep->opaque;
hc->state = HTTP_CONN_STATE_CONNECTING;
+ /* TODO: set to HTTP_VERSION_NA in case of TLS */
+ hc->version = HTTP_VERSION_1;
cargs->api_context = hc_index;
ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_HTTP);
@@ -2391,7 +854,12 @@ http_transport_connect (transport_endpoint_cfg_t *tep)
hc->timeout = http_cfg->timeout;
}
- hc->is_server = 0;
+ ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_CRYPTO);
+ if (ext_cfg)
+ {
+ HTTP_DBG (1, "app set tls");
+ cargs->sep.transport_proto = TRANSPORT_PROTO_TLS;
+ }
if (vec_len (app->name))
hc->app_name = vec_dup (app->name);
@@ -2416,7 +884,7 @@ http_transport_connect (transport_endpoint_cfg_t *tep)
ho->opaque = sep->opaque;
ho->session_type =
session_type_from_proto_and_ip (TRANSPORT_PROTO_HTTP, sep->is_ip4);
- hc->h_tc_session_handle = cargs->sh;
+ hc->hc_tc_session_handle = cargs->sh;
hc->c_s_index = ho->session_index;
return 0;
@@ -2471,19 +939,19 @@ http_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep)
}
/* Grab transport connection listener and link to http listener */
- lhc->h_tc_session_handle = args->handle;
- al = app_listener_get_w_handle (lhc->h_tc_session_handle);
+ lhc->hc_tc_session_handle = args->handle;
+ al = app_listener_get_w_handle (lhc->hc_tc_session_handle);
ts_listener = app_listener_get_session (al);
ts_listener->opaque = lhc_index;
/* Grab application listener and link to http listener */
app_listener = listen_session_get (app_listener_index);
- lhc->h_pa_wrk_index = sep->app_wrk_index;
- lhc->h_pa_session_handle = listen_session_get_handle (app_listener);
+ lhc->hc_pa_wrk_index = sep->app_wrk_index;
+ lhc->hc_pa_session_handle = listen_session_get_handle (app_listener);
lhc->c_s_index = app_listener_index;
lhc->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP;
- lhc->is_server = 1;
+ lhc->flags |= HTTP_CONN_F_IS_SERVER;
if (vec_len (app->name))
lhc->app_name = vec_dup (app->name);
@@ -2502,7 +970,7 @@ http_stop_listen (u32 listener_index)
lhc = http_listener_get (listener_index);
vnet_unlisten_args_t a = {
- .handle = lhc->h_tc_session_handle,
+ .handle = lhc->hc_tc_session_handle,
.app_index = http_main.app_index,
.wrk_map_index = 0 /* default wrk */
};
@@ -2516,16 +984,22 @@ http_stop_listen (u32 listener_index)
}
static void
-http_transport_close (u32 hc_index, u32 thread_index)
+http_transport_close (u32 rh, clib_thread_index_t thread_index)
{
- session_t *as;
http_conn_t *hc;
+ u32 hc_index;
+ http_req_handle_t hr_handle;
+ hr_handle.as_u32 = rh;
+
+ hc_index = http_vfts[hr_handle.version].hc_index_get_by_req_index (
+ hr_handle.req_index, thread_index);
HTTP_DBG (1, "App disconnecting [%u]%x", thread_index, hc_index);
hc = http_conn_get_w_thread (hc_index, thread_index);
if (hc->state == HTTP_CONN_STATE_CONNECTING)
{
+ HTTP_DBG (1, "in connecting state, close now");
hc->state = HTTP_CONN_STATE_APP_CLOSED;
http_disconnect_transport (hc);
return;
@@ -2535,26 +1009,42 @@ http_transport_close (u32 hc_index, u32 thread_index)
HTTP_DBG (1, "nothing to do, already closed");
return;
}
- as = session_get_from_handle (hc->h_pa_session_handle);
- /* Nothing more to send, confirm close */
- if (!svm_fifo_max_dequeue_cons (as->tx_fifo))
- {
- session_transport_closed_notify (&hc->connection);
- http_disconnect_transport (hc);
- }
- else
+ http_vfts[hc->version].app_close_callback (hc, hr_handle.req_index,
+ thread_index);
+}
+
+static void
+http_transport_reset (u32 rh, clib_thread_index_t thread_index)
+{
+ http_conn_t *hc;
+ u32 hc_index;
+ http_req_handle_t hr_handle;
+
+ hr_handle.as_u32 = rh;
+ hc_index = http_vfts[hr_handle.version].hc_index_get_by_req_index (
+ hr_handle.req_index, thread_index);
+ HTTP_DBG (1, "App disconnecting [%u]%x", thread_index, hc_index);
+
+ hc = http_conn_get_w_thread (hc_index, thread_index);
+ if (hc->state == HTTP_CONN_STATE_CLOSED)
{
- /* Wait for all data to be written to ts */
- hc->state = HTTP_CONN_STATE_APP_CLOSED;
+ HTTP_DBG (1, "nothing to do, already closed");
+ return;
}
+
+ http_vfts[hc->version].app_reset_callback (hc, hr_handle.req_index,
+ thread_index);
}
static transport_connection_t *
-http_transport_get_connection (u32 hc_index, u32 thread_index)
+http_transport_get_connection (u32 rh, clib_thread_index_t thread_index)
{
- http_conn_t *hc = http_conn_get_w_thread (hc_index, thread_index);
- return &hc->connection;
+ http_req_handle_t hr_handle;
+
+ hr_handle.as_u32 = rh;
+ return http_vfts[hr_handle.version].req_get_connection (hr_handle.req_index,
+ thread_index);
}
static transport_connection_t *
@@ -2568,46 +1058,32 @@ static int
http_app_tx_callback (void *session, transport_send_params_t *sp)
{
session_t *as = (session_t *) session;
- u32 max_burst_sz, sent;
+ u32 max_burst_sz, sent, hc_index;
http_conn_t *hc;
+ http_req_handle_t hr_handle;
+ hr_handle.as_u32 = as->connection_index;
- HTTP_DBG (1, "hc [%u]%x", as->thread_index, as->connection_index);
+ hc_index = http_vfts[hr_handle.version].hc_index_get_by_req_index (
+ hr_handle.req_index, as->thread_index);
+ HTTP_DBG (1, "hc [%u]%x", hc_index, as->connection_index);
- hc = http_conn_get_w_thread (as->connection_index, as->thread_index);
+ hc = http_conn_get_w_thread (hc_index, as->thread_index);
- max_burst_sz = sp->max_burst_size * TRANSPORT_PACER_MIN_MSS;
- sp->max_burst_size = max_burst_sz;
-
- if (!http_req_state_is_tx_valid (hc))
+ if (hc->state == HTTP_CONN_STATE_CLOSED)
{
- /* Sometimes the server apps can send the response earlier
- * than expected (e.g when rejecting a bad request)*/
- if (hc->req.state == HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA &&
- hc->is_server)
- {
- svm_fifo_dequeue_drop_all (as->rx_fifo);
- hc->req.state = HTTP_REQ_STATE_WAIT_APP_REPLY;
- }
- else
- {
- clib_warning ("hc [%u]%x invalid tx state: http req state "
- "'%U', session state '%U'",
- as->thread_index, as->connection_index,
- format_http_req_state, hc->req.state,
- format_http_conn_state, hc);
- svm_fifo_dequeue_drop_all (as->tx_fifo);
- return 0;
- }
+ HTTP_DBG (1, "conn closed");
+ svm_fifo_dequeue_drop_all (as->tx_fifo);
+ return 0;
}
- HTTP_DBG (1, "run state machine");
- http_req_run_state_machine (hc, sp, 1);
+ max_burst_sz = sp->max_burst_size * TRANSPORT_PACER_MIN_MSS;
+ sp->max_burst_size = max_burst_sz;
+
+ http_vfts[hc->version].app_tx_callback (hc, hr_handle.req_index, sp);
if (hc->state == HTTP_CONN_STATE_APP_CLOSED)
- {
- if (!svm_fifo_max_dequeue_cons (as->tx_fifo))
- http_disconnect_transport (hc);
- }
+ http_vfts[hc->version].app_close_callback (hc, hr_handle.req_index,
+ as->thread_index);
sent = max_burst_sz - sp->max_burst_size;
@@ -2617,38 +1093,36 @@ http_app_tx_callback (void *session, transport_send_params_t *sp)
static int
http_app_rx_evt_cb (transport_connection_t *tc)
{
- http_conn_t *hc = (http_conn_t *) tc;
- HTTP_DBG (1, "hc [%u]%x", vlib_get_thread_index (), hc->h_hc_index);
+ http_req_t *req = (http_req_t *) tc;
+ http_conn_t *hc;
+ http_req_handle_t hr_handle;
+
+ HTTP_DBG (1, "hc [%u]%x", vlib_get_thread_index (), req->hr_hc_index);
- if (hc->req.state == HTTP_REQ_STATE_TUNNEL)
- http_req_state_tunnel_rx (hc, 0);
+ hr_handle.as_u32 = req->hr_req_handle;
+ hc = http_conn_get_w_thread (req->hr_hc_index, req->c_thread_index);
+ http_vfts[hr_handle.version].app_rx_evt_callback (hc, hr_handle.req_index,
+ req->c_thread_index);
return 0;
}
static void
-http_transport_get_endpoint (u32 hc_index, u32 thread_index,
+http_transport_get_endpoint (u32 rh, clib_thread_index_t thread_index,
transport_endpoint_t *tep, u8 is_lcl)
{
- http_conn_t *hc = http_conn_get_w_thread (hc_index, thread_index);
- session_t *ts;
-
- ts = session_get_from_handle (hc->h_tc_session_handle);
- session_get_endpoint (ts, tep, is_lcl);
-}
-
-static u8 *
-format_http_connection (u8 *s, va_list *args)
-{
- http_conn_t *hc = va_arg (*args, http_conn_t *);
+ http_conn_t *hc;
session_t *ts;
+ u32 hc_index;
+ http_req_handle_t hr_handle;
- ts = session_get_from_handle (hc->h_tc_session_handle);
- s = format (s, "[%d:%d][H] app_wrk %u ts %d:%d", hc->c_thread_index,
- hc->c_s_index, hc->h_pa_wrk_index, ts->thread_index,
- ts->session_index);
+ hr_handle.as_u32 = rh;
+ hc_index = http_vfts[hr_handle.version].hc_index_get_by_req_index (
+ hr_handle.req_index, thread_index);
+ hc = http_conn_get_w_thread (hc_index, thread_index);
- return s;
+ ts = session_get_from_handle (hc->hc_tc_session_handle);
+ session_get_endpoint (ts, tep, is_lcl);
}
static u8 *
@@ -2658,10 +1132,10 @@ format_http_listener (u8 *s, va_list *args)
app_listener_t *al;
session_t *lts;
- al = app_listener_get_w_handle (lhc->h_tc_session_handle);
+ al = app_listener_get_w_handle (lhc->hc_tc_session_handle);
lts = app_listener_get_session (al);
s = format (s, "[%d:%d][H] app_wrk %u ts %d:%d", lhc->c_thread_index,
- lhc->c_s_index, lhc->h_pa_wrk_index, lts->thread_index,
+ lhc->c_s_index, lhc->hc_pa_wrk_index, lts->thread_index,
lts->session_index);
return s;
@@ -2670,22 +1144,18 @@ format_http_listener (u8 *s, va_list *args)
static u8 *
format_http_transport_connection (u8 *s, va_list *args)
{
- u32 tc_index = va_arg (*args, u32);
- u32 thread_index = va_arg (*args, u32);
+ http_req_handle_t rh = va_arg (*args, http_req_handle_t);
+ clib_thread_index_t thread_index = va_arg (*args, u32);
u32 verbose = va_arg (*args, u32);
+ u32 hc_index;
http_conn_t *hc;
- hc = http_conn_get_w_thread (tc_index, thread_index);
-
- s = format (s, "%-" SESSION_CLI_ID_LEN "U", format_http_connection, hc);
- if (verbose)
- {
- s =
- format (s, "%-" SESSION_CLI_STATE_LEN "U", format_http_conn_state, hc);
- if (verbose > 1)
- s = format (s, "\n");
- }
+ hc_index = http_vfts[rh.version].hc_index_get_by_req_index (rh.req_index,
+ thread_index);
+ hc = http_conn_get_w_thread (hc_index, thread_index);
+ s = format (s, "%U", http_vfts[rh.version].format_req, rh.req_index,
+ thread_index, hc, verbose);
return s;
}
@@ -2714,10 +1184,10 @@ format_http_transport_half_open (u8 *s, va_list *args)
session_t *tcp_ho;
ho_hc = http_ho_conn_get (ho_index);
- tcp_ho = session_get_from_handle (ho_hc->h_tc_session_handle);
+ tcp_ho = session_get_from_handle (ho_hc->hc_tc_session_handle);
s = format (s, "[%d:%d][H] half-open app_wrk %u ts %d:%d",
- ho_hc->c_thread_index, ho_hc->c_s_index, ho_hc->h_pa_wrk_index,
+ ho_hc->c_thread_index, ho_hc->c_s_index, ho_hc->hc_pa_wrk_index,
tcp_ho->thread_index, tcp_ho->session_index);
return s;
}
@@ -2739,7 +1209,13 @@ http_transport_cleanup_ho (u32 ho_hc_index)
HTTP_DBG (1, "half open: %x", ho_hc_index);
ho_hc = http_ho_conn_get (ho_hc_index);
- session_cleanup_half_open (ho_hc->h_tc_session_handle);
+ if (ho_hc->hc_tc_session_handle == SESSION_INVALID_HANDLE)
+ {
+ HTTP_DBG (1, "already pending cleanup");
+ ho_hc->flags |= HTTP_CONN_F_NO_APP_SESSION;
+ return;
+ }
+ session_cleanup_half_open (ho_hc->hc_tc_session_handle);
http_ho_conn_free (ho_hc);
}
@@ -2749,6 +1225,7 @@ static const transport_proto_vft_t http_proto = {
.start_listen = http_start_listen,
.stop_listen = http_stop_listen,
.close = http_transport_close,
+ .reset = http_transport_reset,
.cleanup_ho = http_transport_cleanup_ho,
.custom_tx = http_app_tx_callback,
.app_rx_evt = http_app_rx_evt_cb,
@@ -2807,6 +1284,28 @@ http_transport_init (vlib_main_t *vm)
VLIB_INIT_FUNCTION (http_transport_init);
+static uword
+unformat_http_version_cfg (unformat_input_t *input, va_list *va)
+{
+ http_engine_vft_t *http_version;
+ unformat_input_t sub_input;
+ int found = 0;
+
+ vec_foreach (http_version, http_vfts)
+ {
+ if (!unformat (input, http_version->name))
+ continue;
+
+ if (http_version->unformat_cfg_callback &&
+ unformat (input, "%U", unformat_vlib_cli_sub_input, &sub_input))
+ {
+ if (http_version->unformat_cfg_callback (&sub_input))
+ found = 1;
+ }
+ }
+ return found;
+}
+
static clib_error_t *
http_config_fn (vlib_main_t *vm, unformat_input_t *input)
{
@@ -2835,6 +1334,8 @@ http_config_fn (vlib_main_t *vm, unformat_input_t *input)
if (hm->fifo_size != mem_sz)
clib_warning ("invalid fifo size %lu", mem_sz);
}
+ else if (unformat (input, "%U", unformat_http_version_cfg))
+ ;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
diff --git a/src/plugins/http/http.h b/src/plugins/http/http.h
index d61ac0b08c7..434ff965b6a 100644
--- a/src/plugins/http/http.h
+++ b/src/plugins/http/http.h
@@ -17,15 +17,9 @@
#define SRC_PLUGINS_HTTP_HTTP_H_
#include <ctype.h>
-
#include <vnet/plugin/plugin.h>
-#include <vpp/app/version.h>
-
-#include <vppinfra/time_range.h>
-
-#include <vnet/session/application_interface.h>
-#include <vnet/session/application.h>
-#include <http/http_buffer.h>
+#include <vnet/ip/format.h>
+#include <vnet/ip/ip46_address.h>
#define HTTP_DEBUG 0
@@ -49,20 +43,6 @@ typedef struct transport_endpt_cfg_http
http_udp_tunnel_mode_t udp_tunnel_mode; /**< connect-udp mode */
} transport_endpt_cfg_http_t;
-typedef struct http_conn_id_
-{
- union
- {
- session_handle_t app_session_handle;
- u32 parent_app_api_ctx;
- };
- session_handle_t tc_session_handle;
- u32 parent_app_wrk_index;
-} http_conn_id_t;
-
-STATIC_ASSERT (sizeof (http_conn_id_t) <= TRANSPORT_CONN_ID_LEN,
- "ctx id must be less than TRANSPORT_CONN_ID_LEN");
-
typedef struct
{
char *base;
@@ -71,45 +51,12 @@ typedef struct
#define http_token_lit(s) (s), sizeof (s) - 1
-#define foreach_http_conn_state \
- _ (LISTEN, "listen") \
- _ (CONNECTING, "connecting") \
- _ (ESTABLISHED, "established") \
- _ (TRANSPORT_CLOSED, "transport-closed") \
- _ (APP_CLOSED, "app-closed") \
- _ (CLOSED, "closed")
-
-typedef enum http_conn_state_
-{
-#define _(s, str) HTTP_CONN_STATE_##s,
- foreach_http_conn_state
-#undef _
-} http_conn_state_t;
-
-#define foreach_http_req_state \
- _ (0, IDLE, "idle") \
- _ (1, WAIT_APP_METHOD, "wait app method") \
- _ (2, WAIT_TRANSPORT_REPLY, "wait transport reply") \
- _ (3, TRANSPORT_IO_MORE_DATA, "transport io more data") \
- _ (4, WAIT_TRANSPORT_METHOD, "wait transport method") \
- _ (5, WAIT_APP_REPLY, "wait app reply") \
- _ (6, APP_IO_MORE_DATA, "app io more data") \
- _ (7, TUNNEL, "tunnel") \
- _ (8, UDP_TUNNEL, "udp tunnel")
-
-typedef enum http_req_state_
-{
-#define _(n, s, str) HTTP_REQ_STATE_##s = n,
- foreach_http_req_state
-#undef _
- HTTP_REQ_N_STATES
-} http_req_state_t;
-
typedef enum http_req_method_
{
HTTP_REQ_GET = 0,
HTTP_REQ_POST,
HTTP_REQ_CONNECT,
+ HTTP_REQ_UNKNOWN, /* for internal use */
} http_req_method_t;
typedef enum http_msg_type_
@@ -118,14 +65,6 @@ typedef enum http_msg_type_
HTTP_MSG_REPLY
} http_msg_type_t;
-typedef enum http_target_form_
-{
- HTTP_TARGET_ORIGIN_FORM,
- HTTP_TARGET_ABSOLUTE_FORM,
- HTTP_TARGET_AUTHORITY_FORM,
- HTTP_TARGET_ASTERISK_FORM
-} http_target_form_t;
-
#define foreach_http_content_type \
_ (APP_7Z, ".7z", "application/x-7z-compressed") \
_ (APP_DOC, ".doc", "application/msword") \
@@ -271,96 +210,108 @@ typedef enum http_status_code_
} http_status_code_t;
#define foreach_http_header_name \
- _ (ACCEPT, "Accept") \
- _ (ACCEPT_CHARSET, "Accept-Charset") \
- _ (ACCEPT_ENCODING, "Accept-Encoding") \
- _ (ACCEPT_LANGUAGE, "Accept-Language") \
- _ (ACCEPT_RANGES, "Accept-Ranges") \
- _ (ACCESS_CONTROL_ALLOW_CREDENTIALS, "Access-Control-Allow-Credentials") \
- _ (ACCESS_CONTROL_ALLOW_HEADERS, "Access-Control-Allow-Headers") \
- _ (ACCESS_CONTROL_ALLOW_METHODS, "Access-Control-Allow-Methods") \
- _ (ACCESS_CONTROL_ALLOW_ORIGIN, "Access-Control-Allow-Origin") \
- _ (ACCESS_CONTROL_EXPOSE_HEADERS, "Access-Control-Expose-Headers") \
- _ (ACCESS_CONTROL_MAX_AGE, "Access-Control-Max-Age") \
- _ (ACCESS_CONTROL_REQUEST_HEADERS, "Access-Control-Request-Headers") \
- _ (ACCESS_CONTROL_REQUEST_METHOD, "Access-Control-Request-Method") \
- _ (AGE, "Age") \
- _ (ALLOW, "Allow") \
- _ (ALPN, "ALPN") \
- _ (ALT_SVC, "Alt-Svc") \
- _ (ALT_USED, "Alt-Used") \
- _ (ALTERNATES, "Alternates") \
- _ (AUTHENTICATION_CONTROL, "Authentication-Control") \
- _ (AUTHENTICATION_INFO, "Authentication-Info") \
- _ (AUTHORIZATION, "Authorization") \
- _ (CACHE_CONTROL, "Cache-Control") \
- _ (CACHE_STATUS, "Cache-Status") \
- _ (CAPSULE_PROTOCOL, "Capsule-Protocol") \
- _ (CDN_CACHE_CONTROL, "CDN-Cache-Control") \
- _ (CDN_LOOP, "CDN-Loop") \
- _ (CLIENT_CERT, "Client-Cert") \
- _ (CLIENT_CERT_CHAIN, "Client-Cert-Chain") \
- _ (CLOSE, "Close") \
- _ (CONNECTION, "Connection") \
- _ (CONTENT_DIGEST, "Content-Digest") \
- _ (CONTENT_DISPOSITION, "Content-Disposition") \
- _ (CONTENT_ENCODING, "Content-Encoding") \
- _ (CONTENT_LANGUAGE, "Content-Language") \
- _ (CONTENT_LENGTH, "Content-Length") \
- _ (CONTENT_LOCATION, "Content-Location") \
- _ (CONTENT_RANGE, "Content-Range") \
- _ (CONTENT_TYPE, "Content-Type") \
- _ (COOKIE, "Cookie") \
- _ (DATE, "Date") \
- _ (DIGEST, "Digest") \
- _ (DPOP, "DPoP") \
- _ (DPOP_NONCE, "DPoP-Nonce") \
- _ (EARLY_DATA, "Early-Data") \
- _ (ETAG, "ETag") \
- _ (EXPECT, "Expect") \
- _ (EXPIRES, "Expires") \
- _ (FORWARDED, "Forwarded") \
- _ (FROM, "From") \
- _ (HOST, "Host") \
- _ (IF_MATCH, "If-Match") \
- _ (IF_MODIFIED_SINCE, "If-Modified-Since") \
- _ (IF_NONE_MATCH, "If-None-Match") \
- _ (IF_RANGE, "If-Range") \
- _ (IF_UNMODIFIED_SINCE, "If-Unmodified-Since") \
- _ (KEEP_ALIVE, "Keep-Alive") \
- _ (LAST_MODIFIED, "Last-Modified") \
- _ (LINK, "Link") \
- _ (LOCATION, "Location") \
- _ (MAX_FORWARDS, "Max-Forwards") \
- _ (ORIGIN, "Origin") \
- _ (PRIORITY, "Priority") \
- _ (PROXY_AUTHENTICATE, "Proxy-Authenticate") \
- _ (PROXY_AUTHENTICATION_INFO, "Proxy-Authentication-Info") \
- _ (PROXY_AUTHORIZATION, "Proxy-Authorization") \
- _ (PROXY_STATUS, "Proxy-Status") \
- _ (RANGE, "Range") \
- _ (REFERER, "Referer") \
- _ (REPR_DIGEST, "Repr-Digest") \
- _ (SET_COOKIE, "Set-Cookie") \
- _ (SIGNATURE, "Signature") \
- _ (SIGNATURE_INPUT, "Signature-Input") \
- _ (STRICT_TRANSPORT_SECURITY, "Strict-Transport-Security") \
- _ (RETRY_AFTER, "Retry-After") \
- _ (SERVER, "Server") \
- _ (TE, "TE") \
- _ (TRAILER, "Trailer") \
- _ (TRANSFER_ENCODING, "Transfer-Encoding") \
- _ (UPGRADE, "Upgrade") \
- _ (USER_AGENT, "User-Agent") \
- _ (VARY, "Vary") \
- _ (VIA, "Via") \
- _ (WANT_CONTENT_DIGEST, "Want-Content-Digest") \
- _ (WANT_REPR_DIGEST, "Want-Repr-Digest") \
- _ (WWW_AUTHENTICATE, "WWW-Authenticate")
+ _ (ACCEPT_CHARSET, "Accept-Charset", "accept-charset", 15) \
+ _ (ACCEPT_ENCODING, "Accept-Encoding", "accept-encoding", 16) \
+ _ (ACCEPT_LANGUAGE, "Accept-Language", "accept-language", 17) \
+ _ (ACCEPT_RANGES, "Accept-Ranges", "accept-ranges", 18) \
+ _ (ACCEPT, "Accept", "accept", 19) \
+ _ (ACCESS_CONTROL_ALLOW_CREDENTIALS, "Access-Control-Allow-Credentials", \
+ "access-control-allow-credentials", 0) \
+ _ (ACCESS_CONTROL_ALLOW_HEADERS, "Access-Control-Allow-Headers", \
+ "access-control-allow-headers", 0) \
+ _ (ACCESS_CONTROL_ALLOW_METHODS, "Access-Control-Allow-Methods", \
+ "access-control-allow-methods", 0) \
+ _ (ACCESS_CONTROL_ALLOW_ORIGIN, "Access-Control-Allow-Origin", \
+ "access-control-allow-origin", 20) \
+ _ (ACCESS_CONTROL_EXPOSE_HEADERS, "Access-Control-Expose-Headers", \
+ "access-control-expose-headers", 0) \
+ _ (ACCESS_CONTROL_MAX_AGE, "Access-Control-Max-Age", \
+ "access-control-max-age", 0) \
+ _ (ACCESS_CONTROL_REQUEST_HEADERS, "Access-Control-Request-Headers", \
+ "access-control-request-headers", 0) \
+ _ (ACCESS_CONTROL_REQUEST_METHOD, "Access-Control-Request-Method", \
+ "access-control-request-method", 0) \
+ _ (AGE, "Age", "age", 21) \
+ _ (ALLOW, "Allow", "allow", 22) \
+ _ (ALPN, "ALPN", "alpn", 0) \
+ _ (ALT_SVC, "Alt-Svc", "alt-svc", 0) \
+ _ (ALT_USED, "Alt-Used", "alt-used", 0) \
+ _ (ALTERNATES, "Alternates", "alternates", 0) \
+ _ (AUTHENTICATION_CONTROL, "Authentication-Control", \
+ "authentication-control", 0) \
+ _ (AUTHENTICATION_INFO, "Authentication-Info", "authentication-info", 0) \
+ _ (AUTHORIZATION, "Authorization", "authorization", 23) \
+ _ (CACHE_CONTROL, "Cache-Control", "cache-control", 24) \
+ _ (CACHE_STATUS, "Cache-Status", "cache-status", 0) \
+ _ (CAPSULE_PROTOCOL, "Capsule-Protocol", "capsule-protocol", 0) \
+ _ (CDN_CACHE_CONTROL, "CDN-Cache-Control", "cdn-cache-control", 0) \
+ _ (CDN_LOOP, "CDN-Loop", "cdn-loop", 0) \
+ _ (CLIENT_CERT, "Client-Cert", "client-cert", 0) \
+ _ (CLIENT_CERT_CHAIN, "Client-Cert-Chain", "client-cert-chain", 0) \
+ _ (CLOSE, "Close", "close", 0) \
+ _ (CONNECTION, "Connection", "connection", 0) \
+ _ (CONTENT_DIGEST, "Content-Digest", "content-digest", 0) \
+ _ (CONTENT_DISPOSITION, "Content-Disposition", "content-disposition", 25) \
+ _ (CONTENT_ENCODING, "Content-Encoding", "content-encoding", 26) \
+ _ (CONTENT_LANGUAGE, "Content-Language", "content-language", 27) \
+ _ (CONTENT_LENGTH, "Content-Length", "content-length", 28) \
+ _ (CONTENT_LOCATION, "Content-Location", "content-location", 29) \
+ _ (CONTENT_RANGE, "Content-Range", "content-range", 30) \
+ _ (CONTENT_TYPE, "Content-Type", "content-type", 31) \
+ _ (COOKIE, "Cookie", "cookie", 32) \
+ _ (DATE, "Date", "date", 33) \
+ _ (DIGEST, "Digest", "digest", 0) \
+ _ (DPOP, "DPoP", "dpop", 0) \
+ _ (DPOP_NONCE, "DPoP-Nonce", "dpop-nonce", 0) \
+ _ (EARLY_DATA, "Early-Data", "early-data", 0) \
+ _ (ETAG, "ETag", "etag", 34) \
+ _ (EXPECT, "Expect", "expect", 35) \
+ _ (EXPIRES, "Expires", "expires", 36) \
+ _ (FORWARDED, "Forwarded", "forwarded", 0) \
+ _ (FROM, "From", "from", 37) \
+ _ (HOST, "Host", "host", 38) \
+ _ (IF_MATCH, "If-Match", "if-match", 39) \
+ _ (IF_MODIFIED_SINCE, "If-Modified-Since", "if-modified-since", 40) \
+ _ (IF_NONE_MATCH, "If-None-Match", "if-none-match", 41) \
+ _ (IF_RANGE, "If-Range", "if-range", 42) \
+ _ (IF_UNMODIFIED_SINCE, "If-Unmodified-Since", "if-unmodified-since", 43) \
+ _ (KEEP_ALIVE, "Keep-Alive", "keep-alive", 0) \
+ _ (LAST_MODIFIED, "Last-Modified", "last-modified", 44) \
+ _ (LINK, "Link", "link", 45) \
+ _ (LOCATION, "Location", "location", 46) \
+ _ (MAX_FORWARDS, "Max-Forwards", "max-forwards", 47) \
+ _ (ORIGIN, "Origin", "origin", 0) \
+ _ (PRIORITY, "Priority", "priority", 0) \
+ _ (PROXY_AUTHENTICATE, "Proxy-Authenticate", "proxy-authenticate", 48) \
+ _ (PROXY_AUTHENTICATION_INFO, "Proxy-Authentication-Info", \
+ "proxy-authentication-info", 0) \
+ _ (PROXY_AUTHORIZATION, "Proxy-Authorization", "proxy-authorization", 49) \
+ _ (PROXY_STATUS, "Proxy-Status", "proxy-status", 0) \
+ _ (RANGE, "Range", "range", 50) \
+ _ (REFERER, "Referer", "referer", 51) \
+ _ (REFRESH, "Refresh", "refresh", 52) \
+ _ (REPR_DIGEST, "Repr-Digest", "repr-digest", 0) \
+ _ (RETRY_AFTER, "Retry-After", "retry-after", 53) \
+ _ (SERVER, "Server", "server", 54) \
+ _ (SET_COOKIE, "Set-Cookie", "set-cookie", 55) \
+ _ (SIGNATURE, "Signature", "signature", 0) \
+ _ (SIGNATURE_INPUT, "Signature-Input", "signature-input", 0) \
+ _ (STRICT_TRANSPORT_SECURITY, "Strict-Transport-Security", \
+ "strict-transport-security", 56) \
+ _ (TE, "TE", "te", 0) \
+ _ (TRAILER, "Trailer", "trailer", 0) \
+ _ (TRANSFER_ENCODING, "Transfer-Encoding", "transfer-encoding", 57) \
+ _ (UPGRADE, "Upgrade", "upgrade", 0) \
+ _ (USER_AGENT, "User-Agent", "user-agent", 58) \
+ _ (VARY, "Vary", "vary", 59) \
+ _ (VIA, "Via", "via", 60) \
+ _ (WANT_CONTENT_DIGEST, "Want-Content-Digest", "want-content-digest", 0) \
+ _ (WANT_REPR_DIGEST, "Want-Repr-Digest", "want-repr-digest", 0) \
+ _ (WWW_AUTHENTICATE, "WWW-Authenticate", "www-authenticate", 61)
typedef enum http_header_name_
{
-#define _(sym, str) HTTP_HEADER_##sym,
+#define _(sym, str_canonical, str_lower, hpack_index) HTTP_HEADER_##sym,
foreach_http_header_name
#undef _
} http_header_name_t;
@@ -399,6 +350,7 @@ typedef enum http_url_scheme_
{
HTTP_URL_SCHEME_HTTP,
HTTP_URL_SCHEME_HTTPS,
+ HTTP_URL_SCHEME_UNKNOWN, /* for internal use */
} http_url_scheme_t;
typedef struct http_msg_data_
@@ -432,118 +384,6 @@ typedef struct http_msg_
http_msg_data_t data;
} http_msg_t;
-typedef struct http_req_
-{
- http_req_state_t state; /* state-machine state */
-
- http_buffer_t tx_buf; /* message body from app to be sent */
-
- /*
- * for parsing of incoming message from transport
- */
- u8 *rx_buf; /* this should hold at least control data */
- u32 rx_buf_offset; /* current offset during parsing */
- u32 control_data_len; /* start line + headers + empty line */
-
- union
- {
- u64 to_recv; /* remaining bytes of body to receive from transport */
- u64 to_skip; /* remaining bytes of capsule to skip */
- };
-
- u8 is_tunnel;
-
- /*
- * parsed metadata for app
- */
- union
- {
- http_status_code_t status_code;
- http_req_method_t method;
- };
-
- http_target_form_t target_form;
- http_url_scheme_t scheme;
- u32 target_authority_offset;
- u32 target_authority_len;
- u32 target_path_offset;
- u32 target_path_len;
- u32 target_query_offset;
- u32 target_query_len;
-
- u32 headers_offset;
- u32 headers_len;
-
- u32 body_offset;
- u64 body_len;
-
- http_field_line_t *headers;
- uword content_len_header_index;
- uword connection_header_index;
- uword upgrade_header_index;
- uword host_header_index;
-
- http_upgrade_proto_t upgrade_proto;
-} http_req_t;
-
-typedef struct http_tc_
-{
- union
- {
- transport_connection_t connection;
- http_conn_id_t c_http_conn_id;
- };
-#define h_tc_session_handle c_http_conn_id.tc_session_handle
-#define h_pa_wrk_index c_http_conn_id.parent_app_wrk_index
-#define h_pa_session_handle c_http_conn_id.app_session_handle
-#define h_pa_app_api_ctx c_http_conn_id.parent_app_api_ctx
-#define h_hc_index connection.c_index
-
- http_conn_state_t state;
- u32 timer_handle;
- u32 timeout;
- u8 pending_timer;
- u8 *app_name;
- u8 *host;
- u8 is_server;
- http_udp_tunnel_mode_t udp_tunnel_mode;
-
- http_req_t req;
-} http_conn_t;
-
-typedef struct http_worker_
-{
- http_conn_t *conn_pool;
-} http_worker_t;
-
-typedef struct http_main_
-{
- http_worker_t *wrk;
- http_conn_t *listener_pool;
- http_conn_t *ho_conn_pool;
- u32 app_index;
-
- u8 **rx_bufs;
- u8 **tx_bufs;
- u8 **app_header_lists;
-
- clib_timebase_t timebase;
-
- u16 *sc_by_u16;
- /*
- * Runtime config
- */
- u8 debug_level;
- u8 is_init;
-
- /*
- * Config
- */
- u64 first_seg_size;
- u64 add_seg_size;
- u32 fifo_size;
-} http_main_t;
-
always_inline u8 *
format_http_bytes (u8 *s, va_list *va)
{
@@ -669,7 +509,8 @@ http_percent_decode (u8 *src, u32 len)
}
/**
- * Remove dot segments from path (RFC3986 section 5.2.4)
+ * Sanitize HTTP path by squashing repeating slashes and removing
+ * dot segments from path (RFC3986 section 5.2.4)
*
* @param path Path to sanitize.
*
@@ -678,18 +519,18 @@ http_percent_decode (u8 *src, u32 len)
* The caller is always responsible to free the returned vector.
*/
always_inline u8 *
-http_path_remove_dot_segments (u8 *path)
+http_path_sanitize (u8 *path)
{
u32 *segments = 0, *segments_len = 0, segment_len;
u8 *new_path = 0;
int i, ii;
- if (!path)
+ if (!path || vec_len (path) == 0)
return vec_new (u8, 0);
segments = vec_new (u32, 1);
/* first segment */
- segments[0] = 0;
+ segments[0] = (path[0] == '/' ? 1 : 0);
/* find all segments */
for (i = 1; i < (vec_len (path) - 1); i++)
{
@@ -704,7 +545,8 @@ http_path_remove_dot_segments (u8 *path)
for (i = 0; i < vec_len (segments_len); i++)
{
segment_len = segments[i + 1] - segments[i];
- if (segment_len == 2 && path[segments[i]] == '.')
+ /* aside from dots, skip empty segments (double slashes) */
+ if ((segment_len == 2 && path[segments[i]] == '.') || segment_len == 1)
segment_len = 0;
else if (segment_len == 3 && path[segments[i]] == '.' &&
path[segments[i] + 1] == '.')
@@ -736,124 +578,6 @@ http_path_remove_dot_segments (u8 *path)
return new_path;
}
-always_inline int
-_parse_field_name (u8 **pos, u8 *end, u8 **field_name_start,
- u32 *field_name_len)
-{
- u32 name_len = 0;
- u8 *p;
-
- static uword tchar[4] = {
- /* !#$%'*+-.0123456789 */
- 0x03ff6cba00000000,
- /* ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~ */
- 0x57ffffffc7fffffe,
- 0x0000000000000000,
- 0x0000000000000000,
- };
-
- p = *pos;
-
- *field_name_start = p;
- while (p != end)
- {
- if (clib_bitmap_get_no_check (tchar, *p))
- {
- name_len++;
- p++;
- }
- else if (*p == ':')
- {
- if (name_len == 0)
- {
- clib_warning ("empty field name");
- return -1;
- }
- *field_name_len = name_len;
- p++;
- *pos = p;
- return 0;
- }
- else
- {
- clib_warning ("invalid character %d", *p);
- return -1;
- }
- }
- clib_warning ("field name end not found");
- return -1;
-}
-
-always_inline int
-_parse_field_value (u8 **pos, u8 *end, u8 **field_value_start,
- u32 *field_value_len)
-{
- u32 value_len = 0;
- u8 *p;
-
- p = *pos;
-
- /* skip leading whitespace */
- while (1)
- {
- if (p == end)
- {
- clib_warning ("field value not found");
- return -1;
- }
- else if (*p != ' ' && *p != '\t')
- {
- break;
- }
- p++;
- }
-
- *field_value_start = p;
- while (p != end)
- {
- if (*p == '\r')
- {
- if ((end - p) < 1)
- {
- clib_warning ("incorrect field line end");
- return -1;
- }
- p++;
- if (*p == '\n')
- {
- if (value_len == 0)
- {
- clib_warning ("empty field value");
- return -1;
- }
- p++;
- *pos = p;
- /* skip trailing whitespace */
- p = *field_value_start + value_len - 1;
- while (*p == ' ' || *p == '\t')
- {
- p--;
- value_len--;
- }
- *field_value_len = value_len;
- return 0;
- }
- clib_warning ("CR without LF");
- return -1;
- }
- if (*p < ' ' && *p != '\t')
- {
- clib_warning ("invalid character %d", *p);
- return -1;
- }
- p++;
- value_len++;
- }
-
- clib_warning ("field value end not found");
- return -1;
-}
-
typedef struct
{
http_token_t name;
@@ -873,6 +597,16 @@ typedef struct
.values = 0, .value_by_name = 0, .buf = 0, .concatenated_values = 0, \
}
+/**
+ * Case-sensitive comparison of two tokens.
+ *
+ * @param actual Pointer to the first token.
+ * @param actual_len Length of the first token.
+ * @param expected Pointer to the second token.
+ * @param expected_len Length of the second token.
+ *
+ * @return @c 1 if tokens are same, @c 0 otherwise.
+ */
always_inline u8
http_token_is (const char *actual, uword actual_len, const char *expected,
uword expected_len)
@@ -903,6 +637,16 @@ http_tolower_word (uword x)
return (x | y);
}
+/**
+ * Case-insensitive comparison of two tokens.
+ *
+ * @param actual Pointer to the first token.
+ * @param actual_len Length of the first token.
+ * @param expected Pointer to the second token.
+ * @param expected_len Length of the second token.
+ *
+ * @return @c 1 if tokens are same, @c 0 otherwise.
+ */
always_inline u8
http_token_is_case (const char *actual, uword actual_len, const char *expected,
uword expected_len)
@@ -934,6 +678,16 @@ http_token_is_case (const char *actual, uword actual_len, const char *expected,
return 1;
}
+/**
+ * Check if there is occurrence of token in another token.
+ *
+ * @param haystack Pointer to the token being searched.
+ * @param haystack_len Length of the token being searched.
+ * @param needle The token to search for.
+ * @param needle_len Length of the token to search for.
+ *
+ * @return @c 1 if in case of success, @c 0 otherwise.
+ */
always_inline u8
http_token_contains (const char *haystack, uword haystack_len,
const char *needle, uword needle_len)
@@ -1158,6 +912,13 @@ typedef struct
/* Use high bit of header name length as custom header name bit. */
#define HTTP_CUSTOM_HEADER_NAME_BIT (1 << 31)
+/**
+ * Initialize headers list context.
+ *
+ * @param ctx Headers list context.
+ * @param buf Buffer, which store headers list, provided by app.
+ * @param len Length of headers list buffer.
+ */
always_inline void
http_init_headers_ctx (http_headers_ctx_t *ctx, u8 *buf, u32 len)
{
@@ -1166,30 +927,53 @@ http_init_headers_ctx (http_headers_ctx_t *ctx, u8 *buf, u32 len)
ctx->buf = buf;
}
-always_inline void
+/**
+ * Add header with predefined name to the headers list.
+ *
+ * @param ctx Headers list context.
+ * @param name Header name ID (see @ref http_header_name_t).
+ * @param value Header value pointer.
+ * @param value_len Header value length.
+ *
+ * @return @c 0 if in case of success, @c -1 otherwise.
+ */
+always_inline int
http_add_header (http_headers_ctx_t *ctx, http_header_name_t name,
const char *value, uword value_len)
{
http_app_header_t *header;
- ASSERT ((ctx->tail_offset + sizeof (http_app_header_t) + value_len) <
- ctx->len);
+ if ((ctx->tail_offset + sizeof (http_app_header_t) + value_len) > ctx->len)
+ return -1;
header = (http_app_header_t *) (ctx->buf + ctx->tail_offset);
header->name = (u32) name;
header->value.len = (u32) value_len;
clib_memcpy (header->value.token, (u8 *) value, value_len);
ctx->tail_offset += sizeof (http_app_header_t) + value_len;
+ return 0;
}
-always_inline void
+/**
+ * Add header with custom name to the headers list.
+ *
+ * @param ctx Headers list context.
+ * @param name Header name pointer.
+ * @param name_len Header name length.
+ * @param value Header value pointer.
+ * @param value_len Header value length.
+ *
+ * @return @c 0 if in case of success, @c -1 otherwise.
+ */
+always_inline int
http_add_custom_header (http_headers_ctx_t *ctx, const char *name,
uword name_len, const char *value, uword value_len)
{
http_custom_token_t *token;
- ASSERT ((ctx->tail_offset + 2 * sizeof (http_custom_token_t) + name_len +
- value_len) < ctx->len);
+ if ((ctx->tail_offset + 2 * sizeof (http_custom_token_t) + name_len +
+ value_len) > ctx->len)
+ return -1;
/* name */
token = (http_custom_token_t *) (ctx->buf + ctx->tail_offset);
@@ -1202,6 +986,18 @@ http_add_custom_header (http_headers_ctx_t *ctx, const char *name,
token->len = (u32) value_len;
clib_memcpy (token->token, (u8 *) value, token->len);
ctx->tail_offset += sizeof (http_custom_token_t) + value_len;
+ return 0;
+}
+
+/**
+ * Truncate the header list
+ *
+ * @param ctx Headers list context.
+ */
+always_inline void
+http_truncate_headers_list (http_headers_ctx_t *ctx)
+{
+ ctx->tail_offset = 0;
}
typedef enum http_uri_host_type_
@@ -1491,6 +1287,15 @@ http_parse_authority (u8 *authority, u32 authority_len,
return token_start == end ? 0 : -1;
}
+/**
+ * Format given authority (RFC3986 section 3.2)
+ *
+ * @param authority Authority to format.
+ *
+ * @return New vector with formated authority.
+ *
+ * The caller is always responsible to free the returned vector.
+ */
always_inline u8 *
http_serialize_authority (http_uri_authority_t *authority)
{
diff --git a/src/plugins/http/http1.c b/src/plugins/http/http1.c
new file mode 100644
index 00000000000..5ecc1f52300
--- /dev/null
+++ b/src/plugins/http/http1.c
@@ -0,0 +1,1936 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2025 Cisco Systems, Inc.
+ */
+
+#include <vnet/session/application.h>
+
+#include <http/http.h>
+#include <http/http_header_names.h>
+#include <http/http_private.h>
+#include <http/http_status_codes.h>
+#include <http/http_timer.h>
+
+typedef struct http1_main_
+{
+ http_req_t **req_pool;
+} http1_main_t;
+
+static http1_main_t http1_main;
+
+const char *http1_upgrade_proto_str[] = { "",
+#define _(sym, str) str,
+ foreach_http_upgrade_proto
+#undef _
+};
+
+/**
+ * http error boilerplate
+ */
+static const char *error_template = "HTTP/1.1 %s\r\n"
+ "Date: %U GMT\r\n"
+ "Connection: close\r\n"
+ "Content-Length: 0\r\n\r\n";
+
+/**
+ * http response boilerplate
+ */
+static const char *response_template = "HTTP/1.1 %s\r\n"
+ "Date: %U GMT\r\n"
+ "Server: %v\r\n";
+
+static const char *content_len_template = "Content-Length: %llu\r\n";
+
+static const char *connection_upgrade_template = "Connection: upgrade\r\n"
+ "Upgrade: %s\r\n";
+
+/**
+ * http request boilerplate
+ */
+static const char *get_request_template = "GET %s HTTP/1.1\r\n"
+ "Host: %v\r\n"
+ "User-Agent: %v\r\n";
+
+static const char *post_request_template = "POST %s HTTP/1.1\r\n"
+ "Host: %v\r\n"
+ "User-Agent: %v\r\n"
+ "Content-Length: %llu\r\n";
+
+always_inline http_req_t *
+http1_conn_alloc_req (http_conn_t *hc)
+{
+ http1_main_t *h1m = &http1_main;
+ http_req_t *req;
+ u32 req_index;
+ http_req_handle_t hr_handle;
+
+ pool_get_aligned_safe (h1m->req_pool[hc->c_thread_index], req,
+ CLIB_CACHE_LINE_BYTES);
+ clib_memset (req, 0, sizeof (*req));
+ req->hr_pa_session_handle = SESSION_INVALID_HANDLE;
+ req_index = req - h1m->req_pool[hc->c_thread_index];
+ hr_handle.version = HTTP_VERSION_1;
+ hr_handle.req_index = req_index;
+ req->hr_req_handle = hr_handle.as_u32;
+ req->hr_hc_index = hc->hc_hc_index;
+ req->c_thread_index = hc->c_thread_index;
+ req->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP;
+ hc->opaque = uword_to_pointer (req_index, void *);
+ hc->flags |= HTTP_CONN_F_HAS_REQUEST;
+ return req;
+}
+
+always_inline http_req_t *
+http1_req_get (u32 req_index, clib_thread_index_t thread_index)
+{
+ http1_main_t *h1m = &http1_main;
+
+ return pool_elt_at_index (h1m->req_pool[thread_index], req_index);
+}
+
+always_inline http_req_t *
+http1_req_get_if_valid (u32 req_index, clib_thread_index_t thread_index)
+{
+ http1_main_t *h1m = &http1_main;
+
+ if (pool_is_free_index (h1m->req_pool[thread_index], req_index))
+ return 0;
+ return pool_elt_at_index (h1m->req_pool[thread_index], req_index);
+}
+
+always_inline http_req_t *
+http1_conn_get_req (http_conn_t *hc)
+{
+ http1_main_t *h1m = &http1_main;
+ u32 req_index;
+
+ req_index = pointer_to_uword (hc->opaque);
+ return pool_elt_at_index (h1m->req_pool[hc->c_thread_index], req_index);
+}
+
+always_inline void
+http1_conn_free_req (http_conn_t *hc)
+{
+ http1_main_t *h1m = &http1_main;
+ http_req_t *req;
+ u32 req_index;
+
+ req_index = pointer_to_uword (hc->opaque);
+ req = pool_elt_at_index (h1m->req_pool[hc->c_thread_index], req_index);
+ vec_free (req->headers);
+ vec_free (req->target);
+ http_buffer_free (&req->tx_buf);
+ if (CLIB_DEBUG)
+ memset (req, 0xba, sizeof (*req));
+ pool_put (h1m->req_pool[hc->c_thread_index], req);
+ hc->flags &= ~HTTP_CONN_F_HAS_REQUEST;
+}
+
+/* Deschedule http session and wait for deq notification if underlying ts tx
+ * fifo almost full */
+static_always_inline void
+http1_check_and_deschedule (http_conn_t *hc, http_req_t *req,
+ transport_send_params_t *sp)
+{
+ if (http_io_ts_check_write_thresh (hc))
+ {
+ http_req_deschedule (req, sp);
+ http_io_ts_add_want_deq_ntf (hc);
+ }
+}
+
+static void
+http1_send_error (http_conn_t *hc, http_status_code_t ec,
+ transport_send_params_t *sp)
+{
+ u8 *data;
+
+ if (ec >= HTTP_N_STATUS)
+ ec = HTTP_STATUS_INTERNAL_ERROR;
+
+ data = format (0, error_template, http_status_code_str[ec],
+ format_http_time_now, hc);
+ HTTP_DBG (3, "%v", data);
+ http_io_ts_write (hc, data, vec_len (data), sp);
+ vec_free (data);
+ http_io_ts_after_write (hc, 0);
+}
+
+static int
+http1_read_message (http_conn_t *hc, u8 *rx_buf)
+{
+ u32 max_deq;
+
+ max_deq = http_io_ts_max_read (hc);
+ if (PREDICT_FALSE (max_deq == 0))
+ return -1;
+
+ vec_validate (rx_buf, max_deq - 1);
+ http_io_ts_read (hc, rx_buf, max_deq, 1);
+
+ return 0;
+}
+
+static int
+http1_parse_target (http_req_t *req, u8 *rx_buf)
+{
+ int i;
+ u8 *p, *end;
+
+ /* asterisk-form = "*" */
+ if ((rx_buf[req->target_path_offset] == '*') && (req->target_path_len == 1))
+ {
+ req->target_form = HTTP_TARGET_ASTERISK_FORM;
+ /* we do not support OPTIONS request */
+ return -1;
+ }
+
+ /* origin-form = 1*( "/" segment ) [ "?" query ] */
+ if (rx_buf[req->target_path_offset] == '/')
+ {
+ /* drop leading slash */
+ req->target_path_len--;
+ req->target_path_offset++;
+ req->target_form = HTTP_TARGET_ORIGIN_FORM;
+ http_identify_optional_query (req, rx_buf);
+ /* can't be CONNECT method */
+ return req->method == HTTP_REQ_CONNECT ? -1 : 0;
+ }
+
+ /* absolute-form =
+ * scheme "://" host [ ":" port ] *( "/" segment ) [ "?" query ] */
+ if (req->target_path_len > 8 &&
+ !memcmp (rx_buf + req->target_path_offset, "http", 4))
+ {
+ req->scheme = HTTP_URL_SCHEME_HTTP;
+ p = rx_buf + req->target_path_offset + 4;
+ if (*p == 's')
+ {
+ p++;
+ req->scheme = HTTP_URL_SCHEME_HTTPS;
+ }
+ if (*p++ == ':')
+ {
+ expect_char ('/');
+ expect_char ('/');
+ req->target_form = HTTP_TARGET_ABSOLUTE_FORM;
+ req->target_authority_offset = p - rx_buf;
+ req->target_authority_len = 0;
+ end = rx_buf + req->target_path_offset + req->target_path_len;
+ while (p < end)
+ {
+ if (*p == '/')
+ {
+ p++; /* drop leading slash */
+ req->target_path_offset = p - rx_buf;
+ req->target_path_len = end - p;
+ break;
+ }
+ req->target_authority_len++;
+ p++;
+ }
+ if (!req->target_path_len)
+ {
+ clib_warning ("zero length host");
+ return -1;
+ }
+ http_identify_optional_query (req, rx_buf);
+ /* can't be CONNECT method */
+ return req->method == HTTP_REQ_CONNECT ? -1 : 0;
+ }
+ }
+
+ /* authority-form = host ":" port */
+ for (i = req->target_path_offset;
+ i < (req->target_path_offset + req->target_path_len); i++)
+ {
+ if ((rx_buf[i] == ':') && (isdigit (rx_buf[i + 1])))
+ {
+ req->target_authority_len = req->target_path_len;
+ req->target_path_len = 0;
+ req->target_authority_offset = req->target_path_offset;
+ req->target_path_offset = 0;
+ req->target_form = HTTP_TARGET_AUTHORITY_FORM;
+ /* "authority-form" is only used for CONNECT requests */
+ return req->method == HTTP_REQ_CONNECT ? 0 : -1;
+ }
+ }
+
+ return -1;
+}
+
+static int
+http1_parse_request_line (http_req_t *req, u8 *rx_buf, http_status_code_t *ec)
+{
+ int i, target_len;
+ u32 next_line_offset, method_offset;
+
+ /* request-line = method SP request-target SP HTTP-version CRLF */
+ i = http_v_find_index (rx_buf, 8, 0, "\r\n");
+ if (i < 0)
+ {
+ clib_warning ("request line incomplete");
+ *ec = HTTP_STATUS_BAD_REQUEST;
+ return -1;
+ }
+ HTTP_DBG (2, "request line length: %d", i);
+ req->control_data_len = i + 2;
+ next_line_offset = req->control_data_len;
+
+ /* there should be at least one more CRLF */
+ if (vec_len (rx_buf) < (next_line_offset + 2))
+ {
+ clib_warning ("malformed message, too short");
+ *ec = HTTP_STATUS_BAD_REQUEST;
+ return -1;
+ }
+
+ /*
+ * RFC9112 2.2:
+ * In the interest of robustness, a server that is expecting to receive and
+ * parse a request-line SHOULD ignore at least one empty line (CRLF)
+ * received prior to the request-line.
+ */
+ method_offset = rx_buf[0] == '\r' && rx_buf[1] == '\n' ? 2 : 0;
+ /* parse method */
+ if (!memcmp (rx_buf + method_offset, "GET ", 4))
+ {
+ HTTP_DBG (0, "GET method");
+ req->method = HTTP_REQ_GET;
+ req->target_path_offset = method_offset + 4;
+ }
+ else if (!memcmp (rx_buf + method_offset, "POST ", 5))
+ {
+ HTTP_DBG (0, "POST method");
+ req->method = HTTP_REQ_POST;
+ req->target_path_offset = method_offset + 5;
+ }
+ else if (!memcmp (rx_buf + method_offset, "CONNECT ", 8))
+ {
+ HTTP_DBG (0, "CONNECT method");
+ req->method = HTTP_REQ_CONNECT;
+ req->upgrade_proto = HTTP_UPGRADE_PROTO_NA;
+ req->target_path_offset = method_offset + 8;
+ req->is_tunnel = 1;
+ }
+ else
+ {
+ if (rx_buf[method_offset] - 'A' <= 'Z' - 'A')
+ {
+ *ec = HTTP_STATUS_NOT_IMPLEMENTED;
+ return -1;
+ }
+ else
+ {
+ *ec = HTTP_STATUS_BAD_REQUEST;
+ return -1;
+ }
+ }
+
+ /* find version */
+ i = http_v_find_index (rx_buf, next_line_offset - 11, 11, " HTTP/");
+ if (i < 0)
+ {
+ clib_warning ("HTTP version not present");
+ *ec = HTTP_STATUS_BAD_REQUEST;
+ return -1;
+ }
+ /* verify major version */
+ if (isdigit (rx_buf[i + 6]))
+ {
+ if (rx_buf[i + 6] != '1')
+ {
+ clib_warning ("HTTP major version '%c' not supported",
+ rx_buf[i + 6]);
+ *ec = HTTP_STATUS_HTTP_VERSION_NOT_SUPPORTED;
+ return -1;
+ }
+ }
+ else
+ {
+ clib_warning ("HTTP major version '%c' is not digit", rx_buf[i + 6]);
+ *ec = HTTP_STATUS_BAD_REQUEST;
+ return -1;
+ }
+
+ /* parse request-target */
+ HTTP_DBG (2, "http at %d", i);
+ target_len = i - req->target_path_offset;
+ HTTP_DBG (2, "target_len %d", target_len);
+ if (target_len < 1)
+ {
+ clib_warning ("request-target not present");
+ *ec = HTTP_STATUS_BAD_REQUEST;
+ return -1;
+ }
+ req->target_path_len = target_len;
+ req->target_query_offset = 0;
+ req->target_query_len = 0;
+ req->target_authority_len = 0;
+ req->target_authority_offset = 0;
+ if (http1_parse_target (req, rx_buf))
+ {
+ clib_warning ("invalid target");
+ *ec = HTTP_STATUS_BAD_REQUEST;
+ return -1;
+ }
+ HTTP_DBG (2, "request-target path length: %u", req->target_path_len);
+ HTTP_DBG (2, "request-target path offset: %u", req->target_path_offset);
+ HTTP_DBG (2, "request-target query length: %u", req->target_query_len);
+ HTTP_DBG (2, "request-target query offset: %u", req->target_query_offset);
+
+ /* set buffer offset to nex line start */
+ req->rx_buf_offset = next_line_offset;
+
+ return 0;
+}
+
+static int
+http1_parse_status_line (http_req_t *req, u8 *rx_buf)
+{
+ int i;
+ u32 next_line_offset;
+ u8 *p, *end;
+ u16 status_code = 0;
+
+ i = http_v_find_index (rx_buf, 0, 0, "\r\n");
+ /* status-line = HTTP-version SP status-code SP [ reason-phrase ] CRLF */
+ if (i < 0)
+ {
+ clib_warning ("status line incomplete");
+ return -1;
+ }
+ HTTP_DBG (2, "status line length: %d", i);
+ if (i < 12)
+ {
+ clib_warning ("status line too short (%d)", i);
+ return -1;
+ }
+ req->control_data_len = i + 2;
+ next_line_offset = req->control_data_len;
+ p = rx_buf;
+ end = rx_buf + i;
+
+ /* there should be at least one more CRLF */
+ if (vec_len (rx_buf) < (next_line_offset + 2))
+ {
+ clib_warning ("malformed message, too short");
+ return -1;
+ }
+
+ /* parse version */
+ expect_char ('H');
+ expect_char ('T');
+ expect_char ('T');
+ expect_char ('P');
+ expect_char ('/');
+ expect_char ('1');
+ expect_char ('.');
+ if (!isdigit (*p++))
+ {
+ clib_warning ("invalid HTTP minor version");
+ return -1;
+ }
+
+ /* skip space(s) */
+ if (*p != ' ')
+ {
+ clib_warning ("no space after HTTP version");
+ return -1;
+ }
+ do
+ {
+ p++;
+ if (p == end)
+ {
+ clib_warning ("no status code");
+ return -1;
+ }
+ }
+ while (*p == ' ');
+
+ /* parse status code */
+ if ((end - p) < 3)
+ {
+ clib_warning ("not enough characters for status code");
+ return -1;
+ }
+ parse_int (status_code, 100);
+ parse_int (status_code, 10);
+ parse_int (status_code, 1);
+ if (status_code < 100 || status_code > 599)
+ {
+ clib_warning ("invalid status code %d", status_code);
+ return -1;
+ }
+ req->status_code = http_sc_by_u16 (status_code);
+ HTTP_DBG (0, "status code: %d", status_code);
+
+ /* set buffer offset to nex line start */
+ req->rx_buf_offset = next_line_offset;
+
+ return 0;
+}
+
+always_inline int
+http1_parse_field_name (u8 **pos, u8 *end, u8 **field_name_start,
+ u32 *field_name_len)
+{
+ u32 name_len = 0;
+ u8 *p;
+
+ static uword tchar[4] = {
+ /* !#$%'*+-.0123456789 */
+ 0x03ff6cba00000000,
+ /* ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~ */
+ 0x57ffffffc7fffffe,
+ 0x0000000000000000,
+ 0x0000000000000000,
+ };
+
+ p = *pos;
+
+ *field_name_start = p;
+ while (p != end)
+ {
+ if (clib_bitmap_get_no_check (tchar, *p))
+ {
+ name_len++;
+ p++;
+ }
+ else if (*p == ':')
+ {
+ if (name_len == 0)
+ {
+ clib_warning ("empty field name");
+ return -1;
+ }
+ *field_name_len = name_len;
+ p++;
+ *pos = p;
+ return 0;
+ }
+ else
+ {
+ clib_warning ("invalid character %d", *p);
+ return -1;
+ }
+ }
+ clib_warning ("field name end not found");
+ return -1;
+}
+
+always_inline int
+http1_parse_field_value (u8 **pos, u8 *end, u8 **field_value_start,
+ u32 *field_value_len)
+{
+ u32 value_len = 0;
+ u8 *p;
+
+ p = *pos;
+
+ /* skip leading whitespace */
+ while (1)
+ {
+ if (p == end)
+ {
+ clib_warning ("field value not found");
+ return -1;
+ }
+ else if (*p != ' ' && *p != '\t')
+ {
+ break;
+ }
+ p++;
+ }
+
+ *field_value_start = p;
+ while (p != end)
+ {
+ if (*p == '\r')
+ {
+ if ((end - p) < 1)
+ {
+ clib_warning ("incorrect field line end");
+ return -1;
+ }
+ p++;
+ if (*p == '\n')
+ {
+ if (value_len == 0)
+ {
+ clib_warning ("empty field value");
+ return -1;
+ }
+ p++;
+ *pos = p;
+ /* skip trailing whitespace */
+ p = *field_value_start + value_len - 1;
+ while (*p == ' ' || *p == '\t')
+ {
+ p--;
+ value_len--;
+ }
+ *field_value_len = value_len;
+ return 0;
+ }
+ clib_warning ("CR without LF");
+ return -1;
+ }
+ if (*p < ' ' && *p != '\t')
+ {
+ clib_warning ("invalid character %d", *p);
+ return -1;
+ }
+ p++;
+ value_len++;
+ }
+
+ clib_warning ("field value end not found");
+ return -1;
+}
+
+static int
+http1_identify_headers (http_req_t *req, u8 *rx_buf, http_status_code_t *ec)
+{
+ int rv;
+ u8 *p, *end, *name_start, *value_start;
+ u32 name_len, value_len;
+ http_field_line_t *field_line;
+ uword header_index;
+
+ vec_reset_length (req->headers);
+ req->content_len_header_index = ~0;
+ req->connection_header_index = ~0;
+ req->upgrade_header_index = ~0;
+ req->host_header_index = ~0;
+ req->headers_offset = req->rx_buf_offset;
+
+ /* check if we have any header */
+ if ((rx_buf[req->rx_buf_offset] == '\r') &&
+ (rx_buf[req->rx_buf_offset + 1] == '\n'))
+ {
+ /* just another CRLF -> no headers */
+ HTTP_DBG (2, "no headers");
+ req->headers_len = 0;
+ req->control_data_len += 2;
+ return 0;
+ }
+
+ end = vec_end (rx_buf);
+ p = rx_buf + req->rx_buf_offset;
+
+ while (1)
+ {
+ rv = http1_parse_field_name (&p, end, &name_start, &name_len);
+ if (rv != 0)
+ {
+ *ec = HTTP_STATUS_BAD_REQUEST;
+ return -1;
+ }
+ rv = http1_parse_field_value (&p, end, &value_start, &value_len);
+ if (rv != 0 || (end - p) < 2)
+ {
+ *ec = HTTP_STATUS_BAD_REQUEST;
+ return -1;
+ }
+
+ vec_add2 (req->headers, field_line, 1);
+ field_line->name_offset = (name_start - rx_buf) - req->headers_offset;
+ field_line->name_len = name_len;
+ field_line->value_offset = (value_start - rx_buf) - req->headers_offset;
+ field_line->value_len = value_len;
+ header_index = field_line - req->headers;
+
+ /* find headers that will be used later in preprocessing */
+ /* names are case-insensitive (RFC9110 section 5.1) */
+ if (req->content_len_header_index == ~0 &&
+ http_token_is_case (
+ (const char *) name_start, name_len,
+ http_header_name_token (HTTP_HEADER_CONTENT_LENGTH)))
+ req->content_len_header_index = header_index;
+ else if (req->connection_header_index == ~0 &&
+ http_token_is_case (
+ (const char *) name_start, name_len,
+ http_header_name_token (HTTP_HEADER_CONNECTION)))
+ req->connection_header_index = header_index;
+ else if (req->upgrade_header_index == ~0 &&
+ http_token_is_case (
+ (const char *) name_start, name_len,
+ http_header_name_token (HTTP_HEADER_UPGRADE)))
+ req->upgrade_header_index = header_index;
+ else if (req->host_header_index == ~0 &&
+ http_token_is_case ((const char *) name_start, name_len,
+ http_header_name_token (HTTP_HEADER_HOST)))
+ req->host_header_index = header_index;
+
+ /* are we done? */
+ if (*p == '\r' && *(p + 1) == '\n')
+ break;
+ }
+
+ req->headers_len = p - (rx_buf + req->headers_offset);
+ req->control_data_len += (req->headers_len + 2);
+ HTTP_DBG (2, "headers length: %u", req->headers_len);
+ HTTP_DBG (2, "headers offset: %u", req->headers_offset);
+
+ return 0;
+}
+
+static int
+http1_identify_message_body (http_req_t *req, u8 *rx_buf,
+ http_status_code_t *ec)
+{
+ int rv;
+
+ req->body_len = 0;
+
+ if (req->headers_len == 0)
+ {
+ HTTP_DBG (2, "no header, no message-body");
+ return 0;
+ }
+ if (req->is_tunnel)
+ {
+ HTTP_DBG (2, "tunnel, no message-body");
+ return 0;
+ }
+
+ /* TODO check for chunked transfer coding */
+
+ if (req->content_len_header_index == ~0)
+ {
+ HTTP_DBG (2, "Content-Length header not present, no message-body");
+ return 0;
+ }
+
+ rv = http_parse_content_length (req, rx_buf);
+ if (rv)
+ {
+ *ec = HTTP_STATUS_BAD_REQUEST;
+ return rv;
+ }
+
+ req->body_offset = req->headers_offset + req->headers_len + 2;
+ HTTP_DBG (2, "body length: %llu", req->body_len);
+ HTTP_DBG (2, "body offset: %u", req->body_offset);
+
+ return 0;
+}
+
+static void
+http1_check_connection_upgrade (http_req_t *req, u8 *rx_buf)
+{
+ http_field_line_t *connection, *upgrade;
+ u8 skip;
+
+ skip = (req->method != HTTP_REQ_GET) + (req->connection_header_index == ~0) +
+ (req->upgrade_header_index == ~0);
+ if (skip)
+ return;
+
+ connection = vec_elt_at_index (req->headers, req->connection_header_index);
+ /* connection options are case-insensitive (RFC9110 7.6.1) */
+ if (http_token_is_case (
+ http_field_line_value_token (connection, req, rx_buf),
+ http_token_lit ("upgrade")))
+ {
+ upgrade = vec_elt_at_index (req->headers, req->upgrade_header_index);
+
+ /* check upgrade protocol, we want to ignore something like upgrade to
+ * newer HTTP version, only tunnels are supported */
+ if (0)
+ ;
+#define _(sym, str) \
+ else if (http_token_is_case ( \
+ http_field_line_value_token (upgrade, req, rx_buf), \
+ http_token_lit (str))) req->upgrade_proto = \
+ HTTP_UPGRADE_PROTO_##sym;
+ foreach_http_upgrade_proto
+#undef _
+ else return;
+
+ req->is_tunnel = 1;
+ req->method = HTTP_REQ_CONNECT;
+ }
+}
+
+static void
+http1_target_fixup (http_conn_t *hc, http_req_t *req)
+{
+ http_field_line_t *host;
+
+ if (req->target_form == HTTP_TARGET_ABSOLUTE_FORM)
+ return;
+
+ /* scheme fixup */
+ req->scheme = http_get_transport_proto (hc) == TRANSPORT_PROTO_TLS ?
+ HTTP_URL_SCHEME_HTTPS :
+ HTTP_URL_SCHEME_HTTP;
+
+ if (req->target_form == HTTP_TARGET_AUTHORITY_FORM ||
+ req->connection_header_index == ~0)
+ return;
+
+ /* authority fixup */
+ host = vec_elt_at_index (req->headers, req->connection_header_index);
+ req->target_authority_offset = host->value_offset;
+ req->target_authority_len = host->value_len;
+}
+
+static void
+http1_write_app_headers (http_req_t *req, http_msg_t *msg, u8 **tx_buf)
+{
+ u8 *app_headers, *p, *end;
+ u32 *tmp;
+
+ /* read app header list */
+ app_headers = http_get_app_header_list (req, msg);
+
+ /* serialize app headers to tx_buf */
+ end = app_headers + msg->data.headers_len;
+ while (app_headers < end)
+ {
+ /* custom header name? */
+ tmp = (u32 *) app_headers;
+ if (PREDICT_FALSE (*tmp & HTTP_CUSTOM_HEADER_NAME_BIT))
+ {
+ http_custom_token_t *name, *value;
+ name = (http_custom_token_t *) app_headers;
+ u32 name_len = name->len & ~HTTP_CUSTOM_HEADER_NAME_BIT;
+ app_headers += sizeof (http_custom_token_t) + name_len;
+ value = (http_custom_token_t *) app_headers;
+ app_headers += sizeof (http_custom_token_t) + value->len;
+ vec_add2 (*tx_buf, p, name_len + value->len + 4);
+ clib_memcpy (p, name->token, name_len);
+ p += name_len;
+ *p++ = ':';
+ *p++ = ' ';
+ clib_memcpy (p, value->token, value->len);
+ p += value->len;
+ *p++ = '\r';
+ *p++ = '\n';
+ }
+ else
+ {
+ http_app_header_t *header;
+ header = (http_app_header_t *) app_headers;
+ app_headers += sizeof (http_app_header_t) + header->value.len;
+ http_token_t name = { http_header_name_token (header->name) };
+ vec_add2 (*tx_buf, p, name.len + header->value.len + 4);
+ clib_memcpy (p, name.base, name.len);
+ p += name.len;
+ *p++ = ':';
+ *p++ = ' ';
+ clib_memcpy (p, header->value.token, header->value.len);
+ p += header->value.len;
+ *p++ = '\r';
+ *p++ = '\n';
+ }
+ }
+}
+
+/*************************************/
+/* request state machine handlers RX */
+/*************************************/
+
+static http_sm_result_t
+http1_req_state_wait_transport_reply (http_conn_t *hc, http_req_t *req,
+ transport_send_params_t *sp)
+{
+ int rv;
+ http_msg_t msg = {};
+ u32 len, max_enq, body_sent;
+ http_status_code_t ec;
+ u8 *rx_buf;
+
+ rx_buf = http_get_rx_buf (hc);
+ rv = http1_read_message (hc, rx_buf);
+
+ /* Nothing yet, wait for data or timer expire */
+ if (rv)
+ {
+ HTTP_DBG (1, "no data to deq");
+ return HTTP_SM_STOP;
+ }
+
+ HTTP_DBG (3, "%v", rx_buf);
+
+ if (vec_len (rx_buf) < 8)
+ {
+ clib_warning ("response buffer too short");
+ goto error;
+ }
+
+ rv = http1_parse_status_line (req, rx_buf);
+ if (rv)
+ goto error;
+
+ rv = http1_identify_headers (req, rx_buf, &ec);
+ if (rv)
+ goto error;
+
+ rv = http1_identify_message_body (req, rx_buf, &ec);
+ if (rv)
+ goto error;
+
+ /* send at least "control data" which is necessary minimum,
+ * if there is some space send also portion of body */
+ max_enq = http_io_as_max_write (req);
+ max_enq -= sizeof (msg);
+ if (max_enq < req->control_data_len)
+ {
+ clib_warning ("not enough room for control data in app's rx fifo");
+ goto error;
+ }
+ len = clib_min (max_enq, vec_len (rx_buf));
+
+ msg.type = HTTP_MSG_REPLY;
+ msg.code = req->status_code;
+ msg.data.headers_offset = req->headers_offset;
+ msg.data.headers_len = req->headers_len;
+ msg.data.body_offset = req->body_offset;
+ msg.data.body_len = req->body_len;
+ msg.data.type = HTTP_MSG_DATA_INLINE;
+ msg.data.len = len;
+ msg.data.headers_ctx = pointer_to_uword (req->headers);
+
+ svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) }, { rx_buf, len } };
+
+ http_io_as_write_segs (req, segs, 2);
+
+ body_sent = len - req->control_data_len;
+ req->to_recv = req->body_len - body_sent;
+ if (req->to_recv == 0)
+ {
+ /* all sent, we are done */
+ http_req_state_change (req, HTTP_REQ_STATE_WAIT_APP_METHOD);
+ }
+ else
+ {
+ /* stream rest of the response body */
+ http_req_state_change (req, HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA);
+ }
+
+ http_io_ts_drain (hc, len);
+ http_io_ts_after_read (hc, 1);
+ http_app_worker_rx_notify (req);
+ return HTTP_SM_STOP;
+
+error:
+ http_io_ts_drain_all (hc);
+ http_io_ts_after_read (hc, 1);
+ session_transport_closing_notify (&req->connection);
+ session_transport_closed_notify (&req->connection);
+ http_disconnect_transport (hc);
+ return HTTP_SM_ERROR;
+}
+
+static http_sm_result_t
+http1_req_state_wait_transport_method (http_conn_t *hc, http_req_t *req,
+ transport_send_params_t *sp)
+{
+ http_status_code_t ec;
+ http_msg_t msg;
+ int rv;
+ u32 len, max_enq, body_sent;
+ u64 max_deq;
+ u8 *rx_buf;
+
+ rx_buf = http_get_rx_buf (hc);
+ rv = http1_read_message (hc, rx_buf);
+
+ /* Nothing yet, wait for data or timer expire */
+ if (rv)
+ return HTTP_SM_STOP;
+
+ HTTP_DBG (3, "%v", rx_buf);
+
+ if (vec_len (rx_buf) < 8)
+ {
+ ec = HTTP_STATUS_BAD_REQUEST;
+ goto error;
+ }
+
+ rv = http1_parse_request_line (req, rx_buf, &ec);
+ if (rv)
+ goto error;
+
+ rv = http1_identify_headers (req, rx_buf, &ec);
+ if (rv)
+ goto error;
+
+ http1_target_fixup (hc, req);
+ http1_check_connection_upgrade (req, rx_buf);
+
+ rv = http1_identify_message_body (req, rx_buf, &ec);
+ if (rv)
+ goto error;
+
+ /* send at least "control data" which is necessary minimum,
+ * if there is some space send also portion of body */
+ max_enq = http_io_as_max_write (req);
+ max_enq -= sizeof (msg);
+ if (max_enq < req->control_data_len)
+ {
+ clib_warning ("not enough room for control data in app's rx fifo");
+ ec = HTTP_STATUS_INTERNAL_ERROR;
+ goto error;
+ }
+ /* do not dequeue more than one HTTP request, we do not support pipelining */
+ max_deq = clib_min (req->control_data_len + req->body_len, vec_len (rx_buf));
+ len = clib_min (max_enq, max_deq);
+
+ msg.type = HTTP_MSG_REQUEST;
+ msg.method_type = req->method;
+ msg.data.type = HTTP_MSG_DATA_INLINE;
+ msg.data.len = len;
+ msg.data.scheme = req->scheme;
+ msg.data.target_authority_offset = req->target_authority_offset;
+ msg.data.target_authority_len = req->target_authority_len;
+ msg.data.target_path_offset = req->target_path_offset;
+ msg.data.target_path_len = req->target_path_len;
+ msg.data.target_query_offset = req->target_query_offset;
+ msg.data.target_query_len = req->target_query_len;
+ msg.data.headers_offset = req->headers_offset;
+ msg.data.headers_len = req->headers_len;
+ msg.data.body_offset = req->body_offset;
+ msg.data.body_len = req->body_len;
+ msg.data.headers_ctx = pointer_to_uword (req->headers);
+ msg.data.upgrade_proto = req->upgrade_proto;
+
+ svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) }, { rx_buf, len } };
+
+ http_io_as_write_segs (req, segs, 2);
+
+ body_sent = len - req->control_data_len;
+ req->to_recv = req->body_len - body_sent;
+ if (req->to_recv == 0)
+ {
+ /* drop everything, we do not support pipelining */
+ http_io_ts_drain_all (hc);
+ /* all sent, we are done */
+ http_req_state_change (req, HTTP_REQ_STATE_WAIT_APP_REPLY);
+ }
+ else
+ {
+ http_io_ts_drain (hc, len);
+ /* stream rest of the response body */
+ http_req_state_change (req, HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA);
+ }
+
+ http_app_worker_rx_notify (req);
+ http_io_ts_after_read (hc, 1);
+
+ return HTTP_SM_STOP;
+
+error:
+ http_io_ts_drain_all (hc);
+ http_io_ts_after_read (hc, 1);
+ http1_send_error (hc, ec, 0);
+ session_transport_closing_notify (&req->connection);
+ http_disconnect_transport (hc);
+
+ return HTTP_SM_ERROR;
+}
+
+static http_sm_result_t
+http1_req_state_transport_io_more_data (http_conn_t *hc, http_req_t *req,
+ transport_send_params_t *sp)
+{
+ u32 max_len, max_deq, max_enq, n_segs = 2;
+ svm_fifo_seg_t segs[n_segs];
+ int n_written;
+
+ max_deq = http_io_ts_max_read (hc);
+ if (max_deq == 0)
+ {
+ HTTP_DBG (1, "no data to deq");
+ return HTTP_SM_STOP;
+ }
+
+ max_enq = http_io_as_max_write (req);
+ if (max_enq == 0)
+ {
+ HTTP_DBG (1, "app's rx fifo full");
+ http_io_as_add_want_deq_ntf (req);
+ return HTTP_SM_STOP;
+ }
+
+ max_len = clib_min (max_enq, max_deq);
+ http_io_ts_read_segs (hc, segs, &n_segs, max_len);
+
+ n_written = http_io_as_write_segs (req, segs, n_segs);
+
+ if (n_written > req->to_recv)
+ {
+ clib_warning ("http protocol error: received more data than expected");
+ session_transport_closing_notify (&req->connection);
+ http_disconnect_transport (hc);
+ http_req_state_change (req, HTTP_REQ_STATE_WAIT_APP_METHOD);
+ return HTTP_SM_ERROR;
+ }
+ req->to_recv -= n_written;
+ http_io_ts_drain (hc, n_written);
+ HTTP_DBG (1, "drained %d from ts; remains %lu", n_written, req->to_recv);
+
+ /* Finished transaction:
+ * server back to HTTP_REQ_STATE_WAIT_APP_REPLY
+ * client to HTTP_REQ_STATE_WAIT_APP_METHOD */
+ if (req->to_recv == 0)
+ http_req_state_change (req, (hc->flags & HTTP_CONN_F_IS_SERVER) ?
+ HTTP_REQ_STATE_WAIT_APP_REPLY :
+ HTTP_REQ_STATE_WAIT_APP_METHOD);
+
+ http_app_worker_rx_notify (req);
+
+ http_io_ts_after_read (hc, 0);
+
+ return HTTP_SM_STOP;
+}
+
+static http_sm_result_t
+http1_req_state_tunnel_rx (http_conn_t *hc, http_req_t *req,
+ transport_send_params_t *sp)
+{
+ u32 max_deq, max_enq, max_read, n_segs = 2;
+ svm_fifo_seg_t segs[n_segs];
+ int n_written = 0;
+
+ HTTP_DBG (1, "tunnel received data from client");
+
+ max_deq = http_io_ts_max_read (hc);
+ if (PREDICT_FALSE (max_deq == 0))
+ {
+ HTTP_DBG (1, "max_deq == 0");
+ return HTTP_SM_STOP;
+ }
+ max_enq = http_io_as_max_write (req);
+ if (max_enq == 0)
+ {
+ HTTP_DBG (1, "app's rx fifo full");
+ http_io_as_add_want_deq_ntf (req);
+ return HTTP_SM_STOP;
+ }
+ max_read = clib_min (max_enq, max_deq);
+ http_io_ts_read_segs (hc, segs, &n_segs, max_read);
+ n_written = http_io_as_write_segs (req, segs, n_segs);
+ http_io_ts_drain (hc, n_written);
+ HTTP_DBG (1, "transfered %u bytes", n_written);
+ http_app_worker_rx_notify (req);
+ http_io_ts_after_read (hc, 0);
+
+ return HTTP_SM_STOP;
+}
+
+static http_sm_result_t
+http1_req_state_udp_tunnel_rx (http_conn_t *hc, http_req_t *req,
+ transport_send_params_t *sp)
+{
+ u32 to_deq, capsule_size, dgram_size, n_read, n_written = 0;
+ int rv;
+ u8 payload_offset = 0;
+ u64 payload_len = 0;
+ session_dgram_hdr_t hdr;
+ u8 *buf = 0;
+
+ HTTP_DBG (1, "udp tunnel received data from client");
+
+ buf = http_get_rx_buf (hc);
+ to_deq = http_io_ts_max_read (hc);
+
+ while (to_deq > 0)
+ {
+ /* some bytes remaining to skip? */
+ if (PREDICT_FALSE (req->to_skip))
+ {
+ if (req->to_skip >= to_deq)
+ {
+ http_io_ts_drain (hc, to_deq);
+ req->to_skip -= to_deq;
+ goto done;
+ }
+ else
+ {
+ http_io_ts_drain (hc, req->to_skip);
+ req->to_skip = 0;
+ }
+ }
+ n_read = http_io_ts_read (hc, buf, HTTP_CAPSULE_HEADER_MAX_SIZE, 1);
+ rv = http_decap_udp_payload_datagram (buf, n_read, &payload_offset,
+ &payload_len);
+ HTTP_DBG (1, "rv=%d, payload_offset=%u, payload_len=%llu", rv,
+ payload_offset, payload_len);
+ if (PREDICT_FALSE (rv != 0))
+ {
+ if (rv < 0)
+ {
+ /* capsule datagram is invalid (session need to be aborted) */
+ http_io_ts_drain_all (hc);
+ session_transport_closing_notify (&req->connection);
+ session_transport_closed_notify (&req->connection);
+ http_disconnect_transport (hc);
+ return HTTP_SM_STOP;
+ }
+ else
+ {
+ /* unknown capsule should be skipped */
+ if (payload_len <= to_deq)
+ {
+ http_io_ts_drain (hc, payload_len);
+ to_deq -= payload_len;
+ continue;
+ }
+ else
+ {
+ http_io_ts_drain (hc, to_deq);
+ req->to_skip = payload_len - to_deq;
+ goto done;
+ }
+ }
+ }
+ capsule_size = payload_offset + payload_len;
+ /* check if we have the full capsule */
+ if (PREDICT_FALSE (to_deq < capsule_size))
+ {
+ HTTP_DBG (1, "capsule not complete");
+ goto done;
+ }
+
+ dgram_size = sizeof (hdr) + payload_len;
+ if (http_io_as_max_write (req) < dgram_size)
+ {
+ HTTP_DBG (1, "app's rx fifo full");
+ http_io_as_add_want_deq_ntf (req);
+ goto done;
+ }
+
+ http_io_ts_drain (hc, payload_offset);
+
+ /* read capsule payload */
+ http_io_ts_read (hc, buf, payload_len, 0);
+
+ hdr.data_length = payload_len;
+ hdr.data_offset = 0;
+
+ /* send datagram header and payload */
+ svm_fifo_seg_t segs[2] = { { (u8 *) &hdr, sizeof (hdr) },
+ { buf, payload_len } };
+ http_io_as_write_segs (req, segs, 2);
+
+ n_written += dgram_size;
+ to_deq -= capsule_size;
+ }
+
+done:
+ HTTP_DBG (1, "written %lu bytes", n_written);
+
+ if (n_written)
+ http_app_worker_rx_notify (req);
+
+ http_io_ts_after_read (hc, 0);
+
+ return HTTP_SM_STOP;
+}
+
+/*************************************/
+/* request state machine handlers TX */
+/*************************************/
+
+static http_sm_result_t
+http1_req_state_wait_app_reply (http_conn_t *hc, http_req_t *req,
+ transport_send_params_t *sp)
+{
+ u8 *response;
+ u32 max_enq;
+ http_status_code_t sc;
+ http_msg_t msg;
+ http_sm_result_t sm_result = HTTP_SM_ERROR;
+ http_req_state_t next_state = HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD;
+
+ http_get_app_msg (req, &msg);
+
+ if (msg.data.type > HTTP_MSG_DATA_PTR)
+ {
+ clib_warning ("no data");
+ sc = HTTP_STATUS_INTERNAL_ERROR;
+ goto error;
+ }
+
+ if (msg.type != HTTP_MSG_REPLY)
+ {
+ clib_warning ("unexpected message type %d", msg.type);
+ sc = HTTP_STATUS_INTERNAL_ERROR;
+ goto error;
+ }
+
+ if (msg.code >= HTTP_N_STATUS)
+ {
+ clib_warning ("unsupported status code: %d", msg.code);
+ return HTTP_SM_ERROR;
+ }
+
+ response = http_get_tx_buf (hc);
+ /*
+ * Add "protocol layer" headers:
+ * - current time
+ * - server name
+ * - data length
+ */
+ response =
+ format (response, response_template, http_status_code_str[msg.code],
+ /* Date */
+ format_http_time_now, hc,
+ /* Server */
+ hc->app_name);
+
+ /* RFC9110 8.6: A server MUST NOT send Content-Length header field in a
+ * 2xx (Successful) response to CONNECT or with a status code of 101
+ * (Switching Protocols). */
+ if (req->is_tunnel && (http_status_code_str[msg.code][0] == '2' ||
+ msg.code == HTTP_STATUS_SWITCHING_PROTOCOLS))
+ {
+ ASSERT (msg.data.body_len == 0);
+ next_state = HTTP_REQ_STATE_TUNNEL;
+ if (req->upgrade_proto > HTTP_UPGRADE_PROTO_NA)
+ {
+ response = format (response, connection_upgrade_template,
+ http1_upgrade_proto_str[req->upgrade_proto]);
+ if (req->upgrade_proto == HTTP_UPGRADE_PROTO_CONNECT_UDP &&
+ hc->udp_tunnel_mode == HTTP_UDP_TUNNEL_DGRAM)
+ next_state = HTTP_REQ_STATE_UDP_TUNNEL;
+ }
+ /* cleanup some stuff we don't need anymore in tunnel mode */
+ vec_free (req->headers);
+ http_buffer_free (&req->tx_buf);
+ req->to_skip = 0;
+ }
+ else
+ response = format (response, content_len_template, msg.data.body_len);
+
+ /* Add headers from app (if any) */
+ if (msg.data.headers_len)
+ {
+ HTTP_DBG (0, "got headers from app, len %d", msg.data.headers_len);
+ http1_write_app_headers (req, &msg, &response);
+ }
+ /* Add empty line after headers */
+ response = format (response, "\r\n");
+ HTTP_DBG (3, "%v", response);
+
+ max_enq = http_io_ts_max_write (hc, sp);
+ if (max_enq < vec_len (response))
+ {
+ clib_warning ("sending status-line and headers failed!");
+ sc = HTTP_STATUS_INTERNAL_ERROR;
+ goto error;
+ }
+ http_io_ts_write (hc, response, vec_len (response), sp);
+
+ if (msg.data.body_len)
+ {
+ /* Start sending the actual data */
+ http_req_tx_buffer_init (req, &msg);
+ next_state = HTTP_REQ_STATE_APP_IO_MORE_DATA;
+ sm_result = HTTP_SM_CONTINUE;
+ }
+ else
+ {
+ /* No response body, we are done */
+ sm_result = HTTP_SM_STOP;
+ }
+
+ http_req_state_change (req, next_state);
+
+ http_io_ts_after_write (hc, 0);
+ return sm_result;
+
+error:
+ http1_send_error (hc, sc, sp);
+ session_transport_closing_notify (&req->connection);
+ http_disconnect_transport (hc);
+ return HTTP_SM_STOP;
+}
+
+static http_sm_result_t
+http1_req_state_wait_app_method (http_conn_t *hc, http_req_t *req,
+ transport_send_params_t *sp)
+{
+ http_msg_t msg;
+ u8 *request = 0, *target;
+ u32 max_enq;
+ http_sm_result_t sm_result = HTTP_SM_ERROR;
+ http_req_state_t next_state;
+
+ http_get_app_msg (req, &msg);
+
+ if (msg.data.type > HTTP_MSG_DATA_PTR)
+ {
+ clib_warning ("no data");
+ goto error;
+ }
+
+ if (msg.type != HTTP_MSG_REQUEST)
+ {
+ clib_warning ("unexpected message type %d", msg.type);
+ goto error;
+ }
+
+ /* read request target */
+ target = http_get_app_target (req, &msg);
+
+ request = http_get_tx_buf (hc);
+ /* currently we support only GET and POST method */
+ if (msg.method_type == HTTP_REQ_GET)
+ {
+ if (msg.data.body_len)
+ {
+ clib_warning ("GET request shouldn't include data");
+ goto error;
+ }
+ /*
+ * Add "protocol layer" headers:
+ * - host
+ * - user agent
+ */
+ request = format (request, get_request_template,
+ /* target */
+ target,
+ /* Host */
+ hc->host,
+ /* User-Agent */
+ hc->app_name);
+
+ next_state = HTTP_REQ_STATE_WAIT_TRANSPORT_REPLY;
+ sm_result = HTTP_SM_STOP;
+ }
+ else if (msg.method_type == HTTP_REQ_POST)
+ {
+ if (!msg.data.body_len)
+ {
+ clib_warning ("POST request should include data");
+ goto error;
+ }
+ /*
+ * Add "protocol layer" headers:
+ * - host
+ * - user agent
+ * - content length
+ */
+ request = format (request, post_request_template,
+ /* target */
+ target,
+ /* Host */
+ hc->host,
+ /* User-Agent */
+ hc->app_name,
+ /* Content-Length */
+ msg.data.body_len);
+
+ http_req_tx_buffer_init (req, &msg);
+
+ next_state = HTTP_REQ_STATE_APP_IO_MORE_DATA;
+ sm_result = HTTP_SM_CONTINUE;
+ }
+ else
+ {
+ clib_warning ("unsupported method %d", msg.method_type);
+ goto error;
+ }
+
+ /* Add headers from app (if any) */
+ if (msg.data.headers_len)
+ {
+ HTTP_DBG (0, "got headers from app, len %d", msg.data.headers_len);
+ http1_write_app_headers (req, &msg, &request);
+ }
+ /* Add empty line after headers */
+ request = format (request, "\r\n");
+ HTTP_DBG (3, "%v", request);
+
+ max_enq = http_io_ts_max_write (hc, sp);
+ if (max_enq < vec_len (request))
+ {
+ clib_warning ("sending request-line and headers failed!");
+ sm_result = HTTP_SM_ERROR;
+ goto error;
+ }
+ http_io_ts_write (hc, request, vec_len (request), sp);
+
+ http_req_state_change (req, next_state);
+
+ http_io_ts_after_write (hc, 0);
+ goto done;
+
+error:
+ http_io_as_drain_all (req);
+ session_transport_closing_notify (&req->connection);
+ session_transport_closed_notify (&req->connection);
+ http_disconnect_transport (hc);
+
+done:
+ return sm_result;
+}
+
+static http_sm_result_t
+http1_req_state_app_io_more_data (http_conn_t *hc, http_req_t *req,
+ transport_send_params_t *sp)
+{
+ u32 max_write, n_read, n_segs, n_written = 0;
+ http_buffer_t *hb = &req->tx_buf;
+ svm_fifo_seg_t *seg;
+ u8 finished = 0;
+
+ ASSERT (http_buffer_bytes_left (hb) > 0);
+ max_write = http_io_ts_max_write (hc, sp);
+ if (max_write == 0)
+ {
+ HTTP_DBG (1, "ts tx fifo full");
+ goto check_fifo;
+ }
+
+ n_read = http_buffer_get_segs (hb, max_write, &seg, &n_segs);
+ if (n_read == 0)
+ {
+ HTTP_DBG (1, "no data to deq");
+ goto check_fifo;
+ }
+
+ n_written = http_io_ts_write_segs (hc, seg, n_segs, sp);
+
+ http_buffer_drain (hb, n_written);
+ finished = http_buffer_bytes_left (hb) == 0;
+
+ if (finished)
+ {
+ /* Finished transaction:
+ * server back to HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD
+ * client to HTTP_REQ_STATE_WAIT_TRANSPORT_REPLY */
+ http_req_state_change (req, (hc->flags & HTTP_CONN_F_IS_SERVER) ?
+ HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD :
+ HTTP_REQ_STATE_WAIT_TRANSPORT_REPLY);
+ http_buffer_free (hb);
+ }
+ http_io_ts_after_write (hc, finished);
+
+check_fifo:
+ http1_check_and_deschedule (hc, req, sp);
+ return HTTP_SM_STOP;
+}
+
+static http_sm_result_t
+http1_req_state_tunnel_tx (http_conn_t *hc, http_req_t *req,
+ transport_send_params_t *sp)
+{
+ u32 max_deq, max_enq, max_read, n_segs = 2;
+ svm_fifo_seg_t segs[n_segs];
+ int n_written = 0;
+
+ HTTP_DBG (1, "tunnel received data from target");
+
+ max_deq = http_io_as_max_read (req);
+ if (PREDICT_FALSE (max_deq == 0))
+ {
+ HTTP_DBG (1, "max_deq == 0");
+ goto check_fifo;
+ }
+ max_enq = http_io_ts_max_write (hc, sp);
+ if (max_enq == 0)
+ {
+ HTTP_DBG (1, "ts tx fifo full");
+ goto check_fifo;
+ }
+ max_read = clib_min (max_enq, max_deq);
+ http_io_as_read_segs (req, segs, &n_segs, max_read);
+ n_written = http_io_ts_write_segs (hc, segs, n_segs, sp);
+ http_io_as_drain (req, n_written);
+ http_io_ts_after_write (hc, 0);
+
+check_fifo:
+ http1_check_and_deschedule (hc, req, sp);
+ return HTTP_SM_STOP;
+}
+
+static http_sm_result_t
+http1_req_state_udp_tunnel_tx (http_conn_t *hc, http_req_t *req,
+ transport_send_params_t *sp)
+{
+ u32 to_deq, capsule_size, dgram_size;
+ u8 written = 0;
+ session_dgram_hdr_t hdr;
+ u8 *buf;
+ u8 *payload;
+
+ HTTP_DBG (1, "udp tunnel received data from target");
+
+ buf = http_get_tx_buf (hc);
+ to_deq = http_io_as_max_read (req);
+
+ while (to_deq > 0)
+ {
+ /* read datagram header */
+ http_io_as_read (req, (u8 *) &hdr, sizeof (hdr), 1);
+ ASSERT (hdr.data_length <= HTTP_UDP_PAYLOAD_MAX_LEN);
+ dgram_size = hdr.data_length + SESSION_CONN_HDR_LEN;
+ ASSERT (to_deq >= dgram_size);
+
+ if (http_io_ts_max_write (hc, sp) <
+ (hdr.data_length + HTTP_UDP_PROXY_DATAGRAM_CAPSULE_OVERHEAD))
+ {
+ HTTP_DBG (1, "ts tx fifo full");
+ goto done;
+ }
+
+ /* create capsule header */
+ payload = http_encap_udp_payload_datagram (buf, hdr.data_length);
+ capsule_size = (payload - buf) + hdr.data_length;
+ /* read payload */
+ http_io_as_read (req, payload, hdr.data_length, 1);
+ http_io_as_drain (req, dgram_size);
+ /* send capsule */
+ http_io_ts_write (hc, buf, capsule_size, sp);
+
+ written = 1;
+ to_deq -= dgram_size;
+ }
+
+done:
+ if (written)
+ http_io_ts_after_write (hc, 0);
+ http1_check_and_deschedule (hc, req, sp);
+ return HTTP_SM_STOP;
+}
+
+/*************************/
+/* request state machine */
+/*************************/
+
+static http_sm_handler tx_state_funcs[HTTP_REQ_N_STATES] = {
+ 0, /* idle */
+ http1_req_state_wait_app_method,
+ 0, /* wait transport reply */
+ 0, /* transport io more data */
+ 0, /* wait transport method */
+ http1_req_state_wait_app_reply,
+ http1_req_state_app_io_more_data,
+ http1_req_state_tunnel_tx,
+ http1_req_state_udp_tunnel_tx,
+};
+
+static http_sm_handler rx_state_funcs[HTTP_REQ_N_STATES] = {
+ 0, /* idle */
+ 0, /* wait app method */
+ http1_req_state_wait_transport_reply,
+ http1_req_state_transport_io_more_data,
+ http1_req_state_wait_transport_method,
+ 0, /* wait app reply */
+ 0, /* app io more data */
+ http1_req_state_tunnel_rx,
+ http1_req_state_udp_tunnel_rx,
+};
+
+static_always_inline int
+http1_req_state_is_tx_valid (http_req_t *req)
+{
+ return tx_state_funcs[req->state] ? 1 : 0;
+}
+
+static_always_inline int
+http1_req_state_is_rx_valid (http_req_t *req)
+{
+ return rx_state_funcs[req->state] ? 1 : 0;
+}
+
+static_always_inline void
+http1_req_run_state_machine (http_conn_t *hc, http_req_t *req,
+ transport_send_params_t *sp, u8 is_tx)
+{
+ http_sm_result_t res;
+
+ do
+ {
+ if (is_tx)
+ res = tx_state_funcs[req->state](hc, req, sp);
+ else
+ res = rx_state_funcs[req->state](hc, req, 0);
+ if (res == HTTP_SM_ERROR)
+ {
+ HTTP_DBG (1, "error in state machine %d", res);
+ return;
+ }
+ }
+ while (res == HTTP_SM_CONTINUE);
+
+ /* Reset the session expiration timer */
+ http_conn_timer_update (hc);
+}
+
+/*****************/
+/* http core VFT */
+/*****************/
+
+static u32
+http1_hc_index_get_by_req_index (u32 req_index,
+ clib_thread_index_t thread_index)
+{
+ http_req_t *req;
+
+ req = http1_req_get (req_index, thread_index);
+ return req->hr_hc_index;
+}
+
+static transport_connection_t *
+http1_req_get_connection (u32 req_index, clib_thread_index_t thread_index)
+{
+ http_req_t *req;
+ req = http1_req_get (req_index, thread_index);
+ return &req->connection;
+}
+
+static u8 *
+format_http1_req (u8 *s, va_list *args)
+{
+ http_req_t *req = va_arg (*args, http_req_t *);
+ http_conn_t *hc = va_arg (*args, http_conn_t *);
+ session_t *ts;
+
+ ts = session_get_from_handle (hc->hc_tc_session_handle);
+ s = format (s, "[%d:%d][H1] app_wrk %u hc_index %u ts %d:%d",
+ req->c_thread_index, req->c_s_index, req->hr_pa_wrk_index,
+ req->hr_hc_index, ts->thread_index, ts->session_index);
+
+ return s;
+}
+
+static u8 *
+http1_format_req (u8 *s, va_list *args)
+{
+ u32 req_index = va_arg (*args, u32);
+ clib_thread_index_t thread_index = va_arg (*args, u32);
+ http_conn_t *hc = va_arg (*args, http_conn_t *);
+ u32 verbose = va_arg (*args, u32);
+ http_req_t *req;
+
+ req = http1_req_get (req_index, thread_index);
+
+ s = format (s, "%-" SESSION_CLI_ID_LEN "U", format_http1_req, req, hc);
+ if (verbose)
+ {
+ s =
+ format (s, "%-" SESSION_CLI_STATE_LEN "U", format_http_conn_state, hc);
+ if (verbose > 1)
+ s = format (s, "\n");
+ }
+
+ return s;
+}
+
+static void
+http1_app_tx_callback (http_conn_t *hc, u32 req_index,
+ transport_send_params_t *sp)
+{
+ http_req_t *req;
+
+ req = http1_req_get (req_index, hc->c_thread_index);
+
+ if (!http1_req_state_is_tx_valid (req))
+ {
+ /* Sometimes the server apps can send the response earlier
+ * than expected (e.g when rejecting a bad request)*/
+ if (req->state == HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA &&
+ (hc->flags & HTTP_CONN_F_IS_SERVER))
+ {
+ http_io_ts_drain_all (hc);
+ http_req_state_change (req, HTTP_REQ_STATE_WAIT_APP_REPLY);
+ }
+ else
+ {
+ clib_warning ("hc [%u]%x invalid tx state: http req state "
+ "'%U', session state '%U'",
+ hc->c_thread_index, hc->hc_hc_index,
+ format_http_req_state, req->state,
+ format_http_conn_state, hc);
+ http_io_as_drain_all (req);
+ return;
+ }
+ }
+
+ HTTP_DBG (1, "run state machine");
+ http1_req_run_state_machine (hc, req, sp, 1);
+}
+
+static void
+http1_app_rx_evt_callback (http_conn_t *hc, u32 req_index,
+ clib_thread_index_t thread_index)
+{
+ http_req_t *req;
+
+ req = http1_req_get (req_index, thread_index);
+
+ if (req->state == HTTP_REQ_STATE_TUNNEL)
+ http1_req_state_tunnel_rx (hc, req, 0);
+}
+
+static void
+http1_app_close_callback (http_conn_t *hc, u32 req_index,
+ clib_thread_index_t thread_index)
+{
+ http_req_t *req;
+
+ req = http1_req_get_if_valid (req_index, thread_index);
+ if (!req)
+ {
+ HTTP_DBG (1, "req already deleted");
+ return;
+ }
+ /* Nothing more to send, confirm close */
+ if (!http_io_as_max_read (req) || hc->state == HTTP_CONN_STATE_CLOSED)
+ {
+ HTTP_DBG (1, "nothing more to send, confirm close");
+ session_transport_closed_notify (&req->connection);
+ http_disconnect_transport (hc);
+ }
+ else
+ {
+ /* Wait for all data to be written to ts */
+ hc->state = HTTP_CONN_STATE_APP_CLOSED;
+ }
+}
+
+static void
+http1_app_reset_callback (http_conn_t *hc, u32 req_index,
+ clib_thread_index_t thread_index)
+{
+ http_req_t *req;
+ req = http1_req_get (req_index, thread_index);
+ session_transport_closed_notify (&req->connection);
+ http_disconnect_transport (hc);
+}
+
+static int
+http1_transport_connected_callback (http_conn_t *hc)
+{
+ http_req_t *req;
+
+ ASSERT (hc->flags & HTTP_CONN_F_NO_APP_SESSION);
+
+ req = http1_conn_alloc_req (hc);
+ http_req_state_change (req, HTTP_REQ_STATE_WAIT_APP_METHOD);
+ return http_conn_established (hc, req);
+}
+
+static void
+http1_transport_rx_callback (http_conn_t *hc)
+{
+ http_req_t *req;
+
+ if (!(hc->flags & HTTP_CONN_F_HAS_REQUEST))
+ {
+ ASSERT (hc->flags & HTTP_CONN_F_IS_SERVER);
+ /* first request - create request ctx and notify app about new conn */
+ req = http1_conn_alloc_req (hc);
+ http_conn_accept_request (hc, req);
+ http_req_state_change (req, HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD);
+ hc->flags &= ~HTTP_CONN_F_NO_APP_SESSION;
+ }
+ else
+ req = http1_conn_get_req (hc);
+
+ if (!http1_req_state_is_rx_valid (req))
+ {
+ if (http_io_ts_max_read (hc))
+ clib_warning ("hc [%u]%x invalid rx state: http req state "
+ "'%U', session state '%U'",
+ hc->c_thread_index, hc->hc_hc_index,
+ format_http_req_state, req->state,
+ format_http_conn_state, hc);
+ http_io_ts_drain_all (hc);
+ return;
+ }
+
+ HTTP_DBG (1, "run state machine");
+ http1_req_run_state_machine (hc, req, 0, 0);
+}
+
+static void
+http1_transport_close_callback (http_conn_t *hc)
+{
+ if (!(hc->flags & HTTP_CONN_F_HAS_REQUEST))
+ return;
+ /* Nothing more to rx, propagate to app */
+ if (!http_io_ts_max_read (hc))
+ {
+ http_req_t *req = http1_conn_get_req (hc);
+ session_transport_closing_notify (&req->connection);
+ }
+}
+
+static void
+http1_transport_reset_callback (http_conn_t *hc)
+{
+ if (!(hc->flags & HTTP_CONN_F_HAS_REQUEST))
+ return;
+ http_req_t *req = http1_conn_get_req (hc);
+ session_transport_reset_notify (&req->connection);
+}
+
+static void
+http1_transport_conn_reschedule_callback (http_conn_t *hc)
+{
+ ASSERT (hc->flags & HTTP_CONN_F_HAS_REQUEST);
+ http_req_t *req = http1_conn_get_req (hc);
+ transport_connection_reschedule (&req->connection);
+}
+
+static void
+http1_conn_cleanup_callback (http_conn_t *hc)
+{
+ http_req_t *req;
+ if (!(hc->flags & HTTP_CONN_F_HAS_REQUEST))
+ return;
+
+ req = http1_conn_get_req (hc);
+ session_transport_delete_notify (&req->connection);
+ http1_conn_free_req (hc);
+}
+
+static void
+http1_enable_callback (void)
+{
+ http1_main_t *h1m = &http1_main;
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ u32 num_threads;
+
+ num_threads = 1 /* main thread */ + vtm->n_threads;
+
+ vec_validate (h1m->req_pool, num_threads - 1);
+}
+
+const static http_engine_vft_t http1_engine = {
+ .name = "http1",
+ .hc_index_get_by_req_index = http1_hc_index_get_by_req_index,
+ .req_get_connection = http1_req_get_connection,
+ .format_req = http1_format_req,
+ .app_tx_callback = http1_app_tx_callback,
+ .app_rx_evt_callback = http1_app_rx_evt_callback,
+ .app_close_callback = http1_app_close_callback,
+ .app_reset_callback = http1_app_reset_callback,
+ .transport_connected_callback = http1_transport_connected_callback,
+ .transport_rx_callback = http1_transport_rx_callback,
+ .transport_close_callback = http1_transport_close_callback,
+ .transport_conn_reschedule_callback =
+ http1_transport_conn_reschedule_callback,
+ .transport_reset_callback = http1_transport_reset_callback,
+ .conn_cleanup_callback = http1_conn_cleanup_callback,
+ .enable_callback = http1_enable_callback,
+};
+
+static clib_error_t *
+http1_init (vlib_main_t *vm)
+{
+ http_register_engine (&http1_engine, HTTP_VERSION_1);
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (http1_init) = {
+ .runs_after = VLIB_INITS ("http_transport_init"),
+};
diff --git a/src/plugins/http/http2/frame.c b/src/plugins/http/http2/frame.c
new file mode 100644
index 00000000000..c9c1931f02a
--- /dev/null
+++ b/src/plugins/http/http2/frame.c
@@ -0,0 +1,339 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2025 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/string.h>
+#include <http/http2/frame.h>
+
+#define MAX_U24 0xFFFFFF
+
+static_always_inline u8 *
+http2_decode_u24 (u8 *src, u32 *value)
+{
+ *value = 0;
+ *value = (u32) (src[0] << 16) | (u32) (src[1] << 8) | (u32) src[2];
+ return src + 3;
+}
+
+static_always_inline u8 *
+http2_encode_u24 (u8 *dst, u32 value)
+{
+ ASSERT (value <= MAX_U24);
+ *dst++ = (value >> 16) & 0xFF;
+ *dst++ = (value >> 8) & 0xFF;
+ *dst++ = value & 0xFF;
+ return dst;
+}
+
+/*
+ * RFC9113 section 4.1
+ *
+ * HTTP Frame {
+ * Length (24),
+ * Type (8),
+ * Flags (8),
+ * Reserved (1),
+ * Stream Identifier (31),
+ * Frame Payload (..),
+ * }
+ */
+
+__clib_export void
+http2_frame_header_read (u8 *src, http2_frame_header_t *fh)
+{
+ u32 *stream_id;
+ src = http2_decode_u24 (src, &fh->length);
+ fh->type = *src++;
+ fh->flags = *src++;
+ stream_id = (u32 *) src;
+ fh->stream_id = clib_net_to_host_u32 (*stream_id) & 0x7FFFFFFF;
+}
+
+static void
+http2_frame_header_write (http2_frame_header_t *fh, u8 *dst)
+{
+ u32 stream_id;
+
+ dst = http2_encode_u24 (dst, fh->length);
+ *dst++ = fh->type;
+ *dst++ = fh->flags;
+ stream_id = clib_host_to_net_u32 (fh->stream_id);
+ clib_memcpy_fast (dst, &stream_id, sizeof (stream_id));
+}
+
+__clib_export http2_error_t
+http2_frame_read_settings (http2_conn_settings_t *settings, u8 *payload,
+ u32 payload_len)
+{
+ http2_settings_entry_t *entry;
+ u32 value;
+
+ while (payload_len >= sizeof (*entry))
+ {
+ entry = (http2_settings_entry_t *) payload;
+ switch (clib_net_to_host_u16 (entry->identifier))
+ {
+#define _(v, label, member, min, max, default_value, err_code) \
+ case HTTP2_SETTINGS_##label: \
+ value = clib_net_to_host_u32 (entry->value); \
+ if (!(value >= min && value <= max)) \
+ return err_code; \
+ settings->member = value; \
+ break;
+ foreach_http2_settings
+#undef _
+ /* ignore unknown or unsupported identifier */
+ default : break;
+ }
+ payload_len -= sizeof (*entry);
+ payload += sizeof (*entry);
+ }
+
+ if (payload_len != 0)
+ return HTTP2_ERROR_FRAME_SIZE_ERROR;
+
+ return HTTP2_ERROR_NO_ERROR;
+}
+
+__clib_export void
+http2_frame_write_settings_ack (u8 **dst)
+{
+ http2_frame_header_t fh = { .flags = HTTP2_FRAME_FLAG_ACK,
+ .type = HTTP2_FRAME_TYPE_SETTINGS };
+ u8 *p = http2_frame_header_alloc (dst);
+ http2_frame_header_write (&fh, p);
+}
+
+__clib_export void
+http2_frame_write_settings (http2_settings_entry_t *settings, u8 **dst)
+{
+ u8 *p;
+ u32 length;
+ http2_settings_entry_t *entry, e;
+
+ ASSERT (settings);
+ ASSERT (vec_len (settings) > 0);
+
+ length = vec_len (settings) * sizeof (*entry);
+ http2_frame_header_t fh = { .type = HTTP2_FRAME_TYPE_SETTINGS,
+ .length = length };
+ p = http2_frame_header_alloc (dst);
+ http2_frame_header_write (&fh, p);
+
+ vec_add2 (*dst, p, length);
+ vec_foreach (entry, settings)
+ {
+ e.identifier = clib_host_to_net_u16 (entry->identifier);
+ e.value = clib_host_to_net_u32 (entry->value);
+ clib_memcpy_fast (p, &e, sizeof (e));
+ p += sizeof (e);
+ }
+}
+
+#define WINDOW_UPDATE_LENGTH 4
+
+__clib_export http2_error_t
+http2_frame_read_window_update (u32 *increment, u8 *payload, u32 payload_len)
+{
+ u32 *value;
+
+ if (payload_len != WINDOW_UPDATE_LENGTH)
+ return HTTP2_ERROR_FRAME_SIZE_ERROR;
+
+ value = (u32 *) payload;
+
+ if (value == 0)
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+
+ *increment = clib_net_to_host_u32 (*value) & 0x7FFFFFFF;
+ return HTTP2_ERROR_NO_ERROR;
+}
+
+__clib_export void
+http2_frame_write_window_update (u32 increment, u32 stream_id, u8 **dst)
+{
+ u8 *p;
+ u32 value;
+
+ ASSERT (increment > 0 && increment <= 0x7FFFFFFF);
+
+ http2_frame_header_t fh = { .type = HTTP2_FRAME_TYPE_WINDOW_UPDATE,
+ .length = WINDOW_UPDATE_LENGTH,
+ .stream_id = stream_id };
+ p = http2_frame_header_alloc (dst);
+ http2_frame_header_write (&fh, p);
+
+ vec_add2 (*dst, p, WINDOW_UPDATE_LENGTH);
+ value = clib_host_to_net_u32 (increment);
+ clib_memcpy_fast (p, &value, WINDOW_UPDATE_LENGTH);
+}
+
+#define RST_STREAM_LENGTH 4
+
+__clib_export http2_error_t
+http2_frame_read_rst_stream (u32 *error_code, u8 *payload, u32 payload_len)
+{
+ u32 *value;
+
+ if (payload_len != RST_STREAM_LENGTH)
+ return HTTP2_ERROR_FRAME_SIZE_ERROR;
+
+ value = (u32 *) payload;
+
+ *error_code = clib_net_to_host_u32 (*value);
+ return HTTP2_ERROR_NO_ERROR;
+}
+
+__clib_export void
+http2_frame_write_rst_stream (http2_error_t error_code, u32 stream_id,
+ u8 **dst)
+{
+ u8 *p;
+ u32 value;
+
+ ASSERT (stream_id > 0 && stream_id <= 0x7FFFFFFF);
+
+ http2_frame_header_t fh = { .type = HTTP2_FRAME_TYPE_RST_STREAM,
+ .length = RST_STREAM_LENGTH,
+ .stream_id = stream_id };
+ p = http2_frame_header_alloc (dst);
+ http2_frame_header_write (&fh, p);
+
+ vec_add2 (*dst, p, RST_STREAM_LENGTH);
+ value = clib_host_to_net_u32 ((u32) error_code);
+ clib_memcpy_fast (p, &value, RST_STREAM_LENGTH);
+}
+
+#define GOAWAY_MIN_SIZE 8
+
+__clib_export http2_error_t
+http2_frame_read_goaway (u32 *error_code, u32 *last_stream_id, u8 *payload,
+ u32 payload_len)
+{
+ u32 *value;
+
+ if (payload_len < GOAWAY_MIN_SIZE)
+ return HTTP2_ERROR_FRAME_SIZE_ERROR;
+
+ value = (u32 *) payload;
+ *last_stream_id = clib_net_to_host_u32 (*value) & 0x7FFFFFFF;
+ payload += 4;
+
+ value = (u32 *) payload;
+ *error_code = clib_net_to_host_u32 (*value);
+
+ /* TODO: Additional Debug Data */
+ return HTTP2_ERROR_NO_ERROR;
+}
+
+__clib_export void
+http2_frame_write_goaway (http2_error_t error_code, u32 last_stream_id,
+ u8 **dst)
+{
+ u8 *p;
+ u32 value;
+
+ ASSERT (last_stream_id <= 0x7FFFFFFF);
+
+ http2_frame_header_t fh = { .type = HTTP2_FRAME_TYPE_GOAWAY,
+ .length = GOAWAY_MIN_SIZE };
+ p = http2_frame_header_alloc (dst);
+ http2_frame_header_write (&fh, p);
+
+ vec_add2 (*dst, p, GOAWAY_MIN_SIZE);
+ value = clib_host_to_net_u32 (last_stream_id);
+ clib_memcpy_fast (p, &value, 4);
+ p += 4;
+ value = clib_host_to_net_u32 ((u32) error_code);
+ clib_memcpy_fast (p, &value, 4);
+ /* TODO: Additional Debug Data */
+}
+
+void
+http2_frame_write_ping (u8 is_resp, u8 *payload, u8 **dst)
+{
+ u8 *p;
+ http2_frame_header_t fh = {
+ .type = HTTP2_FRAME_TYPE_PING,
+ .length = HTTP2_PING_PAYLOAD_LEN,
+ .flags = is_resp ? HTTP2_FRAME_FLAG_ACK : 0,
+ };
+
+ p = http2_frame_header_alloc (dst);
+ http2_frame_header_write (&fh, p);
+ vec_add2 (*dst, p, HTTP2_PING_PAYLOAD_LEN);
+ clib_memcpy_fast (p, payload, HTTP2_PING_PAYLOAD_LEN);
+}
+
+#define PRIORITY_DATA_LEN 5
+
+__clib_export http2_error_t
+http2_frame_read_headers (u8 **headers, u32 *headers_len, u8 *payload,
+ u32 payload_len, u8 flags)
+{
+ *headers_len = payload_len;
+
+ if (flags & HTTP2_FRAME_FLAG_PADED)
+ {
+ u8 pad_len = *payload++;
+ if ((u32) pad_len >= payload_len)
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ *headers_len -= (pad_len + 1);
+ }
+
+ if (flags & HTTP2_FRAME_FLAG_PRIORITY)
+ {
+ if (*headers_len <= PRIORITY_DATA_LEN)
+ return HTTP2_ERROR_FRAME_SIZE_ERROR;
+ /* just skip, priority scheme defined in RFC7540 is deprecated */
+ *headers_len -= PRIORITY_DATA_LEN;
+ payload += PRIORITY_DATA_LEN;
+ }
+
+ *headers = payload;
+
+ return HTTP2_ERROR_NO_ERROR;
+}
+
+__clib_export void
+http2_frame_write_headers_header (u32 headers_len, u32 stream_id, u8 flags,
+ u8 *dst)
+{
+ ASSERT (stream_id > 0 && stream_id <= 0x7FFFFFFF);
+
+ http2_frame_header_t fh = { .type = HTTP2_FRAME_TYPE_HEADERS,
+ .length = headers_len,
+ .flags = flags,
+ .stream_id = stream_id };
+ http2_frame_header_write (&fh, dst);
+}
+
+__clib_export http2_error_t
+http2_frame_read_data (u8 **data, u32 *data_len, u8 *payload, u32 payload_len,
+ u8 flags)
+{
+ *data_len = payload_len;
+
+ if (flags & HTTP2_FRAME_FLAG_PADED)
+ {
+ u8 pad_len = *payload++;
+ if ((u32) pad_len >= payload_len)
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ *data_len -= (pad_len + 1);
+ }
+
+ *data = payload;
+ return HTTP2_ERROR_NO_ERROR;
+}
+
+__clib_export void
+http2_frame_write_data_header (u32 data_len, u32 stream_id, u8 flags, u8 *dst)
+{
+ ASSERT (stream_id > 0 && stream_id <= 0x7FFFFFFF);
+
+ http2_frame_header_t fh = { .type = HTTP2_FRAME_TYPE_DATA,
+ .length = data_len,
+ .flags = flags,
+ .stream_id = stream_id };
+ http2_frame_header_write (&fh, dst);
+}
diff --git a/src/plugins/http/http2/frame.h b/src/plugins/http/http2/frame.h
new file mode 100644
index 00000000000..53a37c1aa0a
--- /dev/null
+++ b/src/plugins/http/http2/frame.h
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2025 Cisco Systems, Inc.
+ */
+
+#ifndef SRC_PLUGINS_HTTP_HTTP2_FRAME_H_
+#define SRC_PLUGINS_HTTP_HTTP2_FRAME_H_
+
+#include <vppinfra/error.h>
+#include <vppinfra/types.h>
+#include <http/http2/http2.h>
+
+#define HTTP2_FRAME_HEADER_SIZE 9
+#define HTTP2_PING_PAYLOAD_LEN 8
+
+#define foreach_http2_frame_type \
+ _ (0x00, DATA, "DATA") \
+ _ (0x01, HEADERS, "HEADERS") \
+ _ (0x02, PRIORITY, "PRIORITY") \
+ _ (0x03, RST_STREAM, "RST_STREAM") \
+ _ (0x04, SETTINGS, "SETTINGS") \
+ _ (0x05, PUSH_PROMISE, "PUSH_PROMISE") \
+ _ (0x06, PING, "PING") \
+ _ (0x07, GOAWAY, "GOAWAY") \
+ _ (0x08, WINDOW_UPDATE, "WINDOW_UPDATE") \
+ _ (0x09, CONTINUATION, "CONTINUATION")
+
+typedef enum
+{
+#define _(v, n, s) HTTP2_FRAME_TYPE_##n = v,
+ foreach_http2_frame_type
+#undef _
+} __clib_packed http2_frame_type_t;
+
+STATIC_ASSERT_SIZEOF (http2_frame_type_t, 1);
+
+#define foreach_http2_frame_flag \
+ _ (0, NONE) \
+ _ (1, END_STREAM) \
+ _ (1, ACK) \
+ _ (1 << 2, END_HEADERS) \
+ _ (1 << 3, PADED) \
+ _ (1 << 5, PRIORITY)
+
+typedef enum
+{
+#define _(v, n) HTTP2_FRAME_FLAG_##n = v,
+ foreach_http2_frame_flag
+#undef _
+} __clib_packed http2_frame_flag_t;
+
+STATIC_ASSERT_SIZEOF (http2_frame_flag_t, 1);
+
+typedef struct
+{
+ u32 length;
+ http2_frame_type_t type;
+ u8 flags;
+ u32 stream_id;
+} http2_frame_header_t;
+
+typedef struct
+{
+ u16 identifier;
+ u32 value;
+} __clib_packed http2_settings_entry_t;
+
+/**
+ * Parse frame header
+ *
+ * @param src Pointer to the beginning of the frame
+ * @param fh Parsed frame header
+ */
+void http2_frame_header_read (u8 *src, http2_frame_header_t *fh);
+
+/**
+ * Add 9 bytes (frame header size) to the end of given vector
+ *
+ * @param dst Pointer to vector
+ *
+ * @return Pointer to the frame header beginning
+ */
+static_always_inline u8 *
+http2_frame_header_alloc (u8 **dst)
+{
+ u8 *p;
+
+ vec_add2 (*dst, p, HTTP2_FRAME_HEADER_SIZE);
+ return p;
+}
+
+/**
+ * Parse SETTINGS frame payload
+ *
+ * @param settings Vector of HTTP/2 settings
+ * @param payload Payload to parse
+ * @param payload_len Payload length
+ *
+ * @return @c HTTP2_ERROR_NO_ERROR on success, error otherwise
+ */
+http2_error_t http2_frame_read_settings (http2_conn_settings_t *settings,
+ u8 *payload, u32 payload_len);
+
+/**
+ * Write SETTINGS ACK frame to the end of given vector
+ *
+ * @param dst Vector where SETTINGS ACK frame will be written
+ */
+void http2_frame_write_settings_ack (u8 **dst);
+
+/**
+ * Write SETTINGS frame to the end of given vector
+ *
+ * @param settings Vector of HTTP/2 settings
+ * @param dst Vector where SETTINGS frame will be written
+ */
+void http2_frame_write_settings (http2_settings_entry_t *settings, u8 **dst);
+
+/**
+ * Parse WINDOW_UPDATE frame payload
+ *
+ * @param increment Parsed window increment value
+ * @param payload Payload to parse
+ * @param payload_len Payload length
+ *
+ * @return @c HTTP2_ERROR_NO_ERROR on success, error otherwise
+ */
+http2_error_t http2_frame_read_window_update (u32 *increment, u8 *payload,
+ u32 payload_len);
+
+/**
+ * Write WINDOW_UPDATE frame to the end of given vector
+ *
+ * @param increment Window increment value
+ * @param stream_id Stream ID
+ * @param dst Vector where WINDOW_UPDATE frame will be written
+ */
+void http2_frame_write_window_update (u32 increment, u32 stream_id, u8 **dst);
+
+/**
+ * Parse RST_STREAM frame payload
+ *
+ * @param error_code Parsed error code
+ * @param payload Payload to parse
+ * @param payload_len Payload length
+ *
+ * @return @c HTTP2_ERROR_NO_ERROR on success, error otherwise
+ */
+http2_error_t http2_frame_read_rst_stream (u32 *error_code, u8 *payload,
+ u32 payload_len);
+
+/**
+ * Write RST_STREAM frame to the end of given vector
+ *
+ * @param error_code Error code
+ * @param stream_id Stream ID, except 0
+ * @param dst Vector where RST_STREAM frame will be written
+ */
+void http2_frame_write_rst_stream (http2_error_t error_code, u32 stream_id,
+ u8 **dst);
+
+/**
+ * Parse GOAWAY frame payload
+ *
+ * @param last_stream_id Parsed last stream ID
+ * @param error_code Parsed error code
+ * @param payload Payload to parse
+ * @param payload_len Payload length
+ *
+ * @return @c HTTP2_ERROR_NO_ERROR on success, error otherwise
+ */
+http2_error_t http2_frame_read_goaway (u32 *last_stream_id, u32 *error_code,
+ u8 *payload, u32 payload_len);
+
+/**
+ * Write GOAWAY frame to the end of given vector
+ *
+ * @param error_code Error code
+ * @param last_stream_id Last stream ID
+ * @param dst Vector where GOAWAY frame will be written
+ */
+void http2_frame_write_goaway (http2_error_t error_code, u32 last_stream_id,
+ u8 **dst);
+
+/**
+ * Write PING frame to the end of given vector
+ *
+ * @param is_resp Indicate that this is PING response
+ * @param payload Payload to parse
+ * @param dst Vector where GOAWAY frame will be written
+ */
+void http2_frame_write_ping (u8 is_resp, u8 *payload, u8 **dst);
+
+/**
+ * Parse HEADERS frame payload
+ *
+ * @param headers Pointer to header block fragment
+ * @param headers_len Header block fragment length
+ * @param payload Payload to parse
+ * @param payload_len Payload length
+ * @param flags Flag field of frame header
+ *
+ * @return @c HTTP2_ERROR_NO_ERROR on success, error otherwise
+ */
+http2_error_t http2_frame_read_headers (u8 **headers, u32 *headers_len,
+ u8 *payload, u32 payload_len,
+ u8 flags);
+
+/**
+ * Write HEADERS frame header
+ *
+ * @param headers_len Header block fragment length
+ * @param stream_id Stream ID, except 0
+ * @param flags Frame header flags
+ * @param dst Pointer where frame header will be written
+ *
+ * @note Use @c http2_frame_header_alloc before
+ */
+void http2_frame_write_headers_header (u32 headers_len, u32 stream_id,
+ u8 flags, u8 *dst);
+
+/**
+ * Parse DATA frame payload
+ *
+ * @param headers Pointer to data
+ * @param headers_len Data length
+ * @param payload Payload to parse
+ * @param payload_len Payload length
+ * @param flags Flag field of frame header
+ *
+ * @return @c HTTP2_ERROR_NO_ERROR on success, error otherwise
+ */
+http2_error_t http2_frame_read_data (u8 **data, u32 *data_len, u8 *payload,
+ u32 payload_len, u8 flags);
+
+/**
+ * Write DATA frame header
+ *
+ * @param data_len Data length
+ * @param stream_id Stream ID, except 0
+ * @param flags Frame header flags
+ * @param dst Pointer where frame header will be written
+ */
+void http2_frame_write_data_header (u32 data_len, u32 stream_id, u8 flags,
+ u8 *dst);
+
+#endif /* SRC_PLUGINS_HTTP_HTTP2_FRAME_H_ */
diff --git a/src/plugins/http/http2/hpack.c b/src/plugins/http/http2/hpack.c
new file mode 100644
index 00000000000..76021ae14a6
--- /dev/null
+++ b/src/plugins/http/http2/hpack.c
@@ -0,0 +1,1173 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2025 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/error.h>
+#include <vppinfra/ring.h>
+#include <http/http2/hpack.h>
+#include <http/http2/huffman_table.h>
+#include <http/http_status_codes.h>
+
+#define HPACK_STATIC_TABLE_SIZE 61
+
+typedef struct
+{
+ char *name;
+ uword name_len;
+ char *value;
+ uword value_len;
+} hpack_static_table_entry_t;
+
+#define name_val_token_lit(name, value) \
+ (name), sizeof (name) - 1, (value), sizeof (value) - 1
+
+static hpack_static_table_entry_t
+ hpack_static_table[HPACK_STATIC_TABLE_SIZE] = {
+ { name_val_token_lit (":authority", "") },
+ { name_val_token_lit (":method", "GET") },
+ { name_val_token_lit (":method", "POST") },
+ { name_val_token_lit (":path", "/") },
+ { name_val_token_lit (":path", "/index.html") },
+ { name_val_token_lit (":scheme", "http") },
+ { name_val_token_lit (":scheme", "https") },
+ { name_val_token_lit (":status", "200") },
+ { name_val_token_lit (":status", "204") },
+ { name_val_token_lit (":status", "206") },
+ { name_val_token_lit (":status", "304") },
+ { name_val_token_lit (":status", "400") },
+ { name_val_token_lit (":status", "404") },
+ { name_val_token_lit (":status", "500") },
+ { name_val_token_lit ("accept-charset", "") },
+ { name_val_token_lit ("accept-encoding", "gzip, deflate") },
+ { name_val_token_lit ("accept-language", "") },
+ { name_val_token_lit ("accept-ranges", "") },
+ { name_val_token_lit ("accept", "") },
+ { name_val_token_lit ("access-control-allow-origin", "") },
+ { name_val_token_lit ("age", "") },
+ { name_val_token_lit ("allow", "") },
+ { name_val_token_lit ("authorization", "") },
+ { name_val_token_lit ("cache-control", "") },
+ { name_val_token_lit ("content-disposition", "") },
+ { name_val_token_lit ("content-encoding", "") },
+ { name_val_token_lit ("content-language", "") },
+ { name_val_token_lit ("content-length", "") },
+ { name_val_token_lit ("content-location", "") },
+ { name_val_token_lit ("content-range", "") },
+ { name_val_token_lit ("content-type", "") },
+ { name_val_token_lit ("cookie", "") },
+ { name_val_token_lit ("date", "") },
+ { name_val_token_lit ("etag", "") },
+ { name_val_token_lit ("etag", "") },
+ { name_val_token_lit ("expires", "") },
+ { name_val_token_lit ("from", "") },
+ { name_val_token_lit ("host", "") },
+ { name_val_token_lit ("if-match", "") },
+ { name_val_token_lit ("if-modified-since", "") },
+ { name_val_token_lit ("if-none-match", "") },
+ { name_val_token_lit ("if-range", "") },
+ { name_val_token_lit ("if-unmodified-since", "") },
+ { name_val_token_lit ("last-modified", "") },
+ { name_val_token_lit ("link", "") },
+ { name_val_token_lit ("location", "") },
+ { name_val_token_lit ("max-forwards", "") },
+ { name_val_token_lit ("proxy-authenticate", "") },
+ { name_val_token_lit ("proxy-authorization", "") },
+ { name_val_token_lit ("range", "") },
+ { name_val_token_lit ("referer", "") },
+ { name_val_token_lit ("refresh", "") },
+ { name_val_token_lit ("retry-after", "") },
+ { name_val_token_lit ("server", "") },
+ { name_val_token_lit ("set-cookie", "") },
+ { name_val_token_lit ("strict-transport-security", "") },
+ { name_val_token_lit ("transfer-encoding", "") },
+ { name_val_token_lit ("user-agent", "") },
+ { name_val_token_lit ("vary", "") },
+ { name_val_token_lit ("via", "") },
+ { name_val_token_lit ("www-authenticate", "") },
+ };
+
+typedef struct
+{
+ char *base;
+ uword len;
+ u8 static_table_index;
+} hpack_token_t;
+
+static hpack_token_t hpack_headers[] = {
+#define _(sym, str_canonical, str_lower, hpack_index) \
+ { http_token_lit (str_lower), hpack_index },
+ foreach_http_header_name
+#undef _
+};
+
+__clib_export uword
+hpack_decode_int (u8 **src, u8 *end, u8 prefix_len)
+{
+ uword value, new_value;
+ u8 *p, shift = 0, byte;
+ u16 prefix_max;
+
+ ASSERT (*src < end);
+ ASSERT (prefix_len >= 1 && prefix_len <= 8);
+
+ p = *src;
+ prefix_max = (1 << prefix_len) - 1;
+ value = *p & (u8) prefix_max;
+ p++;
+ /* if integer value is less than 2^prefix_len-1 it's encoded within prefix */
+ if (value != prefix_max)
+ {
+ *src = p;
+ return value;
+ }
+
+ while (p != end)
+ {
+ byte = *p;
+ p++;
+ new_value = value + ((uword) (byte & 0x7F) << shift);
+ shift += 7;
+ /* check for overflow */
+ if (new_value < value)
+ return HPACK_INVALID_INT;
+ value = new_value;
+ /* MSB of the last byte is zero */
+ if ((byte & 0x80) == 0)
+ {
+ *src = p;
+ return value;
+ }
+ }
+
+ return HPACK_INVALID_INT;
+}
+
+http2_error_t
+hpack_decode_huffman (u8 **src, u8 *end, u8 **buf, uword *buf_len)
+{
+ u64 accumulator = 0;
+ u8 accumulator_len = 0;
+ u8 *p;
+ hpack_huffman_code_t *code;
+
+ p = *src;
+ while (1)
+ {
+ /* out of space? */
+ if (*buf_len == 0)
+ return HTTP2_ERROR_INTERNAL_ERROR;
+ /* refill */
+ while (p < end && accumulator_len <= 56)
+ {
+ accumulator <<= 8;
+ accumulator_len += 8;
+ accumulator |= (u64) *p++;
+ }
+ /* first try short codes (5 - 8 bits) */
+ code =
+ &huff_code_table_fast[(u8) (accumulator >> (accumulator_len - 8))];
+ /* zero code length mean no luck */
+ if (PREDICT_TRUE (code->code_len))
+ {
+ **buf = code->symbol;
+ (*buf)++;
+ (*buf_len)--;
+ accumulator_len -= code->code_len;
+ }
+ else
+ {
+ /* slow path / long codes (10 - 30 bits) */
+ u32 tmp;
+ /* group boundaries are aligned to 32 bits */
+ if (accumulator_len < 32)
+ tmp = accumulator << (32 - accumulator_len);
+ else
+ tmp = accumulator >> (accumulator_len - 32);
+ /* figure out which interval code falls into, this is possible
+ * because HPACK use canonical Huffman codes
+ * see Schwartz, E. and B. Kallick, “Generating a canonical prefix
+ * encoding”
+ */
+ hpack_huffman_group_t *hg = hpack_huffman_get_group (tmp);
+ /* trim code to correct length */
+ u32 code = (accumulator >> (accumulator_len - hg->code_len)) &
+ ((1 << hg->code_len) - 1);
+ if (!code)
+ return HTTP2_ERROR_COMPRESSION_ERROR;
+ /* find symbol in the list */
+ **buf = hg->symbols[code - hg->first_code];
+ (*buf)++;
+ (*buf_len)--;
+ accumulator_len -= hg->code_len;
+ }
+ /* all done */
+ if (p == end && accumulator_len < 8)
+ {
+ /* there might be one more symbol encoded with short code */
+ if (accumulator_len >= 5)
+ {
+ /* first check EOF case */
+ if (((1 << accumulator_len) - 1) ==
+ (accumulator & ((1 << accumulator_len) - 1)))
+ break;
+
+ /* out of space? */
+ if (*buf_len == 0)
+ return HTTP2_ERROR_INTERNAL_ERROR;
+
+ /* if bogus EOF check bellow will fail */
+ code = &huff_code_table_fast[(u8) (accumulator
+ << (8 - accumulator_len))];
+ **buf = code->symbol;
+ (*buf)++;
+ (*buf_len)--;
+ accumulator_len -= code->code_len;
+ /* end at byte boundary? */
+ if (accumulator_len == 0)
+ break;
+ }
+ /* we must end with EOF here */
+ if (((1 << accumulator_len) - 1) !=
+ (accumulator & ((1 << accumulator_len) - 1)))
+ return HTTP2_ERROR_COMPRESSION_ERROR;
+ break;
+ }
+ }
+ return HTTP2_ERROR_NO_ERROR;
+}
+
+__clib_export http2_error_t
+hpack_decode_string (u8 **src, u8 *end, u8 **buf, uword *buf_len)
+{
+ u8 *p, is_huffman;
+ uword len;
+
+ if (*src == end)
+ return HTTP2_ERROR_COMPRESSION_ERROR;
+
+ p = *src;
+ /* H flag in first bit */
+ is_huffman = *p & 0x80;
+
+ /* length is integer with 7 bit prefix */
+ len = hpack_decode_int (&p, end, 7);
+ if (PREDICT_FALSE (len == HPACK_INVALID_INT))
+ return HTTP2_ERROR_COMPRESSION_ERROR;
+
+ /* do we have everything? */
+ if (len > (end - p))
+ return HTTP2_ERROR_COMPRESSION_ERROR;
+
+ if (is_huffman)
+ {
+ *src = (p + len);
+ return hpack_decode_huffman (&p, p + len, buf, buf_len);
+ }
+ else
+ {
+ /* enough space? */
+ if (len > *buf_len)
+ return HTTP2_ERROR_INTERNAL_ERROR;
+
+ clib_memcpy (*buf, p, len);
+ *buf_len -= len;
+ *buf += len;
+ *src = (p + len);
+ return HTTP2_ERROR_NO_ERROR;
+ }
+}
+
+__clib_export u8 *
+hpack_encode_int (u8 *dst, uword value, u8 prefix_len)
+{
+ u16 prefix_max;
+
+ ASSERT (prefix_len >= 1 && prefix_len <= 8);
+
+ prefix_max = (1 << prefix_len) - 1;
+
+ /* if integer value is less than 2^prefix_len-1 it's encoded within prefix */
+ if (value < prefix_max)
+ {
+ *dst++ |= (u8) value;
+ return dst;
+ }
+
+ /* otherwise all bits of the prefix are set to 1 */
+ *dst++ |= (u8) prefix_max;
+ /* and the value is decreased by 2^prefix_len-1 */
+ value -= prefix_max;
+ /* MSB of each byte is used as continuation flag */
+ for (; value >= 0x80; value >>= 7)
+ *dst++ = 0x80 | (value & 0x7F);
+ /* except for the last byte */
+ *dst++ = (u8) value;
+
+ return dst;
+}
+
+uword
+hpack_huffman_encoded_len (const u8 *value, uword value_len)
+{
+ uword len = 0;
+ u8 *end;
+ hpack_huffman_symbol_t *sym;
+
+ end = (u8 *) value + value_len;
+ while (value != end)
+ {
+ sym = &huff_sym_table[*value++];
+ len += sym->code_len;
+ }
+ /* round up to byte boundary */
+ return (len + 7) / 8;
+}
+
+u8 *
+hpack_encode_huffman (u8 *dst, const u8 *value, uword value_len)
+{
+ u8 *end;
+ hpack_huffman_symbol_t *sym;
+ u8 accumulator_len = 40; /* leftover (1 byte) + max code_len (4 bytes) */
+ u64 accumulator = 0; /* to fit leftover and current code */
+
+ end = (u8 *) value + value_len;
+
+ while (value != end)
+ {
+ sym = &huff_sym_table[*value++];
+ /* add current code to leftover of previous one */
+ accumulator |= (u64) sym->code << (accumulator_len - sym->code_len);
+ accumulator_len -= sym->code_len;
+ /* write only fully occupied bytes (max 4) */
+ switch (accumulator_len)
+ {
+ case 1 ... 8:
+#define WRITE_BYTE() \
+ *dst = (u8) (accumulator >> 32); \
+ accumulator_len += 8; \
+ accumulator <<= 8; \
+ dst++;
+ WRITE_BYTE ();
+ case 9 ... 16:
+ WRITE_BYTE ();
+ case 17 ... 24:
+ WRITE_BYTE ();
+ case 25 ... 32:
+ WRITE_BYTE ();
+ default:
+ break;
+ }
+ }
+
+ /* padding (0-7 bits)*/
+ ASSERT (accumulator_len > 32 && accumulator_len <= 40);
+ if (accumulator_len != 40)
+ {
+ accumulator |= (u64) 0x7F << (accumulator_len - 7);
+ *dst = (u8) (accumulator >> 32);
+ dst++;
+ }
+ return dst;
+}
+
+__clib_export u8 *
+hpack_encode_string (u8 *dst, const u8 *value, uword value_len)
+{
+ uword huff_len;
+
+ huff_len = hpack_huffman_encoded_len (value, value_len);
+ /* raw bytes might take fewer bytes */
+ if (huff_len >= value_len)
+ {
+ *dst = 0; /* clear H flag */
+ dst = hpack_encode_int (dst, value_len, 7);
+ clib_memcpy (dst, value, value_len);
+ return dst + value_len;
+ }
+
+ *dst = 0x80; /* set H flag */
+ dst = hpack_encode_int (dst, huff_len, 7);
+ dst = hpack_encode_huffman (dst, value, value_len);
+
+ return dst;
+}
+
+__clib_export void
+hpack_dynamic_table_init (hpack_dynamic_table_t *table, u32 max_size)
+{
+ table->max_size = max_size;
+ table->size = max_size;
+ table->used = 0;
+ clib_ring_new (table->entries,
+ max_size / HPACK_DYNAMIC_TABLE_ENTRY_OVERHEAD);
+}
+
+__clib_export void
+hpack_dynamic_table_free (hpack_dynamic_table_t *table)
+{
+ hpack_dynamic_table_entry_t *e;
+
+ while ((e = clib_ring_deq (table->entries)) != 0)
+ vec_free (e->buf);
+
+ clib_ring_free (table->entries);
+}
+
+#define hpack_dynamic_table_entry_value_base(e) \
+ ((char *) ((e)->buf + (e)->name_len))
+#define hpack_dynamic_table_entry_value_len(e) \
+ (vec_len ((e)->buf) - (e)->name_len)
+
+always_inline hpack_dynamic_table_entry_t *
+hpack_dynamic_table_get (hpack_dynamic_table_t *table, uword index)
+{
+ if (index > clib_ring_n_enq (table->entries))
+ return 0;
+
+ hpack_dynamic_table_entry_t *first = clib_ring_get_first (table->entries);
+ u32 first_index = first - table->entries;
+ u32 entry_index =
+ (first_index + (clib_ring_n_enq (table->entries) - 1 - (u32) index)) %
+ vec_len (table->entries);
+ return table->entries + entry_index;
+}
+
+__clib_export u8 *
+format_hpack_dynamic_table (u8 *s, va_list *args)
+{
+ hpack_dynamic_table_t *table = va_arg (*args, hpack_dynamic_table_t *);
+ u32 i;
+ hpack_dynamic_table_entry_t *e;
+
+ s = format (s, "HPACK dynamic table:\n");
+ for (i = 0; i < clib_ring_n_enq (table->entries); i++)
+ {
+ e = hpack_dynamic_table_get (table, i);
+ s = format (s, "\t[%u] %U: %U\n", i, format_http_bytes, e->buf,
+ e->name_len, format_http_bytes,
+ hpack_dynamic_table_entry_value_base (e),
+ hpack_dynamic_table_entry_value_len (e));
+ }
+ return s;
+}
+
+static inline void
+hpack_dynamic_table_evict_one (hpack_dynamic_table_t *table)
+{
+ u32 entry_size;
+ hpack_dynamic_table_entry_t *e;
+
+ e = clib_ring_deq (table->entries);
+ ASSERT (e);
+ HTTP_DBG (2, "%U: %U", format_http_bytes, e->buf, e->name_len,
+ format_http_bytes, hpack_dynamic_table_entry_value_base (e),
+ hpack_dynamic_table_entry_value_len (e));
+ entry_size = vec_len (e->buf) + HPACK_DYNAMIC_TABLE_ENTRY_OVERHEAD;
+ table->used -= entry_size;
+ vec_reset_length (e->buf);
+}
+
+static void
+hpack_dynamic_table_add (hpack_dynamic_table_t *table, http_token_t *name,
+ http_token_t *value)
+{
+ u32 entry_size;
+ hpack_dynamic_table_entry_t *e;
+
+ entry_size = name->len + value->len + HPACK_DYNAMIC_TABLE_ENTRY_OVERHEAD;
+
+ /* make space or evict all */
+ while (clib_ring_n_enq (table->entries) &&
+ (table->used + entry_size > table->size))
+ hpack_dynamic_table_evict_one (table);
+
+ /* attempt to add entry larger than the maximum size is not error */
+ if (entry_size > table->size)
+ return;
+
+ e = clib_ring_enq (table->entries);
+ ASSERT (e);
+ vec_validate (e->buf, name->len + value->len - 1);
+ clib_memcpy (e->buf, name->base, name->len);
+ clib_memcpy (e->buf + name->len, value->base, value->len);
+ e->name_len = name->len;
+ table->used += entry_size;
+
+ HTTP_DBG (2, "%U: %U", format_http_bytes, e->buf, e->name_len,
+ format_http_bytes, hpack_dynamic_table_entry_value_base (e),
+ hpack_dynamic_table_entry_value_len (e));
+}
+
+static http2_error_t
+hpack_get_table_entry (uword index, http_token_t *name, http_token_t *value,
+ u8 value_is_indexed, hpack_dynamic_table_t *dt)
+{
+ if (index <= HPACK_STATIC_TABLE_SIZE)
+ {
+ hpack_static_table_entry_t *e = &hpack_static_table[index - 1];
+ name->base = e->name;
+ name->len = e->name_len;
+ if (value_is_indexed)
+ {
+ value->base = e->value;
+ value->len = e->value_len;
+ }
+ HTTP_DBG (2, "[%llu] %U: %U", index, format_http_bytes, e->name,
+ e->name_len, format_http_bytes, e->value, e->value_len);
+ return HTTP2_ERROR_NO_ERROR;
+ }
+ else
+ {
+ hpack_dynamic_table_entry_t *e =
+ hpack_dynamic_table_get (dt, index - HPACK_STATIC_TABLE_SIZE - 1);
+ if (PREDICT_FALSE (!e))
+ {
+ HTTP_DBG (1, "index %llu not in dynamic table", index);
+ return HTTP2_ERROR_COMPRESSION_ERROR;
+ }
+ name->base = (char *) e->buf;
+ name->len = e->name_len;
+ value->base = hpack_dynamic_table_entry_value_base (e);
+ value->len = hpack_dynamic_table_entry_value_len (e);
+ HTTP_DBG (2, "[%llu] %U: %U", index, format_http_bytes, name->base,
+ name->len, format_http_bytes, value->base, value->len);
+ return HTTP2_ERROR_NO_ERROR;
+ }
+}
+
+__clib_export http2_error_t
+hpack_decode_header (u8 **src, u8 *end, u8 **buf, uword *buf_len,
+ u32 *name_len, u32 *value_len, hpack_dynamic_table_t *dt)
+{
+ u8 *p;
+ u8 value_is_indexed = 0, add_new_entry = 0;
+ uword old_len, new_max, index = 0;
+ http_token_t name, value;
+ http2_error_t rv;
+
+ ASSERT (*src < end);
+ p = *src;
+
+ /* dynamic table size update */
+ while ((*p & 0xE0) == 0x20)
+ {
+ new_max = hpack_decode_int (&p, end, 5);
+ if (p == end || new_max > (uword) dt->max_size)
+ {
+ HTTP_DBG (1, "invalid dynamic table size update");
+ return HTTP2_ERROR_COMPRESSION_ERROR;
+ }
+ while (clib_ring_n_enq (dt->entries) && new_max > dt->used)
+ hpack_dynamic_table_evict_one (dt);
+ dt->size = (u32) new_max;
+ }
+
+ if (*p & 0x80) /* indexed header field */
+ {
+ index = hpack_decode_int (&p, end, 7);
+ /* index value of 0 is not used */
+ if (index == 0 || index == HPACK_INVALID_INT)
+ {
+ HTTP_DBG (1, "invalid index");
+ return HTTP2_ERROR_COMPRESSION_ERROR;
+ }
+ value_is_indexed = 1;
+ }
+ else if (*p > 0x40) /* incremental indexing - indexed name */
+ {
+ index = hpack_decode_int (&p, end, 6);
+ /* index value of 0 is not used */
+ if (index == 0 || index == HPACK_INVALID_INT)
+ {
+ HTTP_DBG (1, "invalid index");
+ return HTTP2_ERROR_COMPRESSION_ERROR;
+ }
+ add_new_entry = 1;
+ }
+ else if (*p == 0x40) /* incremental indexing - new name */
+ {
+ add_new_entry = 1;
+ p++;
+ }
+ else /* without indexing / never indexed */
+ {
+ if ((*p & 0x0F) == 0) /* new name */
+ p++;
+ else /* indexed name */
+ {
+ index = hpack_decode_int (&p, end, 4);
+ /* index value of 0 is not used */
+ if (index == 0 || index == HPACK_INVALID_INT)
+ {
+ HTTP_DBG (1, "invalid index");
+ return HTTP2_ERROR_COMPRESSION_ERROR;
+ }
+ }
+ }
+
+ if (index)
+ {
+ rv = hpack_get_table_entry (index, &name, &value, value_is_indexed, dt);
+ if (rv != HTTP2_ERROR_NO_ERROR)
+ {
+ HTTP_DBG (1, "entry index %llu error", index);
+ return rv;
+ }
+ if (name.len > *buf_len)
+ {
+ HTTP_DBG (1, "not enough space");
+ return HTTP2_ERROR_INTERNAL_ERROR;
+ }
+ clib_memcpy (*buf, name.base, name.len);
+ *buf_len -= name.len;
+ *buf += name.len;
+ *name_len = name.len;
+ if (value_is_indexed)
+ {
+ if (value.len > *buf_len)
+ {
+ HTTP_DBG (1, "not enough space");
+ return HTTP2_ERROR_INTERNAL_ERROR;
+ }
+ clib_memcpy (*buf, value.base, value.len);
+ *buf_len -= value.len;
+ *buf += value.len;
+ *value_len = value.len;
+ }
+ }
+ else
+ {
+ old_len = *buf_len;
+ name.base = (char *) *buf;
+ rv = hpack_decode_string (&p, end, buf, buf_len);
+ if (rv != HTTP2_ERROR_NO_ERROR)
+ {
+ HTTP_DBG (1, "invalid header name");
+ return rv;
+ }
+ *name_len = old_len - *buf_len;
+ name.len = *name_len;
+ }
+
+ if (!value_is_indexed)
+ {
+ old_len = *buf_len;
+ value.base = (char *) *buf;
+ rv = hpack_decode_string (&p, end, buf, buf_len);
+ if (rv != HTTP2_ERROR_NO_ERROR)
+ {
+ HTTP_DBG (1, "invalid header value");
+ return rv;
+ }
+ *value_len = old_len - *buf_len;
+ value.len = *value_len;
+ }
+
+ if (add_new_entry)
+ hpack_dynamic_table_add (dt, &name, &value);
+
+ *src = p;
+ return HTTP2_ERROR_NO_ERROR;
+}
+
+static inline u8
+hpack_header_name_is_valid (u8 *name, u32 name_len)
+{
+ u32 i;
+ static uword tchar[4] = {
+ /* !#$%'*+-.0123456789 */
+ 0x03ff6cba00000000,
+ /* ^_`abcdefghijklmnopqrstuvwxyz|~ */
+ 0x57ffffffc0000000,
+ 0x0000000000000000,
+ 0x0000000000000000,
+ };
+ for (i = 0; i < name_len; i++)
+ {
+ if (!clib_bitmap_get_no_check (tchar, name[i]))
+ return 0;
+ }
+ return 1;
+}
+
+static inline u8
+hpack_header_value_is_valid (u8 *value, u32 value_len)
+{
+ u32 i;
+ /* VCHAR / SP / HTAB / %x80-FF */
+ static uword tchar[4] = {
+ 0xffffffff00000200,
+ 0x7fffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ };
+
+ if (value_len == 0)
+ return 1;
+
+ /* must not start or end with SP or HTAB */
+ if ((value[0] == 0x20 || value[0] == 0x09 || value[value_len - 1] == 0x20 ||
+ value[value_len - 1] == 0x09))
+ return 0;
+
+ for (i = 0; i < value_len; i++)
+ {
+ if (!clib_bitmap_get_no_check (tchar, value[i]))
+ return 0;
+ }
+ return 1;
+}
+
+static inline http_req_method_t
+hpack_parse_method (u8 *value, u32 value_len)
+{
+ switch (value_len)
+ {
+ case 3:
+ if (!memcmp (value, "GET", 3))
+ return HTTP_REQ_GET;
+ break;
+ case 4:
+ if (!memcmp (value, "POST", 4))
+ return HTTP_REQ_POST;
+ break;
+ case 7:
+ if (!memcmp (value, "CONNECT", 7))
+ return HTTP_REQ_CONNECT;
+ break;
+ default:
+ break;
+ }
+ /* HPACK should return only connection errors, this one is stream error */
+ return HTTP_REQ_UNKNOWN;
+}
+
+static inline http_url_scheme_t
+hpack_parse_scheme (u8 *value, u32 value_len)
+{
+ switch (value_len)
+ {
+ case 4:
+ if (!memcmp (value, "http", 4))
+ return HTTP_URL_SCHEME_HTTP;
+ break;
+ case 5:
+ if (!memcmp (value, "https", 5))
+ return HTTP_URL_SCHEME_HTTPS;
+ break;
+ default:
+ break;
+ }
+ /* HPACK should return only connection errors, this one is stream error */
+ return HTTP_URL_SCHEME_UNKNOWN;
+}
+
+static http2_error_t
+hpack_parse_req_pseudo_header (u8 *name, u32 name_len, u8 *value,
+ u32 value_len,
+ hpack_request_control_data_t *control_data)
+{
+ HTTP_DBG (2, "%U: %U", format_http_bytes, name, name_len, format_http_bytes,
+ value, value_len);
+ switch (name_len)
+ {
+ case 5:
+ if (!memcmp (name + 1, "path", 4))
+ {
+ if (control_data->parsed_bitmap & HPACK_PSEUDO_HEADER_PATH_PARSED ||
+ value_len == 0)
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ control_data->parsed_bitmap |= HPACK_PSEUDO_HEADER_PATH_PARSED;
+ control_data->path = value;
+ control_data->path_len = value_len;
+ break;
+ }
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ case 7:
+ switch (name[1])
+ {
+ case 'm':
+ if (!memcmp (name + 2, "ethod", 5))
+ {
+ if (control_data->parsed_bitmap &
+ HPACK_PSEUDO_HEADER_METHOD_PARSED)
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ control_data->parsed_bitmap |= HPACK_PSEUDO_HEADER_METHOD_PARSED;
+ control_data->method = hpack_parse_method (value, value_len);
+ break;
+ }
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ case 's':
+ if (!memcmp (name + 2, "cheme", 5))
+ {
+ if (control_data->parsed_bitmap &
+ HPACK_PSEUDO_HEADER_SCHEME_PARSED)
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ control_data->parsed_bitmap |= HPACK_PSEUDO_HEADER_SCHEME_PARSED;
+ control_data->scheme = hpack_parse_scheme (value, value_len);
+ break;
+ }
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ default:
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ }
+ break;
+ case 10:
+ if (!memcmp (name + 1, "authority", 9))
+ {
+ if (control_data->parsed_bitmap &
+ HPACK_PSEUDO_HEADER_AUTHORITY_PARSED)
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ control_data->parsed_bitmap |= HPACK_PSEUDO_HEADER_AUTHORITY_PARSED;
+ control_data->authority = value;
+ control_data->authority_len = value_len;
+ break;
+ }
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ default:
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ }
+
+ return HTTP2_ERROR_NO_ERROR;
+}
+
+/* Special treatment for headers like:
+ *
+ * RFC9113 8.2.2: any message containing connection-specific header
+ * fields MUST be treated as malformed (connection, upgrade, keep-alive,
+ * proxy-connection, transfer-encoding), TE header MUST NOT contain any value
+ * other than "trailers"
+ *
+ * find headers that will be used later in preprocessing (content-length)
+ */
+always_inline http2_error_t
+hpack_preprocess_header (u8 *name, u32 name_len, u8 *value, u32 value_len,
+ uword index,
+ hpack_request_control_data_t *control_data)
+{
+ switch (name_len)
+ {
+ case 2:
+ if (name[0] == 't' && name[1] == 'e' &&
+ !http_token_is_case ((const char *) value, value_len,
+ http_token_lit ("trailers")))
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ break;
+ case 7:
+ if (!memcmp (name, "upgrade", 7))
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ break;
+ case 10:
+ switch (name[0])
+ {
+ case 'c':
+ if (!memcmp (name + 1, "onnection", 9))
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ break;
+ case 'k':
+ if (!memcmp (name + 1, "eep-alive", 9))
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ break;
+ default:
+ break;
+ }
+ break;
+ case 14:
+ if (!memcmp (name, "content-length", 7) &&
+ control_data->content_len_header_index == ~0)
+ control_data->content_len_header_index = index;
+ break;
+ case 16:
+ if (!memcmp (name, "proxy-connection", 16))
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ break;
+ case 17:
+ if (!memcmp (name, "transfer-encoding", 17))
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ break;
+ default:
+ break;
+ }
+ return HTTP2_ERROR_NO_ERROR;
+}
+
+__clib_export http2_error_t
+hpack_parse_request (u8 *src, u32 src_len, u8 *dst, u32 dst_len,
+ hpack_request_control_data_t *control_data,
+ http_field_line_t **headers,
+ hpack_dynamic_table_t *dynamic_table)
+{
+ u8 *p, *end, *b, *name, *value;
+ u8 regular_header_parsed = 0;
+ u32 name_len, value_len;
+ uword b_left;
+ http_field_line_t *header;
+ http2_error_t rv;
+
+ p = src;
+ end = src + src_len;
+ b = dst;
+ b_left = dst_len;
+ control_data->parsed_bitmap = 0;
+ control_data->headers_len = 0;
+ control_data->content_len_header_index = ~0;
+
+ while (p != end)
+ {
+ name = b;
+ rv = hpack_decode_header (&p, end, &b, &b_left, &name_len, &value_len,
+ dynamic_table);
+ if (rv != HTTP2_ERROR_NO_ERROR)
+ {
+ HTTP_DBG (1, "hpack_decode_header: %U", format_http2_error, rv);
+ return rv;
+ }
+ value = name + name_len;
+
+ /* pseudo header */
+ if (name[0] == ':')
+ {
+ /* all pseudo-headers must be before regular headers */
+ if (regular_header_parsed)
+ {
+ HTTP_DBG (1, "pseudo-headers after regular header");
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ }
+ rv = hpack_parse_req_pseudo_header (name, name_len, value, value_len,
+ control_data);
+ if (rv != HTTP2_ERROR_NO_ERROR)
+ {
+ HTTP_DBG (1, "hpack_parse_req_pseudo_header: %U",
+ format_http2_error, rv);
+ return rv;
+ }
+ continue;
+ }
+ else
+ {
+ if (!hpack_header_name_is_valid (name, name_len))
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ if (!regular_header_parsed)
+ {
+ regular_header_parsed = 1;
+ control_data->headers = name;
+ }
+ }
+ if (!hpack_header_value_is_valid (value, value_len))
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ vec_add2 (*headers, header, 1);
+ HTTP_DBG (2, "%U: %U", format_http_bytes, name, name_len,
+ format_http_bytes, value, value_len);
+ header->name_offset = name - control_data->headers;
+ header->name_len = name_len;
+ header->value_offset = value - control_data->headers;
+ header->value_len = value_len;
+ control_data->headers_len += name_len;
+ control_data->headers_len += value_len;
+ if (regular_header_parsed)
+ {
+ rv = hpack_preprocess_header (name, name_len, value, value_len,
+ header - *headers, control_data);
+ if (rv != HTTP2_ERROR_NO_ERROR)
+ {
+ HTTP_DBG (1, "connection-specific header present");
+ return rv;
+ }
+ }
+ }
+ control_data->control_data_len = dst_len - b_left;
+ HTTP_DBG (2, "%U", format_hpack_dynamic_table, dynamic_table);
+ return HTTP2_ERROR_NO_ERROR;
+}
+
+static inline u8 *
+hpack_encode_header (u8 *dst, http_header_name_t name, const u8 *value,
+ u32 value_len)
+{
+ hpack_token_t *name_token;
+ u8 *a, *b;
+ u32 orig_len, actual_size;
+
+ orig_len = vec_len (dst);
+ name_token = &hpack_headers[name];
+ if (name_token->static_table_index)
+ {
+ /* static table index with 4 bit prefix is max 2 bytes */
+ vec_add2 (dst, a, 2 + value_len + HPACK_ENCODED_INT_MAX_LEN);
+ /* Literal Header Field without Indexing — Indexed Name */
+ *a = 0x00; /* zero first 4 bits */
+ b = hpack_encode_int (a, name_token->static_table_index, 4);
+ }
+ else
+ {
+ /* one extra byte for 4 bit prefix */
+ vec_add2 (dst, a,
+ name_token->len + value_len + HPACK_ENCODED_INT_MAX_LEN * 2 +
+ 1);
+ b = a;
+ /* Literal Header Field without Indexing — New Name */
+ *b++ = 0x00;
+ b = hpack_encode_string (b, (const u8 *) name_token->base,
+ name_token->len);
+ }
+ b = hpack_encode_string (b, value, value_len);
+
+ actual_size = b - a;
+ vec_set_len (dst, orig_len + actual_size);
+ return dst;
+}
+
+static inline u8 *
+hpack_encode_custom_header (u8 *dst, const u8 *name, u32 name_len,
+ const u8 *value, u32 value_len)
+{
+ u32 orig_len, actual_size;
+ u8 *a, *b;
+
+ orig_len = vec_len (dst);
+ /* one extra byte for 4 bit prefix */
+ vec_add2 (dst, a, name_len + value_len + HPACK_ENCODED_INT_MAX_LEN * 2 + 1);
+ b = a;
+ /* Literal Header Field without Indexing — New Name */
+ *b++ = 0x00;
+ b = hpack_encode_string (b, name, name_len);
+ b = hpack_encode_string (b, value, value_len);
+ actual_size = b - a;
+ vec_set_len (dst, orig_len + actual_size);
+ return dst;
+}
+
+static inline u8 *
+hpack_encode_status_code (u8 *dst, http_status_code_t sc)
+{
+ u32 orig_len, actual_size;
+ u8 *a, *b;
+
+#define encode_common_sc(_index) \
+ vec_add2 (dst, a, 1); \
+ *a++ = 0x80 | _index;
+
+ switch (sc)
+ {
+ case HTTP_STATUS_OK:
+ encode_common_sc (8);
+ break;
+ case HTTP_STATUS_NO_CONTENT:
+ encode_common_sc (9);
+ break;
+ case HTTP_STATUS_PARTIAL_CONTENT:
+ encode_common_sc (10);
+ break;
+ case HTTP_STATUS_NOT_MODIFIED:
+ encode_common_sc (11);
+ break;
+ case HTTP_STATUS_BAD_REQUEST:
+ encode_common_sc (12);
+ break;
+ case HTTP_STATUS_NOT_FOUND:
+ encode_common_sc (13);
+ break;
+ case HTTP_STATUS_INTERNAL_ERROR:
+ encode_common_sc (14);
+ break;
+ default:
+ orig_len = vec_len (dst);
+ vec_add2 (dst, a, 5);
+ b = a;
+ /* Literal Header Field without Indexing — Indexed Name */
+ *b++ = 8;
+ b = hpack_encode_string (b, (const u8 *) http_status_code_str[sc], 3);
+ actual_size = b - a;
+ vec_set_len (dst, orig_len + actual_size);
+ break;
+ }
+ return dst;
+}
+
+static inline u8 *
+hpack_encode_content_len (u8 *dst, u64 content_len)
+{
+ u8 digit_buffer[20];
+ u8 *d = digit_buffer + sizeof (digit_buffer);
+ u32 orig_len, actual_size;
+ u8 *a, *b;
+
+ orig_len = vec_len (dst);
+ vec_add2 (dst, a, 3 + sizeof (digit_buffer));
+ b = a;
+
+ /* static table index 28 */
+ *b++ = 0x0F;
+ *b++ = 0x0D;
+ do
+ {
+ *--d = '0' + content_len % 10;
+ content_len /= 10;
+ }
+ while (content_len);
+
+ b = hpack_encode_string (b, d, digit_buffer + sizeof (digit_buffer) - d);
+ actual_size = b - a;
+ vec_set_len (dst, orig_len + actual_size);
+ return dst;
+}
+
+__clib_export void
+hpack_serialize_response (u8 *app_headers, u32 app_headers_len,
+ hpack_response_control_data_t *control_data,
+ u8 **dst)
+{
+ u8 *p, *end;
+
+ p = *dst;
+
+ /* status code must be first since it is pseudo-header */
+ p = hpack_encode_status_code (p, control_data->sc);
+
+ /* server name */
+ p = hpack_encode_header (p, HTTP_HEADER_SERVER, control_data->server_name,
+ control_data->server_name_len);
+
+ /* date */
+ p = hpack_encode_header (p, HTTP_HEADER_DATE, control_data->date,
+ control_data->date_len);
+
+ /* content length if any */
+ if (control_data->content_len != HPACK_ENCODER_SKIP_CONTENT_LEN)
+ p = hpack_encode_content_len (p, control_data->content_len);
+
+ if (!app_headers_len)
+ {
+ *dst = p;
+ return;
+ }
+
+ end = app_headers + app_headers_len;
+ while (app_headers < end)
+ {
+ /* custom header name? */
+ u32 *tmp = (u32 *) app_headers;
+ if (PREDICT_FALSE (*tmp & HTTP_CUSTOM_HEADER_NAME_BIT))
+ {
+ http_custom_token_t *name, *value;
+ name = (http_custom_token_t *) app_headers;
+ u32 name_len = name->len & ~HTTP_CUSTOM_HEADER_NAME_BIT;
+ app_headers += sizeof (http_custom_token_t) + name_len;
+ value = (http_custom_token_t *) app_headers;
+ app_headers += sizeof (http_custom_token_t) + value->len;
+ p = hpack_encode_custom_header (p, name->token, name_len,
+ value->token, value->len);
+ }
+ else
+ {
+ http_app_header_t *header;
+ header = (http_app_header_t *) app_headers;
+ app_headers += sizeof (http_app_header_t) + header->value.len;
+ p = hpack_encode_header (p, header->name, header->value.token,
+ header->value.len);
+ }
+ }
+
+ *dst = p;
+}
diff --git a/src/plugins/http/http2/hpack.h b/src/plugins/http/http2/hpack.h
new file mode 100644
index 00000000000..69144de133a
--- /dev/null
+++ b/src/plugins/http/http2/hpack.h
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2025 Cisco Systems, Inc.
+ */
+
+#ifndef SRC_PLUGINS_HTTP_HPACK_H_
+#define SRC_PLUGINS_HTTP_HPACK_H_
+
+#include <vppinfra/types.h>
+#include <http/http2/http2.h>
+#include <http/http.h>
+
+#define HPACK_INVALID_INT CLIB_UWORD_MAX
+#if uword_bits == 64
+#define HPACK_ENCODED_INT_MAX_LEN 10
+#else
+#define HPACK_ENCODED_INT_MAX_LEN 6
+#endif
+
+#define HPACK_DEFAULT_HEADER_TABLE_SIZE 4096
+#define HPACK_DYNAMIC_TABLE_ENTRY_OVERHEAD 32
+#define HPACK_ENCODER_SKIP_CONTENT_LEN ((u64) ~0)
+
+typedef struct
+{
+ u8 *buf;
+ uword name_len;
+} hpack_dynamic_table_entry_t;
+
+typedef struct
+{
+ /* SETTINGS_HEADER_TABLE_SIZE */
+ u32 max_size;
+ /* dynamic table size update */
+ u32 size;
+ /* current usage (each entry = 32 + name len + value len) */
+ u32 used;
+ /* ring buffer */
+ hpack_dynamic_table_entry_t *entries;
+} hpack_dynamic_table_t;
+
+enum
+{
+#define _(bit, name, str) HPACK_PSEUDO_HEADER_##name##_PARSED = (1 << bit),
+ foreach_http2_pseudo_header
+#undef _
+};
+
+typedef struct
+{
+ http_req_method_t method;
+ http_url_scheme_t scheme;
+ u8 *authority;
+ u32 authority_len;
+ u8 *path;
+ u32 path_len;
+ u8 *headers;
+ uword content_len_header_index;
+ u32 headers_len;
+ u32 control_data_len;
+ u16 parsed_bitmap;
+} hpack_request_control_data_t;
+
+typedef struct
+{
+ http_status_code_t sc;
+ u64 content_len;
+ u8 *server_name;
+ u32 server_name_len;
+ u8 *date;
+ u32 date_len;
+} hpack_response_control_data_t;
+
+/**
+ * Decode unsigned variable-length integer (RFC7541 section 5.1)
+ *
+ * @param src Pointer to source buffer which will be advanced
+ * @param end End of the source buffer
+ * @param prefix_len Number of bits of the prefix (between 1 and 8)
+ *
+ * @return Decoded integer or @c HPACK_INVALID_INT in case of error
+ */
+uword hpack_decode_int (u8 **src, u8 *end, u8 prefix_len);
+
+/**
+ * Encode given value as unsigned variable-length integer (RFC7541 section 5.1)
+ *
+ * @param dst Pointer to destination buffer, should have enough space
+ * @param value Integer value to encode (up to @c CLIB_WORD_MAX)
+ * @param prefix_len Number of bits of the prefix (between 1 and 8)
+ *
+ * @return Advanced pointer to the destination buffer
+ *
+ * @note Encoded integer will take maximum @c HPACK_ENCODED_INT_MAX_LEN bytes
+ */
+u8 *hpack_encode_int (u8 *dst, uword value, u8 prefix_len);
+
+/**
+ * Decode
+ *
+ * @param src Pointer to source buffer which will be advanced
+ * @param end End of the source buffer
+ * @param buf Pointer to the buffer where string is decoded which will be
+ * advanced by number of written bytes
+ * @param buf_len Length the buffer, will be decreased
+ *
+ * @return @c HTTP2_ERROR_NO_ERROR on success
+ *
+ * @note Caller is responsible to check if there is somthing left in source
+ * buffer first
+ */
+http2_error_t hpack_decode_huffman (u8 **src, u8 *end, u8 **buf,
+ uword *buf_len);
+
+/**
+ * Encode given string in Huffman codes.
+ *
+ * @param dst Pointer to destination buffer, should have enough space
+ * @param value String to encode
+ * @param value_len Length of the string
+ *
+ * @return Advanced pointer to the destination buffer
+ */
+u8 *hpack_encode_huffman (u8 *dst, const u8 *value, uword value_len);
+
+/**
+ * Number of bytes required to encode given string in Huffman codes
+ *
+ * @param value Pointer to buffer with string to encode
+ * @param value_len Length of the string
+ *
+ * @return number of bytes required to encode string in Huffman codes, round up
+ * to byte boundary
+ */
+uword hpack_huffman_encoded_len (const u8 *value, uword value_len);
+
+/**
+ * Initialize HPACK dynamic table
+ *
+ * @param table Dynamic table to initialize
+ * @param max_size Maximum table size (SETTINGS_HEADER_TABLE_SIZE)
+ */
+void hpack_dynamic_table_init (hpack_dynamic_table_t *table, u32 max_size);
+
+/**
+ * Free HPACK dynamic table
+ *
+ * @param table Dynamic table to free
+ */
+void hpack_dynamic_table_free (hpack_dynamic_table_t *table);
+
+u8 *format_hpack_dynamic_table (u8 *s, va_list *args);
+
+/**
+ * Request parser
+ *
+ * @param src Header block to parse
+ * @param src_len Length of header block
+ * @param dst Buffer where headers will be decoded
+ * @param dst_len Length of buffer for decoded headers
+ * @param control_data Preparsed pseudo-headers
+ * @param headers List of regular headers
+ * @param dynamic_table Decoder dynamic table
+ *
+ * @return @c HTTP2_ERROR_NO_ERROR on success, connection error otherwise
+ */
+http2_error_t hpack_parse_request (u8 *src, u32 src_len, u8 *dst, u32 dst_len,
+ hpack_request_control_data_t *control_data,
+ http_field_line_t **headers,
+ hpack_dynamic_table_t *dynamic_table);
+
+/**
+ * Serialize response
+ *
+ * @param app_headers App header list
+ * @param app_headers_len App header list length
+ * @param control_data Header values set by protocol layer
+ * @param dst Vector where serialized headers will be added
+ */
+void hpack_serialize_response (u8 *app_headers, u32 app_headers_len,
+ hpack_response_control_data_t *control_data,
+ u8 **dst);
+
+#endif /* SRC_PLUGINS_HTTP_HPACK_H_ */
diff --git a/src/plugins/http/http2/http2.c b/src/plugins/http/http2/http2.c
new file mode 100644
index 00000000000..67db185823c
--- /dev/null
+++ b/src/plugins/http/http2/http2.c
@@ -0,0 +1,1492 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2025 Cisco Systems, Inc.
+ */
+
+#include <http/http2/hpack.h>
+#include <http/http2/frame.h>
+#include <http/http_private.h>
+#include <http/http_timer.h>
+
+#ifndef HTTP_2_ENABLE
+#define HTTP_2_ENABLE 0
+#endif
+
+#define foreach_http2_stream_state \
+ _ (IDLE, "IDLE") \
+ _ (OPEN, "OPEN") \
+ _ (HALF_CLOSED, "HALF-CLOSED") \
+ _ (CLOSED, "CLOSED")
+
+typedef enum http2_stream_state_
+{
+#define _(s, str) HTTP2_STREAM_STATE_##s,
+ foreach_http2_stream_state
+#undef _
+} http2_stream_state_t;
+
+#define foreach_http2_req_flags _ (APP_CLOSED, "app-closed")
+
+typedef enum http2_req_flags_bit_
+{
+#define _(sym, str) HTTP2_REQ_F_BIT_##sym,
+ foreach_http2_req_flags
+#undef _
+} http2_req_flags_bit_t;
+
+typedef enum http2_req_flags_
+{
+#define _(sym, str) HTTP2_REQ_F_##sym = 1 << HTTP2_REQ_F_BIT_##sym,
+ foreach_http2_req_flags
+#undef _
+} __clib_packed http2_req_flags_t;
+
+typedef struct http2_req_
+{
+ http_req_t base;
+ http2_stream_state_t stream_state;
+ u8 flags;
+ u32 stream_id;
+ u64 peer_window;
+ u8 *payload;
+ u32 payload_len;
+} http2_req_t;
+
+#define foreach_http2_conn_flags \
+ _ (EXPECT_PREFACE, "expect-preface") \
+ _ (PREFACE_VERIFIED, "preface-verified")
+
+typedef enum http2_conn_flags_bit_
+{
+#define _(sym, str) HTTP2_CONN_F_BIT_##sym,
+ foreach_http2_conn_flags
+#undef _
+} http2_conn_flags_bit_t;
+
+typedef enum http2_conn_flags_
+{
+#define _(sym, str) HTTP2_CONN_F_##sym = 1 << HTTP2_CONN_F_BIT_##sym,
+ foreach_http2_conn_flags
+#undef _
+} __clib_packed http2_conn_flags_t;
+
+typedef struct http2_conn_ctx_
+{
+ http2_conn_settings_t peer_settings;
+ hpack_dynamic_table_t decoder_dynamic_table;
+ u8 flags;
+ u32 last_opened_stream_id;
+ u32 last_processed_stream_id;
+ u64 peer_window;
+ uword *req_by_stream_id;
+} http2_conn_ctx_t;
+
+typedef struct http2_main_
+{
+ http2_conn_ctx_t **conn_pool;
+ http2_req_t **req_pool;
+ http2_conn_settings_t settings;
+} http2_main_t;
+
+static http2_main_t http2_main;
+
+http2_conn_ctx_t *
+http2_conn_ctx_alloc_w_thread (http_conn_t *hc)
+{
+ http2_main_t *h2m = &http2_main;
+ http2_conn_ctx_t *h2c;
+
+ pool_get_aligned_safe (h2m->conn_pool[hc->c_thread_index], h2c,
+ CLIB_CACHE_LINE_BYTES);
+ clib_memset (h2c, 0, sizeof (*h2c));
+ h2c->peer_settings = http2_default_conn_settings;
+ h2c->peer_window = h2c->peer_settings.initial_window_size;
+ h2c->req_by_stream_id = hash_create (0, sizeof (uword));
+ hc->opaque =
+ uword_to_pointer (h2c - h2m->conn_pool[hc->c_thread_index], void *);
+ HTTP_DBG (1, "h2c [%u]%x", hc->c_thread_index,
+ h2c - h2m->conn_pool[hc->c_thread_index]);
+ return h2c;
+}
+
+static inline http2_conn_ctx_t *
+http2_conn_ctx_get_w_thread (http_conn_t *hc)
+{
+ http2_main_t *h2m = &http2_main;
+ u32 h2c_index = pointer_to_uword (hc->opaque);
+ return pool_elt_at_index (h2m->conn_pool[hc->c_thread_index], h2c_index);
+}
+
+static inline void
+http2_conn_ctx_free (http_conn_t *hc)
+{
+ http2_main_t *h2m = &http2_main;
+ http2_conn_ctx_t *h2c;
+
+ h2c = http2_conn_ctx_get_w_thread (hc);
+ HTTP_DBG (1, "h2c [%u]%x", hc->c_thread_index,
+ h2c - h2m->conn_pool[hc->c_thread_index]);
+ hash_free (h2c->req_by_stream_id);
+ if (hc->flags & HTTP_CONN_F_HAS_REQUEST)
+ hpack_dynamic_table_free (&h2c->decoder_dynamic_table);
+ if (CLIB_DEBUG)
+ memset (h2c, 0xba, sizeof (*h2c));
+ pool_put (h2m->conn_pool[hc->c_thread_index], h2c);
+}
+
+static inline http2_req_t *
+http2_conn_alloc_req (http_conn_t *hc, u32 stream_id)
+{
+ http2_main_t *h2m = &http2_main;
+ http2_conn_ctx_t *h2c;
+ http2_req_t *req;
+ u32 req_index;
+ http_req_handle_t hr_handle;
+
+ pool_get_aligned_safe (h2m->req_pool[hc->c_thread_index], req,
+ CLIB_CACHE_LINE_BYTES);
+ clib_memset (req, 0, sizeof (*req));
+ req->base.hr_pa_session_handle = SESSION_INVALID_HANDLE;
+ req_index = req - h2m->req_pool[hc->c_thread_index];
+ hr_handle.version = HTTP_VERSION_2;
+ hr_handle.req_index = req_index;
+ req->base.hr_req_handle = hr_handle.as_u32;
+ req->base.hr_hc_index = hc->hc_hc_index;
+ req->base.c_thread_index = hc->c_thread_index;
+ req->stream_id = stream_id;
+ req->stream_state = HTTP2_STREAM_STATE_IDLE;
+ h2c = http2_conn_ctx_get_w_thread (hc);
+ HTTP_DBG (1, "h2c [%u]%x req_index %x stream_id %u", hc->c_thread_index,
+ h2c - h2m->conn_pool[hc->c_thread_index], req_index, stream_id);
+ req->peer_window = h2c->peer_settings.initial_window_size;
+ hash_set (h2c->req_by_stream_id, stream_id, req_index);
+ return req;
+}
+
+static inline void
+http2_conn_free_req (http2_conn_ctx_t *h2c, http2_req_t *req,
+ clib_thread_index_t thread_index)
+{
+ http2_main_t *h2m = &http2_main;
+
+ HTTP_DBG (1, "h2c [%u]%x req_index %x stream_id %u", thread_index,
+ h2c - h2m->conn_pool[thread_index],
+ ((http_req_handle_t) req->base.hr_req_handle).req_index,
+ req->stream_id);
+ vec_free (req->base.headers);
+ vec_free (req->base.target);
+ http_buffer_free (&req->base.tx_buf);
+ hash_unset (h2c->req_by_stream_id, req->stream_id);
+ if (CLIB_DEBUG)
+ memset (req, 0xba, sizeof (*req));
+ pool_put (h2m->req_pool[thread_index], req);
+}
+
+http2_req_t *
+http2_conn_get_req (http_conn_t *hc, u32 stream_id)
+{
+ http2_main_t *h2m = &http2_main;
+ http2_conn_ctx_t *h2c;
+ uword *p;
+
+ h2c = http2_conn_ctx_get_w_thread (hc);
+ p = hash_get (h2c->req_by_stream_id, stream_id);
+ if (p)
+ {
+ return pool_elt_at_index (h2m->req_pool[hc->c_thread_index], p[0]);
+ }
+ else
+ {
+ HTTP_DBG (1, "hc [%u]%x streamId %u not found", hc->c_thread_index,
+ hc->hc_hc_index, stream_id);
+ return 0;
+ }
+}
+
+always_inline http2_req_t *
+http2_req_get (u32 req_index, clib_thread_index_t thread_index)
+{
+ http2_main_t *h2m = &http2_main;
+
+ return pool_elt_at_index (h2m->req_pool[thread_index], req_index);
+}
+
+/* send GOAWAY frame and close TCP connection */
+always_inline void
+http2_connection_error (http_conn_t *hc, http2_error_t error,
+ transport_send_params_t *sp)
+{
+ u8 *response;
+ u32 req_index, stream_id;
+ http2_conn_ctx_t *h2c;
+ http2_req_t *req;
+
+ h2c = http2_conn_ctx_get_w_thread (hc);
+
+ response = http_get_tx_buf (hc);
+ http2_frame_write_goaway (error, h2c->last_processed_stream_id, &response);
+ http_io_ts_write (hc, response, vec_len (response), sp);
+ http_io_ts_after_write (hc, 1);
+
+ hash_foreach (stream_id, req_index, h2c->req_by_stream_id, ({
+ req = http2_req_get (req_index, hc->c_thread_index);
+ if (req->stream_state != HTTP2_STREAM_STATE_CLOSED)
+ session_transport_reset_notify (&req->base.connection);
+ }));
+ http_shutdown_transport (hc);
+}
+
+always_inline void
+http2_send_stream_error (http_conn_t *hc, u32 stream_id, http2_error_t error,
+ transport_send_params_t *sp)
+{
+ u8 *response;
+
+ response = http_get_tx_buf (hc);
+ http2_frame_write_rst_stream (error, stream_id, &response);
+ http_io_ts_write (hc, response, vec_len (response), sp);
+ http_io_ts_after_write (hc, 1);
+}
+
+/* send RST_STREAM frame and notify app */
+always_inline void
+http2_stream_error (http_conn_t *hc, http2_req_t *req, http2_error_t error,
+ transport_send_params_t *sp)
+{
+ ASSERT (req->stream_state > HTTP2_STREAM_STATE_IDLE);
+
+ http2_send_stream_error (hc, req->stream_id, error, sp);
+ req->stream_state = HTTP2_STREAM_STATE_CLOSED;
+ if (req->flags & HTTP2_REQ_F_APP_CLOSED)
+ session_transport_closed_notify (&req->base.connection);
+ else
+ session_transport_closing_notify (&req->base.connection);
+}
+
+always_inline void
+http2_stream_close (http2_req_t *req)
+{
+ req->stream_state = HTTP2_STREAM_STATE_CLOSED;
+ if (req->flags & HTTP2_REQ_F_APP_CLOSED)
+ {
+ HTTP_DBG (1, "req [%u]%x app already closed, confirm",
+ req->base.c_thread_index,
+ ((http_req_handle_t) req->base.hr_req_handle).req_index);
+ session_transport_closed_notify (&req->base.connection);
+ }
+ else
+ {
+ HTTP_DBG (1, "req [%u]%x all done closing, notify app",
+ req->base.c_thread_index,
+ ((http_req_handle_t) req->base.hr_req_handle).req_index);
+ session_transport_closing_notify (&req->base.connection);
+ }
+}
+
+always_inline void
+http2_send_server_preface (http_conn_t *hc)
+{
+ u8 *response;
+ http2_main_t *h2m = &http2_main;
+ http2_settings_entry_t *setting, *settings_list = 0;
+
+#define _(v, label, member, min, max, default_value, err_code) \
+ if (h2m->settings.member != default_value) \
+ { \
+ vec_add2 (settings_list, setting, 1); \
+ setting->identifier = HTTP2_SETTINGS_##label; \
+ setting->value = h2m->settings.member; \
+ }
+ foreach_http2_settings
+#undef _
+
+ response = http_get_tx_buf (hc);
+ http2_frame_write_settings (settings_list, &response);
+ http_io_ts_write (hc, response, vec_len (response), 0);
+ http_io_ts_after_write (hc, 0);
+}
+
+/*************************************/
+/* request state machine handlers RX */
+/*************************************/
+
+static http_sm_result_t
+http2_req_state_wait_transport_method (http_conn_t *hc, http2_req_t *req,
+ transport_send_params_t *sp,
+ http2_error_t *error)
+{
+ http2_conn_ctx_t *h2c;
+ hpack_request_control_data_t control_data;
+ u8 *buf = 0;
+ http_msg_t msg;
+ int rv;
+ http_req_state_t new_state = HTTP_REQ_STATE_WAIT_APP_REPLY;
+
+ h2c = http2_conn_ctx_get_w_thread (hc);
+
+ /* TODO: configurable buf size with bigger default value */
+ vec_validate_init_empty (buf, 1023, 0);
+ *error = hpack_parse_request (req->payload, req->payload_len, buf, 1023,
+ &control_data, &req->base.headers,
+ &h2c->decoder_dynamic_table);
+ if (*error != HTTP2_ERROR_NO_ERROR)
+ {
+ HTTP_DBG (1, "hpack_parse_request failed");
+ return HTTP_SM_ERROR;
+ }
+
+ if (!(control_data.parsed_bitmap & HPACK_PSEUDO_HEADER_METHOD_PARSED))
+ {
+ HTTP_DBG (1, ":method pseudo-header missing in request");
+ http2_stream_error (hc, req, HTTP2_ERROR_PROTOCOL_ERROR, sp);
+ return HTTP_SM_STOP;
+ }
+ if (control_data.method == HTTP_REQ_UNKNOWN ||
+ control_data.method == HTTP_REQ_CONNECT)
+ {
+ HTTP_DBG (1, "unsupported method");
+ http2_stream_error (hc, req, HTTP2_ERROR_PROTOCOL_ERROR, sp);
+ return HTTP_SM_STOP;
+ }
+ if (!(control_data.parsed_bitmap & HPACK_PSEUDO_HEADER_SCHEME_PARSED) &&
+ control_data.method != HTTP_REQ_CONNECT)
+ {
+ HTTP_DBG (1, ":scheme pseudo-header missing in request");
+ http2_stream_error (hc, req, HTTP2_ERROR_PROTOCOL_ERROR, sp);
+ return HTTP_SM_STOP;
+ }
+ if (control_data.scheme == HTTP_URL_SCHEME_UNKNOWN)
+ {
+ HTTP_DBG (1, "unsupported scheme");
+ http2_stream_error (hc, req, HTTP2_ERROR_INTERNAL_ERROR, sp);
+ return HTTP_SM_STOP;
+ }
+ if (!(control_data.parsed_bitmap & HPACK_PSEUDO_HEADER_PATH_PARSED) &&
+ control_data.method != HTTP_REQ_CONNECT)
+ {
+ HTTP_DBG (1, ":path pseudo-header missing in request");
+ http2_stream_error (hc, req, HTTP2_ERROR_PROTOCOL_ERROR, sp);
+ return HTTP_SM_STOP;
+ }
+ if (!(control_data.parsed_bitmap & HPACK_PSEUDO_HEADER_AUTHORITY_PARSED) &&
+ control_data.method != HTTP_REQ_CONNECT)
+ {
+ HTTP_DBG (1, ":path pseudo-header missing in request");
+ http2_stream_error (hc, req, HTTP2_ERROR_PROTOCOL_ERROR, sp);
+ return HTTP_SM_STOP;
+ }
+
+ req->base.control_data_len = control_data.control_data_len;
+ req->base.headers_offset = control_data.headers - buf;
+ req->base.headers_len = control_data.headers_len;
+ if (control_data.content_len_header_index != ~0)
+ {
+ req->base.content_len_header_index =
+ control_data.content_len_header_index;
+ rv = http_parse_content_length (&req->base, buf);
+ if (rv)
+ {
+ http2_stream_error (hc, req, HTTP2_ERROR_PROTOCOL_ERROR, sp);
+ return HTTP_SM_STOP;
+ }
+ new_state = HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA;
+ }
+ /* TODO: message framing without content length using END_STREAM flag */
+ if (req->base.body_len == 0 && req->stream_state == HTTP2_STREAM_STATE_OPEN)
+ {
+ HTTP_DBG (1, "no content-length and DATA frame expected");
+ *error = HTTP2_ERROR_INTERNAL_ERROR;
+ return HTTP_SM_ERROR;
+ }
+ req->base.to_recv = req->base.body_len;
+
+ req->base.target_path_len = control_data.path_len;
+ req->base.target_path_offset = control_data.path - buf;
+ /* drop leading slash */
+ req->base.target_path_offset++;
+ req->base.target_path_len--;
+ req->base.target_query_offset = 0;
+ req->base.target_query_len = 0;
+ http_identify_optional_query (&req->base, buf);
+
+ msg.type = HTTP_MSG_REQUEST;
+ msg.method_type = control_data.method;
+ msg.data.type = HTTP_MSG_DATA_INLINE;
+ msg.data.len = req->base.connection_header_index;
+ msg.data.scheme = control_data.scheme;
+ msg.data.target_authority_offset = control_data.authority - buf;
+ msg.data.target_authority_len = control_data.authority_len;
+ msg.data.target_path_offset = req->base.target_path_offset;
+ msg.data.target_path_len = req->base.target_path_len;
+ msg.data.target_query_offset = req->base.target_query_offset;
+ msg.data.target_query_len = req->base.target_query_len;
+ msg.data.headers_offset = req->base.headers_offset;
+ msg.data.headers_len = req->base.headers_len;
+ msg.data.headers_ctx = pointer_to_uword (req->base.headers);
+ msg.data.upgrade_proto = HTTP_UPGRADE_PROTO_NA;
+ msg.data.body_offset = req->base.control_data_len;
+ msg.data.body_len = req->base.body_len;
+
+ svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) },
+ { buf, req->base.control_data_len } };
+ HTTP_DBG (3, "%U", format_http_bytes, buf, req->base.control_data_len);
+ http_io_as_write_segs (&req->base, segs, 2);
+ http_req_state_change (&req->base, new_state);
+ http_app_worker_rx_notify (&req->base);
+
+ if (req->stream_id > h2c->last_processed_stream_id)
+ h2c->last_processed_stream_id = req->stream_id;
+
+ return HTTP_SM_STOP;
+}
+
+static http_sm_result_t
+http2_req_state_transport_io_more_data (http_conn_t *hc, http2_req_t *req,
+ transport_send_params_t *sp,
+ http2_error_t *error)
+{
+ if (req->payload_len > req->base.to_recv)
+ {
+ HTTP_DBG (1, "received more data than expected");
+ http2_stream_error (hc, req, HTTP2_ERROR_PROTOCOL_ERROR, sp);
+ return HTTP_SM_STOP;
+ }
+ req->base.to_recv -= req->payload_len;
+ if (req->stream_state == HTTP2_STREAM_STATE_HALF_CLOSED &&
+ req->base.to_recv != 0)
+ {
+ HTTP_DBG (1, "peer closed stream but don't send all data");
+ http2_stream_error (hc, req, HTTP2_ERROR_PROTOCOL_ERROR, sp);
+ return HTTP_SM_STOP;
+ }
+ if (req->base.to_recv == 0)
+ http_req_state_change (&req->base, HTTP_REQ_STATE_WAIT_APP_REPLY);
+ http_io_as_write (&req->base, req->payload, req->payload_len);
+ http_app_worker_rx_notify (&req->base);
+
+ return HTTP_SM_STOP;
+}
+
+/*************************************/
+/* request state machine handlers TX */
+/*************************************/
+
+static http_sm_result_t
+http2_req_state_wait_app_reply (http_conn_t *hc, http2_req_t *req,
+ transport_send_params_t *sp,
+ http2_error_t *error)
+{
+ http_msg_t msg;
+ u8 *response, *date, *app_headers = 0;
+ u8 fh[HTTP2_FRAME_HEADER_SIZE];
+ hpack_response_control_data_t control_data;
+ u8 flags = HTTP2_FRAME_FLAG_END_HEADERS;
+ http_sm_result_t sm_result = HTTP_SM_ERROR;
+ u32 n_written;
+
+ http_get_app_msg (&req->base, &msg);
+ ASSERT (msg.type == HTTP_MSG_REPLY);
+
+ response = http_get_tx_buf (hc);
+ date = format (0, "%U", format_http_time_now, hc);
+
+ control_data.sc = msg.code;
+ control_data.content_len = msg.data.body_len;
+ control_data.server_name = hc->app_name;
+ control_data.server_name_len = vec_len (hc->app_name);
+ control_data.date = date;
+ control_data.date_len = vec_len (date);
+
+ if (msg.data.headers_len)
+ app_headers = http_get_app_header_list (&req->base, &msg);
+
+ hpack_serialize_response (app_headers, msg.data.headers_len, &control_data,
+ &response);
+ vec_free (date);
+
+ if (msg.data.body_len)
+ {
+ /* start sending the actual data */
+ http_req_tx_buffer_init (&req->base, &msg);
+ http_req_state_change (&req->base, HTTP_REQ_STATE_APP_IO_MORE_DATA);
+ sm_result = HTTP_SM_CONTINUE;
+ }
+ else
+ {
+ /* no response body, we are done */
+ flags |= HTTP2_FRAME_FLAG_END_STREAM;
+ sm_result = HTTP_SM_STOP;
+ http2_stream_close (req);
+ }
+
+ http2_frame_write_headers_header (vec_len (response), req->stream_id, flags,
+ fh);
+ svm_fifo_seg_t segs[2] = { { fh, HTTP2_FRAME_HEADER_SIZE },
+ { response, vec_len (response) } };
+ n_written = http_io_ts_write_segs (hc, segs, 2, sp);
+ ASSERT (n_written == (HTTP2_FRAME_HEADER_SIZE + vec_len (response)));
+ http_io_ts_after_write (hc, 0);
+
+ return sm_result;
+}
+
+static http_sm_result_t
+http2_req_state_app_io_more_data (http_conn_t *hc, http2_req_t *req,
+ transport_send_params_t *sp,
+ http2_error_t *error)
+{
+ u32 max_write, max_read, n_segs, n_read, n_written = 0;
+ svm_fifo_seg_t *app_segs, *segs = 0;
+ http_buffer_t *hb = &req->base.tx_buf;
+ u8 fh[HTTP2_FRAME_HEADER_SIZE];
+ u8 finished = 0, flags = 0;
+
+ ASSERT (http_buffer_bytes_left (hb) > 0);
+ max_write = http_io_ts_max_write (hc, sp);
+ if (max_write <= HTTP2_FRAME_HEADER_SIZE)
+ {
+ HTTP_DBG (1, "ts tx fifo full");
+ goto check_fifo;
+ }
+ max_read = http_buffer_bytes_left (hb);
+
+ n_read = http_buffer_get_segs (hb, max_write - HTTP2_FRAME_HEADER_SIZE,
+ &app_segs, &n_segs);
+ if (n_read == 0)
+ {
+ HTTP_DBG (1, "no data to deq");
+ goto check_fifo;
+ }
+
+ finished = (max_read - n_read) == 0;
+ flags = finished ? HTTP2_FRAME_FLAG_END_STREAM : 0;
+ http2_frame_write_data_header (n_read, req->stream_id, flags, fh);
+ vec_validate (segs, 0);
+ segs[0].len = HTTP2_FRAME_HEADER_SIZE;
+ segs[0].data = fh;
+ vec_append (segs, app_segs);
+
+ n_written = http_io_ts_write_segs (hc, segs, n_segs + 1, sp);
+ ASSERT (n_written == (HTTP2_FRAME_HEADER_SIZE + n_read));
+ vec_free (segs);
+ http_buffer_drain (hb, n_read);
+
+ if (finished)
+ {
+ http_buffer_free (hb);
+ if (hc->flags & HTTP_CONN_F_IS_SERVER)
+ http2_stream_close (req);
+ else
+ req->stream_state = HTTP2_STREAM_STATE_HALF_CLOSED;
+ }
+ http_io_ts_after_write (hc, finished);
+
+check_fifo:
+ if (http_io_ts_check_write_thresh (hc))
+ {
+ http_io_ts_add_want_deq_ntf (hc);
+ http_req_deschedule (&req->base, sp);
+ }
+ return HTTP_SM_STOP;
+}
+
+/*************************/
+/* request state machine */
+/*************************/
+
+typedef http_sm_result_t (*http2_sm_handler) (http_conn_t *hc,
+ http2_req_t *req,
+ transport_send_params_t *sp,
+ http2_error_t *error);
+
+static http2_sm_handler tx_state_funcs[HTTP_REQ_N_STATES] = {
+ 0, /* idle */
+ 0, /* wait app method */
+ 0, /* wait transport reply */
+ 0, /* transport io more data */
+ 0, /* wait transport method */
+ http2_req_state_wait_app_reply,
+ http2_req_state_app_io_more_data,
+ 0, /* tunnel */
+ 0, /* udp tunnel */
+};
+
+static http2_sm_handler rx_state_funcs[HTTP_REQ_N_STATES] = {
+ 0, /* idle */
+ 0, /* wait app method */
+ 0, /* wait transport reply */
+ http2_req_state_transport_io_more_data,
+ http2_req_state_wait_transport_method,
+ 0, /* wait app reply */
+ 0, /* app io more data */
+ 0, /* tunnel */
+ 0, /* udp tunnel */
+};
+
+static_always_inline int
+http2_req_state_is_tx_valid (http2_req_t *req)
+{
+ return tx_state_funcs[req->base.state] ? 1 : 0;
+}
+
+static_always_inline http2_error_t
+http2_req_run_state_machine (http_conn_t *hc, http2_req_t *req,
+ transport_send_params_t *sp, u8 is_tx)
+{
+ http_sm_result_t res;
+ http2_error_t error;
+ http2_conn_ctx_t *h2c;
+
+ do
+ {
+ if (is_tx)
+ res = tx_state_funcs[req->base.state](hc, req, sp, &error);
+ else
+ res = rx_state_funcs[req->base.state](hc, req, 0, &error);
+
+ if (res == HTTP_SM_ERROR)
+ {
+ HTTP_DBG (1, "protocol error %U", format_http2_error, error);
+ return error;
+ }
+ }
+ while (res == HTTP_SM_CONTINUE);
+
+ if (req->stream_state == HTTP2_STREAM_STATE_CLOSED)
+ {
+ h2c = http2_conn_ctx_get_w_thread (hc);
+ session_transport_delete_notify (&req->base.connection);
+ http2_conn_free_req (h2c, req, hc->c_thread_index);
+ }
+
+ return HTTP2_ERROR_NO_ERROR;
+}
+
+/******************/
+/* frame handlers */
+/******************/
+
+static http2_error_t
+http2_handle_headers_frame (http_conn_t *hc, http2_frame_header_t *fh)
+{
+ http2_main_t *h2m = &http2_main;
+ http2_req_t *req;
+ u8 *rx_buf;
+ http2_error_t rv;
+ http2_conn_ctx_t *h2c;
+
+ if (!(fh->flags & HTTP2_FRAME_FLAG_END_HEADERS))
+ {
+ /* TODO: fragmented headers */
+ return HTTP2_ERROR_INTERNAL_ERROR;
+ }
+
+ if (hc->flags & HTTP_CONN_F_IS_SERVER)
+ {
+ h2c = http2_conn_ctx_get_w_thread (hc);
+ /* streams initiated by client must use odd-numbered stream id */
+ if ((fh->stream_id & 1) == 0)
+ {
+ HTTP_DBG (1, "invalid stream id %u", fh->stream_id);
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ }
+ /* stream id must be greater than all streams that client has opened */
+ if (fh->stream_id <= h2c->last_opened_stream_id)
+ {
+ HTTP_DBG (1, "closed stream id %u", fh->stream_id);
+ return HTTP2_ERROR_STREAM_CLOSED;
+ }
+ h2c->last_opened_stream_id = fh->stream_id;
+ if (hash_elts (h2c->req_by_stream_id) ==
+ h2m->settings.max_concurrent_streams)
+ {
+ HTTP_DBG (1, "SETTINGS_MAX_CONCURRENT_STREAMS exceeded");
+ http_io_ts_drain (hc, fh->length);
+ http2_send_stream_error (hc, fh->stream_id,
+ HTTP2_ERROR_REFUSED_STREAM, 0);
+ return HTTP2_ERROR_NO_ERROR;
+ }
+ req = http2_conn_alloc_req (hc, fh->stream_id);
+ http_conn_accept_request (hc, &req->base);
+ http_req_state_change (&req->base, HTTP_REQ_STATE_WAIT_TRANSPORT_METHOD);
+ req->stream_state = HTTP2_STREAM_STATE_OPEN;
+ hc->flags &= ~HTTP_CONN_F_NO_APP_SESSION;
+ if (!(hc->flags & HTTP_CONN_F_HAS_REQUEST))
+ {
+ hc->flags |= HTTP_CONN_F_HAS_REQUEST;
+ hpack_dynamic_table_init (
+ &h2c->decoder_dynamic_table,
+ http2_default_conn_settings.header_table_size);
+ }
+ if (fh->flags & HTTP2_FRAME_FLAG_END_STREAM)
+ req->stream_state = HTTP2_STREAM_STATE_HALF_CLOSED;
+ }
+ else
+ {
+ /* TODO: client */
+ return HTTP2_ERROR_INTERNAL_ERROR;
+ }
+
+ rx_buf = http_get_rx_buf (hc);
+ vec_validate (rx_buf, fh->length - 1);
+ http_io_ts_read (hc, rx_buf, fh->length, 0);
+
+ rv = http2_frame_read_headers (&req->payload, &req->payload_len, rx_buf,
+ fh->length, fh->flags);
+ if (rv != HTTP2_ERROR_NO_ERROR)
+ return rv;
+
+ HTTP_DBG (1, "run state machine");
+ return http2_req_run_state_machine (hc, req, 0, 0);
+}
+
+static http2_error_t
+http2_handle_data_frame (http_conn_t *hc, http2_frame_header_t *fh)
+{
+ http2_req_t *req;
+ u8 *rx_buf;
+ http2_error_t rv;
+ http2_conn_ctx_t *h2c;
+
+ req = http2_conn_get_req (hc, fh->stream_id);
+ if (!req)
+ {
+ if (fh->stream_id == 0)
+ {
+ HTTP_DBG (1, "DATA frame with stream id 0");
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ }
+ h2c = http2_conn_ctx_get_w_thread (hc);
+ if (fh->stream_id <= h2c->last_opened_stream_id)
+ {
+ HTTP_DBG (1, "stream closed, ignoring frame");
+ http2_send_stream_error (hc, fh->stream_id,
+ HTTP2_ERROR_STREAM_CLOSED, 0);
+ return HTTP2_ERROR_NO_ERROR;
+ }
+ else
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ }
+
+ /* bogus state */
+ if (hc->flags & HTTP_CONN_F_IS_SERVER &&
+ req->stream_state != HTTP2_STREAM_STATE_OPEN)
+ {
+ HTTP_DBG (1, "error: stream already half-closed");
+ http2_stream_error (hc, req, HTTP2_ERROR_STREAM_CLOSED, 0);
+ return HTTP2_ERROR_NO_ERROR;
+ }
+
+ if (fh->flags & HTTP2_FRAME_FLAG_END_STREAM)
+ req->stream_state = HTTP2_STREAM_STATE_HALF_CLOSED;
+
+ rx_buf = http_get_rx_buf (hc);
+ vec_validate (rx_buf, fh->length - 1);
+ http_io_ts_read (hc, rx_buf, fh->length, 0);
+
+ rv = http2_frame_read_data (&req->payload, &req->payload_len, rx_buf,
+ fh->length, fh->flags);
+ if (rv != HTTP2_ERROR_NO_ERROR)
+ return rv;
+
+ HTTP_DBG (1, "run state machine");
+ return http2_req_run_state_machine (hc, req, 0, 0);
+}
+
+static http2_error_t
+http2_handle_window_update_frame (http_conn_t *hc, http2_frame_header_t *fh)
+{
+ u8 *rx_buf;
+ u32 win_increment;
+ http2_error_t rv;
+
+ rx_buf = http_get_rx_buf (hc);
+ vec_validate (rx_buf, fh->length - 1);
+ http_io_ts_read (hc, rx_buf, fh->length, 0);
+
+ rv = http2_frame_read_window_update (&win_increment, rx_buf, fh->length);
+ if (rv != HTTP2_ERROR_NO_ERROR)
+ return rv;
+
+ /* TODO: flow control */
+ return HTTP2_ERROR_NO_ERROR;
+}
+
+static http2_error_t
+http2_handle_settings_frame (http_conn_t *hc, http2_frame_header_t *fh)
+{
+ u8 *rx_buf, *resp = 0;
+ http2_error_t rv;
+ http2_conn_settings_t new_settings;
+ http2_conn_ctx_t *h2c;
+
+ if (fh->stream_id != 0)
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+
+ if (fh->flags == HTTP2_FRAME_FLAG_ACK)
+ {
+ if (fh->length != 0)
+ return HTTP2_ERROR_FRAME_SIZE_ERROR;
+ /* TODO: we can start using non-default settings */
+ }
+ else
+ {
+ if (fh->length < sizeof (http2_settings_entry_t))
+ return HTTP2_ERROR_FRAME_SIZE_ERROR;
+
+ rx_buf = http_get_rx_buf (hc);
+ vec_validate (rx_buf, fh->length - 1);
+ http_io_ts_read (hc, rx_buf, fh->length, 0);
+
+ h2c = http2_conn_ctx_get_w_thread (hc);
+ new_settings = h2c->peer_settings;
+ rv = http2_frame_read_settings (&new_settings, rx_buf, fh->length);
+ if (rv != HTTP2_ERROR_NO_ERROR)
+ return rv;
+ h2c->peer_settings = new_settings;
+
+ /* ACK peer settings */
+ http2_frame_write_settings_ack (&resp);
+ http_io_ts_write (hc, resp, vec_len (resp), 0);
+ vec_free (resp);
+ http_io_ts_after_write (hc, 0);
+ }
+
+ return HTTP2_ERROR_NO_ERROR;
+}
+
+static http2_error_t
+http2_handle_rst_stream_frame (http_conn_t *hc, http2_frame_header_t *fh)
+{
+ u8 *rx_buf;
+ http2_error_t rv;
+ http2_req_t *req;
+ u32 error_code;
+ http2_conn_ctx_t *h2c;
+
+ if (fh->stream_id == 0)
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+
+ rx_buf = http_get_rx_buf (hc);
+ vec_validate (rx_buf, fh->length - 1);
+ http_io_ts_read (hc, rx_buf, fh->length, 0);
+
+ rv = http2_frame_read_rst_stream (&error_code, rx_buf, fh->length);
+ if (rv != HTTP2_ERROR_NO_ERROR)
+ return rv;
+
+ req = http2_conn_get_req (hc, fh->stream_id);
+ if (!req)
+ {
+ h2c = http2_conn_ctx_get_w_thread (hc);
+ if (fh->stream_id <= h2c->last_opened_stream_id)
+ {
+ /* we reset stream, but peer might send something meanwhile */
+ HTTP_DBG (1, "stream closed, ignoring frame");
+ return HTTP2_ERROR_NO_ERROR;
+ }
+ else
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ }
+
+ req->stream_state = HTTP2_STREAM_STATE_CLOSED;
+ session_transport_reset_notify (&req->base.connection);
+
+ return HTTP2_ERROR_NO_ERROR;
+}
+
+static http2_error_t
+http2_handle_goaway_frame (http_conn_t *hc, http2_frame_header_t *fh)
+{
+ u8 *rx_buf;
+ http2_error_t rv;
+ u32 error_code, last_stream_id, req_index, stream_id;
+ http2_conn_ctx_t *h2c;
+ http2_req_t *req;
+
+ if (fh->stream_id != 0)
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+
+ rx_buf = http_get_rx_buf (hc);
+ vec_validate (rx_buf, fh->length - 1);
+ http_io_ts_read (hc, rx_buf, fh->length, 0);
+
+ rv =
+ http2_frame_read_goaway (&error_code, &last_stream_id, rx_buf, fh->length);
+ if (rv != HTTP2_ERROR_NO_ERROR)
+ return rv;
+
+ if (error_code == HTTP2_ERROR_NO_ERROR)
+ {
+ /* TODO: graceful shutdown (no new streams) */
+ }
+ else
+ {
+ /* connection error */
+ h2c = http2_conn_ctx_get_w_thread (hc);
+ hash_foreach (stream_id, req_index, h2c->req_by_stream_id, ({
+ req = http2_req_get (req_index, hc->c_thread_index);
+ session_transport_reset_notify (&req->base.connection);
+ }));
+ http_shutdown_transport (hc);
+ }
+
+ return HTTP2_ERROR_NO_ERROR;
+}
+
+static http2_error_t
+http2_handle_ping_frame (http_conn_t *hc, http2_frame_header_t *fh)
+{
+ u8 *rx_buf, *resp = 0;
+
+ if (fh->stream_id != 0 || fh->length != HTTP2_PING_PAYLOAD_LEN)
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+
+ rx_buf = http_get_rx_buf (hc);
+ vec_validate (rx_buf, fh->length - 1);
+ http_io_ts_read (hc, rx_buf, fh->length, 0);
+
+ /* RFC9113 6.7: The endpoint MUST NOT respond to PING frames with ACK */
+ if (fh->flags & HTTP2_FRAME_FLAG_ACK)
+ return HTTP2_ERROR_NO_ERROR;
+
+ http2_frame_write_ping (1, rx_buf, &resp);
+ http_io_ts_write (hc, resp, vec_len (resp), 0);
+ vec_free (resp);
+ http_io_ts_after_write (hc, 1);
+
+ return HTTP2_ERROR_NO_ERROR;
+}
+
+static http2_error_t
+http2_handle_push_promise (http_conn_t *hc, http2_frame_header_t *fh)
+{
+ if (hc->flags & HTTP_CONN_F_IS_SERVER)
+ {
+ HTTP_DBG (1, "error: server received PUSH_PROMISE");
+ return HTTP2_ERROR_PROTOCOL_ERROR;
+ }
+ /* TODO: client */
+ return HTTP2_ERROR_INTERNAL_ERROR;
+}
+
+static_always_inline int
+http2_expect_preface (http_conn_t *hc, http2_conn_ctx_t *h2c)
+{
+ u8 *rx_buf;
+
+ ASSERT (hc->flags & HTTP_CONN_F_IS_SERVER);
+ h2c->flags &= ~HTTP2_CONN_F_EXPECT_PREFACE;
+
+ /* already done in http core */
+ if (h2c->flags & HTTP2_CONN_F_PREFACE_VERIFIED)
+ return 0;
+
+ rx_buf = http_get_rx_buf (hc);
+ http_io_ts_read (hc, rx_buf, http2_conn_preface.len, 1);
+ return memcmp (rx_buf, http2_conn_preface.base, http2_conn_preface.len);
+}
+
+/*****************/
+/* http core VFT */
+/*****************/
+
+static u32
+http2_hc_index_get_by_req_index (u32 req_index,
+ clib_thread_index_t thread_index)
+{
+ http2_req_t *req;
+
+ req = http2_req_get (req_index, thread_index);
+ return req->base.hr_hc_index;
+}
+
+static transport_connection_t *
+http2_req_get_connection (u32 req_index, clib_thread_index_t thread_index)
+{
+ http2_req_t *req;
+ req = http2_req_get (req_index, thread_index);
+ return &(req->base.connection);
+}
+
+static u8 *
+format_http2_req (u8 *s, va_list *args)
+{
+ http2_req_t *req = va_arg (*args, http2_req_t *);
+ http_conn_t *hc = va_arg (*args, http_conn_t *);
+ session_t *ts;
+
+ ts = session_get_from_handle (hc->hc_tc_session_handle);
+ s = format (s, "[%d:%d][H2] stream_id %u app_wrk %u hc_index %u ts %d:%d",
+ req->base.c_thread_index, req->base.c_s_index, req->stream_id,
+ req->base.hr_pa_wrk_index, req->base.hr_hc_index,
+ ts->thread_index, ts->session_index);
+
+ return s;
+}
+
+static u8 *
+http2_format_req (u8 *s, va_list *args)
+{
+ u32 req_index = va_arg (*args, u32);
+ clib_thread_index_t thread_index = va_arg (*args, u32);
+ http_conn_t *hc = va_arg (*args, http_conn_t *);
+ u32 verbose = va_arg (*args, u32);
+ http2_req_t *req;
+
+ req = http2_req_get (req_index, thread_index);
+
+ s = format (s, "%-" SESSION_CLI_ID_LEN "U", format_http2_req, req, hc);
+ if (verbose)
+ {
+ s =
+ format (s, "%-" SESSION_CLI_STATE_LEN "U", format_http_conn_state, hc);
+ if (verbose > 1)
+ s = format (s, "\n");
+ }
+
+ return s;
+}
+
+static void
+http2_app_tx_callback (http_conn_t *hc, u32 req_index,
+ transport_send_params_t *sp)
+{
+ http2_req_t *req;
+ http2_error_t rv;
+
+ HTTP_DBG (1, "hc [%u]%x req_index %u", hc->c_thread_index, hc->hc_hc_index,
+ req_index);
+ req = http2_req_get (req_index, hc->c_thread_index);
+
+ if (!http2_req_state_is_tx_valid (req))
+ {
+ if (req->base.state == HTTP_REQ_STATE_TRANSPORT_IO_MORE_DATA &&
+ (hc->flags & HTTP_CONN_F_IS_SERVER))
+ {
+ /* server app might send error earlier */
+ http_req_state_change (&req->base, HTTP_REQ_STATE_WAIT_APP_REPLY);
+ }
+ else
+ {
+ clib_warning ("hc [%u]%x invalid tx state: http req state "
+ "'%U', session state '%U'",
+ hc->c_thread_index, hc->hc_hc_index,
+ format_http_req_state, req->base.state,
+ format_http_conn_state, hc);
+ http2_stream_error (hc, req, HTTP2_ERROR_INTERNAL_ERROR, sp);
+ return;
+ }
+ }
+
+ /* peer reset stream, but app might send something meanwhile */
+ if (req->stream_state == HTTP2_STREAM_STATE_CLOSED)
+ {
+ HTTP_DBG (1, "stream closed, ignoring app data");
+ http_io_as_drain_all (&req->base);
+ return;
+ }
+
+ HTTP_DBG (1, "run state machine");
+ rv = http2_req_run_state_machine (hc, req, sp, 1);
+ if (rv != HTTP2_ERROR_NO_ERROR)
+ {
+ http2_connection_error (hc, rv, sp);
+ return;
+ }
+
+ /* reset http connection expiration timer */
+ http_conn_timer_update (hc);
+}
+
+static void
+http2_app_rx_evt_callback (http_conn_t *hc, u32 req_index,
+ clib_thread_index_t thread_index)
+{
+ /* TODO: continue tunnel RX */
+}
+
+static void
+http2_app_close_callback (http_conn_t *hc, u32 req_index,
+ clib_thread_index_t thread_index)
+{
+ http2_req_t *req;
+
+ HTTP_DBG (1, "hc [%u]%x req_index %u", hc->c_thread_index, hc->hc_hc_index,
+ req_index);
+ req = http2_req_get (req_index, thread_index);
+ if (!req)
+ {
+ HTTP_DBG (1, "req already deleted");
+ return;
+ }
+
+ if (req->stream_state == HTTP2_STREAM_STATE_CLOSED ||
+ hc->state == HTTP_CONN_STATE_CLOSED)
+ {
+ HTTP_DBG (1, "nothing more to send, confirm close");
+ session_transport_closed_notify (&req->base.connection);
+ }
+ else
+ {
+ HTTP_DBG (1, "wait for all data to be written to ts");
+ req->flags |= HTTP2_REQ_F_APP_CLOSED;
+ }
+}
+
+static void
+http2_app_reset_callback (http_conn_t *hc, u32 req_index,
+ clib_thread_index_t thread_index)
+{
+ http2_req_t *req;
+
+ HTTP_DBG (1, "hc [%u]%x req_index %u", hc->c_thread_index, hc->hc_hc_index,
+ req_index);
+ req = http2_req_get (req_index, thread_index);
+ req->flags |= HTTP2_REQ_F_APP_CLOSED;
+ http2_stream_error (hc, req, HTTP2_ERROR_INTERNAL_ERROR, 0);
+}
+
+static int
+http2_transport_connected_callback (http_conn_t *hc)
+{
+ /* TODO */
+ return -1;
+}
+
+static void
+http2_transport_rx_callback (http_conn_t *hc)
+{
+ http2_main_t *h2m = &http2_main;
+ http2_frame_header_t fh;
+ u32 to_deq;
+ u8 *rx_buf;
+ http2_error_t rv;
+ http2_conn_ctx_t *h2c;
+
+ HTTP_DBG (1, "hc [%u]%x", hc->c_thread_index, hc->hc_hc_index);
+
+ to_deq = http_io_ts_max_read (hc);
+
+ if (PREDICT_FALSE (to_deq == 0))
+ {
+ HTTP_DBG (1, "no data to deq");
+ return;
+ }
+
+ h2c = http2_conn_ctx_get_w_thread (hc);
+ if (h2c->flags & HTTP2_CONN_F_EXPECT_PREFACE)
+ {
+ if (to_deq < http2_conn_preface.len)
+ {
+ HTTP_DBG (1, "to_deq %u is less than conn preface size", to_deq);
+ http_disconnect_transport (hc);
+ return;
+ }
+ if (http2_expect_preface (hc, h2c))
+ {
+ HTTP_DBG (1, "conn preface verification failed");
+ http_disconnect_transport (hc);
+ return;
+ }
+ http2_send_server_preface (hc);
+ http_io_ts_drain (hc, http2_conn_preface.len);
+ to_deq -= http2_conn_preface.len;
+ if (to_deq == 0)
+ return;
+ }
+
+ if (PREDICT_FALSE (to_deq < HTTP2_FRAME_HEADER_SIZE))
+ {
+ HTTP_DBG (1, "to_deq %u is less than frame header size", to_deq);
+ http2_connection_error (hc, HTTP2_ERROR_PROTOCOL_ERROR, 0);
+ return;
+ }
+
+ while (to_deq >= HTTP2_FRAME_HEADER_SIZE)
+ {
+ rx_buf = http_get_rx_buf (hc);
+ http_io_ts_read (hc, rx_buf, HTTP2_FRAME_HEADER_SIZE, 1);
+ to_deq -= HTTP2_FRAME_HEADER_SIZE;
+ http2_frame_header_read (rx_buf, &fh);
+ if (fh.length > h2m->settings.max_frame_size)
+ {
+ HTTP_DBG (1, "frame length %lu exceeded SETTINGS_MAX_FRAME_SIZE %lu",
+ fh.length, h2m->settings.max_frame_size);
+ http2_connection_error (hc, HTTP2_ERROR_FRAME_SIZE_ERROR, 0);
+ return;
+ }
+ if (fh.length > to_deq)
+ {
+ HTTP_DBG (
+ 1, "frame payload not yet received, to deq %lu, frame length %lu",
+ to_deq, fh.length);
+ if (http_io_ts_fifo_size (hc, 1) <
+ (fh.length + HTTP2_FRAME_HEADER_SIZE))
+ {
+ clib_warning ("ts rx fifo too small to hold frame (%u)",
+ fh.length + HTTP2_FRAME_HEADER_SIZE);
+ http2_connection_error (hc, HTTP2_ERROR_PROTOCOL_ERROR, 0);
+ }
+ return;
+ }
+ http_io_ts_drain (hc, HTTP2_FRAME_HEADER_SIZE);
+ to_deq -= fh.length;
+
+ HTTP_DBG (1, "frame type 0x%02x", fh.type);
+ switch (fh.type)
+ {
+ case HTTP2_FRAME_TYPE_HEADERS:
+ rv = http2_handle_headers_frame (hc, &fh);
+ break;
+ case HTTP2_FRAME_TYPE_DATA:
+ rv = http2_handle_data_frame (hc, &fh);
+ break;
+ case HTTP2_FRAME_TYPE_WINDOW_UPDATE:
+ rv = http2_handle_window_update_frame (hc, &fh);
+ break;
+ case HTTP2_FRAME_TYPE_SETTINGS:
+ rv = http2_handle_settings_frame (hc, &fh);
+ break;
+ case HTTP2_FRAME_TYPE_RST_STREAM:
+ rv = http2_handle_rst_stream_frame (hc, &fh);
+ break;
+ case HTTP2_FRAME_TYPE_GOAWAY:
+ rv = http2_handle_goaway_frame (hc, &fh);
+ break;
+ case HTTP2_FRAME_TYPE_PING:
+ rv = http2_handle_ping_frame (hc, &fh);
+ break;
+ case HTTP2_FRAME_TYPE_CONTINUATION:
+ /* TODO */
+ rv = HTTP2_ERROR_INTERNAL_ERROR;
+ break;
+ case HTTP2_FRAME_TYPE_PUSH_PROMISE:
+ rv = http2_handle_push_promise (hc, &fh);
+ break;
+ case HTTP2_FRAME_TYPE_PRIORITY: /* deprecated */
+ default:
+ /* ignore unknown frame type */
+ http_io_ts_drain (hc, fh.length);
+ rv = HTTP2_ERROR_NO_ERROR;
+ break;
+ }
+
+ if (rv != HTTP2_ERROR_NO_ERROR)
+ {
+ http2_connection_error (hc, rv, 0);
+ return;
+ }
+ }
+
+ /* reset http connection expiration timer */
+ http_conn_timer_update (hc);
+}
+
+static void
+http2_transport_close_callback (http_conn_t *hc)
+{
+ u32 req_index, stream_id, n_open_streams = 0;
+ http2_req_t *req;
+ http2_conn_ctx_t *h2c;
+
+ HTTP_DBG (1, "hc [%u]%x", hc->c_thread_index, hc->hc_hc_index);
+
+ if (!(hc->flags & HTTP_CONN_F_HAS_REQUEST))
+ {
+ HTTP_DBG (1, "no request");
+ return;
+ }
+
+ h2c = http2_conn_ctx_get_w_thread (hc);
+ hash_foreach (stream_id, req_index, h2c->req_by_stream_id, ({
+ req = http2_req_get (req_index, hc->c_thread_index);
+ if (req->stream_state != HTTP2_STREAM_STATE_CLOSED)
+ {
+ HTTP_DBG (1, "req_index %u", req_index);
+ session_transport_closing_notify (&req->base.connection);
+ n_open_streams++;
+ }
+ }));
+ if (n_open_streams == 0)
+ {
+ HTTP_DBG (1, "no open stream disconnecting");
+ http_disconnect_transport (hc);
+ }
+}
+
+static void
+http2_transport_reset_callback (http_conn_t *hc)
+{
+ u32 req_index, stream_id;
+ http2_req_t *req;
+ http2_conn_ctx_t *h2c;
+
+ HTTP_DBG (1, "hc [%u]%x", hc->c_thread_index, hc->hc_hc_index);
+
+ if (!(hc->flags & HTTP_CONN_F_HAS_REQUEST))
+ return;
+
+ h2c = http2_conn_ctx_get_w_thread (hc);
+ hash_foreach (stream_id, req_index, h2c->req_by_stream_id, ({
+ req = http2_req_get (req_index, hc->c_thread_index);
+ if (req->stream_state != HTTP2_STREAM_STATE_CLOSED)
+ {
+ HTTP_DBG (1, "req_index %u", req_index);
+ session_transport_reset_notify (&req->base.connection);
+ }
+ }));
+}
+
+static void
+http2_transport_conn_reschedule_callback (http_conn_t *hc)
+{
+ /* TODO */
+}
+
+static void
+http2_conn_accept_callback (http_conn_t *hc)
+{
+ http2_conn_ctx_t *h2c;
+
+ HTTP_DBG (1, "hc [%u]%x", hc->c_thread_index, hc->hc_hc_index);
+ h2c = http2_conn_ctx_alloc_w_thread (hc);
+ h2c->flags |= HTTP2_CONN_F_EXPECT_PREFACE;
+ /* already done in http core */
+ if (http_get_transport_proto (hc) == TRANSPORT_PROTO_TCP)
+ h2c->flags |= HTTP2_CONN_F_PREFACE_VERIFIED;
+}
+
+static void
+http2_conn_cleanup_callback (http_conn_t *hc)
+{
+ u32 req_index, stream_id, *req_index_p, *req_indices = 0;
+ http2_req_t *req;
+ http2_conn_ctx_t *h2c;
+
+ HTTP_DBG (1, "hc [%u]%x", hc->c_thread_index, hc->hc_hc_index);
+ h2c = http2_conn_ctx_get_w_thread (hc);
+ hash_foreach (stream_id, req_index, h2c->req_by_stream_id,
+ ({ vec_add1 (req_indices, req_index); }));
+
+ vec_foreach (req_index_p, req_indices)
+ {
+ req = http2_req_get (*req_index_p, hc->c_thread_index);
+ if (req->stream_state != HTTP2_STREAM_STATE_CLOSED)
+ session_transport_delete_notify (&req->base.connection);
+ http2_conn_free_req (h2c, req, hc->c_thread_index);
+ }
+
+ vec_free (req_indices);
+ http2_conn_ctx_free (hc);
+}
+
+static void
+http2_enable_callback (void)
+{
+ http2_main_t *h2m = &http2_main;
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ u32 num_threads;
+
+ num_threads = 1 /* main thread */ + vtm->n_threads;
+
+ vec_validate (h2m->conn_pool, num_threads - 1);
+ vec_validate (h2m->req_pool, num_threads - 1);
+}
+
+static int
+http2_update_settings (http_settings_t type, u32 value)
+{
+ http2_main_t *h2m = &http2_main;
+
+ switch (type)
+ {
+#define _(v, label, member, min, max, default_value, err_code) \
+ case HTTP2_SETTINGS_##label: \
+ if (!(value >= min && value <= max)) \
+ return -1; \
+ h2m->settings.member = value; \
+ return 0;
+ foreach_http2_settings
+#undef _
+ default : return -1;
+ }
+}
+
+static uword
+http2_unformat_config_callback (unformat_input_t *input)
+{
+ u32 value;
+
+ if (!input)
+ return 0;
+
+ unformat_skip_white_space (input);
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "initial-window-size %u", &value))
+ {
+ if (http2_update_settings (HTTP2_SETTINGS_INITIAL_WINDOW_SIZE,
+ value))
+ return 0;
+ }
+ else if (unformat (input, "max-frame-size %u", &value))
+ {
+ if (http2_update_settings (HTTP2_SETTINGS_MAX_FRAME_SIZE, value))
+ return 0;
+ }
+ else if (unformat (input, "max-header-list-size %u", &value))
+ {
+ if (http2_update_settings (HTTP2_SETTINGS_MAX_HEADER_LIST_SIZE,
+ value))
+ return 0;
+ }
+ else if (unformat (input, "header-table-size %u", &value))
+ {
+ if (http2_update_settings (HTTP2_SETTINGS_HEADER_TABLE_SIZE, value))
+ return 0;
+ }
+ else
+ return 0;
+ }
+ return 1;
+}
+
+const static http_engine_vft_t http2_engine = {
+ .name = "http2",
+ .hc_index_get_by_req_index = http2_hc_index_get_by_req_index,
+ .req_get_connection = http2_req_get_connection,
+ .format_req = http2_format_req,
+ .app_tx_callback = http2_app_tx_callback,
+ .app_rx_evt_callback = http2_app_rx_evt_callback,
+ .app_close_callback = http2_app_close_callback,
+ .app_reset_callback = http2_app_reset_callback,
+ .transport_connected_callback = http2_transport_connected_callback,
+ .transport_rx_callback = http2_transport_rx_callback,
+ .transport_close_callback = http2_transport_close_callback,
+ .transport_reset_callback = http2_transport_reset_callback,
+ .transport_conn_reschedule_callback =
+ http2_transport_conn_reschedule_callback,
+ .conn_accept_callback = http2_conn_accept_callback,
+ .conn_cleanup_callback = http2_conn_cleanup_callback,
+ .enable_callback = http2_enable_callback,
+ .unformat_cfg_callback = http2_unformat_config_callback,
+};
+
+clib_error_t *
+http2_init (vlib_main_t *vm)
+{
+ http2_main_t *h2m = &http2_main;
+
+ clib_warning ("http/2 enabled");
+ h2m->settings = http2_default_conn_settings;
+ h2m->settings.max_concurrent_streams = 100; /* by default unlimited */
+ http_register_engine (&http2_engine, HTTP_VERSION_2);
+
+ return 0;
+}
+
+#if HTTP_2_ENABLE > 0
+VLIB_INIT_FUNCTION (http2_init) = {
+ .runs_after = VLIB_INITS ("http_transport_init"),
+};
+#endif
diff --git a/src/plugins/http/http2/http2.h b/src/plugins/http/http2/http2.h
new file mode 100644
index 00000000000..9fc95344771
--- /dev/null
+++ b/src/plugins/http/http2/http2.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2025 Cisco Systems, Inc.
+ */
+
+#ifndef SRC_PLUGINS_HTTP_HTTP2_H_
+#define SRC_PLUGINS_HTTP_HTTP2_H_
+
+#include <vppinfra/format.h>
+#include <vppinfra/types.h>
+
+/* RFC9113 section 7 */
+#define foreach_http2_error \
+ _ (NO_ERROR, "NO_ERROR") \
+ _ (PROTOCOL_ERROR, "PROTOCOL_ERROR") \
+ _ (INTERNAL_ERROR, "INTERNAL_ERROR") \
+ _ (FLOW_CONTROL_ERROR, "FLOW_CONTROL_ERROR") \
+ _ (SETTINGS_TIMEOUT, "SETTINGS_TIMEOUT") \
+ _ (STREAM_CLOSED, "STREAM_CLOSED") \
+ _ (FRAME_SIZE_ERROR, "FRAME_SIZE_ERROR") \
+ _ (REFUSED_STREAM, "REFUSED_STREAM") \
+ _ (CANCEL, "CANCEL") \
+ _ (COMPRESSION_ERROR, "COMPRESSION_ERROR") \
+ _ (CONNECT_ERROR, "CONNECT_ERROR") \
+ _ (ENHANCE_YOUR_CALM, "ENHANCE_YOUR_CALM") \
+ _ (INADEQUATE_SECURITY, "INADEQUATE_SECURITY") \
+ _ (HTTP_1_1_REQUIRED, "HTTP_1_1_REQUIRED")
+
+typedef enum http2_error_
+{
+#define _(s, str) HTTP2_ERROR_##s,
+ foreach_http2_error
+#undef _
+} http2_error_t;
+
+static inline u8 *
+format_http2_error (u8 *s, va_list *va)
+{
+ http2_error_t e = va_arg (*va, http2_error_t);
+ u8 *t = 0;
+
+ switch (e)
+ {
+#define _(s, str) \
+ case HTTP2_ERROR_##s: \
+ t = (u8 *) str; \
+ break;
+ foreach_http2_error
+#undef _
+ default : return format (s, "BUG: unknown");
+ }
+ return format (s, "%s", t);
+}
+
+#define foreach_http2_pseudo_header \
+ _ (0, METHOD, "method") \
+ _ (1, SCHEME, "scheme") \
+ _ (2, AUTHORITY, "authority") \
+ _ (3, PATH, "path") \
+ _ (4, STATUS, "status")
+
+/* value, label, member, min, max, default_value, err_code */
+#define foreach_http2_settings \
+ _ (1, HEADER_TABLE_SIZE, header_table_size, 0, CLIB_U32_MAX, 4096, \
+ HTTP2_ERROR_NO_ERROR) \
+ _ (2, ENABLE_PUSH, enable_push, 0, 1, 1, HTTP2_ERROR_PROTOCOL_ERROR) \
+ _ (3, MAX_CONCURRENT_STREAMS, max_concurrent_streams, 0, CLIB_U32_MAX, \
+ CLIB_U32_MAX, HTTP2_ERROR_NO_ERROR) \
+ _ (4, INITIAL_WINDOW_SIZE, initial_window_size, 0, 0x7FFFFFFF, 65535, \
+ HTTP2_ERROR_FLOW_CONTROL_ERROR) \
+ _ (5, MAX_FRAME_SIZE, max_frame_size, 16384, 16777215, 16384, \
+ HTTP2_ERROR_PROTOCOL_ERROR) \
+ _ (6, MAX_HEADER_LIST_SIZE, max_header_list_size, 0, CLIB_U32_MAX, \
+ CLIB_U32_MAX, HTTP2_ERROR_NO_ERROR)
+
+typedef enum
+{
+#define _(value, label, member, min, max, default_value, err_code) \
+ HTTP2_SETTINGS_##label = value,
+ foreach_http2_settings
+#undef _
+} http_settings_t;
+
+typedef struct
+{
+#define _(value, label, member, min, max, default_value, err_code) u32 member;
+ foreach_http2_settings
+#undef _
+} http2_conn_settings_t;
+
+static const http2_conn_settings_t http2_default_conn_settings = {
+#define _(value, label, member, min, max, default_value, err_code) \
+ default_value,
+ foreach_http2_settings
+#undef _
+};
+
+#endif /* SRC_PLUGINS_HTTP_HTTP2_H_ */
diff --git a/src/plugins/http/http2/huffman_table.h b/src/plugins/http/http2/huffman_table.h
new file mode 100644
index 00000000000..66afffbc54a
--- /dev/null
+++ b/src/plugins/http/http2/huffman_table.h
@@ -0,0 +1,319 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2025 Cisco Systems, Inc.
+ */
+
+/* generated by mk_huffman_table.py */
+
+#ifndef SRC_PLUGINS_HTTP_HUFFMAN_TABLE_H_
+#define SRC_PLUGINS_HTTP_HUFFMAN_TABLE_H_
+
+#include <vppinfra/types.h>
+
+typedef struct
+{
+ u8 code_len;
+ u32 code;
+} hpack_huffman_symbol_t;
+
+static hpack_huffman_symbol_t huff_sym_table[] = {
+ { 13, 0x1ff8 }, { 23, 0x7fffd8 }, { 28, 0xfffffe2 }, { 28, 0xfffffe3 },
+ { 28, 0xfffffe4 }, { 28, 0xfffffe5 }, { 28, 0xfffffe6 }, { 28, 0xfffffe7 },
+ { 28, 0xfffffe8 }, { 24, 0xffffea }, { 30, 0x3ffffffc }, { 28, 0xfffffe9 },
+ { 28, 0xfffffea }, { 30, 0x3ffffffd }, { 28, 0xfffffeb }, { 28, 0xfffffec },
+ { 28, 0xfffffed }, { 28, 0xfffffee }, { 28, 0xfffffef }, { 28, 0xffffff0 },
+ { 28, 0xffffff1 }, { 28, 0xffffff2 }, { 30, 0x3ffffffe }, { 28, 0xffffff3 },
+ { 28, 0xffffff4 }, { 28, 0xffffff5 }, { 28, 0xffffff6 }, { 28, 0xffffff7 },
+ { 28, 0xffffff8 }, { 28, 0xffffff9 }, { 28, 0xffffffa }, { 28, 0xffffffb },
+ { 6, 0x14 }, { 10, 0x3f8 }, { 10, 0x3f9 }, { 12, 0xffa },
+ { 13, 0x1ff9 }, { 6, 0x15 }, { 8, 0xf8 }, { 11, 0x7fa },
+ { 10, 0x3fa }, { 10, 0x3fb }, { 8, 0xf9 }, { 11, 0x7fb },
+ { 8, 0xfa }, { 6, 0x16 }, { 6, 0x17 }, { 6, 0x18 },
+ { 5, 0x0 }, { 5, 0x1 }, { 5, 0x2 }, { 6, 0x19 },
+ { 6, 0x1a }, { 6, 0x1b }, { 6, 0x1c }, { 6, 0x1d },
+ { 6, 0x1e }, { 6, 0x1f }, { 7, 0x5c }, { 8, 0xfb },
+ { 15, 0x7ffc }, { 6, 0x20 }, { 12, 0xffb }, { 10, 0x3fc },
+ { 13, 0x1ffa }, { 6, 0x21 }, { 7, 0x5d }, { 7, 0x5e },
+ { 7, 0x5f }, { 7, 0x60 }, { 7, 0x61 }, { 7, 0x62 },
+ { 7, 0x63 }, { 7, 0x64 }, { 7, 0x65 }, { 7, 0x66 },
+ { 7, 0x67 }, { 7, 0x68 }, { 7, 0x69 }, { 7, 0x6a },
+ { 7, 0x6b }, { 7, 0x6c }, { 7, 0x6d }, { 7, 0x6e },
+ { 7, 0x6f }, { 7, 0x70 }, { 7, 0x71 }, { 7, 0x72 },
+ { 8, 0xfc }, { 7, 0x73 }, { 8, 0xfd }, { 13, 0x1ffb },
+ { 19, 0x7fff0 }, { 13, 0x1ffc }, { 14, 0x3ffc }, { 6, 0x22 },
+ { 15, 0x7ffd }, { 5, 0x3 }, { 6, 0x23 }, { 5, 0x4 },
+ { 6, 0x24 }, { 5, 0x5 }, { 6, 0x25 }, { 6, 0x26 },
+ { 6, 0x27 }, { 5, 0x6 }, { 7, 0x74 }, { 7, 0x75 },
+ { 6, 0x28 }, { 6, 0x29 }, { 6, 0x2a }, { 5, 0x7 },
+ { 6, 0x2b }, { 7, 0x76 }, { 6, 0x2c }, { 5, 0x8 },
+ { 5, 0x9 }, { 6, 0x2d }, { 7, 0x77 }, { 7, 0x78 },
+ { 7, 0x79 }, { 7, 0x7a }, { 7, 0x7b }, { 15, 0x7ffe },
+ { 11, 0x7fc }, { 14, 0x3ffd }, { 13, 0x1ffd }, { 28, 0xffffffc },
+ { 20, 0xfffe6 }, { 22, 0x3fffd2 }, { 20, 0xfffe7 }, { 20, 0xfffe8 },
+ { 22, 0x3fffd3 }, { 22, 0x3fffd4 }, { 22, 0x3fffd5 }, { 23, 0x7fffd9 },
+ { 22, 0x3fffd6 }, { 23, 0x7fffda }, { 23, 0x7fffdb }, { 23, 0x7fffdc },
+ { 23, 0x7fffdd }, { 23, 0x7fffde }, { 24, 0xffffeb }, { 23, 0x7fffdf },
+ { 24, 0xffffec }, { 24, 0xffffed }, { 22, 0x3fffd7 }, { 23, 0x7fffe0 },
+ { 24, 0xffffee }, { 23, 0x7fffe1 }, { 23, 0x7fffe2 }, { 23, 0x7fffe3 },
+ { 23, 0x7fffe4 }, { 21, 0x1fffdc }, { 22, 0x3fffd8 }, { 23, 0x7fffe5 },
+ { 22, 0x3fffd9 }, { 23, 0x7fffe6 }, { 23, 0x7fffe7 }, { 24, 0xffffef },
+ { 22, 0x3fffda }, { 21, 0x1fffdd }, { 20, 0xfffe9 }, { 22, 0x3fffdb },
+ { 22, 0x3fffdc }, { 23, 0x7fffe8 }, { 23, 0x7fffe9 }, { 21, 0x1fffde },
+ { 23, 0x7fffea }, { 22, 0x3fffdd }, { 22, 0x3fffde }, { 24, 0xfffff0 },
+ { 21, 0x1fffdf }, { 22, 0x3fffdf }, { 23, 0x7fffeb }, { 23, 0x7fffec },
+ { 21, 0x1fffe0 }, { 21, 0x1fffe1 }, { 22, 0x3fffe0 }, { 21, 0x1fffe2 },
+ { 23, 0x7fffed }, { 22, 0x3fffe1 }, { 23, 0x7fffee }, { 23, 0x7fffef },
+ { 20, 0xfffea }, { 22, 0x3fffe2 }, { 22, 0x3fffe3 }, { 22, 0x3fffe4 },
+ { 23, 0x7ffff0 }, { 22, 0x3fffe5 }, { 22, 0x3fffe6 }, { 23, 0x7ffff1 },
+ { 26, 0x3ffffe0 }, { 26, 0x3ffffe1 }, { 20, 0xfffeb }, { 19, 0x7fff1 },
+ { 22, 0x3fffe7 }, { 23, 0x7ffff2 }, { 22, 0x3fffe8 }, { 25, 0x1ffffec },
+ { 26, 0x3ffffe2 }, { 26, 0x3ffffe3 }, { 26, 0x3ffffe4 }, { 27, 0x7ffffde },
+ { 27, 0x7ffffdf }, { 26, 0x3ffffe5 }, { 24, 0xfffff1 }, { 25, 0x1ffffed },
+ { 19, 0x7fff2 }, { 21, 0x1fffe3 }, { 26, 0x3ffffe6 }, { 27, 0x7ffffe0 },
+ { 27, 0x7ffffe1 }, { 26, 0x3ffffe7 }, { 27, 0x7ffffe2 }, { 24, 0xfffff2 },
+ { 21, 0x1fffe4 }, { 21, 0x1fffe5 }, { 26, 0x3ffffe8 }, { 26, 0x3ffffe9 },
+ { 28, 0xffffffd }, { 27, 0x7ffffe3 }, { 27, 0x7ffffe4 }, { 27, 0x7ffffe5 },
+ { 20, 0xfffec }, { 24, 0xfffff3 }, { 20, 0xfffed }, { 21, 0x1fffe6 },
+ { 22, 0x3fffe9 }, { 21, 0x1fffe7 }, { 21, 0x1fffe8 }, { 23, 0x7ffff3 },
+ { 22, 0x3fffea }, { 22, 0x3fffeb }, { 25, 0x1ffffee }, { 25, 0x1ffffef },
+ { 24, 0xfffff4 }, { 24, 0xfffff5 }, { 26, 0x3ffffea }, { 23, 0x7ffff4 },
+ { 26, 0x3ffffeb }, { 27, 0x7ffffe6 }, { 26, 0x3ffffec }, { 26, 0x3ffffed },
+ { 27, 0x7ffffe7 }, { 27, 0x7ffffe8 }, { 27, 0x7ffffe9 }, { 27, 0x7ffffea },
+ { 27, 0x7ffffeb }, { 28, 0xffffffe }, { 27, 0x7ffffec }, { 27, 0x7ffffed },
+ { 27, 0x7ffffee }, { 27, 0x7ffffef }, { 27, 0x7fffff0 }, { 26, 0x3ffffee },
+};
+
+typedef struct
+{
+ u8 symbol;
+ u8 code_len;
+} hpack_huffman_code_t;
+
+static hpack_huffman_code_t huff_code_table_fast[] = {
+ { 0x30, 5 }, { 0x30, 5 }, { 0x30, 5 }, { 0x30, 5 }, { 0x30, 5 }, { 0x30, 5 },
+ { 0x30, 5 }, { 0x30, 5 }, { 0x31, 5 }, { 0x31, 5 }, { 0x31, 5 }, { 0x31, 5 },
+ { 0x31, 5 }, { 0x31, 5 }, { 0x31, 5 }, { 0x31, 5 }, { 0x32, 5 }, { 0x32, 5 },
+ { 0x32, 5 }, { 0x32, 5 }, { 0x32, 5 }, { 0x32, 5 }, { 0x32, 5 }, { 0x32, 5 },
+ { 0x61, 5 }, { 0x61, 5 }, { 0x61, 5 }, { 0x61, 5 }, { 0x61, 5 }, { 0x61, 5 },
+ { 0x61, 5 }, { 0x61, 5 }, { 0x63, 5 }, { 0x63, 5 }, { 0x63, 5 }, { 0x63, 5 },
+ { 0x63, 5 }, { 0x63, 5 }, { 0x63, 5 }, { 0x63, 5 }, { 0x65, 5 }, { 0x65, 5 },
+ { 0x65, 5 }, { 0x65, 5 }, { 0x65, 5 }, { 0x65, 5 }, { 0x65, 5 }, { 0x65, 5 },
+ { 0x69, 5 }, { 0x69, 5 }, { 0x69, 5 }, { 0x69, 5 }, { 0x69, 5 }, { 0x69, 5 },
+ { 0x69, 5 }, { 0x69, 5 }, { 0x6F, 5 }, { 0x6F, 5 }, { 0x6F, 5 }, { 0x6F, 5 },
+ { 0x6F, 5 }, { 0x6F, 5 }, { 0x6F, 5 }, { 0x6F, 5 }, { 0x73, 5 }, { 0x73, 5 },
+ { 0x73, 5 }, { 0x73, 5 }, { 0x73, 5 }, { 0x73, 5 }, { 0x73, 5 }, { 0x73, 5 },
+ { 0x74, 5 }, { 0x74, 5 }, { 0x74, 5 }, { 0x74, 5 }, { 0x74, 5 }, { 0x74, 5 },
+ { 0x74, 5 }, { 0x74, 5 }, { 0x20, 6 }, { 0x20, 6 }, { 0x20, 6 }, { 0x20, 6 },
+ { 0x25, 6 }, { 0x25, 6 }, { 0x25, 6 }, { 0x25, 6 }, { 0x2D, 6 }, { 0x2D, 6 },
+ { 0x2D, 6 }, { 0x2D, 6 }, { 0x2E, 6 }, { 0x2E, 6 }, { 0x2E, 6 }, { 0x2E, 6 },
+ { 0x2F, 6 }, { 0x2F, 6 }, { 0x2F, 6 }, { 0x2F, 6 }, { 0x33, 6 }, { 0x33, 6 },
+ { 0x33, 6 }, { 0x33, 6 }, { 0x34, 6 }, { 0x34, 6 }, { 0x34, 6 }, { 0x34, 6 },
+ { 0x35, 6 }, { 0x35, 6 }, { 0x35, 6 }, { 0x35, 6 }, { 0x36, 6 }, { 0x36, 6 },
+ { 0x36, 6 }, { 0x36, 6 }, { 0x37, 6 }, { 0x37, 6 }, { 0x37, 6 }, { 0x37, 6 },
+ { 0x38, 6 }, { 0x38, 6 }, { 0x38, 6 }, { 0x38, 6 }, { 0x39, 6 }, { 0x39, 6 },
+ { 0x39, 6 }, { 0x39, 6 }, { 0x3D, 6 }, { 0x3D, 6 }, { 0x3D, 6 }, { 0x3D, 6 },
+ { 0x41, 6 }, { 0x41, 6 }, { 0x41, 6 }, { 0x41, 6 }, { 0x5F, 6 }, { 0x5F, 6 },
+ { 0x5F, 6 }, { 0x5F, 6 }, { 0x62, 6 }, { 0x62, 6 }, { 0x62, 6 }, { 0x62, 6 },
+ { 0x64, 6 }, { 0x64, 6 }, { 0x64, 6 }, { 0x64, 6 }, { 0x66, 6 }, { 0x66, 6 },
+ { 0x66, 6 }, { 0x66, 6 }, { 0x67, 6 }, { 0x67, 6 }, { 0x67, 6 }, { 0x67, 6 },
+ { 0x68, 6 }, { 0x68, 6 }, { 0x68, 6 }, { 0x68, 6 }, { 0x6C, 6 }, { 0x6C, 6 },
+ { 0x6C, 6 }, { 0x6C, 6 }, { 0x6D, 6 }, { 0x6D, 6 }, { 0x6D, 6 }, { 0x6D, 6 },
+ { 0x6E, 6 }, { 0x6E, 6 }, { 0x6E, 6 }, { 0x6E, 6 }, { 0x70, 6 }, { 0x70, 6 },
+ { 0x70, 6 }, { 0x70, 6 }, { 0x72, 6 }, { 0x72, 6 }, { 0x72, 6 }, { 0x72, 6 },
+ { 0x75, 6 }, { 0x75, 6 }, { 0x75, 6 }, { 0x75, 6 }, { 0x3A, 7 }, { 0x3A, 7 },
+ { 0x42, 7 }, { 0x42, 7 }, { 0x43, 7 }, { 0x43, 7 }, { 0x44, 7 }, { 0x44, 7 },
+ { 0x45, 7 }, { 0x45, 7 }, { 0x46, 7 }, { 0x46, 7 }, { 0x47, 7 }, { 0x47, 7 },
+ { 0x48, 7 }, { 0x48, 7 }, { 0x49, 7 }, { 0x49, 7 }, { 0x4A, 7 }, { 0x4A, 7 },
+ { 0x4B, 7 }, { 0x4B, 7 }, { 0x4C, 7 }, { 0x4C, 7 }, { 0x4D, 7 }, { 0x4D, 7 },
+ { 0x4E, 7 }, { 0x4E, 7 }, { 0x4F, 7 }, { 0x4F, 7 }, { 0x50, 7 }, { 0x50, 7 },
+ { 0x51, 7 }, { 0x51, 7 }, { 0x52, 7 }, { 0x52, 7 }, { 0x53, 7 }, { 0x53, 7 },
+ { 0x54, 7 }, { 0x54, 7 }, { 0x55, 7 }, { 0x55, 7 }, { 0x56, 7 }, { 0x56, 7 },
+ { 0x57, 7 }, { 0x57, 7 }, { 0x59, 7 }, { 0x59, 7 }, { 0x6A, 7 }, { 0x6A, 7 },
+ { 0x6B, 7 }, { 0x6B, 7 }, { 0x71, 7 }, { 0x71, 7 }, { 0x76, 7 }, { 0x76, 7 },
+ { 0x77, 7 }, { 0x77, 7 }, { 0x78, 7 }, { 0x78, 7 }, { 0x79, 7 }, { 0x79, 7 },
+ { 0x7A, 7 }, { 0x7A, 7 }, { 0x26, 8 }, { 0x2A, 8 }, { 0x2C, 8 }, { 0x3B, 8 },
+ { 0x58, 8 }, { 0x5A, 8 }, { 0x00, 0 }, { 0x00, 0 },
+};
+
+typedef struct
+{
+ u32 first_code;
+ u8 code_len;
+ u8 symbols[29];
+} hpack_huffman_group_t;
+
+/* clang-format off */
+
+static hpack_huffman_group_t huff_code_table_slow[] = {
+ {
+ 0x3f8, /* first_code */
+ 10, /* code_len */
+ {
+ 0x21, 0x22, 0x28, 0x29, 0x3F,
+ } /* symbols */
+ },
+ {
+ 0x7fa, /* first_code */
+ 11, /* code_len */
+ {
+ 0x27, 0x2B, 0x7C,
+ } /* symbols */
+ },
+ {
+ 0xffa, /* first_code */
+ 12, /* code_len */
+ {
+ 0x23, 0x3E,
+ } /* symbols */
+ },
+ {
+ 0x1ff8, /* first_code */
+ 13, /* code_len */
+ {
+ 0x00, 0x24, 0x40, 0x5B, 0x5D, 0x7E,
+ } /* symbols */
+ },
+ {
+ 0x3ffc, /* first_code */
+ 14, /* code_len */
+ {
+ 0x5E, 0x7D,
+ } /* symbols */
+ },
+ {
+ 0x7ffc, /* first_code */
+ 15, /* code_len */
+ {
+ 0x3C, 0x60, 0x7B,
+ } /* symbols */
+ },
+ {
+ 0x7fff0, /* first_code */
+ 19, /* code_len */
+ {
+ 0x5C, 0xC3, 0xD0,
+ } /* symbols */
+ },
+ {
+ 0xfffe6, /* first_code */
+ 20, /* code_len */
+ {
+ 0x80, 0x82, 0x83, 0xA2, 0xB8, 0xC2, 0xE0, 0xE2,
+ } /* symbols */
+ },
+ {
+ 0x1fffdc, /* first_code */
+ 21, /* code_len */
+ {
+ 0x99, 0xA1, 0xA7, 0xAC, 0xB0, 0xB1, 0xB3, 0xD1, 0xD8, 0xD9,
+ 0xE3, 0xE5, 0xE6,
+ } /* symbols */
+ },
+ {
+ 0x3fffd2, /* first_code */
+ 22, /* code_len */
+ {
+ 0x81, 0x84, 0x85, 0x86, 0x88, 0x92, 0x9A, 0x9C, 0xA0, 0xA3,
+ 0xA4, 0xA9, 0xAA, 0xAD, 0xB2, 0xB5, 0xB9, 0xBA, 0xBB, 0xBD,
+ 0xBE, 0xC4, 0xC6, 0xE4, 0xE8, 0xE9,
+ } /* symbols */
+ },
+ {
+ 0x7fffd8, /* first_code */
+ 23, /* code_len */
+ {
+ 0x01, 0x87, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8F, 0x93, 0x95,
+ 0x96, 0x97, 0x98, 0x9B, 0x9D, 0x9E, 0xA5, 0xA6, 0xA8, 0xAE,
+ 0xAF, 0xB4, 0xB6, 0xB7, 0xBC, 0xBF, 0xC5, 0xE7, 0xEF,
+ } /* symbols */
+ },
+ {
+ 0xffffea, /* first_code */
+ 24, /* code_len */
+ {
+ 0x09, 0x8E, 0x90, 0x91, 0x94, 0x9F, 0xAB, 0xCE, 0xD7, 0xE1,
+ 0xEC, 0xED,
+ } /* symbols */
+ },
+ {
+ 0x1ffffec, /* first_code */
+ 25, /* code_len */
+ {
+ 0xC7, 0xCF, 0xEA, 0xEB,
+ } /* symbols */
+ },
+ {
+ 0x3ffffe0, /* first_code */
+ 26, /* code_len */
+ {
+ 0xC0, 0xC1, 0xC8, 0xC9, 0xCA, 0xCD, 0xD2, 0xD5, 0xDA, 0xDB,
+ 0xEE, 0xF0, 0xF2, 0xF3, 0xFF,
+ } /* symbols */
+ },
+ {
+ 0x7ffffde, /* first_code */
+ 27, /* code_len */
+ {
+ 0xCB, 0xCC, 0xD3, 0xD4, 0xD6, 0xDD, 0xDE, 0xDF, 0xF1, 0xF4,
+ 0xF5, 0xF6, 0xF7, 0xF8, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE,
+ } /* symbols */
+ },
+ {
+ 0xfffffe2, /* first_code */
+ 28, /* code_len */
+ {
+ 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0B, 0x0C, 0x0E,
+ 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x17, 0x18, 0x19,
+ 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x7F, 0xDC, 0xF9,
+ } /* symbols */
+ },
+ {
+ 0x3ffffffc, /* first_code */
+ 30, /* code_len */
+ {
+ 0x0A, 0x0D, 0x16,
+ } /* symbols */
+ },
+};
+
+/* clang format-on */
+
+always_inline hpack_huffman_group_t *
+hpack_huffman_get_group (u32 value)
+{
+ if (value < 0xFF400000)
+ return &huff_code_table_slow[0];
+ else if (value < 0xFFA00000)
+ return &huff_code_table_slow[1];
+ else if (value < 0xFFC00000)
+ return &huff_code_table_slow[2];
+ else if (value < 0xFFF00000)
+ return &huff_code_table_slow[3];
+ else if (value < 0xFFF80000)
+ return &huff_code_table_slow[4];
+ else if (value < 0xFFFE0000)
+ return &huff_code_table_slow[5];
+ else if (value < 0xFFFE6000)
+ return &huff_code_table_slow[6];
+ else if (value < 0xFFFEE000)
+ return &huff_code_table_slow[7];
+ else if (value < 0xFFFF4800)
+ return &huff_code_table_slow[8];
+ else if (value < 0xFFFFB000)
+ return &huff_code_table_slow[9];
+ else if (value < 0xFFFFEA00)
+ return &huff_code_table_slow[10];
+ else if (value < 0xFFFFF600)
+ return &huff_code_table_slow[11];
+ else if (value < 0xFFFFF800)
+ return &huff_code_table_slow[12];
+ else if (value < 0xFFFFFBC0)
+ return &huff_code_table_slow[13];
+ else if (value < 0xFFFFFE20)
+ return &huff_code_table_slow[14];
+ else if (value < 0xFFFFFFF0)
+ return &huff_code_table_slow[15];
+ else
+ return &huff_code_table_slow[16];
+}
+
+#endif /* SRC_PLUGINS_HTTP_HUFFMAN_TABLE_H_ */
diff --git a/src/plugins/http/http_buffer.c b/src/plugins/http/http_buffer.c
index bc1b8c08630..fd90fbfed8c 100644
--- a/src/plugins/http/http_buffer.c
+++ b/src/plugins/http/http_buffer.c
@@ -57,8 +57,9 @@ buf_fifo_free (http_buffer_t *hb)
vec_free (bf->segs);
}
-static svm_fifo_seg_t *
-buf_fifo_get_segs (http_buffer_t *hb, u32 max_len, u32 *n_segs)
+static u32
+buf_fifo_get_segs (http_buffer_t *hb, u32 max_len, svm_fifo_seg_t **fs,
+ u32 *n_segs)
{
http_buffer_fifo_t *bf = (http_buffer_fifo_t *) &hb->data;
@@ -67,7 +68,7 @@ buf_fifo_get_segs (http_buffer_t *hb, u32 max_len, u32 *n_segs)
max_len = clib_min (bf->len - bf->offset, (u64) max_len);
- vec_validate (bf->segs, _n_segs);
+ vec_validate (bf->segs, _n_segs - 1);
len = svm_fifo_segments (bf->src, 0, bf->segs, &_n_segs, max_len);
if (len < 0)
@@ -77,7 +78,8 @@ buf_fifo_get_segs (http_buffer_t *hb, u32 max_len, u32 *n_segs)
HTTP_DBG (1, "available to send %u n_segs %u", len, *n_segs);
- return bf->segs;
+ *fs = bf->segs;
+ return len;
}
static u32
@@ -92,13 +94,13 @@ buf_fifo_drain (http_buffer_t *hb, u32 len)
return len;
}
-static u8
-buf_fifo_is_drained (http_buffer_t *hb)
+static u64
+buf_fifo_bytes_left (http_buffer_t *hb)
{
http_buffer_fifo_t *bf = (http_buffer_fifo_t *) &hb->data;
ASSERT (bf->offset <= bf->len);
- return (bf->offset == bf->len);
+ return (bf->len - bf->offset);
}
const static http_buffer_vft_t buf_fifo_vft = {
@@ -106,7 +108,7 @@ const static http_buffer_vft_t buf_fifo_vft = {
.free = buf_fifo_free,
.get_segs = buf_fifo_get_segs,
.drain = buf_fifo_drain,
- .is_drained = buf_fifo_is_drained,
+ .bytes_left = buf_fifo_bytes_left,
};
HTTP_BUFFER_REGISTER_VFT (HTTP_BUFFER_FIFO, buf_fifo_vft);
@@ -115,6 +117,7 @@ typedef struct http_buffer_ptr_
{
svm_fifo_seg_t *segs;
svm_fifo_t *f;
+ u64 len;
} http_buffer_ptr_t;
STATIC_ASSERT (sizeof (http_buffer_ptr_t) <= HTTP_BUFFER_DATA_SZ, "buf data");
@@ -135,12 +138,11 @@ buf_ptr_init (http_buffer_t *hb, void *data, u64 len)
bf->f = f;
bf->segs = 0;
- vec_validate (bf->segs, 1);
+ vec_validate (bf->segs, 0);
bf->segs[0].data = uword_to_pointer (ptr, u8 *);
- bf->segs[0].len = len;
- bf->segs[1] = bf->segs[0];
+ bf->len = len;
}
static void
@@ -152,15 +154,17 @@ buf_ptr_free (http_buffer_t *hb)
vec_free (bf->segs);
}
-static svm_fifo_seg_t *
-buf_ptr_get_segs (http_buffer_t *hb, u32 max_len, u32 *n_segs)
+static u32
+buf_ptr_get_segs (http_buffer_t *hb, u32 max_len, svm_fifo_seg_t **fs,
+ u32 *n_segs)
{
http_buffer_ptr_t *bf = (http_buffer_ptr_t *) &hb->data;
*n_segs = 1;
- bf->segs[1].len = clib_min (bf->segs[0].len, max_len);
+ bf->segs[0].len = clib_min (bf->len, (u64) max_len);
- return &bf->segs[1];
+ *fs = bf->segs;
+ return bf->segs[0].len;
}
static u32
@@ -168,14 +172,14 @@ buf_ptr_drain (http_buffer_t *hb, u32 len)
{
http_buffer_ptr_t *bf = (http_buffer_ptr_t *) &hb->data;
- ASSERT (bf->segs[0].len >= len);
+ ASSERT (bf->len >= len);
- bf->segs[1].data += len;
- bf->segs[0].len -= len;
+ bf->segs[0].data += len;
+ bf->len -= len;
- HTTP_DBG (1, "drained %u left %u", len, bf->segs[0].len);
+ HTTP_DBG (1, "drained %u left %u", len, bf->len);
- if (!bf->segs[0].len)
+ if (!bf->len)
{
svm_fifo_dequeue_drop (bf->f, sizeof (uword));
return sizeof (uword);
@@ -184,12 +188,12 @@ buf_ptr_drain (http_buffer_t *hb, u32 len)
return 0;
}
-static u8
-buf_ptr_is_drained (http_buffer_t *hb)
+static u64
+buf_ptr_bytes_left (http_buffer_t *hb)
{
http_buffer_ptr_t *bf = (http_buffer_ptr_t *) &hb->data;
- return (bf->segs[0].len == 0);
+ return bf->len;
}
const static http_buffer_vft_t buf_ptr_vft = {
@@ -197,7 +201,7 @@ const static http_buffer_vft_t buf_ptr_vft = {
.free = buf_ptr_free,
.get_segs = buf_ptr_get_segs,
.drain = buf_ptr_drain,
- .is_drained = buf_ptr_is_drained,
+ .bytes_left = buf_ptr_bytes_left,
};
HTTP_BUFFER_REGISTER_VFT (HTTP_BUFFER_PTR, buf_ptr_vft);
diff --git a/src/plugins/http/http_buffer.h b/src/plugins/http/http_buffer.h
index 1140be42d6e..01b37d4173b 100644
--- a/src/plugins/http/http_buffer.h
+++ b/src/plugins/http/http_buffer.h
@@ -38,9 +38,10 @@ struct http_buffer_vft_
{
void (*init) (http_buffer_t *, void *data, u64 len);
void (*free) (http_buffer_t *);
- svm_fifo_seg_t *(*get_segs) (http_buffer_t *, u32 max_len, u32 *n_segs);
+ u32 (*get_segs) (http_buffer_t *, u32 max_len, svm_fifo_seg_t **fs,
+ u32 *n_segs);
u32 (*drain) (http_buffer_t *, u32 len);
- u8 (*is_drained) (http_buffer_t *);
+ u64 (*bytes_left) (http_buffer_t *);
};
void http_buffer_init (http_buffer_t *hb, http_buffer_type_t type,
@@ -53,10 +54,11 @@ http_buffer_free (http_buffer_t *hb)
hb->vft->free (hb);
}
-static inline svm_fifo_seg_t *
-http_buffer_get_segs (http_buffer_t *hb, u32 max_len, u32 *n_segs)
+static inline u32
+http_buffer_get_segs (http_buffer_t *hb, u32 max_len, svm_fifo_seg_t **fs,
+ u32 *n_segs)
{
- return hb->vft->get_segs (hb, max_len, n_segs);
+ return hb->vft->get_segs (hb, max_len, fs, n_segs);
}
static inline u32
@@ -65,10 +67,10 @@ http_buffer_drain (http_buffer_t *hb, u32 len)
return hb->vft->drain (hb, len);
}
-static inline u8
-http_buffer_is_drained (http_buffer_t *hb)
+static inline u64
+http_buffer_bytes_left (http_buffer_t *hb)
{
- return hb->vft->is_drained (hb);
+ return hb->vft->bytes_left (hb);
}
#endif /* SRC_PLUGINS_HTTP_HTTP_BUFFER_H_ */
diff --git a/src/plugins/http/http_header_names.h b/src/plugins/http/http_header_names.h
index 99acac786db..1778daf10d9 100644
--- a/src/plugins/http/http_header_names.h
+++ b/src/plugins/http/http_header_names.h
@@ -8,7 +8,8 @@
#include <http/http.h>
static http_token_t http_header_names[] = {
-#define _(sym, str) { http_token_lit (str) },
+#define _(sym, str_canonical, str_lower, hpack_index) \
+ { http_token_lit (str_canonical) },
foreach_http_header_name
#undef _
};
diff --git a/src/plugins/http/http_plugin.rst b/src/plugins/http/http_plugin.rst
index 995e55e6f0f..4e799a57668 100644
--- a/src/plugins/http/http_plugin.rst
+++ b/src/plugins/http/http_plugin.rst
@@ -15,7 +15,7 @@ Usage
-----
The plugin exposes following inline functions: ``http_validate_abs_path_syntax``, ``http_validate_query_syntax``,
-``http_percent_decode``, ``http_path_remove_dot_segments``, ``http_build_header_table``, ``http_get_header``,
+``http_percent_decode``, ``http_path_sanitize``, ``http_build_header_table``, ``http_get_header``,
``http_reset_header_table``, ``http_free_header_table``, ``http_init_headers_ctx``, ``http_add_header``,
``http_add_custom_header``, ``http_validate_target_syntax``, ``http_parse_authority``, ``http_serialize_authority``,
``http_parse_masque_host_port``, ``http_decap_udp_payload_datagram``, ``http_encap_udp_payload_datagram``,
diff --git a/src/plugins/http/http_private.h b/src/plugins/http/http_private.h
new file mode 100644
index 00000000000..1f9812de7fa
--- /dev/null
+++ b/src/plugins/http/http_private.h
@@ -0,0 +1,885 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2025 Cisco Systems, Inc.
+ */
+
+#ifndef SRC_PLUGINS_HTTP_HTTP_PRIVATE_H_
+#define SRC_PLUGINS_HTTP_HTTP_PRIVATE_H_
+
+#include <vppinfra/time_range.h>
+#include <vnet/session/application.h>
+#include <vnet/session/session.h>
+#include <vnet/session/transport.h>
+#include <http/http.h>
+#include <http/http_buffer.h>
+
+#define HTTP_FIFO_THRESH (16 << 10)
+
+static const http_token_t http2_conn_preface = { http_token_lit (
+ "PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n") };
+
+typedef union
+{
+ struct
+ {
+ u32 version : 3;
+ u32 conn_index : 29;
+ };
+ u32 as_u32;
+} http_conn_handle_t;
+
+STATIC_ASSERT (sizeof (http_conn_handle_t) == sizeof (u32), "must fit in u32");
+
+typedef union
+{
+ struct
+ {
+ u32 version : 3;
+ u32 req_index : 29;
+ };
+ u32 as_u32;
+} http_req_handle_t;
+
+STATIC_ASSERT (sizeof (http_req_handle_t) == sizeof (u32), "must fit in u32");
+
+#define foreach_http_conn_state \
+ _ (LISTEN, "LISTEN") \
+ _ (CONNECTING, "CONNECTING") \
+ _ (ESTABLISHED, "ESTABLISHED") \
+ _ (TRANSPORT_CLOSED, "TRANSPORT-CLOSED") \
+ _ (APP_CLOSED, "APP-CLOSED") \
+ _ (CLOSED, "CLOSED")
+
+typedef enum http_conn_state_
+{
+#define _(s, str) HTTP_CONN_STATE_##s,
+ foreach_http_conn_state
+#undef _
+} http_conn_state_t;
+
+#define foreach_http_req_state \
+ _ (0, IDLE, "idle") \
+ _ (1, WAIT_APP_METHOD, "wait app method") \
+ _ (2, WAIT_TRANSPORT_REPLY, "wait transport reply") \
+ _ (3, TRANSPORT_IO_MORE_DATA, "transport io more data") \
+ _ (4, WAIT_TRANSPORT_METHOD, "wait transport method") \
+ _ (5, WAIT_APP_REPLY, "wait app reply") \
+ _ (6, APP_IO_MORE_DATA, "app io more data") \
+ _ (7, TUNNEL, "tunnel") \
+ _ (8, UDP_TUNNEL, "udp tunnel")
+
+typedef enum http_req_state_
+{
+#define _(n, s, str) HTTP_REQ_STATE_##s = n,
+ foreach_http_req_state
+#undef _
+ HTTP_REQ_N_STATES
+} http_req_state_t;
+
+typedef enum http_target_form_
+{
+ HTTP_TARGET_ORIGIN_FORM,
+ HTTP_TARGET_ABSOLUTE_FORM,
+ HTTP_TARGET_AUTHORITY_FORM,
+ HTTP_TARGET_ASTERISK_FORM
+} http_target_form_t;
+
+typedef enum http_version_
+{
+ HTTP_VERSION_1,
+ HTTP_VERSION_2,
+ HTTP_VERSION_3,
+ HTTP_VERSION_NA = 7,
+} http_version_t;
+
+typedef struct http_req_id_
+{
+ session_handle_t app_session_handle;
+ u32 parent_app_wrk_index;
+ u32 hc_index;
+} http_req_id_t;
+
+STATIC_ASSERT (sizeof (http_req_id_t) <= TRANSPORT_CONN_ID_LEN,
+ "ctx id must be less than TRANSPORT_CONN_ID_LEN");
+
+typedef struct http_req_
+{
+ union
+ {
+ transport_connection_t connection;
+ http_req_id_t c_http_req_id;
+ };
+#define hr_pa_wrk_index c_http_req_id.parent_app_wrk_index
+#define hr_pa_session_handle c_http_req_id.app_session_handle
+#define hr_hc_index c_http_req_id.hc_index
+#define hr_req_handle connection.c_index
+
+ u32 as_fifo_offset; /* for peek */
+
+ http_req_state_t state; /* state-machine state */
+
+ http_buffer_t tx_buf; /* message body from app to be sent */
+
+ /*
+ * for parsing of incoming message from transport
+ */
+ u32 rx_buf_offset; /* current offset during parsing */
+ u32 control_data_len; /* start line + headers + empty line */
+
+ union
+ {
+ u64 to_recv; /* remaining bytes of body to receive from transport */
+ u64 to_skip; /* remaining bytes of capsule to skip */
+ };
+
+ u8 is_tunnel;
+
+ /*
+ * parsed metadata for app
+ */
+ union
+ {
+ http_status_code_t status_code;
+ http_req_method_t method;
+ };
+
+ http_target_form_t target_form;
+ u8 *target;
+ http_url_scheme_t scheme;
+ u32 target_authority_offset;
+ u32 target_authority_len;
+ u32 target_path_offset;
+ u32 target_path_len;
+ u32 target_query_offset;
+ u32 target_query_len;
+
+ u32 headers_offset;
+ u32 headers_len;
+
+ u32 body_offset;
+ u64 body_len;
+
+ http_field_line_t *headers;
+ uword content_len_header_index;
+ uword connection_header_index;
+ uword upgrade_header_index;
+ uword host_header_index;
+
+ http_upgrade_proto_t upgrade_proto;
+} http_req_t;
+
+#define foreach_http_conn_flags \
+ _ (HO_DONE, "ho-done") \
+ _ (NO_APP_SESSION, "no-app-session") \
+ _ (PENDING_TIMER, "pending-timer") \
+ _ (IS_SERVER, "is-server") \
+ _ (HAS_REQUEST, "has-request")
+
+typedef enum http_conn_flags_bit_
+{
+#define _(sym, str) HTTP_CONN_F_BIT_##sym,
+ foreach_http_conn_flags
+#undef _
+} http_conn_flags_bit_t;
+
+typedef enum http_conn_flags_
+{
+#define _(sym, str) HTTP_CONN_F_##sym = 1 << HTTP_CONN_F_BIT_##sym,
+ foreach_http_conn_flags
+#undef _
+} __clib_packed http_conn_flags_t;
+
+typedef struct http_conn_id_
+{
+ union
+ {
+ session_handle_t app_session_handle;
+ u32 parent_app_api_ctx;
+ };
+ session_handle_t tc_session_handle;
+ u32 parent_app_wrk_index;
+} http_conn_id_t;
+
+STATIC_ASSERT (sizeof (http_conn_id_t) <= TRANSPORT_CONN_ID_LEN,
+ "ctx id must be less than TRANSPORT_CONN_ID_LEN");
+
+typedef struct http_tc_
+{
+ union
+ {
+ transport_connection_t connection;
+ http_conn_id_t c_http_conn_id;
+ };
+#define hc_tc_session_handle c_http_conn_id.tc_session_handle
+#define hc_pa_wrk_index c_http_conn_id.parent_app_wrk_index
+#define hc_pa_session_handle c_http_conn_id.app_session_handle
+#define hc_pa_app_api_ctx c_http_conn_id.parent_app_api_ctx
+#define hc_hc_index connection.c_index
+
+ http_version_t version;
+ http_conn_state_t state;
+ u32 timer_handle;
+ u32 timeout;
+ u8 *app_name;
+ u8 *host;
+ http_conn_flags_t flags;
+ http_udp_tunnel_mode_t udp_tunnel_mode;
+
+ void *opaque; /* version specific data */
+} http_conn_t;
+
+typedef struct http_worker_
+{
+ http_conn_t *conn_pool;
+} http_worker_t;
+
+typedef struct http_main_
+{
+ http_worker_t *wrk;
+ http_conn_t *listener_pool;
+ http_conn_t *ho_conn_pool;
+ u32 *postponed_ho_free;
+ u32 *ho_free_list;
+ u32 app_index;
+
+ u8 **rx_bufs;
+ u8 **tx_bufs;
+ u8 **app_header_lists;
+
+ clib_timebase_t timebase;
+
+ http_status_code_t *sc_by_u16;
+ /*
+ * Runtime config
+ */
+ u8 is_init;
+
+ /*
+ * Config
+ */
+ u64 first_seg_size;
+ u64 add_seg_size;
+ u32 fifo_size;
+} http_main_t;
+
+typedef struct http_engine_vft_
+{
+ const char *name;
+ u32 (*hc_index_get_by_req_index) (u32 req_index,
+ clib_thread_index_t thread_index);
+ transport_connection_t *(*req_get_connection) (
+ u32 req_index, clib_thread_index_t thread_index);
+ u8 *(*format_req) (u8 *s, va_list *args);
+ void (*app_tx_callback) (http_conn_t *hc, u32 req_index,
+ transport_send_params_t *sp);
+ void (*app_rx_evt_callback) (http_conn_t *hc, u32 req_index,
+ clib_thread_index_t thread_index);
+ void (*app_close_callback) (http_conn_t *hc, u32 req_index,
+ clib_thread_index_t thread_index);
+ void (*app_reset_callback) (http_conn_t *hc, u32 req_index,
+ clib_thread_index_t thread_index);
+ int (*transport_connected_callback) (http_conn_t *hc);
+ void (*transport_rx_callback) (http_conn_t *hc);
+ void (*transport_close_callback) (http_conn_t *hc);
+ void (*transport_reset_callback) (http_conn_t *hc);
+ void (*transport_conn_reschedule_callback) (http_conn_t *hc);
+ void (*conn_accept_callback) (http_conn_t *hc); /* optional */
+ void (*conn_cleanup_callback) (http_conn_t *hc);
+ void (*enable_callback) (void); /* optional */
+ uword (*unformat_cfg_callback) (unformat_input_t *input); /* optional */
+} http_engine_vft_t;
+
+void http_register_engine (const http_engine_vft_t *vft,
+ http_version_t version);
+
+/* HTTP state machine result */
+typedef enum http_sm_result_t_
+{
+ HTTP_SM_STOP = 0,
+ HTTP_SM_CONTINUE = 1,
+ HTTP_SM_ERROR = -1,
+} http_sm_result_t;
+
+typedef http_sm_result_t (*http_sm_handler) (http_conn_t *hc, http_req_t *req,
+ transport_send_params_t *sp);
+
+#define expect_char(c) \
+ if (*p++ != c) \
+ { \
+ clib_warning ("unexpected character"); \
+ return -1; \
+ }
+
+#define parse_int(val, mul) \
+ do \
+ { \
+ if (!isdigit (*p)) \
+ { \
+ clib_warning ("expected digit"); \
+ return -1; \
+ } \
+ val += mul * (*p++ - '0'); \
+ } \
+ while (0)
+
+#define http_field_line_value_token(_fl, _req, _rx_buf) \
+ (const char *) ((_rx_buf) + (_req)->headers_offset + (_fl)->value_offset), \
+ (_fl)->value_len
+
+u8 *format_http_req_state (u8 *s, va_list *va);
+u8 *format_http_conn_state (u8 *s, va_list *args);
+u8 *format_http_time_now (u8 *s, va_list *args);
+
+/**
+ * @brief Find the first occurrence of the string in the vector.
+ *
+ * @param vec The vector to be scanned.
+ * @param offset Search offset in the vector.
+ * @param num Maximum number of characters to be searched if non-zero.
+ * @param str The string to be searched.
+ *
+ * @return @c -1 if the string is not found within the vector; index otherwise.
+ */
+int http_v_find_index (u8 *vec, u32 offset, u32 num, char *str);
+
+/**
+ * Disconnect HTTP connection.
+ *
+ * @param hc HTTP connection to disconnect.
+ */
+void http_disconnect_transport (http_conn_t *hc);
+
+/**
+ * Shutdown HTTP connection.
+ *
+ * Close TX side of the underlying transport.
+ *
+ * @param hc HTTP connection to shutdown.
+ */
+void http_shutdown_transport (http_conn_t *hc);
+
+/**
+ * Convert numeric representation of status code to @c http_status_code_t.
+ *
+ * @param status_code Status code within the range of 100 to 599, inclusive.
+ *
+ * @return Registered status code or in case of unrecognized status code as
+ * equivalent to the x00 status code of that class.
+ */
+http_status_code_t http_sc_by_u16 (u16 status_code);
+
+/**
+ * Read header list sent by app.
+ *
+ * @param req HTTP request.
+ * @param msg HTTP msg sent by app.
+ *
+ * @return Pointer to the header list.
+ *
+ * @note For immediate processing, not for buffering.
+ */
+u8 *http_get_app_header_list (http_req_t *req, http_msg_t *msg);
+
+/**
+ * Get pre-allocated TX buffer/vector.
+ *
+ * @param hc HTTP connection.
+ *
+ * @return Pointer to the vector.
+ *
+ * @note Vector length is reset to zero, use as temporary storage.
+ */
+u8 *http_get_tx_buf (http_conn_t *hc);
+
+/**
+ * Get pre-allocated RX buffer/vector.
+ *
+ * @param hc HTTP connection.
+ *
+ * @return Pointer to the vector.
+ *
+ * @note Vector length is reset to zero, use as temporary storage.
+ */
+u8 *http_get_rx_buf (http_conn_t *hc);
+
+/**
+ * Read request target path sent by app.
+ *
+ * @param req HTTP request.
+ * @param msg HTTP msg sent by app.
+ *
+ * @return Pointer to the target path.
+ *
+ * @note Valid only with request lifetime.
+ */
+u8 *http_get_app_target (http_req_t *req, http_msg_t *msg);
+
+/**
+ * Initialize per-request HTTP TX buffer.
+ *
+ * @param req HTTP request.
+ * @param msg HTTP msg sent by app.
+ *
+ * @note Use for streaming of body sent by app.
+ */
+void http_req_tx_buffer_init (http_req_t *req, http_msg_t *msg);
+
+/**
+ * Change state of given HTTP request.
+ *
+ * @param req HTTP request.
+ * @param state New state.
+ */
+always_inline void
+http_req_state_change (http_req_t *req, http_req_state_t state)
+{
+ HTTP_DBG (1, "changing http req state: %U -> %U", format_http_req_state,
+ req->state, format_http_req_state, state);
+ ASSERT (req->state != HTTP_REQ_STATE_TUNNEL);
+ req->state = state;
+}
+
+/**
+ * Send RX event to the app worker.
+ *
+ * @param req HTTP request.
+ */
+always_inline void
+http_app_worker_rx_notify (http_req_t *req)
+{
+ session_t *as;
+ app_worker_t *app_wrk;
+
+ as = session_get_from_handle (req->hr_pa_session_handle);
+ if (!(as->flags & SESSION_F_RX_EVT))
+ {
+ app_wrk = app_worker_get_if_valid (as->app_wrk_index);
+ if (app_wrk)
+ {
+ as->flags |= SESSION_F_RX_EVT;
+ app_worker_rx_notify (app_wrk, as);
+ }
+ }
+}
+
+/**
+ * Get underlying transport protocol of the HTTP connection.
+ *
+ * @param hc HTTP connection.
+ *
+ * @return Transport protocol, @ref transport_proto_t.
+ */
+always_inline transport_proto_t
+http_get_transport_proto (http_conn_t *hc)
+{
+ return session_get_transport_proto (
+ session_get_from_handle (hc->hc_tc_session_handle));
+}
+
+/**
+ * Read HTTP msg sent by app.
+ *
+ * @param req HTTP request.
+ * @param msg HTTP msq will be stored here.
+ */
+always_inline void
+http_get_app_msg (http_req_t *req, http_msg_t *msg)
+{
+ session_t *as;
+ int rv;
+
+ as = session_get_from_handle (req->hr_pa_session_handle);
+ rv = svm_fifo_dequeue (as->tx_fifo, sizeof (*msg), (u8 *) msg);
+ ASSERT (rv == sizeof (*msg));
+}
+
+always_inline void
+http_identify_optional_query (http_req_t *req, u8 *rx_buf)
+{
+ int i;
+ for (i = req->target_path_offset;
+ i < (req->target_path_offset + req->target_path_len); i++)
+ {
+ if (rx_buf[i] == '?')
+ {
+ req->target_query_offset = i + 1;
+ req->target_query_len = req->target_path_offset +
+ req->target_path_len -
+ req->target_query_offset;
+ req->target_path_len =
+ req->target_path_len - req->target_query_len - 1;
+ break;
+ }
+ }
+}
+
+always_inline int
+http_parse_content_length (http_req_t *req, u8 *rx_buf)
+{
+ int i;
+ http_field_line_t *field_line;
+ u8 *p;
+ u64 body_len = 0, digit;
+
+ field_line = vec_elt_at_index (req->headers, req->content_len_header_index);
+ p = rx_buf + req->headers_offset + field_line->value_offset;
+ for (i = 0; i < field_line->value_len; i++)
+ {
+ /* check for digit */
+ if (!isdigit (*p))
+ {
+ HTTP_DBG (1, "expected digit");
+ return -1;
+ }
+ digit = *p - '0';
+ u64 new_body_len = body_len * 10 + digit;
+ /* check for overflow */
+ if (new_body_len < body_len)
+ {
+ HTTP_DBG (1, "content-length value too big number, overflow");
+ return -1;
+ }
+ body_len = new_body_len;
+ p++;
+ }
+
+ req->body_len = body_len;
+
+ return 0;
+}
+
+always_inline void
+http_req_deschedule (http_req_t *req, transport_send_params_t *sp)
+{
+ transport_connection_deschedule (&req->connection);
+ sp->flags |= TRANSPORT_SND_F_DESCHED;
+}
+
+/* Abstraction of app session fifo operations */
+
+always_inline void
+http_io_as_add_want_deq_ntf (http_req_t *req)
+{
+ session_t *as = session_get_from_handle (req->hr_pa_session_handle);
+ svm_fifo_add_want_deq_ntf (as->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+}
+
+always_inline u32
+http_io_as_max_write (http_req_t *req)
+{
+ session_t *as = session_get_from_handle (req->hr_pa_session_handle);
+ return svm_fifo_max_enqueue_prod (as->rx_fifo);
+}
+
+always_inline u32
+http_io_as_max_read (http_req_t *req)
+{
+ session_t *as = session_get_from_handle (req->hr_pa_session_handle);
+ return svm_fifo_max_dequeue_cons (as->tx_fifo);
+}
+
+always_inline void
+http_io_as_write (http_req_t *req, u8 *data, u32 len)
+{
+ int n_written;
+ session_t *as = session_get_from_handle (req->hr_pa_session_handle);
+
+ n_written = svm_fifo_enqueue (as->rx_fifo, len, data);
+ ASSERT (n_written == len);
+}
+
+always_inline u32
+http_io_as_write_segs (http_req_t *req, const svm_fifo_seg_t segs[],
+ u32 n_segs)
+{
+ int n_written;
+ session_t *as = session_get_from_handle (req->hr_pa_session_handle);
+ n_written = svm_fifo_enqueue_segments (as->rx_fifo, segs, n_segs, 0);
+ ASSERT (n_written > 0);
+ return (u32) n_written;
+}
+
+always_inline u32
+http_io_as_read (http_req_t *req, u8 *buf, u32 len, u8 peek)
+{
+ int n_read;
+ session_t *as = session_get_from_handle (req->hr_pa_session_handle);
+
+ if (peek)
+ {
+ n_read = svm_fifo_peek (as->tx_fifo, req->as_fifo_offset, len, buf);
+ ASSERT (n_read > 0);
+ req->as_fifo_offset += len;
+ return (u32) n_read;
+ }
+
+ n_read = svm_fifo_dequeue (as->tx_fifo, len, buf);
+ ASSERT (n_read == len);
+ return (u32) n_read;
+}
+
+always_inline void
+http_io_as_read_segs (http_req_t *req, svm_fifo_seg_t *segs, u32 *n_segs,
+ u32 max_bytes)
+{
+ int n_read;
+ session_t *as = session_get_from_handle (req->hr_pa_session_handle);
+ n_read = svm_fifo_segments (as->tx_fifo, 0, segs, n_segs, max_bytes);
+ ASSERT (n_read > 0);
+}
+
+always_inline void
+http_io_as_drain (http_req_t *req, u32 len)
+{
+ session_t *as = session_get_from_handle (req->hr_pa_session_handle);
+ svm_fifo_dequeue_drop (as->tx_fifo, len);
+ req->as_fifo_offset = 0;
+}
+
+always_inline void
+http_io_as_drain_all (http_req_t *req)
+{
+ session_t *as = session_get_from_handle (req->hr_pa_session_handle);
+ svm_fifo_dequeue_drop_all (as->tx_fifo);
+ req->as_fifo_offset = 0;
+}
+
+/* Abstraction of transport session fifo operations */
+
+always_inline u32
+http_io_ts_fifo_size (http_conn_t *hc, u8 is_rx)
+{
+ session_t *ts = session_get_from_handle (hc->hc_tc_session_handle);
+ if (is_rx)
+ return svm_fifo_size (ts->rx_fifo);
+ else
+ return svm_fifo_size (ts->tx_fifo);
+}
+
+always_inline u32
+http_io_ts_max_read (http_conn_t *hc)
+{
+ session_t *ts = session_get_from_handle (hc->hc_tc_session_handle);
+ return svm_fifo_max_dequeue_cons (ts->rx_fifo);
+}
+
+always_inline u32
+http_io_ts_max_write (http_conn_t *hc, transport_send_params_t *sp)
+{
+ session_t *ts = session_get_from_handle (hc->hc_tc_session_handle);
+ return clib_min (svm_fifo_max_enqueue_prod (ts->tx_fifo),
+ sp->max_burst_size);
+}
+
+always_inline int
+http_io_ts_check_write_thresh (http_conn_t *hc)
+{
+ session_t *ts = session_get_from_handle (hc->hc_tc_session_handle);
+ return (svm_fifo_max_enqueue_prod (ts->tx_fifo) < HTTP_FIFO_THRESH);
+}
+
+always_inline void
+http_io_ts_add_want_deq_ntf (http_conn_t *hc)
+{
+ session_t *ts = session_get_from_handle (hc->hc_tc_session_handle);
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+}
+
+always_inline u32
+http_io_ts_read (http_conn_t *hc, u8 *buf, u32 len, u8 peek)
+{
+ int n_read;
+ session_t *ts = session_get_from_handle (hc->hc_tc_session_handle);
+
+ if (peek)
+ {
+ n_read = svm_fifo_peek (ts->rx_fifo, 0, len, buf);
+ ASSERT (n_read > 0);
+ return (u32) n_read;
+ }
+
+ n_read = svm_fifo_dequeue (ts->rx_fifo, len, buf);
+ ASSERT (n_read == len);
+ return (u32) n_read;
+}
+
+always_inline void
+http_io_ts_read_segs (http_conn_t *hc, svm_fifo_seg_t *segs, u32 *n_segs,
+ u32 max_bytes)
+{
+ int n_read;
+ session_t *ts = session_get_from_handle (hc->hc_tc_session_handle);
+ n_read = svm_fifo_segments (ts->rx_fifo, 0, segs, n_segs, max_bytes);
+ ASSERT (n_read > 0);
+}
+
+always_inline void
+http_io_ts_drain (http_conn_t *hc, u32 len)
+{
+ session_t *ts = session_get_from_handle (hc->hc_tc_session_handle);
+ svm_fifo_dequeue_drop (ts->rx_fifo, len);
+}
+
+always_inline void
+http_io_ts_drain_all (http_conn_t *hc)
+{
+ session_t *ts = session_get_from_handle (hc->hc_tc_session_handle);
+ svm_fifo_dequeue_drop_all (ts->rx_fifo);
+}
+
+always_inline void
+http_io_ts_after_read (http_conn_t *hc, u8 clear_evt)
+{
+ session_t *ts = session_get_from_handle (hc->hc_tc_session_handle);
+ if (clear_evt)
+ {
+ if (svm_fifo_is_empty_cons (ts->rx_fifo))
+ svm_fifo_unset_event (ts->rx_fifo);
+ }
+ else
+ {
+ if (svm_fifo_max_dequeue_cons (ts->rx_fifo))
+ session_program_rx_io_evt (hc->hc_tc_session_handle);
+ }
+}
+
+always_inline void
+http_io_ts_write (http_conn_t *hc, u8 *data, u32 len,
+ transport_send_params_t *sp)
+{
+ int n_written;
+ session_t *ts = session_get_from_handle (hc->hc_tc_session_handle);
+
+ n_written = svm_fifo_enqueue (ts->tx_fifo, len, data);
+ ASSERT (n_written == len);
+ if (sp)
+ {
+ ASSERT (sp->max_burst_size >= len);
+ sp->bytes_dequeued += len;
+ sp->max_burst_size -= len;
+ }
+}
+
+always_inline u32
+http_io_ts_write_segs (http_conn_t *hc, const svm_fifo_seg_t segs[],
+ u32 n_segs, transport_send_params_t *sp)
+{
+ int n_written;
+ session_t *ts = session_get_from_handle (hc->hc_tc_session_handle);
+ n_written = svm_fifo_enqueue_segments (ts->tx_fifo, segs, n_segs, 0);
+ ASSERT (n_written > 0);
+ sp->bytes_dequeued += n_written;
+ sp->max_burst_size -= n_written;
+ return (u32) n_written;
+}
+
+always_inline void
+http_io_ts_after_write (http_conn_t *hc, u8 flush)
+{
+ session_t *ts = session_get_from_handle (hc->hc_tc_session_handle);
+
+ if (!flush)
+ {
+ if (svm_fifo_set_event (ts->tx_fifo))
+ session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX);
+ }
+ else
+ {
+ if (svm_fifo_set_event (ts->tx_fifo))
+ session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX_FLUSH);
+ }
+}
+
+always_inline int
+http_conn_accept_request (http_conn_t *hc, http_req_t *req)
+{
+ session_t *as, *asl;
+ app_worker_t *app_wrk;
+ int rv;
+
+ HTTP_DBG (1, "hc [%u]%x req %x", hc->hc_hc_index, hc->c_thread_index,
+ req->hr_req_handle);
+
+ /* allocate app session and initialize */
+ as = session_alloc (hc->c_thread_index);
+ HTTP_DBG (1, "allocated session 0x%lx", session_handle (as));
+ req->c_s_index = as->session_index;
+ as->app_wrk_index = hc->hc_pa_wrk_index;
+ as->connection_index = req->hr_req_handle;
+ as->session_state = SESSION_STATE_ACCEPTING;
+ asl = listen_session_get_from_handle (hc->hc_pa_session_handle);
+ as->session_type = asl->session_type;
+ as->listener_handle = hc->hc_pa_session_handle;
+
+ /* init session fifos and notify app */
+ if ((rv = app_worker_init_accepted (as)))
+ {
+ HTTP_DBG (1, "failed to allocate fifos");
+ req->hr_pa_session_handle = SESSION_INVALID_HANDLE;
+ session_free (as);
+ hc->flags |= HTTP_CONN_F_NO_APP_SESSION;
+ return rv;
+ }
+
+ req->hr_pa_session_handle = session_handle (as);
+ req->hr_pa_wrk_index = as->app_wrk_index;
+
+ app_wrk = app_worker_get (as->app_wrk_index);
+
+ if ((rv = app_worker_accept_notify (app_wrk, as)))
+ {
+ HTTP_DBG (1, "app accept returned");
+ req->hr_pa_session_handle = SESSION_INVALID_HANDLE;
+ session_free (as);
+ hc->flags |= HTTP_CONN_F_NO_APP_SESSION;
+ return rv;
+ }
+
+ return 0;
+}
+
+always_inline int
+http_conn_established (http_conn_t *hc, http_req_t *req)
+{
+ session_t *as;
+ app_worker_t *app_wrk;
+ session_t *ts;
+ int rv;
+
+ /* allocate app session and initialize */
+ as = session_alloc (hc->c_thread_index);
+ HTTP_DBG (1, "allocated session 0x%lx", session_handle (as));
+ req->c_s_index = as->session_index;
+ as->app_wrk_index = hc->hc_pa_wrk_index;
+ as->connection_index = req->hr_req_handle;
+ as->session_state = SESSION_STATE_READY;
+ as->opaque = hc->hc_pa_app_api_ctx;
+ ts = session_get_from_handle (hc->hc_tc_session_handle);
+ as->session_type = session_type_from_proto_and_ip (
+ TRANSPORT_PROTO_HTTP, session_type_is_ip4 (ts->session_type));
+
+ /* init session fifos and notify app */
+ app_wrk = app_worker_get_if_valid (hc->hc_pa_wrk_index);
+ if (!app_wrk)
+ {
+ HTTP_DBG (1, "no app worker");
+ hc->flags |= HTTP_CONN_F_NO_APP_SESSION;
+ return -1;
+ }
+
+ if ((rv = app_worker_init_connected (app_wrk, as)))
+ {
+ HTTP_DBG (1, "failed to allocate fifos");
+ session_free (as);
+ hc->flags |= HTTP_CONN_F_NO_APP_SESSION;
+ return rv;
+ }
+
+ app_worker_connect_notify (app_wrk, as, 0, hc->hc_pa_app_api_ctx);
+
+ req->hr_pa_session_handle = session_handle (as);
+ req->hr_pa_wrk_index = as->app_wrk_index;
+
+ return 0;
+}
+
+#endif /* SRC_PLUGINS_HTTP_HTTP_PRIVATE_H_ */
diff --git a/src/plugins/http/http_timer.h b/src/plugins/http/http_timer.h
index 43d20d004d8..5ce42032f20 100644
--- a/src/plugins/http/http_timer.h
+++ b/src/plugins/http/http_timer.h
@@ -16,7 +16,7 @@
#ifndef SRC_PLUGINS_HTTP_HTTP_TIMER_H_
#define SRC_PLUGINS_HTTP_HTTP_TIMER_H_
-#include <http/http.h>
+#include <http/http_private.h>
#include <vppinfra/tw_timer_2t_1w_2048sl.h>
#define HTTP_CONN_TIMEOUT 60
@@ -45,7 +45,8 @@ http_conn_timer_start (http_conn_t *hc)
u32 hs_handle;
ASSERT (hc->timer_handle == HTTP_TIMER_HANDLE_INVALID);
- hs_handle = hc->c_thread_index << 24 | hc->c_c_index;
+ ASSERT (hc->hc_hc_index <= 0x00FFFFFF);
+ hs_handle = hc->c_thread_index << 24 | hc->hc_hc_index;
clib_spinlock_lock (&twc->tw_lock);
hc->timer_handle =
@@ -58,7 +59,7 @@ http_conn_timer_stop (http_conn_t *hc)
{
http_tw_ctx_t *twc = &http_tw_ctx;
- hc->pending_timer = 0;
+ hc->flags &= ~HTTP_CONN_F_PENDING_TIMER;
if (hc->timer_handle == HTTP_TIMER_HANDLE_INVALID)
return;
@@ -79,7 +80,8 @@ http_conn_timer_update (http_conn_t *hc)
tw_timer_update_2t_1w_2048sl (&twc->tw, hc->timer_handle, hc->timeout);
else
{
- hs_handle = hc->c_thread_index << 24 | hc->c_c_index;
+ ASSERT (hc->hc_hc_index <= 0x00FFFFFF);
+ hs_handle = hc->c_thread_index << 24 | hc->hc_hc_index;
hc->timer_handle =
tw_timer_start_2t_1w_2048sl (&twc->tw, hs_handle, 0, hc->timeout);
}
diff --git a/src/plugins/http/test/http_test.c b/src/plugins/http/test/http_test.c
index bfaa285eb35..f44d3cbd31b 100644
--- a/src/plugins/http/test/http_test.c
+++ b/src/plugins/http/test/http_test.c
@@ -6,6 +6,8 @@
#include <vpp/app/version.h>
#include <http/http.h>
#include <http/http_header_names.h>
+#include <http/http2/hpack.h>
+#include <http/http2/frame.h>
#define HTTP_TEST_I(_cond, _comment, _args...) \
({ \
@@ -533,6 +535,771 @@ http_test_http_header_table (vlib_main_t *vm)
return 0;
}
+static int
+http_test_parse_request (const char *first_req, uword first_req_len,
+ const char *second_req, uword second_req_len,
+ const char *third_req, uword third_req_len,
+ hpack_dynamic_table_t *dynamic_table)
+{
+ http2_error_t rv;
+ u8 *buf = 0;
+ hpack_request_control_data_t control_data;
+ http_field_line_t *headers = 0;
+ u16 parsed_bitmap = 0;
+
+ static http2_error_t (*_hpack_parse_request) (
+ u8 * src, u32 src_len, u8 * dst, u32 dst_len,
+ hpack_request_control_data_t * control_data, http_field_line_t * *headers,
+ hpack_dynamic_table_t * dynamic_table);
+
+ _hpack_parse_request =
+ vlib_get_plugin_symbol ("http_plugin.so", "hpack_parse_request");
+
+ parsed_bitmap =
+ HPACK_PSEUDO_HEADER_METHOD_PARSED | HPACK_PSEUDO_HEADER_SCHEME_PARSED |
+ HPACK_PSEUDO_HEADER_PATH_PARSED | HPACK_PSEUDO_HEADER_AUTHORITY_PARSED;
+
+ /* first request */
+ vec_validate_init_empty (buf, 254, 0);
+ memset (&control_data, 0, sizeof (control_data));
+ rv = _hpack_parse_request ((u8 *) first_req, (u32) first_req_len, buf, 254,
+ &control_data, &headers, dynamic_table);
+ if (rv != HTTP2_ERROR_NO_ERROR ||
+ control_data.parsed_bitmap != parsed_bitmap ||
+ control_data.method != HTTP_REQ_GET ||
+ control_data.scheme != HTTP_URL_SCHEME_HTTP ||
+ control_data.path_len != 1 || control_data.authority_len != 15 ||
+ dynamic_table->used != 57 || vec_len (headers) != 0)
+ return 1;
+ if (memcmp (control_data.path, "/", 1))
+ return 1;
+ if (memcmp (control_data.authority, "www.example.com", 15))
+ return 1;
+ vec_free (headers);
+ vec_free (buf);
+
+ /* second request */
+ vec_validate_init_empty (buf, 254, 0);
+ memset (&control_data, 0, sizeof (control_data));
+ rv = _hpack_parse_request ((u8 *) second_req, (u32) second_req_len, buf, 254,
+ &control_data, &headers, dynamic_table);
+ if (rv != HTTP2_ERROR_NO_ERROR ||
+ control_data.parsed_bitmap != parsed_bitmap ||
+ control_data.method != HTTP_REQ_GET ||
+ control_data.scheme != HTTP_URL_SCHEME_HTTP ||
+ control_data.path_len != 1 || control_data.authority_len != 15 ||
+ dynamic_table->used != 110 || vec_len (headers) != 1 ||
+ control_data.headers_len != 21)
+ return 2;
+ if (memcmp (control_data.path, "/", 1))
+ return 2;
+ if (memcmp (control_data.authority, "www.example.com", 15))
+ return 2;
+ if (headers[0].name_len != 13 || headers[0].value_len != 8)
+ return 2;
+ if (memcmp (control_data.headers + headers[0].name_offset, "cache-control",
+ 13))
+ return 2;
+ if (memcmp (control_data.headers + headers[0].value_offset, "no-cache", 8))
+ return 2;
+ vec_free (headers);
+ vec_free (buf);
+
+ /* third request */
+ vec_validate_init_empty (buf, 254, 0);
+ memset (&control_data, 0, sizeof (control_data));
+ rv = _hpack_parse_request ((u8 *) third_req, (u32) third_req_len, buf, 254,
+ &control_data, &headers, dynamic_table);
+ if (rv != HTTP2_ERROR_NO_ERROR ||
+ control_data.parsed_bitmap != parsed_bitmap ||
+ control_data.method != HTTP_REQ_GET ||
+ control_data.scheme != HTTP_URL_SCHEME_HTTPS ||
+ control_data.path_len != 11 || control_data.authority_len != 15 ||
+ dynamic_table->used != 164 || vec_len (headers) != 1 ||
+ control_data.headers_len != 22)
+ return 3;
+ if (memcmp (control_data.path, "/index.html", 11))
+ return 3;
+ if (memcmp (control_data.authority, "www.example.com", 15))
+ return 3;
+ if (headers[0].name_len != 10 || headers[0].value_len != 12)
+ return 3;
+ if (memcmp (control_data.headers + headers[0].name_offset, "custom-key", 10))
+ return 3;
+ if (memcmp (control_data.headers + headers[0].value_offset, "custom-value",
+ 12))
+ return 3;
+ vec_free (headers);
+ vec_free (buf);
+
+ return 0;
+}
+
+static int
+http_test_hpack (vlib_main_t *vm)
+{
+ vlib_cli_output (vm, "hpack_decode_int");
+
+ static uword (*_hpack_decode_int) (u8 * *pos, u8 * end, u8 prefix_len);
+ _hpack_decode_int =
+ vlib_get_plugin_symbol ("http_plugin.so", "hpack_decode_int");
+
+ u8 *pos, *end, *input = 0;
+ uword value;
+#define TEST(i, pl, e) \
+ vec_validate (input, sizeof (i) - 2); \
+ memcpy (input, i, sizeof (i) - 1); \
+ pos = input; \
+ end = vec_end (input); \
+ value = _hpack_decode_int (&pos, end, (u8) pl); \
+ HTTP_TEST ((value == (uword) e && pos == end), \
+ "%U with prefix length %u is %llu", format_hex_bytes, input, \
+ vec_len (input), (u8) pl, value); \
+ vec_free (input);
+
+ TEST ("\x00", 8, 0);
+ TEST ("\x2A", 8, 42);
+ TEST ("\x72", 4, 2);
+ TEST ("\x7F\x00", 7, 127);
+ TEST ("\x7F\x01", 7, 128);
+ TEST ("\x9F\x9A\x0A", 5, 1337);
+ TEST ("\xFF\x80\x01", 7, 255);
+ /* max value to decode is CLIB_WORD_MAX, CLIB_UWORD_MAX is error */
+ TEST ("\x7F\x80\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F", 7, CLIB_WORD_MAX);
+
+#undef TEST
+
+#define N_TEST(i, pl) \
+ vec_validate (input, sizeof (i) - 2); \
+ memcpy (input, i, sizeof (i) - 1); \
+ pos = input; \
+ end = vec_end (input); \
+ value = _hpack_decode_int (&pos, end, (u8) pl); \
+ HTTP_TEST ((value == HPACK_INVALID_INT), \
+ "%U with prefix length %u should be invalid", format_hex_bytes, \
+ input, vec_len (input), (u8) pl); \
+ vec_free (input);
+
+ /* incomplete */
+ N_TEST ("\x7F", 7);
+ N_TEST ("\x0F\xFF\xFF", 4);
+ /* overflow */
+ N_TEST ("\x0F\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x00", 4);
+ N_TEST ("\x0F\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x00", 4);
+
+#undef N_TEST
+
+ vlib_cli_output (vm, "hpack_encode_int");
+
+ static u8 *(*_hpack_encode_int) (u8 * dst, uword value, u8 prefix_len);
+ _hpack_encode_int =
+ vlib_get_plugin_symbol ("http_plugin.so", "hpack_encode_int");
+
+ u8 *buf = 0;
+ u8 *p;
+
+#define TEST(v, pl, e) \
+ vec_validate_init_empty (buf, 15, 0); \
+ p = _hpack_encode_int (buf, v, (u8) pl); \
+ HTTP_TEST (((p - buf) == (sizeof (e) - 1) && !memcmp (buf, e, p - buf)), \
+ "%llu with prefix length %u is encoded as %U", v, (u8) pl, \
+ format_hex_bytes, buf, p - buf); \
+ vec_free (buf);
+
+ TEST (0, 8, "\x00");
+ TEST (2, 4, "\x02");
+ TEST (42, 8, "\x2A");
+ TEST (127, 7, "\x7F\x00");
+ TEST (128, 7, "\x7F\x01");
+ TEST (255, 7, "\x7F\x80\x01");
+ TEST (1337, 5, "\x1F\x9A\x0A");
+ TEST (CLIB_WORD_MAX, 7, "\x7F\x80\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F");
+#undef TEST
+
+ vlib_cli_output (vm, "hpack_decode_string");
+
+ static http2_error_t (*_hpack_decode_string) (u8 * *src, u8 * end, u8 * *buf,
+ uword * buf_len);
+ _hpack_decode_string =
+ vlib_get_plugin_symbol ("http_plugin.so", "hpack_decode_string");
+
+ u8 *bp;
+ uword blen, len;
+ http2_error_t rv;
+
+#define TEST(i, e) \
+ vec_validate (input, sizeof (i) - 2); \
+ memcpy (input, i, sizeof (i) - 1); \
+ pos = input; \
+ vec_validate_init_empty (buf, 63, 0); \
+ bp = buf; \
+ blen = vec_len (buf); \
+ rv = _hpack_decode_string (&pos, vec_end (input), &bp, &blen); \
+ len = vec_len (buf) - blen; \
+ HTTP_TEST ((len == strlen (e) && !memcmp (buf, e, len) && \
+ pos == vec_end (input) && bp == buf + len && \
+ rv == HTTP2_ERROR_NO_ERROR), \
+ "%U is decoded as %U", format_hex_bytes, input, vec_len (input), \
+ format_http_bytes, buf, len); \
+ vec_free (input); \
+ vec_free (buf);
+
+ /* raw coding */
+ TEST ("\x07private", "private");
+ /* Huffman coding */
+ TEST ("\x85\xAE\xC3\x77\x1A\x4B", "private");
+ TEST ("\x86\xA8\xEB\x10\x64\x9C\xBF", "no-cache");
+ TEST ("\x8C\xF1\xE3\xC2\xE5\xF2\x3A\x6B\xA0\xAB\x90\xF4\xFF",
+ "www.example.com");
+ TEST ("\x96\xD0\x7A\xBE\x94\x10\x54\xD4\x44\xA8\x20\x05\x95\x04\x0B\x81\x66"
+ "\xE0\x82\xA6\x2D\x1B\xFF",
+ "Mon, 21 Oct 2013 20:13:21 GMT")
+ TEST ("\xAD\x94\xE7\x82\x1D\xD7\xF2\xE6\xC7\xB3\x35\xDF\xDF\xCD\x5B\x39\x60"
+ "\xD5\xAF\x27\x08\x7F\x36\x72\xC1\xAB\x27\x0F\xB5\x29\x1F\x95\x87\x31"
+ "\x60\x65\xC0\x03\xED\x4E\xE5\xB1\x06\x3D\x50\x07",
+ "foo=ASDJKHQKBZXOQWEOPIUAXQWEOIU; max-age=3600; version=1");
+ TEST ("\x8A\x9C\xB4\x50\x75\x3C\x1E\xCA\x24\xFE\x3F", "hello world!")
+ TEST ("\x8A\xFF\xFE\x03\x18\xC6\x31\x8C\x63\x18\xC7", "\\aaaaaaaaaaaa");
+ TEST ("\x8C\x1F\xFF\xF0\x18\xC6\x31\x80\x03\x18\xC6\x31\x8F",
+ "a\\aaaaa00aaaaaaa");
+ TEST ("\x87\x1F\xFF\xF0\xFF\xFE\x11\xFF", "a\\\\b");
+ TEST ("\x84\x1F\xF9\xFE\xA3", "a?'b");
+ TEST ("\x84\x1F\xFA\xFF\x23", "a'?b");
+ TEST ("\x8D\x1F\xFF\xFF\xFF\x0C\x63\x18\xC0\x01\x8C\x63\x18\xC7",
+ "\x61\xF9\x61\x61\x61\x61\x61\x30\x30\x61\x61\x61\x61\x61\x61\x61")
+#undef TEST
+
+#define N_TEST(i, e) \
+ vec_validate (input, sizeof (i) - 2); \
+ memcpy (input, i, sizeof (i) - 1); \
+ pos = input; \
+ vec_validate_init_empty (buf, 15, 0); \
+ bp = buf; \
+ blen = vec_len (buf); \
+ rv = _hpack_decode_string (&pos, vec_end (input), &bp, &blen); \
+ HTTP_TEST ((rv == e), "%U should be invalid (%U)", format_hex_bytes, input, \
+ vec_len (input), format_http2_error, rv); \
+ vec_free (input); \
+ vec_free (buf);
+
+ /* incomplete */
+ N_TEST ("\x87", HTTP2_ERROR_COMPRESSION_ERROR);
+ N_TEST ("\x07priv", HTTP2_ERROR_COMPRESSION_ERROR);
+ /* invalid length */
+ N_TEST ("\x7Fprivate", HTTP2_ERROR_COMPRESSION_ERROR);
+ /* invalid EOF */
+ N_TEST ("\x81\x8C", HTTP2_ERROR_COMPRESSION_ERROR);
+ /* not enough space for decoding */
+ N_TEST (
+ "\x96\xD0\x7A\xBE\x94\x10\x54\xD4\x44\xA8\x20\x05\x95\x04\x0B\x81\x66"
+ "\xE0\x82\xA6\x2D\x1B\xFF",
+ HTTP2_ERROR_INTERNAL_ERROR);
+#undef N_TEST
+
+ vlib_cli_output (vm, "hpack_encode_string");
+
+ static u8 *(*_hpack_encode_string) (u8 * dst, const u8 *value,
+ uword value_len);
+ _hpack_encode_string =
+ vlib_get_plugin_symbol ("http_plugin.so", "hpack_encode_string");
+
+#define TEST(i, e) \
+ vec_validate (input, sizeof (i) - 2); \
+ memcpy (input, i, sizeof (i) - 1); \
+ pos = input; \
+ vec_validate_init_empty (buf, 63, 0); \
+ p = _hpack_encode_string (buf, input, vec_len (input)); \
+ HTTP_TEST (((p - buf) == (sizeof (e) - 1) && !memcmp (buf, e, p - buf)), \
+ "%v is encoded as %U", input, format_hex_bytes, buf, p - buf); \
+ vec_free (input); \
+ vec_free (buf);
+
+ /* Huffman coding */
+ TEST ("private", "\x85\xAE\xC3\x77\x1A\x4B");
+ TEST ("no-cache", "\x86\xA8\xEB\x10\x64\x9C\xBF");
+ TEST ("www.example.com",
+ "\x8C\xF1\xE3\xC2\xE5\xF2\x3A\x6B\xA0\xAB\x90\xF4\xFF");
+ TEST ("Mon, 21 Oct 2013 20:13:21 GMT",
+ "\x96\xD0\x7A\xBE\x94\x10\x54\xD4\x44\xA8\x20\x05\x95\x04\x0B\x81\x66"
+ "\xE0\x82\xA6\x2D\x1B\xFF")
+ TEST ("foo=ASDJKHQKBZXOQWEOPIUAXQWEOIU; max-age=3600; version=1",
+ "\xAD\x94\xE7\x82\x1D\xD7\xF2\xE6\xC7\xB3\x35\xDF\xDF\xCD\x5B\x39\x60"
+ "\xD5\xAF\x27\x08\x7F\x36\x72\xC1\xAB\x27\x0F\xB5\x29\x1F\x95\x87\x31"
+ "\x60\x65\xC0\x03\xED\x4E\xE5\xB1\x06\x3D\x50\x07");
+ TEST ("hello world!", "\x8A\x9C\xB4\x50\x75\x3C\x1E\xCA\x24\xFE\x3F")
+ TEST ("\\aaaaaaaaaaaa", "\x8A\xFF\xFE\x03\x18\xC6\x31\x8C\x63\x18\xC7");
+ /* raw coding */
+ TEST ("[XZ]", "\x4[XZ]");
+#undef TEST
+
+ vlib_cli_output (vm, "hpack_decode_header");
+
+ static http2_error_t (*_hpack_decode_header) (
+ u8 * *src, u8 * end, u8 * *buf, uword * buf_len, u32 * name_len,
+ u32 * value_len, hpack_dynamic_table_t * dt);
+
+ _hpack_decode_header =
+ vlib_get_plugin_symbol ("http_plugin.so", "hpack_decode_header");
+
+ static void (*_hpack_dynamic_table_init) (hpack_dynamic_table_t * table,
+ u32 max_size);
+
+ _hpack_dynamic_table_init =
+ vlib_get_plugin_symbol ("http_plugin.so", "hpack_dynamic_table_init");
+
+ static void (*_hpack_dynamic_table_free) (hpack_dynamic_table_t * table);
+
+ _hpack_dynamic_table_free =
+ vlib_get_plugin_symbol ("http_plugin.so", "hpack_dynamic_table_free");
+
+ u32 name_len, value_len;
+ hpack_dynamic_table_t table;
+
+ _hpack_dynamic_table_init (&table, 128);
+
+#define TEST(i, e_name, e_value, dt_size) \
+ vec_validate (input, sizeof (i) - 2); \
+ memcpy (input, i, sizeof (i) - 1); \
+ pos = input; \
+ vec_validate_init_empty (buf, 63, 0); \
+ bp = buf; \
+ blen = vec_len (buf); \
+ rv = _hpack_decode_header (&pos, vec_end (input), &bp, &blen, &name_len, \
+ &value_len, &table); \
+ len = vec_len (buf) - blen; \
+ HTTP_TEST ((rv == HTTP2_ERROR_NO_ERROR && table.used == dt_size && \
+ name_len == strlen (e_name) && value_len == strlen (e_value) && \
+ !memcmp (buf, e_name, name_len) && \
+ !memcmp (buf + name_len, e_value, value_len) && \
+ vec_len (buf) == (blen + name_len + value_len) && \
+ pos == vec_end (input) && bp == buf + name_len + value_len), \
+ "%U is decoded as '%U: %U'", format_hex_bytes, input, \
+ vec_len (input), format_http_bytes, buf, name_len, \
+ format_http_bytes, buf + name_len, value_len); \
+ vec_free (input); \
+ vec_free (buf);
+
+ /* C.2.1. Literal Header Field with Indexing */
+ TEST ("\x40\x0A\x63\x75\x73\x74\x6F\x6D\x2D\x6B\x65\x79\x0D\x63\x75\x73\x74"
+ "\x6F\x6D\x2D\x68\x65\x61\x64\x65\x72",
+ "custom-key", "custom-header", 55);
+ /* C.2.2. Literal Header Field without Indexing */
+ TEST ("\x04\x0C\x2F\x73\x61\x6D\x70\x6C\x65\x2F\x70\x61\x74\x68", ":path",
+ "/sample/path", 55);
+ /* C.2.3. Literal Header Field Never Indexed */
+ TEST ("\x10\x08\x70\x61\x73\x73\x77\x6F\x72\x64\x06\x73\x65\x63\x72\x65\x74",
+ "password", "secret", 55);
+ /* C.2.4. Indexed Header Field */
+ TEST ("\x82", ":method", "GET", 55);
+ TEST ("\xBE", "custom-key", "custom-header", 55);
+ /* Literal Header Field with Indexing - enough space in dynamic table */
+ TEST ("\x41\x0F\x77\x77\x77\x2E\x65\x78\x61\x6D\x70\x6C\x65\x2E\x63\x6F\x6D",
+ ":authority", "www.example.com", 112);
+ /* verification */
+ TEST ("\xBE", ":authority", "www.example.com", 112);
+ TEST ("\xBF", "custom-key", "custom-header", 112);
+ /* Literal Header Field with Indexing - eviction */
+ TEST ("\x58\x08\x6E\x6F\x2D\x63\x61\x63\x68\x65", "cache-control",
+ "no-cache", 110);
+ /* verification */
+ TEST ("\xBE", "cache-control", "no-cache", 110);
+ TEST ("\xBF", ":authority", "www.example.com", 110);
+ /* Literal Header Field with Indexing - eviction */
+ TEST ("\x40\x0A\x63\x75\x73\x74\x6F\x6D\x2D\x6B\x65\x79\x0D\x63\x75\x73\x74"
+ "\x6F\x6D\x2D\x68\x65\x61\x64\x65\x72",
+ "custom-key", "custom-header", 108);
+ /* verification */
+ TEST ("\xBE", "custom-key", "custom-header", 108);
+ TEST ("\xBF", "cache-control", "no-cache", 108);
+ /* Literal Header Field with Indexing - eviction */
+ TEST ("\x41\x0F\x77\x77\x77\x2E\x65\x78\x61\x6D\x70\x6C\x65\x2E\x63\x6F\x6D",
+ ":authority", "www.example.com", 112);
+ /* verification */
+ TEST ("\xBE", ":authority", "www.example.com", 112);
+ TEST ("\xBF", "custom-key", "custom-header", 112);
+ /* Literal Header Field with Indexing - eviction with reference */
+ TEST ("\x7F\x00\x0C\x63\x75\x73\x74\x6F\x6D\x2D\x76\x61\x6C\x75\x65",
+ "custom-key", "custom-value", 111);
+ /* verification */
+ TEST ("\xBE", "custom-key", "custom-value", 111);
+ TEST ("\xBF", ":authority", "www.example.com", 111);
+#undef TEST
+
+ _hpack_dynamic_table_free (&table);
+
+ vlib_cli_output (vm, "hpack_parse_request");
+
+ int result;
+ /* C.3. Request Examples without Huffman Coding */
+ _hpack_dynamic_table_init (&table, HPACK_DEFAULT_HEADER_TABLE_SIZE);
+ result = http_test_parse_request (
+ http_token_lit ("\x82\x86\x84\x41\x0F\x77\x77\x77\x2E\x65\x78\x61"
+ "\x6D\x70\x6C\x65\x2E\x63\x6F\x6D"),
+ http_token_lit (
+ "\x82\x86\x84\xBE\x58\x08\x6E\x6F\x2D\x63\x61\x63\x68\x65"),
+ http_token_lit (
+ "\x82\x87\x85\xBF\x40\x0A\x63\x75\x73\x74\x6F\x6D\x2D\x6B"
+ "\x65\x79\x0C\x63\x75\x73\x74\x6F\x6D\x2D\x76\x61\x6C\x75\x65"),
+ &table);
+ _hpack_dynamic_table_free (&table);
+ HTTP_TEST ((result == 0), "request without Huffman Coding (result=%d)",
+ result);
+ /* C.4. Request Examples with Huffman Coding */
+ _hpack_dynamic_table_init (&table, HPACK_DEFAULT_HEADER_TABLE_SIZE);
+ result = http_test_parse_request (
+ http_token_lit (
+ "\x82\x86\x84\x41\x8C\xF1\xE3\xC2\xE5\xF2\x3A\x6B\xA0\xAB\x90\xF4\xFF"),
+ http_token_lit ("\x82\x86\x84\xBE\x58\x86\xA8\xEB\x10\x64\x9C\xBF"),
+ http_token_lit ("\x82\x87\x85\xBF\x40\x88\x25\xA8\x49\xE9\x5B\xA9\x7D\x7F"
+ "\x89\x25\xA8\x49\xE9\x5B\xB8\xE8\xB4\xBF"),
+ &table);
+ _hpack_dynamic_table_free (&table);
+ HTTP_TEST ((result == 0), "request with Huffman Coding (result=%d)", result);
+
+ vlib_cli_output (vm, "hpack_serialize_response");
+
+ hpack_response_control_data_t resp_cd;
+ u8 *server_name;
+ u8 *date;
+
+ static void (*_hpack_serialize_response) (
+ u8 * app_headers, u32 app_headers_len,
+ hpack_response_control_data_t * control_data, u8 * *dst);
+
+ _hpack_serialize_response =
+ vlib_get_plugin_symbol ("http_plugin.so", "hpack_serialize_response");
+
+ server_name = format (0, "http unit tests");
+ date = format (0, "Mon, 21 Oct 2013 20:13:21 GMT");
+
+ vec_validate (buf, 127);
+ vec_reset_length (buf);
+ resp_cd.sc = HTTP_STATUS_GATEWAY_TIMEOUT;
+ resp_cd.content_len = HPACK_ENCODER_SKIP_CONTENT_LEN;
+ resp_cd.server_name = server_name;
+ resp_cd.server_name_len = vec_len (server_name);
+ resp_cd.date = date;
+ resp_cd.date_len = vec_len (date);
+ u8 expected1[] =
+ "\x08\x03\x35\x30\x34\x0F\x27\x8B\x9D\x29\xAD\x4B\x6A\x32\x54\x49\x50\x94"
+ "\x7F\x0F\x12\x96\xD0\x7A\xBE\x94\x10\x54\xD4\x44\xA8\x20\x05\x95\x04\x0B"
+ "\x81\x66\xE0\x82\xA6\x2D\x1B\xFF";
+ _hpack_serialize_response (0, 0, &resp_cd, &buf);
+ HTTP_TEST ((vec_len (buf) == (sizeof (expected1) - 1) &&
+ !memcmp (buf, expected1, sizeof (expected1) - 1)),
+ "response encoded as %U", format_hex_bytes, buf, vec_len (buf));
+ vec_reset_length (buf);
+
+ resp_cd.sc = HTTP_STATUS_OK;
+ resp_cd.content_len = 1024;
+ http_headers_ctx_t headers;
+ u8 *headers_buf = 0;
+ vec_validate (headers_buf, 127);
+ http_init_headers_ctx (&headers, headers_buf, vec_len (headers_buf));
+ http_add_header (&headers, HTTP_HEADER_CONTENT_TYPE,
+ http_token_lit ("text/plain"));
+ http_add_header (&headers, HTTP_HEADER_CACHE_STATUS,
+ http_token_lit ("ExampleCache; hit"));
+ http_add_custom_header (&headers, http_token_lit ("sandwich"),
+ http_token_lit ("spam"));
+ u8 expected2[] =
+ "\x88\x0F\x27\x8B\x9D\x29\xAD\x4B\x6A\x32\x54\x49\x50\x94\x7F\x0F\x12\x96"
+ "\xD0\x7A\xBE\x94\x10\x54\xD4\x44\xA8\x20\x05\x95\x04\x0B\x81\x66\xE0\x82"
+ "\xA6\x2D\x1B\xFF\x0F\x0D\x83\x08\x04\xD7\x0F\x10\x87\x49\x7C\xA5\x8A\xE8"
+ "\x19\xAA\x00\x88\x20\xC9\x39\x56\x42\x46\x9B\x51\x8D\xC1\xE4\x74\xD7\x41"
+ "\x6F\x0C\x93\x97\xED\x49\xCC\x9F\x00\x86\x40\xEA\x93\xC1\x89\x3F\x83\x45"
+ "\x63\xA7";
+ _hpack_serialize_response (headers_buf, headers.tail_offset, &resp_cd, &buf);
+ HTTP_TEST ((vec_len (buf) == (sizeof (expected2) - 1) &&
+ !memcmp (buf, expected2, sizeof (expected2) - 1)),
+ "response encoded as %U", format_hex_bytes, buf, vec_len (buf));
+ vec_free (buf);
+ vec_free (headers_buf);
+ vec_free (server_name);
+ vec_free (date);
+
+ return 0;
+}
+
+static int
+http_test_h2_frame (vlib_main_t *vm)
+{
+ static void (*_http2_frame_header_read) (u8 * src,
+ http2_frame_header_t * fh);
+
+ _http2_frame_header_read =
+ vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_header_read");
+
+ vlib_cli_output (vm, "http2_frame_read_settings");
+
+ static http2_error_t (*_http2_frame_read_settings) (
+ http2_conn_settings_t * settings, u8 * payload, u32 payload_len);
+
+ _http2_frame_read_settings =
+ vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_read_settings");
+
+ http2_error_t rv;
+ http2_frame_header_t fh = { 0 };
+ http2_conn_settings_t conn_settings = http2_default_conn_settings;
+
+ u8 settings[] = { 0x0, 0x0, 0x12, 0x4, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x3, 0x0, 0x0, 0x0, 0x64, 0x0, 0x4, 0x40,
+ 0x0, 0x0, 0x0, 0x0, 0x2, 0x0, 0x0, 0x0, 0x0 };
+ _http2_frame_header_read (settings, &fh);
+ HTTP_TEST ((fh.flags == 0 && fh.type == HTTP2_FRAME_TYPE_SETTINGS &&
+ fh.stream_id == 0 && fh.length == 18),
+ "frame identified as SETTINGS");
+
+ rv = _http2_frame_read_settings (
+ &conn_settings, settings + HTTP2_FRAME_HEADER_SIZE, fh.length);
+ HTTP_TEST ((rv == HTTP2_ERROR_NO_ERROR &&
+ conn_settings.max_concurrent_streams == 100 &&
+ conn_settings.initial_window_size == 1073741824 &&
+ conn_settings.enable_push == 0),
+ "SETTINGS frame payload parsed")
+
+ u8 settings_ack[] = { 0x0, 0x0, 0x0, 0x4, 0x1, 0x0, 0x0, 0x0, 0x0 };
+ _http2_frame_header_read (settings_ack, &fh);
+ HTTP_TEST ((fh.flags == HTTP2_FRAME_FLAG_ACK &&
+ fh.type == HTTP2_FRAME_TYPE_SETTINGS && fh.stream_id == 0 &&
+ fh.length == 0),
+ "frame identified as SETTINGS ACK");
+
+ vlib_cli_output (vm, "http2_frame_write_settings_ack");
+
+ static void (*_http2_frame_write_settings_ack) (u8 * *dst);
+
+ _http2_frame_write_settings_ack = vlib_get_plugin_symbol (
+ "http_plugin.so", "http2_frame_write_settings_ack");
+
+ u8 *buf = 0;
+
+ _http2_frame_write_settings_ack (&buf);
+ HTTP_TEST ((vec_len (buf) == sizeof (settings_ack)) &&
+ !memcmp (buf, settings_ack, sizeof (settings_ack)),
+ "SETTINGS ACK frame written");
+ vec_free (buf);
+
+ vlib_cli_output (vm, "http2_frame_write_settings");
+
+ static void (*_http2_frame_write_settings) (
+ http2_settings_entry_t * settings, u8 * *dst);
+
+ _http2_frame_write_settings =
+ vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_write_settings");
+
+ http2_settings_entry_t *settings_list = 0;
+ vec_validate (settings_list, 2);
+ settings_list[0].identifier = HTTP2_SETTINGS_MAX_CONCURRENT_STREAMS;
+ settings_list[0].value = 100;
+ settings_list[1].identifier = HTTP2_SETTINGS_INITIAL_WINDOW_SIZE;
+ settings_list[1].value = 1073741824;
+ settings_list[2].identifier = HTTP2_SETTINGS_ENABLE_PUSH;
+ settings_list[2].value = 0;
+
+ _http2_frame_write_settings (settings_list, &buf);
+ HTTP_TEST ((vec_len (buf) == sizeof (settings) &&
+ !memcmp (buf, settings, sizeof (settings))),
+ "SETTINGS frame written");
+ vec_free (settings_list);
+ vec_free (buf);
+
+ vlib_cli_output (vm, "http2_frame_read_window_update");
+
+ static http2_error_t (*_http2_frame_read_window_update) (
+ u32 * increment, u8 * payload, u32 payload_len);
+
+ _http2_frame_read_window_update = vlib_get_plugin_symbol (
+ "http_plugin.so", "http2_frame_read_window_update");
+
+ u32 win_increment;
+ u8 win_update[] = { 0x0, 0x0, 0x4, 0x8, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x3f, 0xff, 0x0, 0x1 };
+ _http2_frame_header_read (win_update, &fh);
+ HTTP_TEST ((fh.flags == 0 && fh.type == HTTP2_FRAME_TYPE_WINDOW_UPDATE &&
+ fh.stream_id == 0 && fh.length == 4),
+ "frame identified as WINDOW_UPDATE");
+
+ rv = _http2_frame_read_window_update (
+ &win_increment, win_update + HTTP2_FRAME_HEADER_SIZE, fh.length);
+ HTTP_TEST ((rv == HTTP2_ERROR_NO_ERROR && win_increment == 1073676289),
+ "WINDOW_UPDATE frame payload parsed")
+
+ vlib_cli_output (vm, "http2_frame_write_window_update");
+
+ static void (*_http2_frame_write_window_update) (u32 increment,
+ u32 stream_id, u8 * *dst);
+
+ _http2_frame_write_window_update = vlib_get_plugin_symbol (
+ "http_plugin.so", "http2_frame_write_window_update");
+
+ _http2_frame_write_window_update (1073676289, 0, &buf);
+ HTTP_TEST ((vec_len (buf) == sizeof (win_update) &&
+ !memcmp (buf, win_update, sizeof (win_update))),
+ "WINDOW_UPDATE frame written");
+ vec_free (buf);
+
+ vlib_cli_output (vm, "http2_frame_read_rst_stream");
+
+ static http2_error_t (*_http2_frame_read_rst_stream) (
+ u32 * error_code, u8 * payload, u32 payload_len);
+
+ _http2_frame_read_rst_stream =
+ vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_read_rst_stream");
+
+ u32 error_code;
+ u8 rst_stream[] = { 0x0, 0x0, 0x4, 0x3, 0x0, 0x0, 0x0,
+ 0x0, 0x5, 0x0, 0x0, 0x0, 0x01 };
+ _http2_frame_header_read (rst_stream, &fh);
+ HTTP_TEST ((fh.flags == 0 && fh.type == HTTP2_FRAME_TYPE_RST_STREAM &&
+ fh.stream_id == 5 && fh.length == 4),
+ "frame identified as RST_STREAM");
+
+ rv = _http2_frame_read_rst_stream (
+ &error_code, rst_stream + HTTP2_FRAME_HEADER_SIZE, fh.length);
+ HTTP_TEST (
+ (rv == HTTP2_ERROR_NO_ERROR && error_code == HTTP2_ERROR_PROTOCOL_ERROR),
+ "RST_STREAM frame payload parsed")
+
+ vlib_cli_output (vm, "http2_frame_write_rst_stream");
+
+ static void (*_http2_frame_write_rst_stream) (u32 increment, u32 stream_id,
+ u8 * *dst);
+
+ _http2_frame_write_rst_stream =
+ vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_write_rst_stream");
+
+ _http2_frame_write_rst_stream (HTTP2_ERROR_PROTOCOL_ERROR, 5, &buf);
+ HTTP_TEST ((vec_len (buf) == sizeof (rst_stream) &&
+ !memcmp (buf, rst_stream, sizeof (rst_stream))),
+ "RST_STREAM frame written");
+ vec_free (buf);
+
+ vlib_cli_output (vm, "http2_frame_read_goaway");
+
+ static http2_error_t (*_http2_frame_read_goaway) (
+ u32 * error_code, u32 * last_stream_id, u8 * payload, u32 payload_len);
+
+ _http2_frame_read_goaway =
+ vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_read_goaway");
+
+ u32 last_stream_id;
+ u8 goaway[] = { 0x0, 0x0, 0x8, 0x7, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x5, 0x0, 0x0, 0x0, 0x2 };
+
+ _http2_frame_header_read (goaway, &fh);
+ HTTP_TEST ((fh.flags == 0 && fh.type == HTTP2_FRAME_TYPE_GOAWAY &&
+ fh.stream_id == 0 && fh.length == 8),
+ "frame identified as GOAWAY");
+
+ rv = _http2_frame_read_goaway (&error_code, &last_stream_id,
+ goaway + HTTP2_FRAME_HEADER_SIZE, fh.length);
+ HTTP_TEST ((rv == HTTP2_ERROR_NO_ERROR &&
+ error_code == HTTP2_ERROR_INTERNAL_ERROR && last_stream_id == 5),
+ "GOAWAY frame payload parsed")
+
+ vlib_cli_output (vm, "http2_frame_write_goaway");
+
+ static void (*_http2_frame_write_goaway) (http2_error_t error_code,
+ u32 last_stream_id, u8 * *dst);
+
+ _http2_frame_write_goaway =
+ vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_write_goaway");
+
+ _http2_frame_write_goaway (HTTP2_ERROR_INTERNAL_ERROR, 5, &buf);
+ HTTP_TEST ((vec_len (buf) == sizeof (goaway) &&
+ !memcmp (buf, goaway, sizeof (goaway))),
+ "GOAWAY frame written");
+ vec_free (buf);
+
+ vlib_cli_output (vm, "http2_frame_read_headers");
+
+ static http2_error_t (*_http2_frame_read_headers) (
+ u8 * *headers, u32 * headers_len, u8 * payload, u32 payload_len, u8 flags);
+
+ _http2_frame_read_headers =
+ vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_read_headers");
+
+ u8 *h;
+ u32 h_len;
+ u8 headers[] = { 0x0, 0x0, 0x28, 0x1, 0x5, 0x0, 0x0, 0x0, 0x3, 0x3f,
+ 0xe1, 0x1f, 0x82, 0x4, 0x88, 0x62, 0x7b, 0x69, 0x1d, 0x48,
+ 0x5d, 0x3e, 0x53, 0x86, 0x41, 0x88, 0xaa, 0x69, 0xd2, 0x9a,
+ 0xc4, 0xb9, 0xec, 0x9b, 0x7a, 0x88, 0x25, 0xb6, 0x50, 0xc3,
+ 0xab, 0xb8, 0x15, 0xc1, 0x53, 0x3, 0x2a, 0x2f, 0x2a };
+
+ _http2_frame_header_read (headers, &fh);
+ HTTP_TEST ((fh.flags ==
+ (HTTP2_FRAME_FLAG_END_HEADERS | HTTP2_FRAME_FLAG_END_STREAM) &&
+ fh.type == HTTP2_FRAME_TYPE_HEADERS && fh.stream_id == 3 &&
+ fh.length == 40),
+ "frame identified as HEADERS");
+
+ rv = _http2_frame_read_headers (
+ &h, &h_len, headers + HTTP2_FRAME_HEADER_SIZE, fh.length, fh.flags);
+ HTTP_TEST ((rv == HTTP2_ERROR_NO_ERROR && h_len == 40 &&
+ *h == headers[HTTP2_FRAME_HEADER_SIZE]),
+ "HEADERS frame payload parsed")
+
+ vlib_cli_output (vm, "http2_frame_write_headers_header");
+
+ static void (*_http2_frame_write_headers_header) (
+ u32 headers_len, u32 stream_id, u8 flags, u8 * dst);
+
+ _http2_frame_write_headers_header = vlib_get_plugin_symbol (
+ "http_plugin.so", "http2_frame_write_headers_header");
+
+ u8 *p = http2_frame_header_alloc (&buf);
+ _http2_frame_write_headers_header (
+ 40, 3, HTTP2_FRAME_FLAG_END_HEADERS | HTTP2_FRAME_FLAG_END_STREAM, p);
+ HTTP_TEST ((vec_len (buf) == HTTP2_FRAME_HEADER_SIZE &&
+ !memcmp (buf, headers, HTTP2_FRAME_HEADER_SIZE)),
+ "HEADERS frame header written");
+ vec_free (buf);
+
+ vlib_cli_output (vm, "http2_frame_read_data");
+
+ static http2_error_t (*_http2_frame_read_data) (
+ u8 * *data, u32 * data_len, u8 * payload, u32 payload_len, u8 flags);
+
+ _http2_frame_read_data =
+ vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_read_data");
+
+ u8 *d;
+ u32 d_len;
+ u8 data[] = { 0x0, 0x0, 0x9, 0x0, 0x1, 0x0, 0x0, 0x0, 0x3,
+ 0x6e, 0x6f, 0x74, 0x20, 0x66, 0x6f, 0x75, 0x6e, 0x64 };
+
+ _http2_frame_header_read (data, &fh);
+ HTTP_TEST ((fh.flags == HTTP2_FRAME_FLAG_END_STREAM &&
+ fh.type == HTTP2_FRAME_TYPE_DATA && fh.stream_id == 3 &&
+ fh.length == 9),
+ "frame identified as DATA");
+
+ rv = _http2_frame_read_data (&d, &d_len, data + HTTP2_FRAME_HEADER_SIZE,
+ fh.length, fh.flags);
+ HTTP_TEST ((rv == HTTP2_ERROR_NO_ERROR && d_len == 9 &&
+ *d == data[HTTP2_FRAME_HEADER_SIZE]),
+ "DATA frame payload parsed")
+
+ vlib_cli_output (vm, "http2_frame_write_data_header");
+
+ static void (*_http2_frame_write_data_header) (
+ u32 headers_len, u32 stream_id, u8 flags, u8 * dst);
+
+ _http2_frame_write_data_header =
+ vlib_get_plugin_symbol ("http_plugin.so", "http2_frame_write_data_header");
+
+ p = http2_frame_header_alloc (&buf);
+ _http2_frame_write_data_header (9, 3, HTTP2_FRAME_FLAG_END_STREAM, p);
+ HTTP_TEST ((vec_len (buf) == HTTP2_FRAME_HEADER_SIZE &&
+ !memcmp (buf, data, HTTP2_FRAME_HEADER_SIZE)),
+ "DATA frame header written");
+ vec_free (buf);
+
+ return 0;
+}
+
static clib_error_t *
test_http_command_fn (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd)
@@ -550,6 +1317,10 @@ test_http_command_fn (vlib_main_t *vm, unformat_input_t *input,
res = http_test_http_token_is_case (vm);
else if (unformat (input, "header-table"))
res = http_test_http_header_table (vm);
+ else if (unformat (input, "hpack"))
+ res = http_test_hpack (vm);
+ else if (unformat (input, "h2-frame"))
+ res = http_test_h2_frame (vm);
else if (unformat (input, "all"))
{
if ((res = http_test_parse_authority (vm)))
@@ -562,6 +1333,10 @@ test_http_command_fn (vlib_main_t *vm, unformat_input_t *input,
goto done;
if ((res = http_test_http_header_table (vm)))
goto done;
+ if ((res = http_test_hpack (vm)))
+ goto done;
+ if ((res = http_test_h2_frame (vm)))
+ goto done;
}
else
break;
diff --git a/src/plugins/http_static/http_cache.c b/src/plugins/http_static/http_cache.c
index 2e63e335d47..61f1f50ea3b 100644
--- a/src/plugins/http_static/http_cache.c
+++ b/src/plugins/http_static/http_cache.c
@@ -400,6 +400,14 @@ hss_cache_init (hss_cache_t *hc, uword cache_size, u8 debug_level)
hc->first_index = hc->last_index = ~0;
}
+void
+hss_cache_free (hss_cache_t *hc)
+{
+ hss_cache_clear (hc);
+ BV (clib_bihash_free) (&hc->name_to_data);
+ clib_spinlock_free (&hc->cache_lock);
+}
+
/** \brief format a file cache entry
*/
static u8 *
diff --git a/src/plugins/http_static/http_cache.h b/src/plugins/http_static/http_cache.h
index 21f71a924d5..c1e363443ee 100644
--- a/src/plugins/http_static/http_cache.h
+++ b/src/plugins/http_static/http_cache.h
@@ -67,6 +67,7 @@ u32 hss_cache_add_and_attach (hss_cache_t *hc, u8 *path, u8 **data,
void hss_cache_detach_entry (hss_cache_t *hc, u32 ce_index);
u32 hss_cache_clear (hss_cache_t *hc);
void hss_cache_init (hss_cache_t *hc, uword cache_size, u8 debug_level);
+void hss_cache_free (hss_cache_t *hc);
u8 *format_hss_cache (u8 *s, va_list *args);
diff --git a/src/plugins/http_static/http_static.api b/src/plugins/http_static/http_static.api
index bd0cebc45d2..5c1eaf7b9d2 100644
--- a/src/plugins/http_static/http_static.api
+++ b/src/plugins/http_static/http_static.api
@@ -3,41 +3,7 @@
This file defines static http server control-plane API messages
*/
-option version = "2.4.0";
-
-/** \brief Configure and enable the static http server
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param fifo_size - size (in bytes) of the session FIFOs
- @param cache_size_limit - size (in bytes) of the in-memory file data cache
- @param max_age - how long a response is considered fresh (in seconds)
- @param prealloc_fifos - number of preallocated fifos (usually 0)
- @param private_segment_size - fifo segment size (usually 0)
- @param www_root - html root path
- @param uri - bind URI, defaults to "tcp://0.0.0.0/80"
-*/
-
-autoreply define http_static_enable_v2 {
- option deprecated;
-
- /* Client identifier, set from api_main.my_client_index */
- u32 client_index;
-
- /* Arbitrary context, so client can match reply to request */
- u32 context;
- /* Typical options */
- u32 fifo_size;
- u32 cache_size_limit;
- u32 max_age [default=600];
- /* Unusual options */
- u32 prealloc_fifos;
- u32 private_segment_size;
-
- /* Root of the html path */
- string www_root[256];
- /* The bind URI */
- string uri[256];
-};
+option version = "2.5.0";
/** \brief Configure and enable the static http server
@param client_index - opaque cookie to identify the sender
@@ -45,6 +11,7 @@ autoreply define http_static_enable_v2 {
@param fifo_size - size (in bytes) of the session FIFOs
@param cache_size_limit - size (in bytes) of the in-memory file data cache
@param max_age - how long a response is considered fresh (in seconds)
+ @param max_body_size - maximum size of a request body (in bytes)
@param keepalive_timeout - timeout during which client connection will stay open (in seconds)
@param prealloc_fifos - number of preallocated fifos (usually 0)
@param private_segment_size - fifo segment size (usually 0)
@@ -52,7 +19,7 @@ autoreply define http_static_enable_v2 {
@param uri - bind URI, defaults to "tcp://0.0.0.0/80"
*/
-autoreply define http_static_enable_v3 {
+autoreply define http_static_enable_v4 {
option deprecated;
/* Client identifier, set from api_main.my_client_index */
@@ -65,6 +32,7 @@ autoreply define http_static_enable_v3 {
u32 cache_size_limit;
u32 max_age [default=600];
u32 keepalive_timeout [default=60];
+ u64 max_body_size [default=8000];
/* Unusual options */
u32 prealloc_fifos;
u32 private_segment_size;
@@ -76,12 +44,14 @@ autoreply define http_static_enable_v3 {
};
/** \brief Configure and enable the static http server
+
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@param fifo_size - size (in bytes) of the session FIFOs
@param cache_size_limit - size (in bytes) of the in-memory file data cache
@param max_age - how long a response is considered fresh (in seconds)
@param max_body_size - maximum size of a request body (in bytes)
+ @param rx_buff_thresh - maximum size of a large memory allocation (in bytes)
@param keepalive_timeout - timeout during which client connection will stay open (in seconds)
@param prealloc_fifos - number of preallocated fifos (usually 0)
@param private_segment_size - fifo segment size (usually 0)
@@ -89,7 +59,7 @@ autoreply define http_static_enable_v3 {
@param uri - bind URI, defaults to "tcp://0.0.0.0/80"
*/
-autoreply define http_static_enable_v4 {
+autoreply define http_static_enable_v5 {
/* Client identifier, set from api_main.my_client_index */
u32 client_index;
@@ -100,7 +70,8 @@ autoreply define http_static_enable_v4 {
u32 cache_size_limit;
u32 max_age [default=600];
u32 keepalive_timeout [default=60];
- u64 max_body_size [default=8000];
+ u64 max_body_size [default=8192];
+ u32 rx_buff_thresh [default=1048576];
/* Unusual options */
u32 prealloc_fifos;
u32 private_segment_size;
diff --git a/src/plugins/http_static/http_static.c b/src/plugins/http_static/http_static.c
index 7a12f37b8d3..85b044fb860 100644
--- a/src/plugins/http_static/http_static.c
+++ b/src/plugins/http_static/http_static.c
@@ -67,22 +67,25 @@ hss_register_url_handler (hss_url_handler_fn fp, const char *url,
static int
hss_enable_api (u32 fifo_size, u32 cache_limit, u32 prealloc_fifos,
u32 private_segment_size, u8 *www_root, u8 *uri, u32 max_age,
- u32 keepalive_timeout, u64 max_body_size)
+ u32 keepalive_timeout, u64 max_body_size, u32 rx_buff_thresh)
{
hss_main_t *hsm = &hss_main;
int rv;
hsm->fifo_size = fifo_size;
- hsm->cache_size = cache_limit;
hsm->prealloc_fifos = prealloc_fifos;
hsm->private_segment_size = private_segment_size;
- hsm->www_root = format (0, "%s%c", www_root, 0);
- hsm->uri = format (0, "%s%c", uri, 0);
- hsm->max_age = max_age;
- hsm->max_body_size = max_body_size;
- hsm->keepalive_timeout = keepalive_timeout;
-
- if (vec_len (hsm->www_root) < 2)
+ if (uri && parse_uri ((char *) uri, &hsm->default_listener.sep))
+ return VNET_API_ERROR_INVALID_VALUE;
+ hsm->default_listener.www_root = format (0, "%s%c", www_root, 0);
+ hsm->default_listener.cache_size = cache_limit;
+ hsm->default_listener.max_age = max_age;
+ hsm->default_listener.max_body_size = max_body_size;
+ hsm->default_listener.rx_buff_thresh = rx_buff_thresh;
+ hsm->default_listener.keepalive_timeout = keepalive_timeout;
+ hsm->have_default_listener = 1;
+
+ if (vec_len (hsm->default_listener.www_root) < 2)
return VNET_API_ERROR_INVALID_VALUE;
if (hsm->app_index != ~0)
@@ -99,8 +102,7 @@ hss_enable_api (u32 fifo_size, u32 cache_limit, u32 prealloc_fifos,
case 0:
break;
default:
- vec_free (hsm->www_root);
- vec_free (hsm->uri);
+ vec_free (hsm->default_listener.www_root);
return VNET_API_ERROR_INIT_FAILED;
}
return 0;
@@ -108,49 +110,29 @@ hss_enable_api (u32 fifo_size, u32 cache_limit, u32 prealloc_fifos,
/* API message handler */
static void
-vl_api_http_static_enable_v2_t_handler (vl_api_http_static_enable_v2_t *mp)
-{
- vl_api_http_static_enable_v2_reply_t *rmp;
- hss_main_t *hsm = &hss_main;
- int rv;
-
- mp->uri[ARRAY_LEN (mp->uri) - 1] = 0;
- mp->www_root[ARRAY_LEN (mp->www_root) - 1] = 0;
-
- rv = hss_enable_api (ntohl (mp->fifo_size), ntohl (mp->cache_size_limit),
- ntohl (mp->prealloc_fifos),
- ntohl (mp->private_segment_size), mp->www_root, mp->uri,
- ntohl (mp->max_age), HSS_DEFAULT_KEEPALIVE_TIMEOUT,
- HSS_DEFAULT_MAX_BODY_SIZE);
-
- REPLY_MACRO (VL_API_HTTP_STATIC_ENABLE_V2_REPLY);
-}
-
-/* API message handler */
-static void
-vl_api_http_static_enable_v3_t_handler (vl_api_http_static_enable_v3_t *mp)
+vl_api_http_static_enable_v4_t_handler (vl_api_http_static_enable_v4_t *mp)
{
- vl_api_http_static_enable_v3_reply_t *rmp;
+ vl_api_http_static_enable_v4_reply_t *rmp;
hss_main_t *hsm = &hss_main;
int rv;
mp->uri[ARRAY_LEN (mp->uri) - 1] = 0;
mp->www_root[ARRAY_LEN (mp->www_root) - 1] = 0;
- rv = hss_enable_api (ntohl (mp->fifo_size), ntohl (mp->cache_size_limit),
- ntohl (mp->prealloc_fifos),
- ntohl (mp->private_segment_size), mp->www_root, mp->uri,
- ntohl (mp->max_age), ntohl (mp->keepalive_timeout),
- HSS_DEFAULT_MAX_BODY_SIZE);
+ rv = hss_enable_api (
+ ntohl (mp->fifo_size), ntohl (mp->cache_size_limit),
+ ntohl (mp->prealloc_fifos), ntohl (mp->private_segment_size), mp->www_root,
+ mp->uri, ntohl (mp->max_age), ntohl (mp->keepalive_timeout),
+ ntohl (mp->max_body_size), HSS_DEFAULT_RX_BUFFER_THRESH);
- REPLY_MACRO (VL_API_HTTP_STATIC_ENABLE_V3_REPLY);
+ REPLY_MACRO (VL_API_HTTP_STATIC_ENABLE_V4_REPLY);
}
/* API message handler */
static void
-vl_api_http_static_enable_v4_t_handler (vl_api_http_static_enable_v4_t *mp)
+vl_api_http_static_enable_v5_t_handler (vl_api_http_static_enable_v5_t *mp)
{
- vl_api_http_static_enable_v4_reply_t *rmp;
+ vl_api_http_static_enable_v5_reply_t *rmp;
hss_main_t *hsm = &hss_main;
int rv;
@@ -161,9 +143,9 @@ vl_api_http_static_enable_v4_t_handler (vl_api_http_static_enable_v4_t *mp)
ntohl (mp->prealloc_fifos),
ntohl (mp->private_segment_size), mp->www_root, mp->uri,
ntohl (mp->max_age), ntohl (mp->keepalive_timeout),
- ntohl (mp->max_body_size));
+ ntohl (mp->max_body_size), ntohl (mp->rx_buff_thresh));
- REPLY_MACRO (VL_API_HTTP_STATIC_ENABLE_V4_REPLY);
+ REPLY_MACRO (VL_API_HTTP_STATIC_ENABLE_V5_REPLY);
}
#include <http_static/http_static.api.c>
diff --git a/src/plugins/http_static/http_static.h b/src/plugins/http_static/http_static.h
index e158a32dbc9..2b5c065e287 100644
--- a/src/plugins/http_static/http_static.h
+++ b/src/plugins/http_static/http_static.h
@@ -25,6 +25,7 @@
#define HSS_DEFAULT_MAX_AGE 600
#define HSS_DEFAULT_MAX_BODY_SIZE 8192
+#define HSS_DEFAULT_RX_BUFFER_THRESH 1 << 20
#define HSS_DEFAULT_KEEPALIVE_TIMEOUT 60
/** @file http_static.h
@@ -33,15 +34,20 @@
/** \brief Application session
*/
-typedef struct
+typedef struct hss_session_
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
u32 session_index;
/** rx thread index */
- u32 thread_index;
+ clib_thread_index_t thread_index;
/** vpp session index, handle */
u32 vpp_session_index;
session_handle_t vpp_session_handle;
+ /** Index of listener for which connection was accepted */
+ u32 listener_index;
+ u8 *target_path;
+ u8 *target_query;
+ http_req_method_t rt;
/** Fully-resolved file path */
u8 *path;
/** Data to send */
@@ -58,6 +64,15 @@ typedef struct
http_headers_ctx_t resp_headers;
/** Response header buffer */
u8 *headers_buf;
+ /** RX buffer (POST body) */
+ u8 *rx_buff;
+ /** Current RX buffer offset */
+ u64 rx_buff_offset;
+ /** POST body left to receive */
+ u64 left_recv;
+ /** threshold for switching to pointers */
+ u64 use_ptr_thresh;
+ int (*read_body_handler) (struct hss_session_ *hs, session_t *ts);
} hss_session_t;
typedef struct hss_session_handle_
@@ -67,7 +82,7 @@ typedef struct hss_session_handle_
struct
{
u32 session_index;
- u32 thread_index;
+ clib_thread_index_t thread_index;
};
u64 as_u64;
};
@@ -113,6 +128,36 @@ typedef hss_url_handler_rc_t (*hss_url_handler_fn) (hss_url_handler_args_t *);
typedef void (*hss_register_url_fn) (hss_url_handler_fn, char *, int);
typedef void (*hss_session_send_fn) (hss_url_handler_args_t *args);
+typedef struct hss_listener_
+{
+ /** Path to file hash table */
+ hss_cache_t cache;
+ /** The bind session endpoint e.g., tcp://0.0.0.0:80 */
+ session_endpoint_cfg_t sep;
+ /** root path to be served */
+ u8 *www_root;
+ /** Threshold for switching to ptr data in http msgs */
+ u64 use_ptr_thresh;
+ /** Max cache size before LRU occurs */
+ u64 cache_size;
+ /** Maximum size of a request body (in bytes) **/
+ u64 max_body_size;
+ /** Maximum size of a large memory allocation */
+ u32 rx_buff_thresh;
+ /** Timeout during which client connection will stay open */
+ u32 keepalive_timeout;
+ /** How long a response is considered fresh (in seconds) */
+ u32 max_age;
+ /** Formatted max_age: "max-age=xyz" */
+ u8 *max_age_formatted;
+ /** Enable the use of builtinurls */
+ u8 enable_url_handlers;
+ /** Index in listener pool */
+ u32 l_index;
+ /** Listener session handle */
+ session_handle_t session_handle;
+} hss_listener_t;
+
/** \brief Main data structure
*/
typedef struct
@@ -120,15 +165,13 @@ typedef struct
/** Per thread vector of session pools */
hss_session_t **sessions;
+ /** Listeners pool */
+ hss_listener_t *listeners;
+
/** Hash tables for built-in GET and POST handlers */
uword *get_url_handlers;
uword *post_url_handlers;
- hss_cache_t cache;
-
- /** root path to be served */
- u8 *www_root;
-
/** Application index */
u32 app_index;
@@ -144,6 +187,11 @@ typedef struct
* Config
*/
+ /** Listener configured with server, if any */
+ hss_listener_t default_listener;
+ u8 have_default_listener;
+ u8 is_init;
+
/** Enable debug messages */
int debug_level;
/** Number of preallocated fifos, usually 0 */
@@ -152,22 +200,6 @@ typedef struct
u64 private_segment_size;
/** Size of the allocated rx, tx fifos, roughly 8K or so */
u32 fifo_size;
- /** The bind URI, defaults to tcp://0.0.0.0/80 */
- u8 *uri;
- /** Threshold for switching to ptr data in http msgs */
- u64 use_ptr_thresh;
- /** Enable the use of builtinurls */
- u8 enable_url_handlers;
- /** Max cache size before LRU occurs */
- u64 cache_size;
- /** How long a response is considered fresh (in seconds) */
- u32 max_age;
- /** Maximum size of a request body (in bytes) **/
- u64 max_body_size;
- /** Formatted max_age: "max-age=xyz" */
- u8 *max_age_formatted;
- /** Timeout during which client connection will stay open */
- u32 keepalive_timeout;
/** hash table of file extensions to mime types string indices */
uword *mime_type_indices_by_file_extensions;
@@ -177,6 +209,16 @@ extern hss_main_t hss_main;
int hss_create (vlib_main_t *vm);
+static inline hss_listener_t *
+hss_listener_get (u32 l_index)
+{
+ hss_main_t *hsm = &hss_main;
+
+ if (pool_is_free_index (hsm->listeners, l_index))
+ return 0;
+ return pool_elt_at_index (hsm->listeners, l_index);
+}
+
/**
* Register a GET or POST URL handler
*/
@@ -184,7 +226,8 @@ void hss_register_url_handler (hss_url_handler_fn fp, const char *url,
http_req_method_t type);
void hss_session_send_data (hss_url_handler_args_t *args);
void hss_builtinurl_json_handlers_init (void);
-hss_session_t *hss_session_get (u32 thread_index, u32 hs_index);
+hss_session_t *hss_session_get (clib_thread_index_t thread_index,
+ u32 hs_index);
#endif /* __included_http_static_h__ */
diff --git a/src/plugins/http_static/http_static_test.c b/src/plugins/http_static/http_static_test.c
index 56487893220..aba7bc4ffbf 100644
--- a/src/plugins/http_static/http_static_test.c
+++ b/src/plugins/http_static/http_static_test.c
@@ -39,100 +39,10 @@ http_static_test_main_t http_static_test_main;
#include <vlibapi/vat_helper_macros.h>
static int
-api_http_static_enable_v2 (vat_main_t *vam)
-{
- unformat_input_t *line_input = vam->input;
- vl_api_http_static_enable_v2_t *mp;
- u64 tmp;
- u8 *www_root = 0;
- u8 *uri = 0;
- u32 prealloc_fifos = 0;
- u32 private_segment_size = 0;
- u32 fifo_size = 8 << 10;
- u32 cache_size_limit = 1 << 20;
- u32 max_age = HSS_DEFAULT_MAX_AGE;
- int ret;
-
- /* Parse args required to build the message */
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "www-root %s", &www_root))
- ;
- else if (unformat (line_input, "prealloc-fifos %d", &prealloc_fifos))
- ;
- else if (unformat (line_input, "private-segment-size %U",
- unformat_memory_size, &tmp))
- {
- if (tmp >= 0x100000000ULL)
- {
- errmsg ("private segment size %llu, too large", tmp);
- return -99;
- }
- private_segment_size = (u32) tmp;
- }
- else if (unformat (line_input, "fifo-size %U", unformat_memory_size,
- &tmp))
- {
- if (tmp >= 0x100000000ULL)
- {
- errmsg ("fifo-size %llu, too large", tmp);
- return -99;
- }
- fifo_size = (u32) tmp;
- }
- else if (unformat (line_input, "cache-size %U", unformat_memory_size,
- &tmp))
- {
- if (tmp < (128ULL << 10))
- {
- errmsg ("cache-size must be at least 128kb");
- return -99;
- }
- cache_size_limit = (u32) tmp;
- }
- else if (unformat (line_input, "max-age %d", &max_age))
- ;
- else if (unformat (line_input, "uri %s", &uri))
- ;
- else
- {
- errmsg ("unknown input `%U'", format_unformat_error, line_input);
- return -99;
- }
- }
-
- if (www_root == 0)
- {
- errmsg ("Must specify www-root");
- return -99;
- }
-
- if (uri == 0)
- uri = format (0, "tcp://0.0.0.0/80%c", 0);
-
- /* Construct the API message */
- M (HTTP_STATIC_ENABLE_V2, mp);
- strncpy_s ((char *) mp->www_root, 256, (const char *) www_root, 256);
- strncpy_s ((char *) mp->uri, 256, (const char *) uri, 256);
- mp->fifo_size = ntohl (fifo_size);
- mp->cache_size_limit = ntohl (cache_size_limit);
- mp->prealloc_fifos = ntohl (prealloc_fifos);
- mp->private_segment_size = ntohl (private_segment_size);
- mp->max_age = ntohl (max_age);
-
- /* send it... */
- S (mp);
-
- /* Wait for a reply... */
- W (ret);
- return ret;
-}
-
-static int
-api_http_static_enable_v3 (vat_main_t *vam)
+api_http_static_enable_v4 (vat_main_t *vam)
{
unformat_input_t *line_input = vam->input;
- vl_api_http_static_enable_v3_t *mp;
+ vl_api_http_static_enable_v4_t *mp;
u64 tmp;
u8 *www_root = 0;
u8 *uri = 0;
@@ -142,6 +52,7 @@ api_http_static_enable_v3 (vat_main_t *vam)
u32 cache_size_limit = 1 << 20;
u32 max_age = HSS_DEFAULT_MAX_AGE;
u32 keepalive_timeout = HSS_DEFAULT_KEEPALIVE_TIMEOUT;
+ u64 max_body_size = HSS_DEFAULT_MAX_BODY_SIZE;
int ret;
/* Parse args required to build the message */
@@ -188,6 +99,8 @@ api_http_static_enable_v3 (vat_main_t *vam)
;
else if (unformat (line_input, "uri %s", &uri))
;
+ else if (unformat (line_input, "max-body-size %llu", &max_body_size))
+ ;
else
{
errmsg ("unknown input `%U'", format_unformat_error, line_input);
@@ -205,7 +118,7 @@ api_http_static_enable_v3 (vat_main_t *vam)
uri = format (0, "tcp://0.0.0.0/80%c", 0);
/* Construct the API message */
- M (HTTP_STATIC_ENABLE_V3, mp);
+ M (HTTP_STATIC_ENABLE_V4, mp);
strncpy_s ((char *) mp->www_root, 256, (const char *) www_root, 256);
strncpy_s ((char *) mp->uri, 256, (const char *) uri, 256);
mp->fifo_size = ntohl (fifo_size);
@@ -214,6 +127,8 @@ api_http_static_enable_v3 (vat_main_t *vam)
mp->private_segment_size = ntohl (private_segment_size);
mp->max_age = ntohl (max_age);
mp->keepalive_timeout = ntohl (keepalive_timeout);
+ mp->max_body_size = ntohl (max_body_size);
+
/* send it... */
S (mp);
@@ -223,10 +138,10 @@ api_http_static_enable_v3 (vat_main_t *vam)
}
static int
-api_http_static_enable_v4 (vat_main_t *vam)
+api_http_static_enable_v5 (vat_main_t *vam)
{
unformat_input_t *line_input = vam->input;
- vl_api_http_static_enable_v4_t *mp;
+ vl_api_http_static_enable_v5_t *mp;
u64 tmp;
u8 *www_root = 0;
u8 *uri = 0;
@@ -237,6 +152,7 @@ api_http_static_enable_v4 (vat_main_t *vam)
u32 max_age = HSS_DEFAULT_MAX_AGE;
u32 keepalive_timeout = HSS_DEFAULT_KEEPALIVE_TIMEOUT;
u64 max_body_size = HSS_DEFAULT_MAX_BODY_SIZE;
+ u32 rx_buff_thresh = HSS_DEFAULT_RX_BUFFER_THRESH;
int ret;
/* Parse args required to build the message */
@@ -283,7 +199,11 @@ api_http_static_enable_v4 (vat_main_t *vam)
;
else if (unformat (line_input, "uri %s", &uri))
;
- else if (unformat (line_input, "max-body-size %llu", &max_body_size))
+ else if (unformat (line_input, "max-body-size %U", unformat_memory_size,
+ &max_body_size))
+ ;
+ else if (unformat (line_input, "rx-buff-thresh %U", unformat_memory_size,
+ &rx_buff_thresh))
;
else
{
diff --git a/src/plugins/http_static/static_server.c b/src/plugins/http_static/static_server.c
index 074416873e3..692cb53abe3 100644
--- a/src/plugins/http_static/static_server.c
+++ b/src/plugins/http_static/static_server.c
@@ -14,12 +14,14 @@
*/
#include <http_static/http_static.h>
+#include <vnet/session/application.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <http/http_content_types.h>
+#include <http/http_status_codes.h>
/** @file static_server.c
* Static http server, sufficient to serve .html / .css / .js content.
@@ -27,11 +29,59 @@
/*? %%clicmd:group_label Static HTTP Server %% ?*/
#define HSS_FIFO_THRESH (16 << 10)
-
+#define HSS_HEADER_BUF_MAX_SIZE 16192
hss_main_t hss_main;
+static int file_handler_discard_body (hss_session_t *hs, session_t *ts);
+static int url_handler_read_body (hss_session_t *hs, session_t *ts);
+
+static int
+hss_add_header (hss_session_t *hs, http_header_name_t name, const char *value,
+ uword value_len)
+{
+ u32 needed_size = 0;
+ while (http_add_header (&hs->resp_headers, name, value, value_len) == -1)
+ {
+ if (needed_size)
+ {
+ http_truncate_headers_list (&hs->resp_headers);
+ hs->data_len = 0;
+ return -1;
+ }
+ else
+ needed_size = hs->resp_headers.tail_offset +
+ sizeof (http_app_header_t) + value_len;
+ if (needed_size < HSS_HEADER_BUF_MAX_SIZE)
+ {
+ vec_resize (hs->headers_buf, sizeof (http_app_header_t) + value_len);
+ hs->resp_headers.len = needed_size;
+ hs->resp_headers.buf = hs->headers_buf;
+ }
+ else
+ {
+ http_truncate_headers_list (&hs->resp_headers);
+ hs->data_len = 0;
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static_always_inline void
+hss_confirm_data_read (hss_session_t *hs, u32 n_last_deq)
+{
+ session_t *ts;
+
+ ts = session_get (hs->vpp_session_index, hs->thread_index);
+ if (svm_fifo_needs_deq_ntf (ts->rx_fifo, n_last_deq))
+ {
+ svm_fifo_clear_deq_ntf (ts->rx_fifo);
+ session_program_transport_io_evt (ts->handle, SESSION_IO_EVT_RX);
+ }
+}
+
static hss_session_t *
-hss_session_alloc (u32 thread_index)
+hss_session_alloc (clib_thread_index_t thread_index)
{
hss_main_t *hsm = &hss_main;
hss_session_t *hs;
@@ -46,7 +96,7 @@ hss_session_alloc (u32 thread_index)
}
__clib_export hss_session_t *
-hss_session_get (u32 thread_index, u32 hs_index)
+hss_session_get (clib_thread_index_t thread_index, u32 hs_index)
{
hss_main_t *hsm = &hss_main;
if (pool_is_free_index (hsm->sessions[thread_index], hs_index))
@@ -85,6 +135,7 @@ hss_session_disconnect_transport (hss_session_t *hs)
static void
start_send_data (hss_session_t *hs, http_status_code_t status)
{
+ hss_main_t *hsm = &hss_main;
http_msg_t msg;
session_t *ts;
u32 n_enq;
@@ -93,6 +144,9 @@ start_send_data (hss_session_t *hs, http_status_code_t status)
ts = session_get (hs->vpp_session_index, hs->thread_index);
+ if (hsm->debug_level > 0)
+ clib_warning ("status code: %U", format_http_status_code, status);
+
msg.type = HTTP_MSG_REPLY;
msg.code = status;
msg.data.body_len = hs->data_len;
@@ -100,7 +154,7 @@ start_send_data (hss_session_t *hs, http_status_code_t status)
msg.data.headers_len = hs->resp_headers.tail_offset;
msg.data.len = msg.data.body_len + msg.data.headers_len;
- if (msg.data.len > hss_main.use_ptr_thresh)
+ if (msg.data.len > hs->use_ptr_thresh)
{
msg.data.type = HTTP_MSG_DATA_PTR;
rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (msg), (u8 *) &msg);
@@ -175,8 +229,9 @@ hss_session_send_data (hss_url_handler_args_t *args)
/* Set content type only if we have some response data */
if (hs->data_len)
- http_add_header (&hs->resp_headers, HTTP_HEADER_CONTENT_TYPE,
- http_content_type_token (args->ct));
+ if (hss_add_header (hs, HTTP_HEADER_CONTENT_TYPE,
+ http_content_type_token (args->ct)))
+ args->sc = HTTP_STATUS_INTERNAL_ERROR;
start_send_data (hs, args->sc);
}
@@ -247,15 +302,20 @@ content_type_from_request (u8 *request)
}
static int
-try_url_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt,
- u8 *target_path, u8 *target_query, u8 *data)
+try_url_handler (hss_session_t *hs)
{
+ hss_main_t *hsm = &hss_main;
http_status_code_t sc = HTTP_STATUS_OK;
hss_url_handler_args_t args = {};
uword *p, *url_table;
+ session_t *ts;
+ u32 max_deq;
+ u8 *target_path;
int rv;
- if (!hsm->enable_url_handlers || !target_path)
+ target_path = hs->target_path;
+
+ if (!target_path)
return -1;
/* zero-length? try "index.html" */
@@ -266,28 +326,69 @@ try_url_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt,
/* Look for built-in GET / POST handlers */
url_table =
- (rt == HTTP_REQ_GET) ? hsm->get_url_handlers : hsm->post_url_handlers;
+ (hs->rt == HTTP_REQ_GET) ? hsm->get_url_handlers : hsm->post_url_handlers;
p = hash_get_mem (url_table, target_path);
if (!p)
return -1;
+ hs->rx_buff = 0;
+
+ /* Read request body */
+ if (hs->left_recv)
+ {
+ hss_listener_t *l = hss_listener_get (hs->listener_index);
+ if (hs->left_recv > l->rx_buff_thresh)
+ {
+ /* TODO: large body (not buffered in memory) */
+ clib_warning ("data length %u above threshold %u", hs->left_recv,
+ l->rx_buff_thresh);
+ hs->left_recv = 0;
+ start_send_data (hs, HTTP_STATUS_INTERNAL_ERROR);
+ hss_session_disconnect_transport (hs);
+ return 0;
+ }
+ hs->rx_buff_offset = 0;
+ vec_validate (hs->rx_buff, hs->left_recv - 1);
+ ts = session_get (hs->vpp_session_index, hs->thread_index);
+ max_deq = svm_fifo_max_dequeue (ts->rx_fifo);
+ if (max_deq < hs->left_recv)
+ {
+ hs->read_body_handler = url_handler_read_body;
+ if (max_deq == 0)
+ return 0;
+ rv = svm_fifo_dequeue (ts->rx_fifo, max_deq, hs->rx_buff);
+ ASSERT (rv == max_deq);
+ hs->rx_buff_offset = max_deq;
+ hs->left_recv -= max_deq;
+ hss_confirm_data_read (hs, max_deq);
+ return 0;
+ }
+ rv = svm_fifo_dequeue (ts->rx_fifo, hs->left_recv,
+ hs->rx_buff + hs->rx_buff_offset);
+ ASSERT (rv == hs->left_recv);
+ hss_confirm_data_read (hs, hs->left_recv);
+ hs->left_recv = 0;
+ }
+
hs->path = 0;
hs->data_offset = 0;
hs->cache_pool_index = ~0;
if (hsm->debug_level > 0)
- clib_warning ("%s '%s'", (rt == HTTP_REQ_GET) ? "GET" : "POST",
+ clib_warning ("%s '%s'", (hs->rt == HTTP_REQ_GET) ? "GET" : "POST",
target_path);
- args.req_type = rt;
- args.query = target_query;
- args.req_data = data;
+ args.req_type = hs->rt;
+ args.query = hs->target_query;
+ args.req_data = hs->rx_buff;
args.sh.thread_index = hs->thread_index;
args.sh.session_index = hs->session_index;
rv = ((hss_url_handler_fn) p[0]) (&args);
+ vec_free (hs->rx_buff);
+
/* Wait for data from handler */
if (rv == HSS_URL_HANDLER_ASYNC)
return 0;
@@ -295,7 +396,7 @@ try_url_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt,
if (rv == HSS_URL_HANDLER_ERROR)
{
clib_warning ("builtin handler %llx hit on %s '%s' but failed!", p[0],
- (rt == HTTP_REQ_GET) ? "GET" : "POST", target_path);
+ (hs->rt == HTTP_REQ_GET) ? "GET" : "POST", target_path);
sc = HTTP_STATUS_BAD_GATEWAY;
}
@@ -305,8 +406,9 @@ try_url_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt,
/* Set content type only if we have some response data */
if (hs->data_len)
- http_add_header (&hs->resp_headers, HTTP_HEADER_CONTENT_TYPE,
- http_content_type_token (args.ct));
+ if (hss_add_header (hs, HTTP_HEADER_CONTENT_TYPE,
+ http_content_type_token (args.ct)))
+ sc = HTTP_STATUS_INTERNAL_ERROR;
start_send_data (hs, sc);
@@ -329,8 +431,9 @@ file_path_is_valid (u8 *path)
}
static u32
-try_index_file (hss_main_t *hsm, hss_session_t *hs, u8 *path)
+try_index_file (hss_listener_t *l, hss_session_t *hs, u8 *path)
{
+ hss_main_t *hsm = &hss_main;
u8 *port_str = 0, *redirect;
transport_endpoint_t endpt;
transport_proto_t proto;
@@ -358,7 +461,7 @@ try_index_file (hss_main_t *hsm, hss_session_t *hs, u8 *path)
/*
* We found an index.html file, build a redirect
*/
- vec_delete (path, vec_len (hsm->www_root) - 1, 0);
+ vec_delete (path, vec_len (l->www_root) - 1, 0);
ts = session_get (hs->vpp_session_index, hs->thread_index);
session_get_endpoint (ts, &endpt, 1 /* is_local */);
@@ -383,8 +486,10 @@ try_index_file (hss_main_t *hsm, hss_session_t *hs, u8 *path)
vec_free (port_str);
- http_add_header (&hs->resp_headers, HTTP_HEADER_LOCATION,
- (const char *) redirect, vec_len (redirect));
+ if (hss_add_header (hs, HTTP_HEADER_LOCATION, (const char *) redirect,
+ vec_len (redirect)))
+ return HTTP_STATUS_INTERNAL_ERROR;
+
vec_free (redirect);
hs->data_len = 0;
hs->free_data = 1;
@@ -393,39 +498,61 @@ try_index_file (hss_main_t *hsm, hss_session_t *hs, u8 *path)
}
static int
-try_file_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt,
- u8 *target)
+try_file_handler (hss_session_t *hs)
{
+ hss_main_t *hsm = &hss_main;
http_status_code_t sc = HTTP_STATUS_OK;
u8 *path, *sanitized_path;
- u32 ce_index;
+ u32 ce_index, max_dequeue;
http_content_type_t type;
u8 *last_modified;
+ hss_listener_t *l;
+ session_t *ts;
+
+ l = hss_listener_get (hs->listener_index);
/* Feature not enabled */
- if (!hsm->www_root)
+ if (!l->www_root)
return -1;
- /* Remove dot segments to prevent path traversal */
- sanitized_path = http_path_remove_dot_segments (target);
+ /* Discard request body */
+ if (hs->left_recv)
+ {
+ ts = session_get (hs->vpp_session_index, hs->thread_index);
+ max_dequeue = svm_fifo_max_dequeue (ts->rx_fifo);
+ if (max_dequeue < hs->left_recv)
+ {
+ svm_fifo_dequeue_drop (ts->rx_fifo, max_dequeue);
+ hs->left_recv -= max_dequeue;
+ hs->read_body_handler = file_handler_discard_body;
+ hss_confirm_data_read (hs, max_dequeue);
+ return 0;
+ }
+ svm_fifo_dequeue_drop (ts->rx_fifo, hs->left_recv);
+ hss_confirm_data_read (hs, hs->left_recv);
+ hs->left_recv = 0;
+ }
+
+ /* Sanitize received path */
+ sanitized_path = http_path_sanitize (hs->target_path);
/*
* Construct the file to open
*/
- if (!target)
- path = format (0, "%s%c", hsm->www_root, 0);
+ if (!sanitized_path)
+ path = format (0, "%s%c", l->www_root, 0);
else
- path = format (0, "%s/%s%c", hsm->www_root, sanitized_path, 0);
+ path = format (0, "%s/%s%c", l->www_root, sanitized_path, 0);
if (hsm->debug_level > 0)
- clib_warning ("%s '%s'", (rt == HTTP_REQ_GET) ? "GET" : "POST", path);
+ clib_warning ("%s '%s'", (hs->rt == HTTP_REQ_GET) ? "GET" : "POST", path);
if (hs->data && hs->free_data)
vec_free (hs->data);
hs->data_offset = 0;
- ce_index = hss_cache_lookup_and_attach (&hsm->cache, path, &hs->data,
+ ce_index = hss_cache_lookup_and_attach (&l->cache, path, &hs->data,
&hs->data_len, &last_modified);
if (ce_index == ~0)
{
@@ -442,10 +569,10 @@ try_file_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt,
sc = HTTP_STATUS_NOT_FOUND;
goto done;
}
- sc = try_index_file (hsm, hs, path);
+ sc = try_index_file (l, hs, path);
goto done;
}
- ce_index = hss_cache_add_and_attach (&hsm->cache, path, &hs->data,
+ ce_index = hss_cache_add_and_attach (&l->cache, path, &hs->data,
&hs->data_len, &last_modified);
if (ce_index == ~0)
{
@@ -462,14 +589,17 @@ try_file_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt,
* Cache-Control max-age
* Last-Modified
*/
- type = content_type_from_request (target);
- http_add_header (&hs->resp_headers, HTTP_HEADER_CONTENT_TYPE,
- http_content_type_token (type));
- http_add_header (&hs->resp_headers, HTTP_HEADER_CACHE_CONTROL,
- (const char *) hsm->max_age_formatted,
- vec_len (hsm->max_age_formatted));
- http_add_header (&hs->resp_headers, HTTP_HEADER_LAST_MODIFIED,
- (const char *) last_modified, vec_len (last_modified));
+ type = content_type_from_request (sanitized_path);
+ if (hss_add_header (hs, HTTP_HEADER_CONTENT_TYPE,
+ http_content_type_token (type)) ||
+ hss_add_header (hs, HTTP_HEADER_CACHE_CONTROL,
+ (const char *) l->max_age_formatted,
+ vec_len (l->max_age_formatted)) ||
+ hss_add_header (hs, HTTP_HEADER_LAST_MODIFIED,
+ (const char *) last_modified, vec_len (last_modified)))
+ {
+ sc = HTTP_STATUS_INTERNAL_ERROR;
+ }
done:
vec_free (sanitized_path);
@@ -481,15 +611,23 @@ done:
}
static void
-handle_request (hss_session_t *hs, http_req_method_t rt, u8 *target_path,
- u8 *target_query, u8 *data)
+handle_request (hss_session_t *hs)
{
- hss_main_t *hsm = &hss_main;
+ hss_listener_t *l;
+
+ l = hss_listener_get (hs->listener_index);
+
+ if (hs->left_recv > l->max_body_size)
+ {
+ start_send_data (hs, HTTP_STATUS_CONTENT_TOO_LARGE);
+ hss_session_disconnect_transport (hs);
+ return;
+ }
- if (!try_url_handler (hsm, hs, rt, target_path, target_query, data))
+ if (l->enable_url_handlers && !try_url_handler (hs))
return;
- if (!try_file_handler (hsm, hs, rt, target_path))
+ if (!try_file_handler (hs))
return;
/* Handler did not find anything return 404 */
@@ -498,18 +636,60 @@ handle_request (hss_session_t *hs, http_req_method_t rt, u8 *target_path,
}
static int
+file_handler_discard_body (hss_session_t *hs, session_t *ts)
+{
+ u32 max_dequeue, to_discard;
+
+ max_dequeue = svm_fifo_max_dequeue (ts->rx_fifo);
+ to_discard = clib_min (max_dequeue, hs->left_recv);
+ svm_fifo_dequeue_drop (ts->rx_fifo, to_discard);
+ hs->left_recv -= to_discard;
+ hss_confirm_data_read (hs, to_discard);
+ if (hs->left_recv == 0)
+ return try_file_handler (hs);
+ return 0;
+}
+
+static int
+url_handler_read_body (hss_session_t *hs, session_t *ts)
+{
+ u32 max_dequeue, to_read;
+ int rv;
+
+ max_dequeue = svm_fifo_max_dequeue (ts->rx_fifo);
+ to_read = clib_min (max_dequeue, hs->left_recv);
+ rv =
+ svm_fifo_dequeue (ts->rx_fifo, to_read, hs->rx_buff + hs->rx_buff_offset);
+ ASSERT (rv == to_read);
+ hs->rx_buff_offset += to_read;
+ hs->left_recv -= to_read;
+ hss_confirm_data_read (hs, to_read);
+ if (hs->left_recv == 0)
+ return try_url_handler (hs);
+ return 0;
+}
+
+static int
hss_ts_rx_callback (session_t *ts)
{
- hss_main_t *hsm = &hss_main;
hss_session_t *hs;
- u8 *target_path = 0, *target_query = 0, *data = 0;
http_msg_t msg;
int rv;
hs = hss_session_get (ts->thread_index, ts->opaque);
+ if (hs->left_recv != 0)
+ {
+ ASSERT (hs->read_body_handler);
+ return hs->read_body_handler (hs, ts);
+ }
+
if (hs->free_data)
vec_free (hs->data);
+
hs->data = 0;
+ hs->data_len = 0;
+ vec_free (hs->target_path);
+ vec_free (hs->target_query);
http_init_headers_ctx (&hs->resp_headers, hs->headers_buf,
vec_len (hs->headers_buf));
@@ -520,72 +700,59 @@ hss_ts_rx_callback (session_t *ts)
if (msg.type != HTTP_MSG_REQUEST ||
(msg.method_type != HTTP_REQ_GET && msg.method_type != HTTP_REQ_POST))
{
- http_add_header (&hs->resp_headers, HTTP_HEADER_ALLOW,
- http_token_lit ("GET, POST"));
- start_send_data (hs, HTTP_STATUS_METHOD_NOT_ALLOWED);
+ if (hss_add_header (hs, HTTP_HEADER_ALLOW, http_token_lit ("GET, POST")))
+ start_send_data (hs, HTTP_STATUS_INTERNAL_ERROR);
+ else
+ start_send_data (hs, HTTP_STATUS_METHOD_NOT_ALLOWED);
goto err_done;
}
+ hs->rt = msg.method_type;
+
/* Read target path */
if (msg.data.target_path_len)
{
- vec_validate (target_path, msg.data.target_path_len - 1);
+ vec_validate (hs->target_path, msg.data.target_path_len - 1);
rv = svm_fifo_peek (ts->rx_fifo, msg.data.target_path_offset,
- msg.data.target_path_len, target_path);
+ msg.data.target_path_len, hs->target_path);
ASSERT (rv == msg.data.target_path_len);
- if (http_validate_abs_path_syntax (target_path, 0))
+ if (http_validate_abs_path_syntax (hs->target_path, 0))
{
start_send_data (hs, HTTP_STATUS_BAD_REQUEST);
goto err_done;
}
/* Target path must be a proper C-string in addition to a vector */
- vec_add1 (target_path, 0);
+ vec_add1 (hs->target_path, 0);
}
/* Read target query */
if (msg.data.target_query_len)
{
- vec_validate (target_query, msg.data.target_query_len - 1);
+ vec_validate (hs->target_query, msg.data.target_query_len - 1);
rv = svm_fifo_peek (ts->rx_fifo, msg.data.target_query_offset,
- msg.data.target_query_len, target_query);
+ msg.data.target_query_len, hs->target_query);
ASSERT (rv == msg.data.target_query_len);
- if (http_validate_query_syntax (target_query, 0))
+ if (http_validate_query_syntax (hs->target_query, 0))
{
start_send_data (hs, HTTP_STATUS_BAD_REQUEST);
goto err_done;
}
}
- /* Read request body for POST requests */
if (msg.data.body_len && msg.method_type == HTTP_REQ_POST)
{
- if (msg.data.body_len > hsm->max_body_size)
- {
- start_send_data (hs, HTTP_STATUS_CONTENT_TOO_LARGE);
- goto err_done;
- }
- if (svm_fifo_max_dequeue (ts->rx_fifo) - msg.data.body_offset <
- msg.data.body_len)
- {
- start_send_data (hs, HTTP_STATUS_INTERNAL_ERROR);
- goto err_done;
- }
- vec_validate (data, msg.data.body_len - 1);
- rv = svm_fifo_peek (ts->rx_fifo, msg.data.body_offset, msg.data.body_len,
- data);
- ASSERT (rv == msg.data.body_len);
+ hs->left_recv = msg.data.body_len;
+ /* drop everything up to body */
+ svm_fifo_dequeue_drop (ts->rx_fifo, msg.data.body_offset);
}
/* Find and send data */
- handle_request (hs, msg.method_type, target_path, target_query, data);
+ handle_request (hs);
goto done;
err_done:
hss_session_disconnect_transport (hs);
done:
- vec_free (target_path);
- vec_free (target_query);
- vec_free (data);
svm_fifo_dequeue_drop (ts->rx_fifo, msg.data.len);
return 0;
}
@@ -631,6 +798,7 @@ static int
hss_ts_accept_callback (session_t *ts)
{
hss_session_t *hs;
+ session_t *ls;
u32 thresh;
hs = hss_session_alloc (ts->thread_index);
@@ -638,6 +806,11 @@ hss_ts_accept_callback (session_t *ts)
hs->vpp_session_index = ts->session_index;
hs->vpp_session_handle = session_handle (ts);
+ /* Link to listener context */
+ ls = listen_session_get_from_handle (ts->listener_handle);
+ hs->listener_index = ls->opaque;
+ hs->use_ptr_thresh = hss_listener_get (hs->listener_index)->use_ptr_thresh;
+
/* The application sets a threshold for it's fifo to get notified when
* additional data can be enqueued. We want to keep the TX fifo reasonably
* full, however avoid entering a state where the
@@ -693,7 +866,6 @@ hss_add_segment_callback (u32 client_index, u64 segment_handle)
static void
hss_ts_cleanup (session_t *s, session_cleanup_ntf_t ntf)
{
- hss_main_t *hsm = &hss_main;
hss_session_t *hs;
if (ntf == SESSION_CLEANUP_TRANSPORT)
@@ -705,7 +877,9 @@ hss_ts_cleanup (session_t *s, session_cleanup_ntf_t ntf)
if (hs->cache_pool_index != ~0)
{
- hss_cache_detach_entry (&hsm->cache, hs->cache_pool_index);
+ hss_listener_t *l = hss_listener_get (hs->listener_index);
+ if (l)
+ hss_cache_detach_entry (&l->cache, hs->cache_pool_index);
hs->cache_pool_index = ~0;
}
@@ -716,6 +890,8 @@ hss_ts_cleanup (session_t *s, session_cleanup_ntf_t ntf)
hs->free_data = 0;
vec_free (hs->headers_buf);
vec_free (hs->path);
+ vec_free (hs->target_path);
+ vec_free (hs->target_query);
hss_session_free (hs);
}
@@ -788,30 +964,22 @@ hss_transport_needs_crypto (transport_proto_t proto)
}
static int
-hss_listen (void)
+hss_listen (hss_listener_t *l, session_handle_t *lh)
{
hss_main_t *hsm = &hss_main;
- session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
vnet_listen_args_t _a, *a = &_a;
- char *uri = "tcp://0.0.0.0/80";
u8 need_crypto;
transport_endpt_ext_cfg_t *ext_cfg;
int rv;
- transport_endpt_cfg_http_t http_cfg = { hsm->keepalive_timeout, 0 };
+ transport_endpt_cfg_http_t http_cfg = { l->keepalive_timeout, 0 };
clib_memset (a, 0, sizeof (*a));
a->app_index = hsm->app_index;
- if (hsm->uri)
- uri = (char *) hsm->uri;
-
- if (parse_uri (uri, &sep))
- return -1;
-
- need_crypto = hss_transport_needs_crypto (sep.transport_proto);
+ need_crypto = hss_transport_needs_crypto (l->sep.transport_proto);
- sep.transport_proto = TRANSPORT_PROTO_HTTP;
- clib_memcpy (&a->sep_ext, &sep, sizeof (sep));
+ l->sep.transport_proto = TRANSPORT_PROTO_HTTP;
+ clib_memcpy (&a->sep_ext, &l->sep, sizeof (l->sep));
ext_cfg = session_endpoint_add_ext_cfg (
&a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (http_cfg));
@@ -825,7 +993,8 @@ hss_listen (void)
ext_cfg->crypto.ckpair_index = hsm->ckpair_index;
}
- rv = vnet_listen (a);
+ if (!(rv = vnet_listen (a)))
+ *lh = a->handle;
session_endpoint_free_ext_cfgs (&a->sep_ext);
@@ -835,13 +1004,75 @@ hss_listen (void)
static void
hss_url_handlers_init (hss_main_t *hsm)
{
- if (!hsm->get_url_handlers)
+ if (hsm->get_url_handlers)
+ return;
+
+ hsm->get_url_handlers = hash_create_string (0, sizeof (uword));
+ hsm->post_url_handlers = hash_create_string (0, sizeof (uword));
+ hss_builtinurl_json_handlers_init ();
+}
+
+int
+hss_listener_add (hss_listener_t *l_cfg)
+{
+ hss_main_t *hsm = &hss_main;
+ session_handle_t lh;
+ app_listener_t *al;
+ hss_listener_t *l;
+ session_t *ls;
+
+ if (hss_listen (l_cfg, &lh))
{
- hsm->get_url_handlers = hash_create_string (0, sizeof (uword));
- hsm->post_url_handlers = hash_create_string (0, sizeof (uword));
+ clib_warning ("failed to start listening");
+ return -1;
}
- hss_builtinurl_json_handlers_init ();
+ pool_get (hsm->listeners, l);
+ *l = *l_cfg;
+ l->l_index = l - hsm->listeners;
+ l->session_handle = lh;
+
+ al = app_listener_get_w_handle (lh);
+ ls = app_listener_get_session (al);
+ ls->opaque = l->l_index;
+
+ if (l->www_root)
+ hss_cache_init (&l->cache, l->cache_size, hsm->debug_level);
+ if (l->enable_url_handlers)
+ hss_url_handlers_init (hsm);
+
+ l->max_age_formatted = format (0, "max-age=%d", l->max_age);
+
+ return 0;
+}
+
+int
+hss_listener_del (hss_listener_t *l_cfg)
+{
+ hss_main_t *hsm = &hss_main;
+ hss_listener_t *l;
+ u8 found = 0;
+
+ pool_foreach (l, hsm->listeners)
+ {
+ if (clib_memcmp (&l_cfg->sep, &l->sep, sizeof (l_cfg->sep)) == 0)
+ {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found)
+ return -1;
+
+ vnet_unlisten_args_t args = { .handle = l->session_handle, hsm->app_index };
+
+ vec_free (l->www_root);
+ vec_free (l->max_age_formatted);
+ hss_cache_free (&l->cache);
+ pool_put (hsm->listeners, l);
+
+ return vnet_unlisten (&args);
}
int
@@ -854,24 +1085,25 @@ hss_create (vlib_main_t *vm)
num_threads = 1 /* main thread */ + vtm->n_threads;
vec_validate (hsm->sessions, num_threads - 1);
+ /* Make sure session layer is enabled */
+ session_enable_disable_args_t args = { .is_en = 1,
+ .rt_engine_type =
+ RT_BACKEND_ENGINE_RULE_TABLE };
+ vnet_session_enable_disable (vm, &args);
+
if (hss_attach ())
{
clib_warning ("failed to attach server");
return -1;
}
- if (hss_listen ())
+
+ if (hsm->have_default_listener && hss_listener_add (&hsm->default_listener))
{
clib_warning ("failed to start listening");
return -1;
}
- if (hsm->www_root)
- hss_cache_init (&hsm->cache, hsm->cache_size, hsm->debug_level);
-
- if (hsm->enable_url_handlers)
- hss_url_handlers_init (hsm);
-
- hsm->max_age_formatted = format (0, "max-age=%d", hsm->max_age);
+ hsm->is_init = 1;
return 0;
}
@@ -882,20 +1114,24 @@ hss_create_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
unformat_input_t _line_input, *line_input = &_line_input;
hss_main_t *hsm = &hss_main;
+ hss_listener_t *l = &hsm->default_listener;
clib_error_t *error = 0;
+ char *uri = 0;
u64 seg_size;
int rv;
if (hsm->app_index != (u32) ~0)
- return clib_error_return (0, "http server already running...");
+ return clib_error_return (0, "http static server already initialized...");
hsm->prealloc_fifos = 0;
hsm->private_segment_size = 0;
hsm->fifo_size = 0;
- hsm->cache_size = 10 << 20;
- hsm->max_age = HSS_DEFAULT_MAX_AGE;
- hsm->max_body_size = HSS_DEFAULT_MAX_BODY_SIZE;
- hsm->keepalive_timeout = HSS_DEFAULT_KEEPALIVE_TIMEOUT;
+
+ l->cache_size = 10 << 20;
+ l->max_age = HSS_DEFAULT_MAX_AGE;
+ l->max_body_size = HSS_DEFAULT_MAX_BODY_SIZE;
+ l->rx_buff_thresh = HSS_DEFAULT_RX_BUFFER_THRESH;
+ l->keepalive_timeout = HSS_DEFAULT_KEEPALIVE_TIMEOUT;
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
@@ -903,37 +1139,43 @@ hss_create_command_fn (vlib_main_t *vm, unformat_input_t *input,
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "www-root %s", &hsm->www_root))
- ;
- else
- if (unformat (line_input, "prealloc-fifos %d", &hsm->prealloc_fifos))
- ;
- else if (unformat (line_input, "private-segment-size %U",
- unformat_memory_size, &seg_size))
+ /* Server config */
+ if (unformat (line_input, "private-segment-size %U",
+ unformat_memory_size, &seg_size))
hsm->private_segment_size = seg_size;
- else if (unformat (line_input, "fifo-size %d", &hsm->fifo_size))
- hsm->fifo_size <<= 10;
- else if (unformat (line_input, "cache-size %U", unformat_memory_size,
- &hsm->cache_size))
+ else if (unformat (line_input, "fifo-size %U", unformat_memory_size,
+ &hsm->fifo_size))
;
- else if (unformat (line_input, "uri %s", &hsm->uri))
+ else if (unformat (line_input, "prealloc-fifos %d",
+ &hsm->prealloc_fifos))
;
else if (unformat (line_input, "debug %d", &hsm->debug_level))
;
- else if (unformat (line_input, "keepalive-timeout %d",
- &hsm->keepalive_timeout))
- ;
else if (unformat (line_input, "debug"))
hsm->debug_level = 1;
- else if (unformat (line_input, "ptr-thresh %U", unformat_memory_size,
- &hsm->use_ptr_thresh))
+ /* Default listener parameters */
+ else if (unformat (line_input, "uri %s", &uri))
+ ;
+ else if (unformat (line_input, "www-root %s", &l->www_root))
;
else if (unformat (line_input, "url-handlers"))
- hsm->enable_url_handlers = 1;
- else if (unformat (line_input, "max-age %d", &hsm->max_age))
+ l->enable_url_handlers = 1;
+ else if (unformat (line_input, "cache-size %U", unformat_memory_size,
+ &l->cache_size))
+ ;
+ else if (unformat (line_input, "max-age %d", &l->max_age))
;
else if (unformat (line_input, "max-body-size %U", unformat_memory_size,
- &hsm->max_body_size))
+ &l->max_body_size))
+ ;
+ else if (unformat (line_input, "rx-buff-thresh %U", unformat_memory_size,
+ &l->rx_buff_thresh))
+ ;
+ else if (unformat (line_input, "keepalive-timeout %d",
+ &l->keepalive_timeout))
+ ;
+ else if (unformat (line_input, "ptr-thresh %U", unformat_memory_size,
+ &l->use_ptr_thresh))
;
else
{
@@ -950,28 +1192,33 @@ no_input:
if (error)
goto done;
- if (hsm->www_root == 0 && !hsm->enable_url_handlers)
+ if (l->www_root)
{
- error = clib_error_return (0, "Must set www-root or url-handlers");
- goto done;
+ /* Maintain legacy default uri behavior */
+ if (!uri)
+ uri = "tcp://0.0.0.0:80";
+ if (l->cache_size < (128 << 10))
+ {
+ error = clib_error_return (0, "cache-size must be at least 128kb");
+ vec_free (l->www_root);
+ goto done;
+ }
}
- if (hsm->cache_size < (128 << 10))
+ if (uri)
{
- error = clib_error_return (0, "cache-size must be at least 128kb");
- vec_free (hsm->www_root);
- goto done;
+ if (parse_uri (uri, &l->sep))
+ {
+ error = clib_error_return (0, "failed to parse uri %s", uri);
+ goto done;
+ }
+ hsm->have_default_listener = 1;
}
- session_enable_disable_args_t args = { .is_en = 1,
- .rt_engine_type =
- RT_BACKEND_ENGINE_RULE_TABLE };
- vnet_session_enable_disable (vm, &args);
-
if ((rv = hss_create (vm)))
{
error = clib_error_return (0, "server_create returned %d", rv);
- vec_free (hsm->www_root);
+ vec_free (l->www_root);
}
done:
@@ -995,13 +1242,123 @@ done:
VLIB_CLI_COMMAND (hss_create_command, static) = {
.path = "http static server",
.short_help =
- "http static server www-root <path> [prealloc-fifos <nn>]\n"
+ "http static server [www-root <path>] [url-handlers]\n"
"[private-segment-size <nnMG>] [fifo-size <nbytes>] [max-age <nseconds>]\n"
- "[uri <uri>] [ptr-thresh <nn>] [url-handlers] [debug [nn]]\n"
+ "[uri <uri>] [ptr-thresh <nn>] [prealloc-fifos <nn>] [debug [nn]]\n"
"[keepalive-timeout <nn>] [max-body-size <nn>]\n",
.function = hss_create_command_fn,
};
+static clib_error_t *
+hss_add_del_listener_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ hss_main_t *hsm = &hss_main;
+ clib_error_t *error = 0;
+ hss_listener_t _l = {}, *l = &_l;
+ u8 is_add = 1;
+ char *uri = 0;
+
+ if (!hsm->is_init)
+ return clib_error_return (0, "Static server not initialized");
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return clib_error_return (0, "No input provided");
+
+ l->cache_size = 10 << 20;
+ l->max_age = HSS_DEFAULT_MAX_AGE;
+ l->max_body_size = HSS_DEFAULT_MAX_BODY_SIZE;
+ l->rx_buff_thresh = HSS_DEFAULT_RX_BUFFER_THRESH;
+ l->keepalive_timeout = HSS_DEFAULT_KEEPALIVE_TIMEOUT;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "add"))
+ is_add = 1;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "uri %s", &uri))
+ ;
+ else if (unformat (line_input, "www-root %s", &l->www_root))
+ ;
+ else if (unformat (line_input, "url-handlers"))
+ l->enable_url_handlers = 1;
+ else if (unformat (line_input, "cache-size %U", unformat_memory_size,
+ &l->cache_size))
+ ;
+ else if (unformat (line_input, "keepalive-timeout %d",
+ &l->keepalive_timeout))
+ ;
+ else if (unformat (line_input, "ptr-thresh %U", unformat_memory_size,
+ &l->use_ptr_thresh))
+ ;
+ else if (unformat (line_input, "max-age %d", &l->max_age))
+ ;
+ else if (unformat (line_input, "max-body-size %U", unformat_memory_size,
+ &l->max_body_size))
+ ;
+ else if (unformat (line_input, "rx-buff-thresh %U", unformat_memory_size,
+ &l->rx_buff_thresh))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ break;
+ }
+ }
+ unformat_free (line_input);
+
+ if (!uri)
+ {
+ error = clib_error_return (0, "Must set uri");
+ goto done;
+ }
+
+ if (parse_uri (uri, &l->sep))
+ {
+ error = clib_error_return (0, "failed to parse uri %s", uri);
+ goto done;
+ }
+
+ if (!is_add)
+ {
+ hss_listener_del (l);
+ goto done;
+ }
+
+ if (l->www_root == 0 && !l->enable_url_handlers)
+ {
+ error = clib_error_return (0, "Must set www-root or url-handlers");
+ goto done;
+ }
+
+ if (l->cache_size < (128 << 10))
+ {
+ error = clib_error_return (0, "cache-size must be at least 128kb");
+ goto done;
+ }
+
+ if (hss_listener_add (l))
+ {
+ error = clib_error_return (0, "failed to create listener");
+ goto done;
+ }
+
+done:
+
+ vec_free (uri);
+ return error;
+}
+
+VLIB_CLI_COMMAND (hss_add_del_listener_command, static) = {
+ .path = "http static listener",
+ .short_help = "http static listener [add|del] uri <uri>\n"
+ "[www-root <path>] [url-handlers] \n",
+ .function = hss_add_del_listener_command_fn,
+};
+
static u8 *
format_hss_session (u8 *s, va_list *args)
{
@@ -1014,14 +1371,29 @@ format_hss_session (u8 *s, va_list *args)
return s;
}
+static u8 *
+format_hss_listener (u8 *s, va_list *args)
+{
+ hss_listener_t *l = va_arg (*args, hss_listener_t *);
+ int __clib_unused verbose = va_arg (*args, int);
+
+ s = format (
+ s, "listener %d, uri %U:%u, www-root %s, cache-size %U url-handlers %d",
+ l->l_index, format_ip46_address, &l->sep.ip, l->sep.is_ip4,
+ clib_net_to_host_u16 (l->sep.port), l->www_root, format_memory_size,
+ l->cache_size, l->enable_url_handlers);
+ return s;
+}
+
static clib_error_t *
hss_show_command_fn (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd)
{
- int verbose = 0, show_cache = 0, show_sessions = 0;
+ int verbose = 0, show_cache = 0, show_sessions = 0, show_listeners = 0;
+ u32 l_index = 0;
hss_main_t *hsm = &hss_main;
- if (hsm->www_root == 0)
+ if (!hsm->is_init)
return clib_error_return (0, "Static server disabled");
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
@@ -1032,17 +1404,26 @@ hss_show_command_fn (vlib_main_t *vm, unformat_input_t *input,
verbose = 1;
else if (unformat (input, "cache"))
show_cache = 1;
+ else if (unformat (input, "cache %u", &l_index))
+ show_cache = 1;
else if (unformat (input, "sessions"))
show_sessions = 1;
+ else if (unformat (input, "listeners"))
+ show_listeners = 1;
else
break;
}
- if ((show_cache + show_sessions) == 0)
+ if ((show_cache + show_sessions + show_listeners) == 0)
return clib_error_return (0, "specify one or more of cache, sessions");
if (show_cache)
- vlib_cli_output (vm, "%U", format_hss_cache, &hsm->cache, verbose);
+ {
+ hss_listener_t *l = hss_listener_get (l_index);
+ if (l == 0)
+ return clib_error_return (0, "listener %d not found", l_index);
+ vlib_cli_output (vm, "%U", format_hss_cache, &l->cache, verbose);
+ }
if (show_sessions)
{
@@ -1067,6 +1448,15 @@ hss_show_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
vec_free (session_indices);
}
+
+ if (show_listeners)
+ {
+ hss_listener_t *l;
+ pool_foreach (l, hsm->listeners)
+ {
+ vlib_cli_output (vm, "%U", format_hss_listener, l, verbose);
+ }
+ }
return 0;
}
@@ -1082,7 +1472,8 @@ hss_show_command_fn (vlib_main_t *vm, unformat_input_t *input,
?*/
VLIB_CLI_COMMAND (hss_show_command, static) = {
.path = "show http static server",
- .short_help = "show http static server sessions cache [verbose [<nn>]]",
+ .short_help = "show http static server [sessions] [cache] [listeners] "
+ "[verbose [<nn>]]",
.function = hss_show_command_fn,
};
@@ -1091,12 +1482,28 @@ hss_clear_cache_command_fn (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd)
{
hss_main_t *hsm = &hss_main;
- u32 busy_items = 0;
+ u32 busy_items = 0, l_index = 0;
+ hss_listener_t *l;
- if (hsm->www_root == 0)
+ if (!hsm->is_init)
return clib_error_return (0, "Static server disabled");
- busy_items = hss_cache_clear (&hsm->cache);
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "index %u", &l_index))
+ ;
+ else
+ {
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ }
+
+ l = hss_listener_get (l_index);
+ if (l == 0)
+ return clib_error_return (0, "listener %d not found", l_index);
+
+ busy_items = hss_cache_clear (&l->cache);
if (busy_items > 0)
vlib_cli_output (vm, "Note: %d busy items still in cache...", busy_items);
@@ -1118,7 +1525,7 @@ hss_clear_cache_command_fn (vlib_main_t *vm, unformat_input_t *input,
?*/
VLIB_CLI_COMMAND (clear_hss_cache_command, static) = {
.path = "clear http static cache",
- .short_help = "clear http static cache",
+ .short_help = "clear http static cache [index <index>]",
.function = hss_clear_cache_command_fn,
};
diff --git a/src/plugins/ikev2/ikev2.c b/src/plugins/ikev2/ikev2.c
index 0e6751ce851..c8183feddfd 100644
--- a/src/plugins/ikev2/ikev2.c
+++ b/src/plugins/ikev2/ikev2.c
@@ -2223,7 +2223,7 @@ ikev2_create_tunnel_interface (vlib_main_t *vm, ikev2_sa_t *sa,
ikev2_child_sa_t *child, u32 sa_index,
u32 child_index, u8 is_rekey, u8 kex)
{
- u32 thread_index = vlib_get_thread_index ();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
ikev2_main_t *km = &ikev2_main;
ipsec_crypto_alg_t encr_type;
ipsec_integ_alg_t integ_type;
@@ -3190,7 +3190,7 @@ ikev2_node_internal (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
ikev2_main_per_thread_data_t *ptd = ikev2_get_per_thread_data ();
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
ikev2_stats_t _stats, *stats = &_stats;
int res;
@@ -5551,6 +5551,7 @@ static uword
ikev2_mngr_process_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
vlib_frame_t * f)
{
+ ipsec_main_t *im = &ipsec_main;
ikev2_main_t *km = &ikev2_main;
ikev2_profile_t *p;
ikev2_child_sa_t *c;
@@ -5631,10 +5632,10 @@ ikev2_mngr_process_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
/* process ipsec sas */
ipsec_sa_t *sa;
- pool_foreach (sa, ipsec_sa_pool)
- {
- ikev2_mngr_process_ipsec_sa (sa);
- }
+ pool_foreach (sa, im->sa_pool)
+ {
+ ikev2_mngr_process_ipsec_sa (sa);
+ }
ikev2_process_pending_sa_init (vm, km);
}
diff --git a/src/plugins/ikev2/ikev2_priv.h b/src/plugins/ikev2/ikev2_priv.h
index 2751657bff9..58da36d9d59 100644
--- a/src/plugins/ikev2/ikev2_priv.h
+++ b/src/plugins/ikev2/ikev2_priv.h
@@ -661,7 +661,7 @@ clib_error_t *ikev2_profile_natt_disable (u8 * name);
static_always_inline ikev2_main_per_thread_data_t *
ikev2_get_per_thread_data ()
{
- u32 thread_index = vlib_get_thread_index ();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
return vec_elt_at_index (ikev2_main.per_thread_data, thread_index);
}
#endif /* __included_ikev2_priv_h__ */
diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c
index 1606f72224f..ca6483b3329 100644
--- a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c
+++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c
@@ -21,7 +21,7 @@
#include <vnet/vnet.h>
#include <vnet/plugin/plugin.h>
#include <ioam/export-common/ioam_export.h>
-#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <plugins/vxlan-gpe/vxlan_gpe.h>
#include <vlibapi/api.h>
#include <vlibmemory/api.h>
diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c
index 839fd80b443..17084767c1e 100644
--- a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c
+++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c
@@ -16,8 +16,8 @@
#include <vnet/vnet.h>
#include <vppinfra/error.h>
#include <vnet/ip/ip.h>
-#include <vnet/vxlan-gpe/vxlan_gpe.h>
-#include <vnet/vxlan-gpe/vxlan_gpe_packet.h>
+#include <plugins/vxlan-gpe/vxlan_gpe.h>
+#include <plugins/vxlan-gpe/vxlan_gpe_packet.h>
#include <ioam/export-common/ioam_export.h>
typedef struct
diff --git a/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c
index 61476ebd85c..a4deae2ca60 100644
--- a/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c
+++ b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c
@@ -646,7 +646,7 @@ vlib_node_registration_t ioam_cache_ts_timer_tick_node;
typedef struct
{
- u32 thread_index;
+ clib_thread_index_t thread_index;
} ioam_cache_ts_timer_tick_trace_t;
/* packet trace format function */
@@ -696,7 +696,7 @@ expired_cache_ts_timer_callback (u32 * expired_timers)
ioam_cache_main_t *cm = &ioam_cache_main;
int i;
u32 pool_index;
- u32 thread_index = vlib_get_thread_index ();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
u32 count = 0;
for (i = 0; i < vec_len (expired_timers); i++)
diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c
index 801faa98066..d8d52e9f0a1 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c
+++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c
@@ -17,8 +17,7 @@
#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
#include <vnet/ethernet/ethernet.h>
-#include <vnet/vxlan-gpe/vxlan_gpe.h>
-#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <plugins/vxlan-gpe/vxlan_gpe.h>
#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h>
#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h>
#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h>
@@ -68,8 +67,8 @@ vxlan_gpe_decap_ioam (vlib_main_t * vm,
vlib_frame_t * from_frame, u8 is_ipv6)
{
u32 n_left_from, next_index, *from, *to_next;
- vxlan_gpe_main_t *ngm = &vxlan_gpe_main;
vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ vxlan_gpe_main_t *ngm = hm->gpe_main;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c
index de375df4f7c..9c742d8c293 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c
+++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c
@@ -17,7 +17,7 @@
#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
#include <vnet/ethernet/ethernet.h>
-#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <plugins/vxlan-gpe/vxlan_gpe.h>
#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h>
#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h>
#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h>
@@ -71,7 +71,8 @@ vxlan_gpe_encap_ioam_v4 (vlib_main_t * vm,
vlib_frame_t * from_frame)
{
u32 n_left_from, next_index, *from, *to_next;
- vxlan_gpe_main_t *ngm = &vxlan_gpe_main;
+ vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main;
+ vxlan_gpe_main_t *ngm = sm->gpe_main;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c
index 2fa0aa29450..a80662b9d12 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c
+++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c
@@ -17,7 +17,7 @@
#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
#include <vnet/ethernet/ethernet.h>
-#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <plugins/vxlan-gpe/vxlan_gpe.h>
#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h>
/* Statistics (not really errors) */
@@ -231,7 +231,8 @@ vxlan_gpe_pop_ioam (vlib_main_t * vm,
vlib_frame_t * from_frame, u8 is_ipv6)
{
u32 n_left_from, next_index, *from, *to_next;
- vxlan_gpe_main_t *ngm = &vxlan_gpe_main;
+ vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main;
+ vxlan_gpe_main_t *ngm = sm->gpe_main;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c
index e3c82725e26..02233cf9841 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c
+++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c
@@ -18,7 +18,7 @@
#include <vnet/ip/ip.h>
#include <vnet/udp/udp_local.h>
#include <vnet/ethernet/ethernet.h>
-#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <plugins/vxlan-gpe/vxlan_gpe.h>
#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h>
#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h>
#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h>
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c
index d61832d975a..6de1760b6b7 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c
@@ -80,9 +80,9 @@ static void vl_api_vxlan_gpe_ioam_vni_enable_t_handler
clib_error_t *error;
vxlan4_gpe_tunnel_key_t key4;
uword *p = NULL;
- vxlan_gpe_main_t *gm = &vxlan_gpe_main;
vxlan_gpe_tunnel_t *t = 0;
vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ vxlan_gpe_main_t *gm = hm->gpe_main;
u32 vni;
@@ -130,7 +130,8 @@ static void vl_api_vxlan_gpe_ioam_vni_disable_t_handler
clib_error_t *error;
vxlan4_gpe_tunnel_key_t key4;
uword *p = NULL;
- vxlan_gpe_main_t *gm = &vxlan_gpe_main;
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ vxlan_gpe_main_t *gm = hm->gpe_main;
vxlan_gpe_tunnel_t *t = 0;
u32 vni;
@@ -214,6 +215,13 @@ ioam_vxlan_gpe_init (vlib_main_t * vm)
vlib_node_t *vxlan_gpe_decap_node = NULL;
uword next_node = 0;
+ sm->gpe_main =
+ vlib_get_plugin_symbol ("vxlan-gpe_plugin.so", "vxlan_gpe_main");
+ if (sm->gpe_main == 0)
+ {
+ return clib_error_return (0, "vxlan-gpe_plugin.so is not loaded");
+ }
+
sm->vlib_main = vm;
sm->vnet_main = vnet_get_main ();
sm->unix_time_0 = (u32) time (0); /* Store starting time */
@@ -231,7 +239,7 @@ ioam_vxlan_gpe_init (vlib_main_t * vm)
vlib_get_node_by_name (vm, (u8 *) "vxlan4-gpe-input");
next_node =
vlib_node_add_next (vm, vxlan_gpe_decap_node->index, decap_node_index);
- vxlan_gpe_register_decap_protocol (VXLAN_GPE_PROTOCOL_IOAM, next_node);
+ sm->gpe_main->register_decap_protocol (VXLAN_GPE_PROTOCOL_IOAM, next_node);
vec_new (vxlan_gpe_ioam_sw_interface_t, pool_elts (sm->sw_interfaces));
sm->dst_by_ip4 = hash_create_mem (0, sizeof (fib_prefix_t), sizeof (uword));
@@ -243,7 +251,9 @@ ioam_vxlan_gpe_init (vlib_main_t * vm)
return 0;
}
-VLIB_INIT_FUNCTION (ioam_vxlan_gpe_init);
+VLIB_INIT_FUNCTION (ioam_vxlan_gpe_init) = {
+ .runs_after = VLIB_INITS ("vxlan_gpe_init"),
+};
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c
index 327afc3fb61..f83c6e1ecc3 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c
@@ -12,8 +12,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-#include <vnet/vxlan-gpe/vxlan_gpe.h>
-#include <vnet/vxlan-gpe/vxlan_gpe_packet.h>
+#include <plugins/vxlan-gpe/vxlan_gpe.h>
+#include <plugins/vxlan-gpe/vxlan_gpe_packet.h>
#include <vnet/ip/format.h>
#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h>
#include <vnet/dpo/load_balance.h>
@@ -423,7 +423,7 @@ vxlan_gpe_set_ioam_rewrite_command_fn (vlib_main_t *
vxlan4_gpe_tunnel_key_t key4;
vxlan6_gpe_tunnel_key_t key6;
uword *p;
- vxlan_gpe_main_t *gm = &vxlan_gpe_main;
+ vxlan_gpe_main_t *gm = hm->gpe_main;
vxlan_gpe_tunnel_t *t = 0;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h
index 0711b87abbe..f9374c9bb95 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h
@@ -15,12 +15,11 @@
#ifndef __included_vxlan_gpe_ioam_h__
#define __included_vxlan_gpe_ioam_h__
-#include <vnet/vxlan-gpe/vxlan_gpe.h>
-#include <vnet/vxlan-gpe/vxlan_gpe_packet.h>
+#include <plugins/vxlan-gpe/vxlan_gpe.h>
+#include <plugins/vxlan-gpe/vxlan_gpe_packet.h>
#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h>
#include <vnet/ip/ip.h>
-
typedef struct vxlan_gpe_sw_interface_
{
u32 sw_if_index;
@@ -100,7 +99,8 @@ typedef struct vxlan_gpe_ioam_main_
vlib_main_t *vlib_main;
/** State convenience vnet_main_t */
vnet_main_t *vnet_main;
-
+ /** State convenience vxlan_gpe_main_t */
+ vxlan_gpe_main_t *gpe_main;
} vxlan_gpe_ioam_main_t;
extern vxlan_gpe_ioam_main_t vxlan_gpe_ioam_main;
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h
index a7ef859ec58..515529ce794 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h
@@ -15,8 +15,8 @@
#ifndef __included_vxlan_gpe_ioam_packet_h__
#define __included_vxlan_gpe_ioam_packet_h__
-#include <vnet/vxlan-gpe/vxlan_gpe.h>
-#include <vnet/vxlan-gpe/vxlan_gpe_packet.h>
+#include <plugins/vxlan-gpe/vxlan_gpe.h>
+#include <plugins/vxlan-gpe/vxlan_gpe_packet.h>
#include <vnet/ip/ip.h>
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c
index 9c783c747d0..9b1b8b824ff 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c
@@ -16,8 +16,8 @@
#include <vnet/vnet.h>
#include <vppinfra/error.h>
-#include <vnet/vxlan-gpe/vxlan_gpe.h>
-#include <vnet/vxlan-gpe/vxlan_gpe_packet.h>
+#include <plugins/vxlan-gpe/vxlan_gpe.h>
+#include <plugins/vxlan-gpe/vxlan_gpe_packet.h>
#include <vppinfra/hash.h>
#include <vppinfra/error.h>
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h
index c0ad8d9d03a..db7fd5651b1 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h
@@ -15,8 +15,8 @@
#ifndef __included_vxlan_gpe_ioam_util_h__
#define __included_vxlan_gpe_ioam_util_h__
-#include <vnet/vxlan-gpe/vxlan_gpe.h>
-#include <vnet/vxlan-gpe/vxlan_gpe_packet.h>
+#include <plugins/vxlan-gpe/vxlan_gpe.h>
+#include <plugins/vxlan-gpe/vxlan_gpe_packet.h>
#include <vnet/ip/ip.h>
diff --git a/src/plugins/l2tp/l2tp.c b/src/plugins/l2tp/l2tp.c
index 907468b5900..cada9dc2656 100644
--- a/src/plugins/l2tp/l2tp.c
+++ b/src/plugins/l2tp/l2tp.c
@@ -151,7 +151,7 @@ test_counters_command_fn (vlib_main_t * vm,
u32 session_index;
u32 counter_index;
u32 nincr = 0;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
pool_foreach (session, lm->sessions)
{
diff --git a/src/plugins/lb/lb.c b/src/plugins/lb/lb.c
index 7ae1884ff31..0c4f21a4a78 100644
--- a/src/plugins/lb/lb.c
+++ b/src/plugins/lb/lb.c
@@ -108,7 +108,7 @@ u8 *format_lb_main (u8 * s, va_list * args)
s = format(s, " #vips: %u\n", pool_elts(lbm->vips));
s = format(s, " #ass: %u\n", pool_elts(lbm->ass) - 1);
- u32 thread_index;
+ clib_thread_index_t thread_index;
for(thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++ ) {
lb_hash_t *h = lbm->per_cpu[thread_index].sticky_ht;
if (h) {
@@ -764,7 +764,7 @@ next:
int
lb_flush_vip_as (u32 vip_index, u32 as_index)
{
- u32 thread_index;
+ clib_thread_index_t thread_index;
vlib_thread_main_t *tm = vlib_get_thread_main();
lb_main_t *lbm = &lb_main;
diff --git a/src/plugins/lb/node.c b/src/plugins/lb/node.c
index a37fe11a9b4..1ddc556a8bf 100644
--- a/src/plugins/lb/node.c
+++ b/src/plugins/lb/node.c
@@ -124,7 +124,7 @@ format_lb_nat_trace (u8 * s, va_list * args)
}
lb_hash_t *
-lb_get_sticky_table (u32 thread_index)
+lb_get_sticky_table (clib_thread_index_t thread_index)
{
lb_main_t *lbm = &lb_main;
lb_hash_t *sticky_ht = lbm->per_cpu[thread_index].sticky_ht;
@@ -282,7 +282,7 @@ lb_node_fn (vlib_main_t * vm,
{
lb_main_t *lbm = &lb_main;
u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u32 lb_time = lb_hash_time_now (vm);
lb_hash_t *sticky_ht = lb_get_sticky_table (thread_index);
diff --git a/src/plugins/linux-cp/lcp.api b/src/plugins/linux-cp/lcp.api
index e7eaa5a3669..8b0fdb5eb53 100644
--- a/src/plugins/linux-cp/lcp.api
+++ b/src/plugins/linux-cp/lcp.api
@@ -177,6 +177,42 @@ autoendian define lcp_itf_pair_details
option in_progress;
};
+/** \brief Enable linux-cp-punt-xc for a given ethertype
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param ethertype - the ethertype to enable
+*/
+autoreply define lcp_ethertype_enable
+{
+ u32 client_index;
+ u32 context;
+ u16 ethertype;
+};
+
+/** \brief Get the enabled ethertypes for linux-cp-punt-xc
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define lcp_ethertype_get
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply to get the enabled ethertypes for linux-cp-punt-xc
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+ @param count - number of enabled ethertypes
+ @param ethertypes - array of enabled ethertypes
+*/
+define lcp_ethertype_get_reply
+{
+ u32 context;
+ i32 retval;
+ u16 count;
+ u16 ethertypes[count];
+};
+
service {
rpc lcp_itf_pair_get returns lcp_itf_pair_get_reply
stream lcp_itf_pair_details;
diff --git a/src/plugins/linux-cp/lcp_api.c b/src/plugins/linux-cp/lcp_api.c
index 74421230e9d..0db502988d7 100644
--- a/src/plugins/linux-cp/lcp_api.c
+++ b/src/plugins/linux-cp/lcp_api.c
@@ -280,6 +280,40 @@ vl_api_lcp_itf_pair_replace_end_t_handler (
REPLY_MACRO (VL_API_LCP_ITF_PAIR_REPLACE_END_REPLY);
}
+static void
+vl_api_lcp_ethertype_enable_t_handler (vl_api_lcp_ethertype_enable_t *mp)
+{
+ vl_api_lcp_ethertype_enable_reply_t *rmp;
+ int rv;
+
+ rv = lcp_ethertype_enable (mp->ethertype);
+
+ REPLY_MACRO (VL_API_LCP_ETHERTYPE_ENABLE_REPLY);
+}
+
+static void
+vl_api_lcp_ethertype_get_t_handler (vl_api_lcp_ethertype_get_t *mp)
+{
+ vl_api_lcp_ethertype_get_reply_t *rmp;
+ ethernet_type_t *ethertypes = vec_new (ethernet_type_t, 0);
+ u16 count = 0;
+ int rv = 0;
+
+ rv = lcp_ethertype_get_enabled (&ethertypes);
+ if (!rv)
+ count = vec_len (ethertypes);
+
+ REPLY_MACRO3 (VL_API_LCP_ETHERTYPE_GET_REPLY, sizeof (u16) * count, ({
+ rmp->count = htons (count);
+ for (int i = 0; i < count; i++)
+ {
+ rmp->ethertypes[i] = htons (ethertypes[i]);
+ }
+ }));
+
+ vec_free (ethertypes);
+}
+
/*
* Set up the API message handling tables
*/
diff --git a/src/plugins/linux-cp/lcp_cli.c b/src/plugins/linux-cp/lcp_cli.c
index 0dcf600b301..e89afd2a753 100644
--- a/src/plugins/linux-cp/lcp_cli.c
+++ b/src/plugins/linux-cp/lcp_cli.c
@@ -337,6 +337,62 @@ VLIB_CLI_COMMAND (lcp_itf_pair_show_cmd_node, static) = {
.is_mp_safe = 1,
};
+static clib_error_t *
+lcp_ethertype_enable_cmd (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ ethernet_type_t ethertype;
+ int rv;
+
+ if (!unformat (input, "%U", unformat_ethernet_type_host_byte_order,
+ &ethertype))
+ return clib_error_return (0, "Invalid ethertype");
+
+ rv = lcp_ethertype_enable (ethertype);
+ if (rv)
+ return clib_error_return (0, "Failed to enable ethertype (%d)", rv);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (lcp_ethertype_enable_command, static) = {
+ .path = "lcp ethertype enable",
+ .short_help =
+ "lcp ethertype enable (<hex_ethertype_num>|<uc_ethertype_name>)",
+ .function = lcp_ethertype_enable_cmd,
+};
+
+static clib_error_t *
+lcp_ethertype_show_cmd (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ ethernet_type_t *ethertypes = vec_new (ethernet_type_t, 0);
+ ethernet_type_t *etype;
+ int rv;
+
+ rv = lcp_ethertype_get_enabled (&ethertypes);
+ if (rv)
+ {
+ vec_free (ethertypes);
+ return clib_error_return (0, "Failed to get enabled ethertypes (%d)",
+ rv);
+ }
+
+ vec_foreach (etype, ethertypes)
+ {
+ vlib_cli_output (vm, "0x%04x", *etype);
+ }
+
+ vec_free (ethertypes);
+ return 0;
+}
+
+VLIB_CLI_COMMAND (lcp_ethertype_show_command, static) = {
+ .path = "show lcp ethertype",
+ .short_help = "show lcp ethertype",
+ .function = lcp_ethertype_show_cmd,
+};
+
clib_error_t *
lcp_cli_init (vlib_main_t *vm)
{
diff --git a/src/plugins/linux-cp/lcp_interface.c b/src/plugins/linux-cp/lcp_interface.c
index 61665ad4146..31864f791af 100644
--- a/src/plugins/linux-cp/lcp_interface.c
+++ b/src/plugins/linux-cp/lcp_interface.c
@@ -162,6 +162,22 @@ lcp_itf_pair_get (u32 index)
return pool_elt_at_index (lcp_itf_pair_pool, index);
}
+/* binary-direct API: for access from other plugins, bypassing VAPI.
+ * Important for parameters and return types to be simple C types, rather
+ * than structures. See src/plugins/sflow/sflow_dlapi.h for an example.
+ */
+u32
+lcp_itf_pair_get_vif_index_by_phy (u32 phy_sw_if_index)
+{
+ if (phy_sw_if_index < vec_len (lip_db_by_phy))
+ {
+ lcp_itf_pair_t *lip = lcp_itf_pair_get (lip_db_by_phy[phy_sw_if_index]);
+ if (lip)
+ return lip->lip_vif_index;
+ }
+ return INDEX_INVALID;
+}
+
index_t
lcp_itf_pair_find_by_vif (u32 vif_index)
{
@@ -1214,6 +1230,53 @@ lcp_itf_pair_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
return 0;
}
+int
+lcp_ethertype_enable (ethernet_type_t ethertype)
+{
+ ethernet_main_t *em = &ethernet_main;
+ ethernet_type_info_t *eti;
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "linux-cp-punt-xc");
+
+ if (!node)
+ return VNET_API_ERROR_UNIMPLEMENTED;
+
+ eti = ethernet_get_type_info (em, ethertype);
+ if (!eti)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ if (eti->node_index != ~0 && eti->node_index != node->index)
+ return VNET_API_ERROR_INVALID_REGISTRATION;
+
+ ethernet_register_input_type (vm, ethertype, node->index);
+ return 0;
+}
+
+int
+lcp_ethertype_get_enabled (ethernet_type_t **ethertypes_vec)
+{
+ ethernet_main_t *em = &ethernet_main;
+ ethernet_type_info_t *eti;
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "linux-cp-punt-xc");
+
+ if (!ethertypes_vec)
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+
+ if (!node)
+ return VNET_API_ERROR_UNIMPLEMENTED;
+
+ vec_foreach (eti, em->type_infos)
+ {
+ if (eti->node_index == node->index)
+ {
+ vec_add1 (*ethertypes_vec, eti->type);
+ }
+ }
+
+ return 0;
+}
+
VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (lcp_itf_pair_link_up_down);
static clib_error_t *
diff --git a/src/plugins/linux-cp/lcp_interface.h b/src/plugins/linux-cp/lcp_interface.h
index cfcd3925a15..8cf6d3f4da1 100644
--- a/src/plugins/linux-cp/lcp_interface.h
+++ b/src/plugins/linux-cp/lcp_interface.h
@@ -18,6 +18,7 @@
#include <vnet/dpo/dpo.h>
#include <vnet/adj/adj.h>
#include <vnet/ip/ip_types.h>
+#include <vnet/ethernet/ethernet.h>
#include <plugins/linux-cp/lcp.h>
@@ -198,6 +199,18 @@ void lcp_itf_pair_sync_state (lcp_itf_pair_t *lip);
void lcp_itf_pair_sync_state_hw (vnet_hw_interface_t *hi);
void lcp_itf_pair_sync_state_all ();
+/**
+ * Enable linux-cp-punt-xc for a given ethertype.
+ * @param ethertype - ethertype to enable
+ */
+int lcp_ethertype_enable (ethernet_type_t ethertype);
+
+/**
+ * Get the list of ethertypes enabled for linux-cp-punt-xc.
+ * @param ethertypes_vec - pointer to a vector to store the list of ethertypes
+ */
+int lcp_ethertype_get_enabled (ethernet_type_t **ethertypes_vec);
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/linux-cp/lcp_nl.c b/src/plugins/linux-cp/lcp_nl.c
index 916877939f0..55d2ea54245 100644
--- a/src/plugins/linux-cp/lcp_nl.c
+++ b/src/plugins/linux-cp/lcp_nl.c
@@ -29,7 +29,7 @@
#include <netlink/route/addr.h>
#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
+#include <vlib/file.h>
#include <vppinfra/error.h>
#include <vppinfra/linux/netns.h>
diff --git a/src/plugins/linux-cp/lcp_node.c b/src/plugins/linux-cp/lcp_node.c
index 241cc5e4bff..9fa1aa5bd66 100644
--- a/src/plugins/linux-cp/lcp_node.c
+++ b/src/plugins/linux-cp/lcp_node.c
@@ -39,40 +39,51 @@
typedef enum
{
-#define _(sym, str) LIP_PUNT_NEXT_##sym,
+#define _(sym, str) LIP_PUNT_XC_NEXT_##sym,
foreach_lip_punt
#undef _
- LIP_PUNT_N_NEXT,
-} lip_punt_next_t;
+ LIP_PUNT_XC_N_NEXT,
+} lip_punt_xc_next_t;
-typedef struct lip_punt_trace_t_
+typedef struct lip_punt_xc_trace_t_
{
+ bool is_xc;
u32 phy_sw_if_index;
u32 host_sw_if_index;
-} lip_punt_trace_t;
+} lip_punt_xc_trace_t;
/* packet trace format function */
static u8 *
-format_lip_punt_trace (u8 *s, va_list *args)
+format_lip_punt_xc_trace (u8 *s, va_list *args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- lip_punt_trace_t *t = va_arg (*args, lip_punt_trace_t *);
+ lip_punt_xc_trace_t *t = va_arg (*args, lip_punt_xc_trace_t *);
- s =
- format (s, "lip-punt: %u -> %u", t->phy_sw_if_index, t->host_sw_if_index);
+ if (t->is_xc)
+ {
+ s = format (s, "lip-xc: %u -> %u", t->host_sw_if_index,
+ t->phy_sw_if_index);
+ }
+ else
+ {
+ s = format (s, "lip-punt: %u -> %u", t->phy_sw_if_index,
+ t->host_sw_if_index);
+ }
return s;
}
/**
* Pass punted packets from the PHY to the HOST.
+ * Conditionally x-connect packets from the HOST to the PHY.
*/
-VLIB_NODE_FN (lip_punt_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+static_always_inline u32
+lip_punt_xc_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool check_xc)
{
u32 n_left_from, *from, *to_next, n_left_to_next;
- lip_punt_next_t next_index;
+ lip_punt_xc_next_t next_index;
next_index = node->cached_next_index;
n_left_from = frame->n_vectors;
@@ -89,6 +100,7 @@ VLIB_NODE_FN (lip_punt_node)
u32 next0 = ~0;
u32 bi0, lipi0;
u32 sw_if_index0;
+ bool is_xc0 = 0;
u8 len0;
bi0 = to_next[0] = from[0];
@@ -97,18 +109,33 @@ VLIB_NODE_FN (lip_punt_node)
to_next += 1;
n_left_from -= 1;
n_left_to_next -= 1;
- next0 = LIP_PUNT_NEXT_DROP;
+ next0 = LIP_PUNT_XC_NEXT_DROP;
b0 = vlib_get_buffer (vm, bi0);
sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
lipi0 = lcp_itf_pair_find_by_phy (sw_if_index0);
- if (PREDICT_FALSE (lipi0 == INDEX_INVALID))
- goto trace0;
+
+ /*
+ * lip_punt_node: expect sw_if_index0 is phy in an itf pair
+ * lip_punt_xc_node: if sw_if_index0 is not phy, expect it is host
+ */
+ if (!check_xc && (PREDICT_FALSE (lipi0 == INDEX_INVALID)))
+ {
+ goto trace0;
+ }
+ else if (check_xc && (lipi0 == INDEX_INVALID))
+ {
+ is_xc0 = 1;
+ lipi0 = lcp_itf_pair_find_by_host (sw_if_index0);
+ if (PREDICT_FALSE (lipi0 == INDEX_INVALID))
+ goto trace0;
+ }
lip0 = lcp_itf_pair_get (lipi0);
- next0 = LIP_PUNT_NEXT_IO;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = lip0->lip_host_sw_if_index;
+ next0 = LIP_PUNT_XC_NEXT_IO;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ is_xc0 ? lip0->lip_phy_sw_if_index : lip0->lip_host_sw_if_index;
if (PREDICT_TRUE (lip0->lip_host_type == LCP_ITF_HOST_TAP))
{
@@ -129,10 +156,22 @@ VLIB_NODE_FN (lip_punt_node)
trace0:
if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
{
- lip_punt_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
- t->phy_sw_if_index = sw_if_index0;
- t->host_sw_if_index =
- (lipi0 == INDEX_INVALID) ? ~0 : lip0->lip_host_sw_if_index;
+ lip_punt_xc_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+
+ t->is_xc = is_xc0;
+ if (is_xc0)
+ {
+ t->phy_sw_if_index =
+ (lipi0 == INDEX_INVALID) ? ~0 : lip0->lip_phy_sw_if_index;
+ t->host_sw_if_index = sw_if_index0;
+ }
+ else
+ {
+ t->phy_sw_if_index = sw_if_index0;
+ t->host_sw_if_index =
+ (lipi0 == INDEX_INVALID) ? ~0 : lip0->lip_host_sw_if_index;
+ }
}
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
@@ -145,16 +184,41 @@ VLIB_NODE_FN (lip_punt_node)
return frame->n_vectors;
}
+VLIB_NODE_FN (lip_punt_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return (lip_punt_xc_inline (vm, node, frame, false /* xc */));
+}
+
+VLIB_NODE_FN (lip_punt_xc_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return (lip_punt_xc_inline (vm, node, frame, true /* xc */));
+}
+
VLIB_REGISTER_NODE (lip_punt_node) = {
.name = "linux-cp-punt",
.vector_size = sizeof (u32),
- .format_trace = format_lip_punt_trace,
+ .format_trace = format_lip_punt_xc_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_next_nodes = LIP_PUNT_XC_N_NEXT,
+ .next_nodes = {
+ [LIP_PUNT_XC_NEXT_DROP] = "error-drop",
+ [LIP_PUNT_XC_NEXT_IO] = "interface-output",
+ },
+};
+
+VLIB_REGISTER_NODE (lip_punt_xc_node) = {
+ .name = "linux-cp-punt-xc",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lip_punt_xc_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_next_nodes = LIP_PUNT_N_NEXT,
+ .n_next_nodes = LIP_PUNT_XC_N_NEXT,
.next_nodes = {
- [LIP_PUNT_NEXT_DROP] = "error-drop",
- [LIP_PUNT_NEXT_IO] = "interface-output",
+ [LIP_PUNT_XC_NEXT_DROP] = "error-drop",
+ [LIP_PUNT_XC_NEXT_IO] = "interface-output",
},
};
@@ -190,7 +254,7 @@ VLIB_NODE_FN (lcp_punt_l3_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
u32 n_left_from, *from, *to_next, n_left_to_next;
- lip_punt_next_t next_index;
+ lip_punt_xc_next_t next_index;
next_index = node->cached_next_index;
n_left_from = frame->n_vectors;
diff --git a/src/plugins/linux-cp/lcp_router.c b/src/plugins/linux-cp/lcp_router.c
index 0efd53e64ef..27f53357a4d 100644
--- a/src/plugins/linux-cp/lcp_router.c
+++ b/src/plugins/linux-cp/lcp_router.c
@@ -17,7 +17,6 @@
#include <linux/if.h>
#include <linux/mpls.h>
-//#include <vlib/vlib.h>
#include <vlib/unix/plugin.h>
#include <linux-cp/lcp_nl.h>
#include <linux-cp/lcp_interface.h>
@@ -1543,6 +1542,12 @@ const nl_vft_t lcp_router_vft = {
.cb = lcp_router_route_sync_end },
};
+static void
+lcp_lcp_router_interface_del_cb (lcp_itf_pair_t *lip)
+{
+ lcp_router_ip6_mroutes_add_del (lip->lip_phy_sw_if_index, 0);
+}
+
static clib_error_t *
lcp_router_init (vlib_main_t *vm)
{
@@ -1550,6 +1555,12 @@ lcp_router_init (vlib_main_t *vm)
nl_register_vft (&lcp_router_vft);
+ lcp_itf_pair_vft_t lcp_router_interface_del_vft = {
+ .pair_del_fn = lcp_lcp_router_interface_del_cb,
+ };
+
+ lcp_itf_pair_register_vft (&lcp_router_interface_del_vft);
+
/*
* allocate 2 route sources. The low priority source will be for
* dynamic routes. If a dynamic route daemon (FRR) tries to remove its
diff --git a/src/plugins/lisp/lisp-gpe/decap.c b/src/plugins/lisp/lisp-gpe/decap.c
index 18e32675a32..b568fef24fa 100644
--- a/src/plugins/lisp/lisp-gpe/decap.c
+++ b/src/plugins/lisp/lisp-gpe/decap.c
@@ -102,9 +102,9 @@ next_index_to_iface (lisp_gpe_main_t * lgm, u32 next_index)
}
static_always_inline void
-incr_decap_stats (vnet_main_t * vnm, u32 thread_index, u32 length,
- u32 sw_if_index, u32 * last_sw_if_index, u32 * n_packets,
- u32 * n_bytes)
+incr_decap_stats (vnet_main_t *vnm, clib_thread_index_t thread_index,
+ u32 length, u32 sw_if_index, u32 *last_sw_if_index,
+ u32 *n_packets, u32 *n_bytes)
{
vnet_interface_main_t *im;
diff --git a/src/plugins/lisp/lisp-gpe/interface.c b/src/plugins/lisp/lisp-gpe/interface.c
index ed2b08f9aaf..5d3ad9463ea 100644
--- a/src/plugins/lisp/lisp-gpe/interface.c
+++ b/src/plugins/lisp/lisp-gpe/interface.c
@@ -233,7 +233,7 @@ l2_lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node,
{
u32 n_left_from, next_index, *from, *to_next;
lisp_gpe_main_t *lgm = &lisp_gpe_main;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
from = vlib_frame_vector_args (from_frame);
diff --git a/src/plugins/mactime/node.c b/src/plugins/mactime/node.c
index fad487e666e..dfe7a26c2af 100644
--- a/src/plugins/mactime/node.c
+++ b/src/plugins/mactime/node.c
@@ -87,7 +87,7 @@ mactime_node_inline (vlib_main_t * vm,
clib_bihash_8_8_t *lut = &mm->lookup_table;
u32 packets_ok = 0;
f64 now;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
vnet_main_t *vnm = vnet_get_main ();
vnet_interface_main_t *im = &vnm->interface_main;
u8 arc = im->output_feature_arc_index;
diff --git a/src/plugins/map/ip4_map.c b/src/plugins/map/ip4_map.c
index 652808e6d37..1ff585ceb3a 100644
--- a/src/plugins/map/ip4_map.c
+++ b/src/plugins/map/ip4_map.c
@@ -154,7 +154,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
next_index = node->cached_next_index;
map_main_t *mm = &map_main;
vlib_combined_counter_main_t *cm = mm->domain_counters;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u32 *buffer0 = 0;
while (n_left_from > 0)
diff --git a/src/plugins/map/ip4_map_t.c b/src/plugins/map/ip4_map_t.c
index fe29af458a2..ec89056e0cc 100644
--- a/src/plugins/map/ip4_map_t.c
+++ b/src/plugins/map/ip4_map_t.c
@@ -117,7 +117,7 @@ ip4_map_t_icmp (vlib_main_t * vm,
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
vlib_combined_counter_main_t *cm = map_main.domain_counters;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
while (n_left_from > 0)
{
@@ -549,7 +549,7 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
vlib_combined_counter_main_t *cm = map_main.domain_counters;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
while (n_left_from > 0)
{
diff --git a/src/plugins/map/ip6_map.c b/src/plugins/map/ip6_map.c
index 3d9b21dfcd9..33d5a0ebbd3 100644
--- a/src/plugins/map/ip6_map.c
+++ b/src/plugins/map/ip6_map.c
@@ -166,7 +166,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
vlib_node_get_runtime (vm, ip6_map_node.index);
map_main_t *mm = &map_main;
vlib_combined_counter_main_t *cm = mm->domain_counters;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -559,7 +559,7 @@ ip6_map_post_ip4_reass (vlib_main_t * vm,
vlib_node_get_runtime (vm, ip6_map_post_ip4_reass_node.index);
map_main_t *mm = &map_main;
vlib_combined_counter_main_t *cm = mm->domain_counters;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -651,7 +651,7 @@ ip6_map_icmp_relay (vlib_main_t * vm,
vlib_node_runtime_t *error_node =
vlib_node_get_runtime (vm, ip6_map_icmp_relay_node.index);
map_main_t *mm = &map_main;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u16 *fragment_ids, *fid;
from = vlib_frame_vector_args (frame);
diff --git a/src/plugins/map/ip6_map_t.c b/src/plugins/map/ip6_map_t.c
index f8d894a013a..854410eb462 100644
--- a/src/plugins/map/ip6_map_t.c
+++ b/src/plugins/map/ip6_map_t.c
@@ -118,7 +118,7 @@ ip6_map_t_icmp (vlib_main_t * vm,
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
vlib_combined_counter_main_t *cm = map_main.domain_counters;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
while (n_left_from > 0)
{
@@ -494,7 +494,7 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
vlib_node_get_runtime (vm, ip6_map_t_node.index);
map_main_t *mm = &map_main;
vlib_combined_counter_main_t *cm = map_main.domain_counters;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c
index 7e3dd44db2c..ad8512ac81e 100644
--- a/src/plugins/memif/memif.c
+++ b/src/plugins/memif/memif.c
@@ -31,7 +31,7 @@
#include <limits.h>
#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
+#include <vlib/file.h>
#include <vnet/plugin/plugin.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/interface/rx_queue_funcs.h>
@@ -379,6 +379,12 @@ memif_connect (memif_if_t * mif)
CLIB_CACHE_LINE_BYTES);
vec_foreach (dma_info, mq->dma_info)
{
+ vlib_buffer_t *bt = &dma_info->data.buffer_template;
+
+ clib_memset (bt, 0, sizeof (*bt));
+ bt->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ bt->total_length_not_including_first_buffer = 0;
+ vnet_buffer (bt)->sw_if_index[VLIB_TX] = (u32) ~0;
vec_validate_aligned (dma_info->data.desc_data,
pow2_mask (max_log2_ring_sz),
CLIB_CACHE_LINE_BYTES);
diff --git a/src/plugins/memif/node.c b/src/plugins/memif/node.c
index d483f92b2fe..08b248df534 100644
--- a/src/plugins/memif/node.c
+++ b/src/plugins/memif/node.c
@@ -504,7 +504,7 @@ memif_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
u32 n_left_to_next;
u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
vlib_buffer_t *buffer_ptrs[MEMIF_RX_VECTOR_SZ];
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
memif_per_thread_data_t *ptd =
vec_elt_at_index (mm->per_thread_data, thread_index);
u16 cur_slot, ring_size, n_slots, mask;
@@ -763,7 +763,7 @@ memif_device_input_zc_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
u16 slot, s0;
memif_desc_t *d0;
vlib_buffer_t *b0, *b1, *b2, *b3;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
memif_per_thread_data_t *ptd = vec_elt_at_index (mm->per_thread_data,
thread_index);
u16 cur_slot, last_slot, ring_size, n_slots, mask, head;
@@ -1061,7 +1061,7 @@ CLIB_MARCH_FN (memif_dma_completion_cb, void, vlib_main_t *vm,
{
memif_main_t *mm = &memif_main;
memif_if_t *mif = vec_elt_at_index (mm->interfaces, b->cookie >> 16);
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u32 n_left_to_next = 0;
u16 nexts[MEMIF_RX_VECTOR_SZ], *next;
u32 _to_next_bufs[MEMIF_RX_VECTOR_SZ], *to_next_bufs = _to_next_bufs, *bi;
diff --git a/src/plugins/memif/private.h b/src/plugins/memif/private.h
index 43455d00522..af82a8bfaa3 100644
--- a/src/plugins/memif/private.h
+++ b/src/plugins/memif/private.h
@@ -76,7 +76,7 @@
#define memif_file_del(a) \
do \
{ \
- memif_log_debug (0, "clib_file_del idx %u", a - file_main.file_pool); \
+ memif_log_debug (0, "clib_file_del idx %u", (a)->index); \
clib_file_del (&file_main, a); \
} \
while (0)
diff --git a/src/plugins/memif/socket.c b/src/plugins/memif/socket.c
index 001f26f13ef..c2b11fc2ecb 100644
--- a/src/plugins/memif/socket.c
+++ b/src/plugins/memif/socket.c
@@ -30,7 +30,7 @@
#include <limits.h>
#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
+#include <vlib/file.h>
#include <vnet/plugin/plugin.h>
#include <vnet/ethernet/ethernet.h>
#include <vpp/app/version.h>
diff --git a/src/plugins/nat/det44/det44.h b/src/plugins/nat/det44/det44.h
index e576bfb65e8..683f554f03c 100644
--- a/src/plugins/nat/det44/det44.h
+++ b/src/plugins/nat/det44/det44.h
@@ -38,7 +38,6 @@
#include <vnet/ip/reass/ip4_sv_reass.h>
#include <nat/lib/lib.h>
-#include <nat/lib/inlines.h>
#include <nat/lib/ipfix_logging.h>
#include <nat/lib/nat_proto.h>
diff --git a/src/plugins/nat/det44/det44_in2out.c b/src/plugins/nat/det44/det44_in2out.c
index 3f5e05a064c..39a9ecabac7 100644
--- a/src/plugins/nat/det44/det44_in2out.c
+++ b/src/plugins/nat/det44/det44_in2out.c
@@ -21,6 +21,7 @@
#include <vlib/vlib.h>
#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
+#include <vnet/ip/ip4_to_ip6.h>
#include <vnet/fib/ip4_fib.h>
#include <vppinfra/error.h>
#include <vppinfra/elog.h>
@@ -29,7 +30,6 @@
#include <nat/det44/det44_inlines.h>
#include <nat/lib/lib.h>
-#include <nat/lib/inlines.h>
#include <nat/lib/nat_inlines.h>
typedef enum
diff --git a/src/plugins/nat/det44/det44_out2in.c b/src/plugins/nat/det44/det44_out2in.c
index ab6acd4f8e9..dd89606ff10 100644
--- a/src/plugins/nat/det44/det44_out2in.c
+++ b/src/plugins/nat/det44/det44_out2in.c
@@ -21,6 +21,7 @@
#include <vlib/vlib.h>
#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
+#include <vnet/ip/ip4_to_ip6.h>
#include <vnet/fib/ip4_fib.h>
#include <vppinfra/error.h>
#include <vppinfra/elog.h>
@@ -29,7 +30,6 @@
#include <nat/det44/det44_inlines.h>
#include <nat/lib/lib.h>
-#include <nat/lib/inlines.h>
#include <nat/lib/nat_inlines.h>
typedef enum
diff --git a/src/plugins/nat/dslite/dslite.h b/src/plugins/nat/dslite/dslite.h
index f05670c9bf5..979afb476b7 100644
--- a/src/plugins/nat/dslite/dslite.h
+++ b/src/plugins/nat/dslite/dslite.h
@@ -22,7 +22,6 @@
#include <nat/lib/lib.h>
#include <nat/lib/alloc.h>
-#include <nat/lib/inlines.h>
typedef struct
{
diff --git a/src/plugins/nat/dslite/dslite_in2out.c b/src/plugins/nat/dslite/dslite_in2out.c
index 522c3cf4123..806969f5f4d 100644
--- a/src/plugins/nat/dslite/dslite_in2out.c
+++ b/src/plugins/nat/dslite/dslite_in2out.c
@@ -12,6 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+#include <vnet/ip/ip4_to_ip6.h>
#include <nat/dslite/dslite.h>
#include <nat/lib/nat_syslog.h>
diff --git a/src/plugins/nat/dslite/dslite_out2in.c b/src/plugins/nat/dslite/dslite_out2in.c
index 531bbb468bb..9ec48d458e5 100644
--- a/src/plugins/nat/dslite/dslite_out2in.c
+++ b/src/plugins/nat/dslite/dslite_out2in.c
@@ -12,6 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+#include <vnet/ip/ip4_to_ip6.h>
#include <nat/dslite/dslite.h>
typedef enum
diff --git a/src/plugins/nat/lib/ipfix_logging.c b/src/plugins/nat/lib/ipfix_logging.c
index 593fa09f7e2..f569ccd1918 100644
--- a/src/plugins/nat/lib/ipfix_logging.c
+++ b/src/plugins/nat/lib/ipfix_logging.c
@@ -22,7 +22,6 @@
#include <vlibmemory/api.h>
#include <vppinfra/atomics.h>
#include <nat/lib/ipfix_logging.h>
-#include <nat/lib/inlines.h>
vlib_node_registration_t nat_ipfix_flush_node;
nat_ipfix_logging_main_t nat_ipfix_logging_main;
diff --git a/src/plugins/nat/lib/nat_syslog.c b/src/plugins/nat/lib/nat_syslog.c
index 98777ebf280..93756a561bc 100644
--- a/src/plugins/nat/lib/nat_syslog.c
+++ b/src/plugins/nat/lib/nat_syslog.c
@@ -21,7 +21,6 @@
#include <vnet/syslog/syslog.h>
#include <nat/lib/nat_syslog.h>
-#include <nat/lib/inlines.h>
#include <nat/lib/nat_syslog_constants.h>
diff --git a/src/plugins/nat/nat44-ed/nat44_ed.h b/src/plugins/nat/nat44-ed/nat44_ed.h
index 706511475cf..c3a959b0635 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed.h
+++ b/src/plugins/nat/nat44-ed/nat44_ed.h
@@ -31,7 +31,6 @@
#include <vlibapi/api.h>
#include <nat/lib/lib.h>
-#include <nat/lib/inlines.h>
/* default number of worker handoff frame queue elements */
#define NAT_FQ_NELTS_DEFAULT 64
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_inlines.h b/src/plugins/nat/nat44-ed/nat44_ed_inlines.h
index 04e5236b7f9..8cd93f263c6 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_inlines.h
+++ b/src/plugins/nat/nat44-ed/nat44_ed_inlines.h
@@ -27,6 +27,7 @@
#include <nat/lib/log.h>
#include <nat/lib/ipfix_logging.h>
#include <nat/nat44-ed/nat44_ed.h>
+#include <vnet/ip/ip4_to_ip6.h>
always_inline void
init_ed_k (clib_bihash_kv_16_8_t *kv, u32 l_addr, u16 l_port, u32 r_addr,
diff --git a/src/plugins/nat/nat44-ei/nat44_ei.c b/src/plugins/nat/nat44-ei/nat44_ei.c
index e16625a2946..d1959f72ae7 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei.c
+++ b/src/plugins/nat/nat44-ei/nat44_ei.c
@@ -21,6 +21,7 @@
#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
#include <vnet/ip/ip4.h>
+#include <vnet/ip/ip4_to_ip6.h>
#include <vnet/ip/ip_table.h>
#include <vnet/ip/reass/ip4_sv_reass.h>
#include <vnet/fib/fib_table.h>
diff --git a/src/plugins/nat/nat44-ei/nat44_ei.h b/src/plugins/nat/nat44-ei/nat44_ei.h
index b4aa0f26c0b..786fb0cfc2c 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei.h
+++ b/src/plugins/nat/nat44-ei/nat44_ei.h
@@ -35,7 +35,6 @@
#include <vppinfra/hash.h>
#include <nat/lib/lib.h>
-#include <nat/lib/inlines.h>
#include <nat/lib/nat_proto.h>
/* default number of worker handoff frame queue elements */
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c
index 3b981d69986..2fbf2832d5e 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c
+++ b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c
@@ -21,6 +21,7 @@
#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
+#include <vnet/ip/ip4_to_ip6.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/udp/udp_local.h>
#include <vnet/fib/ip4_fib.h>
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_out2in.c b/src/plugins/nat/nat44-ei/nat44_ei_out2in.c
index 5d91cb04f7c..805a6962868 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei_out2in.c
+++ b/src/plugins/nat/nat44-ei/nat44_ei_out2in.c
@@ -21,6 +21,7 @@
#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
+#include <vnet/ip/ip4_to_ip6.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/udp/udp_local.h>
#include <vnet/fib/ip4_fib.h>
diff --git a/src/plugins/nat/nat64/nat64.c b/src/plugins/nat/nat64/nat64.c
index 950eea60e5e..c59cfbbbd55 100644
--- a/src/plugins/nat/nat64/nat64.c
+++ b/src/plugins/nat/nat64/nat64.c
@@ -15,6 +15,7 @@
#include <vppinfra/crc32.h>
#include <vnet/fib/ip4_fib.h>
+#include <vnet/ip/ip4_to_ip6.h>
#include <vnet/ip/reass/ip4_sv_reass.h>
#include <vnet/ip/reass/ip6_sv_reass.h>
diff --git a/src/plugins/nat/nat64/nat64.h b/src/plugins/nat/nat64/nat64.h
index 9eb8d915390..2577880c7a4 100644
--- a/src/plugins/nat/nat64/nat64.h
+++ b/src/plugins/nat/nat64/nat64.h
@@ -30,7 +30,6 @@
#include <vnet/ip/reass/ip4_sv_reass.h>
#include <nat/lib/lib.h>
-#include <nat/lib/inlines.h>
#include <nat/lib/nat_inlines.h>
#include <nat/nat64/nat64_db.h>
diff --git a/src/plugins/nat/nat64/nat64_db.c b/src/plugins/nat/nat64/nat64_db.c
index e4e9febcb12..6ba77c58965 100644
--- a/src/plugins/nat/nat64/nat64_db.c
+++ b/src/plugins/nat/nat64/nat64_db.c
@@ -16,7 +16,6 @@
#include <vnet/fib/fib_table.h>
#include <nat/lib/ipfix_logging.h>
#include <nat/lib/nat_syslog.h>
-#include <nat/lib/inlines.h>
#include <nat/nat64/nat64_db.h>
int
diff --git a/src/plugins/nat/pnat/pnat.api b/src/plugins/nat/pnat/pnat.api
index de555c41412..82c2de49682 100644
--- a/src/plugins/nat/pnat/pnat.api
+++ b/src/plugins/nat/pnat/pnat.api
@@ -165,6 +165,23 @@ define pnat_interfaces_details
vl_api_pnat_mask_t lookup_mask[2]; /* PNAT_ATTACHMENT_POINT_MAX */
};
+
+autoendian define pnat_flow_lookup
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ vl_api_pnat_attachment_point_t attachment;
+ vl_api_pnat_match_tuple_t match;
+};
+
+autoendian define pnat_flow_lookup_reply
+{
+ u32 context;
+ i32 retval;
+ u32 binding_index;
+};
+
counters pnat {
none {
severity info;
diff --git a/src/plugins/nat/pnat/pnat_api.c b/src/plugins/nat/pnat/pnat_api.c
index a4e7ff192bf..f627307628d 100644
--- a/src/plugins/nat/pnat/pnat_api.c
+++ b/src/plugins/nat/pnat/pnat_api.c
@@ -93,6 +93,20 @@ static void vl_api_pnat_binding_del_t_handler(vl_api_pnat_binding_del_t *mp) {
REPLY_MACRO_END(VL_API_PNAT_BINDING_DEL_REPLY);
}
+static void vl_api_pnat_flow_lookup_t_handler(vl_api_pnat_flow_lookup_t *mp) {
+ pnat_main_t *pm = &pnat_main;
+ vl_api_pnat_flow_lookup_reply_t *rmp;
+ u32 binding_index;
+ int rv = 0;
+ binding_index =
+ pnat_flow_lookup(mp->sw_if_index, mp->attachment, &mp->match);
+ if (binding_index == ~0) {
+ rv = -1;
+ }
+ REPLY_MACRO2_END(VL_API_PNAT_FLOW_LOOKUP_REPLY,
+ ({ rmp->binding_index = binding_index; }));
+}
+
/*
* Workaround for a bug in vppapigen that doesn't register the endian handler
* for _details messages. When that's fixed it should be possible to use
diff --git a/src/plugins/netmap/netmap.c b/src/plugins/netmap/netmap.c
index ebef215eb3b..0d92d03151c 100644
--- a/src/plugins/netmap/netmap.c
+++ b/src/plugins/netmap/netmap.c
@@ -22,7 +22,7 @@
#include <fcntl.h>
#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
+#include <vlib/file.h>
#include <vnet/ethernet/ethernet.h>
#include <netmap/net_netmap.h>
@@ -53,7 +53,7 @@ close_netmap_if (netmap_main_t * nm, netmap_if_t * nif)
{
if (nif->clib_file_index != ~0)
{
- clib_file_del (&file_main, file_main.file_pool + nif->clib_file_index);
+ clib_file_del_by_index (&file_main, nif->clib_file_index);
nif->clib_file_index = ~0;
}
else if (nif->fd > -1)
diff --git a/src/plugins/netmap/node.c b/src/plugins/netmap/node.c
index 6169847fa79..85e7db5808b 100644
--- a/src/plugins/netmap/node.c
+++ b/src/plugins/netmap/node.c
@@ -98,7 +98,7 @@ netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
u32 n_free_bufs;
struct netmap_ring *ring;
int cur_ring;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm);
if (nif->per_interface_next_index != ~0)
@@ -255,7 +255,7 @@ VLIB_NODE_FN (netmap_input_node) (vlib_main_t * vm,
{
int i;
u32 n_rx_packets = 0;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
netmap_main_t *nm = &netmap_main;
netmap_if_t *nmi;
diff --git a/src/plugins/nsh/nsh.c b/src/plugins/nsh/nsh.c
index a2c24e27b26..06dd45be944 100644
--- a/src/plugins/nsh/nsh.c
+++ b/src/plugins/nsh/nsh.c
@@ -20,7 +20,7 @@
#include <nsh/nsh.h>
#include <gre/gre.h>
#include <vxlan/vxlan.h>
-#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <plugins/vxlan-gpe/vxlan_gpe.h>
#include <vnet/l2/l2_classify.h>
#include <vnet/adj/adj.h>
#include <vpp/app/version.h>
@@ -182,7 +182,8 @@ nsh_md2_set_next_ioam_export_override (uword next)
clib_error_t *
nsh_init (vlib_main_t * vm)
{
- vlib_node_t *node, *gre4_input, *gre6_input;
+ vlib_node_t *node, *gre4_input, *gre6_input, *vxlan4_gpe_input,
+ *vxlan6_gpe_input;
nsh_main_t *nm = &nsh_main;
clib_error_t *error = 0;
uword next_node;
@@ -222,20 +223,24 @@ nsh_init (vlib_main_t * vm)
/* Add dispositions to nodes that feed nsh-input */
//alagalah - validate we don't really need to use the node value
+ vxlan4_gpe_input = vlib_get_node_by_name (vm, (u8 *) "vxlan4-gpe-input");
+ vxlan6_gpe_input = vlib_get_node_by_name (vm, (u8 *) "vxlan6-gpe-input");
+ nm->vgm = vlib_get_plugin_symbol ("vxlan-gpe_plugin.so", "vxlan_gpe_main");
+ if (vxlan4_gpe_input == 0 || vxlan6_gpe_input == 0 || nm->vgm == 0)
+ {
+ error = clib_error_return (0, "vxlan_gpe_plugin.so is not loaded");
+ return error;
+ }
next_node =
- vlib_node_add_next (vm, vxlan4_gpe_input_node.index,
- nm->nsh_input_node_index);
- vlib_node_add_next (vm, vxlan4_gpe_input_node.index,
- nm->nsh_proxy_node_index);
- vlib_node_add_next (vm, vxlan4_gpe_input_node.index,
+ vlib_node_add_next (vm, vxlan4_gpe_input->index, nm->nsh_input_node_index);
+ vlib_node_add_next (vm, vxlan4_gpe_input->index, nm->nsh_proxy_node_index);
+ vlib_node_add_next (vm, vxlan4_gpe_input->index,
nsh_aware_vnf_proxy_node.index);
- vxlan_gpe_register_decap_protocol (VXLAN_GPE_PROTOCOL_NSH, next_node);
+ nm->vgm->register_decap_protocol (VXLAN_GPE_PROTOCOL_NSH, next_node);
- vlib_node_add_next (vm, vxlan6_gpe_input_node.index,
- nm->nsh_input_node_index);
- vlib_node_add_next (vm, vxlan6_gpe_input_node.index,
- nm->nsh_proxy_node_index);
- vlib_node_add_next (vm, vxlan6_gpe_input_node.index,
+ vlib_node_add_next (vm, vxlan6_gpe_input->index, nm->nsh_input_node_index);
+ vlib_node_add_next (vm, vxlan6_gpe_input->index, nm->nsh_proxy_node_index);
+ vlib_node_add_next (vm, vxlan6_gpe_input->index,
nsh_aware_vnf_proxy_node.index);
gre4_input = vlib_get_node_by_name (vm, (u8 *) "gre4-input");
@@ -280,7 +285,9 @@ nsh_init (vlib_main_t * vm)
return error;
}
-VLIB_INIT_FUNCTION (nsh_init);
+VLIB_INIT_FUNCTION (nsh_init) = {
+ .runs_after = VLIB_INITS ("vxlan_gpe_init"),
+};
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
diff --git a/src/plugins/nsh/nsh.h b/src/plugins/nsh/nsh.h
index 86a9a7e95c3..c408ddb99a2 100644
--- a/src/plugins/nsh/nsh.h
+++ b/src/plugins/nsh/nsh.h
@@ -18,6 +18,7 @@
#include <vnet/vnet.h>
#include <nsh/nsh_packet.h>
#include <vnet/ip/ip4_packet.h>
+#include <plugins/vxlan-gpe/vxlan_gpe.h>
typedef struct {
u16 class;
@@ -166,6 +167,10 @@ typedef struct {
/* convenience */
vlib_main_t * vlib_main;
vnet_main_t * vnet_main;
+
+ /* vxlan gpe plugin */
+ vxlan_gpe_main_t *vgm;
+
} nsh_main_t;
extern nsh_main_t nsh_main;
diff --git a/src/plugins/nsh/nsh_pop.c b/src/plugins/nsh/nsh_pop.c
index 8de319e158b..d66cfc9de27 100644
--- a/src/plugins/nsh/nsh_pop.c
+++ b/src/plugins/nsh/nsh_pop.c
@@ -19,7 +19,7 @@
#include <vnet/plugin/plugin.h>
#include <nsh/nsh.h>
#include <vnet/gre/packet.h>
-#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <plugins/vxlan-gpe/vxlan_gpe.h>
#include <vnet/l2/l2_classify.h>
#include <vlibapi/api.h>
diff --git a/src/plugins/ping/ping_api.c b/src/plugins/ping/ping_api.c
index 5578fa560f2..a5af1033d0e 100644
--- a/src/plugins/ping/ping_api.c
+++ b/src/plugins/ping/ping_api.c
@@ -122,16 +122,22 @@ vl_api_want_ping_finished_events_t_handler (
while ((sleep_interval =
time_ping_sent + ping_interval - vlib_time_now (vm)) > 0.0)
{
- uword event_type;
+ uword event_count;
vlib_process_wait_for_event_or_clock (vm, sleep_interval);
- event_type = vlib_process_get_events (vm, 0);
- if (event_type == ~0)
+ if (dst_addr.version == AF_IP4)
+ event_count =
+ vlib_process_get_events_with_type (vm, 0, PING_RESPONSE_IP4);
+ else if (dst_addr.version == AF_IP6)
+ event_count =
+ vlib_process_get_events_with_type (vm, 0, PING_RESPONSE_IP6);
+ else
break;
- if (event_type == PING_RESPONSE_IP4 ||
- event_type == PING_RESPONSE_IP6)
- reply_count += 1;
+ if (event_count == 0)
+ break;
+
+ reply_count += 1;
}
}
diff --git a/src/plugins/pppoe/pppoe_cp_node.c b/src/plugins/pppoe/pppoe_cp_node.c
index c96559679f0..efafc448f98 100644
--- a/src/plugins/pppoe/pppoe_cp_node.c
+++ b/src/plugins/pppoe/pppoe_cp_node.c
@@ -73,7 +73,7 @@ VLIB_NODE_FN (pppoe_cp_dispatch_node) (vlib_main_t * vm,
vnet_main_t * vnm = pem->vnet_main;
vnet_interface_main_t * im = &vnm->interface_main;
u32 pkts_decapsulated = 0;
- u32 thread_index = vlib_get_thread_index();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
pppoe_entry_key_t cached_key;
pppoe_entry_result_t cached_result;
diff --git a/src/plugins/pppoe/pppoe_decap.c b/src/plugins/pppoe/pppoe_decap.c
index 854364b1aca..17fbeaabb43 100644
--- a/src/plugins/pppoe/pppoe_decap.c
+++ b/src/plugins/pppoe/pppoe_decap.c
@@ -54,7 +54,7 @@ VLIB_NODE_FN (pppoe_input_node) (vlib_main_t * vm,
vnet_main_t * vnm = pem->vnet_main;
vnet_interface_main_t * im = &vnm->interface_main;
u32 pkts_decapsulated = 0;
- u32 thread_index = vlib_get_thread_index();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
pppoe_entry_key_t cached_key;
pppoe_entry_result_t cached_result;
diff --git a/src/plugins/prom/prom.c b/src/plugins/prom/prom.c
index 475e98b1038..0ddc96f7474 100644
--- a/src/plugins/prom/prom.c
+++ b/src/plugins/prom/prom.c
@@ -382,13 +382,16 @@ prom_stat_segment_client_init (void)
stat_segment_adjust (scm, (void *) scm->shared_header->directory_vector);
}
-void
+clib_error_t *
prom_enable (vlib_main_t *vm)
{
prom_main_t *pm = &prom_main;
pm->register_url = vlib_get_plugin_symbol ("http_static_plugin.so",
"hss_register_url_handler");
+ if (pm->register_url == 0)
+ return clib_error_return (0, "http_static_plugin.so not loaded");
+
pm->send_data =
vlib_get_plugin_symbol ("http_static_plugin.so", "hss_session_send_data");
pm->register_url (prom_stats_dump, "stats.prom", HTTP_REQ_GET);
@@ -400,6 +403,8 @@ prom_enable (vlib_main_t *vm)
prom_scraper_process_enable (vm);
prom_stat_segment_client_init ();
+
+ return 0;
}
static clib_error_t *
diff --git a/src/plugins/prom/prom.h b/src/plugins/prom/prom.h
index 898e4c209d1..a06302c1ff9 100644
--- a/src/plugins/prom/prom.h
+++ b/src/plugins/prom/prom.h
@@ -44,7 +44,7 @@ typedef enum prom_process_evt_codes_
PROM_SCRAPER_EVT_RUN,
} prom_process_evt_codes_t;
-void prom_enable (vlib_main_t *vm);
+clib_error_t *prom_enable (vlib_main_t *vm);
prom_main_t *prom_get_main (void);
void prom_stat_patterns_set (u8 **patterns);
diff --git a/src/plugins/prom/prom_cli.c b/src/plugins/prom/prom_cli.c
index 705e54ac1b8..09407d46235 100644
--- a/src/plugins/prom/prom_cli.c
+++ b/src/plugins/prom/prom_cli.c
@@ -131,7 +131,7 @@ prom_command_fn (vlib_main_t *vm, unformat_input_t *input,
no_input:
if (is_enable && !pm->is_enabled)
- prom_enable (vm);
+ return prom_enable (vm);
return 0;
}
diff --git a/src/plugins/pvti/input.c b/src/plugins/pvti/input.c
index 6a8806e2795..d7727153aa7 100644
--- a/src/plugins/pvti/input.c
+++ b/src/plugins/pvti/input.c
@@ -124,7 +124,7 @@ pvti_input_node_common (vlib_main_t *vm, vlib_node_runtime_t *node,
pvti_main_t *pvm = &pvti_main;
- u32 thread_index = vlib_get_thread_index ();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
pvti_per_thread_data_t *ptd =
vec_elt_at_index (pvm->per_thread_data[is_ip6], thread_index);
diff --git a/src/plugins/pvti/output.c b/src/plugins/pvti/output.c
index 1939c6f585a..5fb6263795e 100644
--- a/src/plugins/pvti/output.c
+++ b/src/plugins/pvti/output.c
@@ -340,7 +340,7 @@ pvti_output_node_common (vlib_main_t *vm, vlib_node_runtime_t *node,
u8 stream_index = pvti_get_stream_index (is_ip6);
- u32 thread_index = vlib_get_thread_index ();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
pvti_per_thread_data_t *ptd =
vec_elt_at_index (pvm->per_thread_data[is_ip6], thread_index);
diff --git a/src/plugins/pvti/pvti.h b/src/plugins/pvti/pvti.h
index ac097c5ecca..608610362d7 100644
--- a/src/plugins/pvti/pvti.h
+++ b/src/plugins/pvti/pvti.h
@@ -223,7 +223,7 @@ extern vlib_node_registration_t pvti_periodic_node;
always_inline u8
pvti_get_stream_index (int is_ip6)
{
- u32 thread_index = vlib_get_thread_index ();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
ASSERT ((thread_index & 0xffffff80) == 0);
diff --git a/src/plugins/quic/quic.c b/src/plugins/quic/quic.c
index 10651f10e7e..77d35634fa9 100644
--- a/src/plugins/quic/quic.c
+++ b/src/plugins/quic/quic.c
@@ -51,7 +51,8 @@ static void quic_update_timer (quic_ctx_t * ctx);
static void quic_check_quic_session_connected (quic_ctx_t * ctx);
static int quic_reset_connection (u64 udp_session_handle,
quic_rx_packet_ctx_t * pctx);
-static void quic_proto_on_close (u32 ctx_index, u32 thread_index);
+static void quic_proto_on_close (u32 ctx_index,
+ clib_thread_index_t thread_index);
static quicly_stream_open_t on_stream_open;
static quicly_closed_by_remote_t on_closed_by_remote;
@@ -133,7 +134,7 @@ quic_crypto_context_alloc (u8 thread_index)
}
static crypto_context_t *
-quic_crypto_context_get (u32 cr_index, u32 thread_index)
+quic_crypto_context_get (u32 cr_index, clib_thread_index_t thread_index)
{
quic_main_t *qm = &quic_main;
ASSERT (cr_index >> 24 == thread_index);
@@ -381,7 +382,7 @@ error:
/* Helper functions */
static u32
-quic_ctx_alloc (u32 thread_index)
+quic_ctx_alloc (clib_thread_index_t thread_index)
{
quic_main_t *qm = &quic_main;
quic_ctx_t *ctx;
@@ -401,7 +402,7 @@ static void
quic_ctx_free (quic_ctx_t * ctx)
{
QUIC_DBG (2, "Free ctx %u %x", ctx->c_thread_index, ctx->c_c_index);
- u32 thread_index = ctx->c_thread_index;
+ clib_thread_index_t thread_index = ctx->c_thread_index;
QUIC_ASSERT (ctx->timer_handle == QUIC_TIMER_HANDLE_INVALID);
if (CLIB_DEBUG)
clib_memset (ctx, 0xfb, sizeof (*ctx));
@@ -409,13 +410,13 @@ quic_ctx_free (quic_ctx_t * ctx)
}
static quic_ctx_t *
-quic_ctx_get (u32 ctx_index, u32 thread_index)
+quic_ctx_get (u32 ctx_index, clib_thread_index_t thread_index)
{
return pool_elt_at_index (quic_main.ctx_pool[thread_index], ctx_index);
}
static quic_ctx_t *
-quic_ctx_get_if_valid (u32 ctx_index, u32 thread_index)
+quic_ctx_get_if_valid (u32 ctx_index, clib_thread_index_t thread_index)
{
if (pool_is_free_index (quic_main.ctx_pool[thread_index], ctx_index))
return 0;
@@ -1100,7 +1101,7 @@ quic_get_time (quicly_now_t * self)
}
static u32
-quic_set_time_now (u32 thread_index)
+quic_set_time_now (clib_thread_index_t thread_index)
{
vlib_main_t *vlib_main = vlib_get_main ();
f64 time = vlib_time_now (vlib_main);
@@ -1396,7 +1397,7 @@ quic_connect (transport_endpoint_cfg_t * tep)
}
static void
-quic_proto_on_close (u32 ctx_index, u32 thread_index)
+quic_proto_on_close (u32 ctx_index, clib_thread_index_t thread_index)
{
int err;
quic_ctx_t *ctx = quic_ctx_get_if_valid (ctx_index, thread_index);
@@ -1548,7 +1549,7 @@ quic_stop_listen (u32 lctx_index)
}
static transport_connection_t *
-quic_connection_get (u32 ctx_index, u32 thread_index)
+quic_connection_get (u32 ctx_index, clib_thread_index_t thread_index)
{
quic_ctx_t *ctx;
ctx = quic_ctx_get (ctx_index, thread_index);
@@ -1600,7 +1601,7 @@ static u8 *
format_quic_connection (u8 * s, va_list * args)
{
u32 qc_index = va_arg (*args, u32);
- u32 thread_index = va_arg (*args, u32);
+ clib_thread_index_t thread_index = va_arg (*args, u32);
u32 verbose = va_arg (*args, u32);
quic_ctx_t *ctx = quic_ctx_get (qc_index, thread_index);
s = format (s, "%U", format_quic_ctx, ctx, verbose);
@@ -1611,7 +1612,7 @@ static u8 *
format_quic_half_open (u8 * s, va_list * args)
{
u32 qc_index = va_arg (*args, u32);
- u32 thread_index = va_arg (*args, u32);
+ clib_thread_index_t thread_index = va_arg (*args, u32);
quic_ctx_t *ctx = quic_ctx_get (qc_index, thread_index);
s = format (s, "[#%d][Q] half-open app %u", thread_index,
ctx->parent_app_id);
@@ -1623,7 +1624,7 @@ static u8 *
format_quic_listener (u8 * s, va_list * args)
{
u32 tci = va_arg (*args, u32);
- u32 thread_index = va_arg (*args, u32);
+ clib_thread_index_t thread_index = va_arg (*args, u32);
u32 verbose = va_arg (*args, u32);
quic_ctx_t *ctx = quic_ctx_get (tci, thread_index);
s = format (s, "%U", format_quic_ctx, ctx, verbose);
@@ -1660,7 +1661,7 @@ quic_on_quic_session_connected (quic_ctx_t * ctx)
session_t *quic_session;
app_worker_t *app_wrk;
u32 ctx_id = ctx->c_c_index;
- u32 thread_index = ctx->c_thread_index;
+ clib_thread_index_t thread_index = ctx->c_thread_index;
int rv;
quic_session = session_alloc (thread_index);
@@ -1775,7 +1776,7 @@ static void
quic_transfer_connection (u32 ctx_index, u32 dest_thread)
{
quic_ctx_t *ctx, *temp_ctx;
- u32 thread_index = vlib_get_thread_index ();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
QUIC_DBG (2, "Transferring conn %u to thread %u", ctx_index, dest_thread);
@@ -1811,7 +1812,7 @@ quic_udp_session_connected_callback (u32 quic_app_index, u32 ctx_index,
app_worker_t *app_wrk;
quicly_conn_t *conn;
quic_ctx_t *ctx;
- u32 thread_index;
+ clib_thread_index_t thread_index;
int ret;
quicly_context_t *quicly_ctx;
@@ -1918,7 +1919,7 @@ quic_udp_session_accepted_callback (session_t * udp_session)
u32 ctx_index;
quic_ctx_t *ctx, *lctx;
session_t *udp_listen_session;
- u32 thread_index = vlib_get_thread_index ();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
udp_listen_session =
listen_session_get_from_handle (udp_session->listener_handle);
@@ -2199,7 +2200,7 @@ quic_process_one_rx_packet (u64 udp_session_handle, svm_fifo_t * f,
{
size_t plen;
u32 full_len, ret;
- u32 thread_index = vlib_get_thread_index ();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
u32 cur_deq = svm_fifo_max_dequeue (f) - fifo_offset;
quicly_context_t *quicly_ctx;
session_t *udp_session;
@@ -2281,7 +2282,7 @@ quic_udp_session_rx_callback (session_t * udp_session)
u32 max_deq;
u64 udp_session_handle = session_handle (udp_session);
int rv = 0;
- u32 thread_index = vlib_get_thread_index ();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
u32 cur_deq, fifo_offset, max_packets, i;
quic_rx_packet_ctx_t packets_ctx[QUIC_RCV_MAX_PACKETS];
@@ -2306,7 +2307,7 @@ rx_start:
#endif
for (i = 0; i < max_packets; i++)
{
- packets_ctx[i].thread_index = UINT32_MAX;
+ packets_ctx[i].thread_index = CLIB_INVALID_THREAD_INDEX;
packets_ctx[i].ctx_index = UINT32_MAX;
packets_ctx[i].ptype = QUIC_PACKET_TYPE_DROP;
@@ -2421,8 +2422,8 @@ quic_get_transport_listener_endpoint (u32 listener_index,
}
static void
-quic_get_transport_endpoint (u32 ctx_index, u32 thread_index,
- transport_endpoint_t * tep, u8 is_lcl)
+quic_get_transport_endpoint (u32 ctx_index, clib_thread_index_t thread_index,
+ transport_endpoint_t *tep, u8 is_lcl)
{
quic_ctx_t *ctx;
ctx = quic_ctx_get (ctx_index, thread_index);
diff --git a/src/plugins/quic/quic.h b/src/plugins/quic/quic.h
index 081bcb120e9..4474aa15e75 100644
--- a/src/plugins/quic/quic.h
+++ b/src/plugins/quic/quic.h
@@ -205,7 +205,7 @@ typedef struct quic_session_cache_
typedef struct quic_stream_data_
{
u32 ctx_id;
- u32 thread_index;
+ clib_thread_index_t thread_index;
u32 app_rx_data_len; /**< bytes received, to be read by external app */
u32 app_tx_data_len; /**< bytes sent */
} quic_stream_data_t;
@@ -232,7 +232,7 @@ typedef struct quic_rx_packet_ctx_
quicly_decoded_packet_t packet;
u8 data[QUIC_MAX_PACKET_SIZE];
u32 ctx_index;
- u32 thread_index;
+ clib_thread_index_t thread_index;
union
{
struct sockaddr sa;
diff --git a/src/plugins/quic/quic_crypto.c b/src/plugins/quic/quic_crypto.c
index 9e2c915daaa..4e11eff2431 100644
--- a/src/plugins/quic/quic_crypto.c
+++ b/src/plugins/quic/quic_crypto.c
@@ -248,8 +248,7 @@ quic_crypto_decrypt_packet (quic_ctx_t *qctx, quic_rx_packet_ctx_t *pctx)
pctx->packet.octets.len - aead_off, pn, pctx->packet.octets.base,
aead_off)) == SIZE_MAX)
{
- fprintf (stderr, "%s: aead decryption failure (pn: %d)\n", __FUNCTION__,
- pn);
+ fprintf (stderr, "%s: aead decryption failure (pn: %d)\n", __func__, pn);
return;
}
@@ -349,8 +348,7 @@ quic_crypto_cipher_setup_crypto (ptls_cipher_context_t *_ctx, int is_enc,
}
else
{
- QUIC_DBG (1, "%s, Invalid crypto cipher : ", __FUNCTION__,
- _ctx->algo->name);
+ QUIC_DBG (1, "%s, Invalid crypto cipher : ", __func__, _ctx->algo->name);
assert (0);
}
@@ -405,8 +403,7 @@ quic_crypto_aead_setup_crypto (ptls_aead_context_t *_ctx, int is_enc,
}
else
{
- QUIC_DBG (1, "%s, invalied aead cipher %s", __FUNCTION__,
- _ctx->algo->name);
+ QUIC_DBG (1, "%s, invalied aead cipher %s", __func__, _ctx->algo->name);
assert (0);
}
diff --git a/src/plugins/rdma/device.c b/src/plugins/rdma/device.c
index 8aeb586a42d..a4dbdb02831 100644
--- a/src/plugins/rdma/device.c
+++ b/src/plugins/rdma/device.c
@@ -23,7 +23,7 @@
#include <vppinfra/linux/sysfs.h>
#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
+#include <vlib/file.h>
#include <vlib/pci/pci.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/interface/rx_queue_funcs.h>
diff --git a/src/plugins/sflow/CMakeLists.txt b/src/plugins/sflow/CMakeLists.txt
index 35433bd24df..c966fcc4480 100644
--- a/src/plugins/sflow/CMakeLists.txt
+++ b/src/plugins/sflow/CMakeLists.txt
@@ -12,39 +12,23 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-vpp_find_path(NETLINK_INCLUDE_DIR NAMES linux/netlink.h)
-if (NOT NETLINK_INCLUDE_DIR)
- message(WARNING "netlink headers not found - sflow plugin disabled")
- return()
-endif()
-
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "FreeBSD")
message(WARNING "sflow is not supported on FreeBSD - sflow plugin disabled")
return()
endif()
-LIST(FIND excluded_plugins linux-cp exc_index)
-if(${exc_index} EQUAL "-1")
- message(WARNING "sflow plugin - linux-cp plugin included: compiling VAPI calls")
- add_compile_definitions(SFLOW_USE_VAPI)
-else()
- message(WARNING "sflow plugin - linux-cp plugin excluded: not compiling VAPI calls")
-endif()
-
-include_directories(${CMAKE_SOURCE_DIR}/vpp-api ${CMAKE_CURRENT_BINARY_DIR}/../../vpp-api)
add_vpp_plugin(sflow
SOURCES
sflow.c
node.c
sflow_common.h
sflow.h
+ sflow_dlapi.h
sflow_psample.c
sflow_psample.h
sflow_psample_fields.h
sflow_usersock.c
sflow_usersock.h
- sflow_vapi.c
- sflow_vapi.h
MULTIARCH_SOURCES
node.c
diff --git a/src/plugins/sflow/sflow.c b/src/plugins/sflow/sflow.c
index 5aa65062330..14d07d69233 100644
--- a/src/plugins/sflow/sflow.c
+++ b/src/plugins/sflow/sflow.c
@@ -25,6 +25,7 @@
#include <sflow/sflow.api_enum.h>
#include <sflow/sflow.api_types.h>
#include <sflow/sflow_psample.h>
+#include <sflow/sflow_dlapi.h>
#include <vpp-api/client/stat_client.h>
#include <vlib/stats/stats.h>
@@ -181,8 +182,15 @@ retry:
SFLOWUSSpec_setMsgType (&spec, SFLOW_VPP_MSG_IF_COUNTERS);
SFLOWUSSpec_setAttr (&spec, SFLOW_VPP_ATTR_PORTNAME, hw->name,
vec_len (hw->name));
- SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_IFINDEX, sfif->hw_if_index);
- if (sfif->linux_if_index)
+ SFLOWUSSpec_setAttrInt (&spec, SFLOW_VPP_ATTR_IFINDEX, sfif->sw_if_index);
+
+ if (smp->lcp_itf_pair_get_vif_index_by_phy)
+ {
+ sfif->linux_if_index =
+ (*smp->lcp_itf_pair_get_vif_index_by_phy) (sfif->sw_if_index);
+ }
+
+ if (sfif->linux_if_index != INDEX_INVALID)
{
// We know the corresponding Linux ifIndex for this interface, so include
// that here.
@@ -238,7 +246,8 @@ total_drops (sflow_main_t *smp)
{
// sum sendmsg and worker-fifo drops
u32 all_drops = smp->psample_send_drops;
- for (u32 thread_index = 0; thread_index < smp->total_threads; thread_index++)
+ for (clib_thread_index_t thread_index = 0; thread_index < smp->total_threads;
+ thread_index++)
{
sflow_per_thread_data_t *sfwk =
vec_elt_at_index (smp->per_thread_data, thread_index);
@@ -313,8 +322,8 @@ read_worker_fifos (sflow_main_t *smp)
for (; batch < SFLOW_READ_BATCH; batch++)
{
u32 psample_send = 0, psample_send_fail = 0;
- for (u32 thread_index = 0; thread_index < smp->total_threads;
- thread_index++)
+ for (clib_thread_index_t thread_index = 0;
+ thread_index < smp->total_threads; thread_index++)
{
sflow_per_thread_data_t *sfwk =
vec_elt_at_index (smp->per_thread_data, thread_index);
@@ -380,7 +389,8 @@ read_node_counters (sflow_main_t *smp, sflow_err_ctrs_t *ctrs)
{
for (u32 ec = 0; ec < SFLOW_N_ERROR; ec++)
ctrs->counters[ec] = 0;
- for (u32 thread_index = 0; thread_index < smp->total_threads; thread_index++)
+ for (clib_thread_index_t thread_index = 0; thread_index < smp->total_threads;
+ thread_index++)
{
sflow_per_thread_data_t *sfwk =
vec_elt_at_index (smp->per_thread_data, thread_index);
@@ -433,15 +443,6 @@ sflow_process_samples (vlib_main_t *vm, vlib_node_runtime_t *node,
continue;
}
-#ifdef SFLOW_USE_VAPI
-#ifdef SFLOW_TEST_HAMMER_VAPI
- sflow_vapi_check_for_linux_if_index_results (&smp->vac,
- smp->per_interface_data);
- sflow_vapi_read_linux_if_index_numbers (&smp->vac,
- smp->per_interface_data);
-#endif
-#endif
-
// PSAMPLE channel may need extra step (e.g. to learn family_id)
// before it is ready to send
EnumSFLOWPSState psState = SFLOWPS_state (&smp->sflow_psample);
@@ -458,23 +459,6 @@ sflow_process_samples (vlib_main_t *vm, vlib_node_runtime_t *node,
{
// second rollover
smp->now_mono_S = tnow_S;
-#ifdef SFLOW_USE_VAPI
- if (!smp->vac.vapi_unavailable)
- {
- // look up linux if_index numbers
- sflow_vapi_check_for_linux_if_index_results (
- &smp->vac, smp->per_interface_data);
- if (smp->vapi_requests == 0 ||
- (tnow_S % SFLOW_VAPI_POLL_INTERVAL) == 0)
- {
- if (sflow_vapi_read_linux_if_index_numbers (
- &smp->vac, smp->per_interface_data))
- {
- smp->vapi_requests++;
- }
- }
- }
-#endif
// send status info
send_sampling_status_info (smp);
// poll counters for interfaces that are due
@@ -505,7 +489,8 @@ sflow_set_worker_sampling_state (sflow_main_t *smp)
vlib_thread_main_t *tm = &vlib_thread_main;
smp->total_threads = 1 + tm->n_threads;
vec_validate (smp->per_thread_data, smp->total_threads);
- for (u32 thread_index = 0; thread_index < smp->total_threads; thread_index++)
+ for (clib_thread_index_t thread_index = 0; thread_index < smp->total_threads;
+ thread_index++)
{
sflow_per_thread_data_t *sfwk =
vec_elt_at_index (smp->per_thread_data, thread_index);
@@ -539,11 +524,6 @@ sflow_sampling_start (sflow_main_t *smp)
smp->psample_seq_egress = 0;
smp->psample_send_drops = 0;
-#ifdef SFLOW_USE_VAPI
- // reset vapi request count so that we make a request the first time
- smp->vapi_requests = 0;
-#endif
-
/* open PSAMPLE netlink channel for writing packet samples */
SFLOWPS_open (&smp->sflow_psample);
/* open USERSOCK netlink channel for writing counters */
@@ -1027,6 +1007,18 @@ sflow_init (vlib_main_t *vm)
/* access to counters - TODO: should this only happen on sflow enable? */
sflow_stat_segment_client_init ();
+
+ smp->lcp_itf_pair_get_vif_index_by_phy =
+ vlib_get_plugin_symbol (SFLOW_LCP_LIB, SFLOW_LCP_SYM_GET_VIF_BY_PHY);
+ if (smp->lcp_itf_pair_get_vif_index_by_phy)
+ {
+ SFLOW_NOTICE ("linux-cp found - using LIP vif_index, where available");
+ }
+ else
+ {
+ SFLOW_NOTICE ("linux-cp not found - using VPP sw_if_index");
+ }
+
return error;
}
diff --git a/src/plugins/sflow/sflow.h b/src/plugins/sflow/sflow.h
index 609ff723816..0ec5ac90688 100644
--- a/src/plugins/sflow/sflow.h
+++ b/src/plugins/sflow/sflow.h
@@ -22,7 +22,6 @@
#include <vppinfra/hash.h>
#include <vppinfra/error.h>
#include <sflow/sflow_common.h>
-#include <sflow/sflow_vapi.h>
#include <sflow/sflow_psample.h>
#include <sflow/sflow_usersock.h>
@@ -124,6 +123,8 @@ typedef struct
sflow_fifo_t fifo;
} sflow_per_thread_data_t;
+typedef u32 (*IfIndexLookupFn) (u32);
+
typedef struct
{
/* API message ID base */
@@ -164,12 +165,7 @@ typedef struct
u32 csample_send;
u32 csample_send_drops;
u32 unixsock_seq;
-#ifdef SFLOW_USE_VAPI
- /* vapi query helper thread (transient) */
- CLIB_CACHE_LINE_ALIGN_MARK (_vapi);
- sflow_vapi_client_t vac;
- int vapi_requests;
-#endif
+ IfIndexLookupFn lcp_itf_pair_get_vif_index_by_phy;
} sflow_main_t;
extern sflow_main_t sflow_main;
diff --git a/src/plugins/sflow/sflow_common.h b/src/plugins/sflow/sflow_common.h
index 29784638bb9..26f306b5741 100644
--- a/src/plugins/sflow/sflow_common.h
+++ b/src/plugins/sflow/sflow_common.h
@@ -15,8 +15,6 @@
#ifndef __included_sflow_common_h__
#define __included_sflow_common_h__
-// #define SFLOW_USE_VAPI (set by CMakeLists.txt)
-
extern vlib_log_class_t sflow_logger;
#define SFLOW_DBG(...) vlib_log_debug (sflow_logger, __VA_ARGS__);
#define SFLOW_INFO(...) vlib_log_info (sflow_logger, __VA_ARGS__);
diff --git a/src/plugins/nat/lib/inlines.h b/src/plugins/sflow/sflow_dlapi.h
index 24e3ba83a5b..e983bc8f6fe 100644
--- a/src/plugins/nat/lib/inlines.h
+++ b/src/plugins/sflow/sflow_dlapi.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Copyright (c) 2025 InMon Corp.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -12,29 +12,18 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-/**
- * @brief Common NAT inline functions
+#ifndef __included_sflow_dlapi_h__
+#define __included_sflow_dlapi_h__
+/* Dynamic-link API
+ * If present, linux-cp plugin will be queried to learn the
+ * Linux if_index for each VPP if_index. If that plugin is not
+ * compiled and loaded, or if the function symbol is not found,
+ * then the interfaces will be reported to NETLINK_USERSOCK
+ * without this extra mapping.
*/
-#ifndef included_nat_inlines_h__
-#define included_nat_inlines_h__
-
-#include <vnet/ip/icmp46_packet.h>
-
-static_always_inline u64
-icmp_type_is_error_message (u8 icmp_type)
-{
- int bmp = 0;
- bmp |= 1 << ICMP4_destination_unreachable;
- bmp |= 1 << ICMP4_time_exceeded;
- bmp |= 1 << ICMP4_parameter_problem;
- bmp |= 1 << ICMP4_source_quench;
- bmp |= 1 << ICMP4_redirect;
- bmp |= 1 << ICMP4_alternate_host_address;
-
- return (1ULL << icmp_type) & bmp;
-}
-
-#endif /* included_nat_inlines_h__ */
+#define SFLOW_LCP_LIB "linux_cp_plugin.so"
+#define SFLOW_LCP_SYM_GET_VIF_BY_PHY "lcp_itf_pair_get_vif_index_by_phy"
+#endif /* __included_sflow_dyn_api_h__ */
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/sflow/sflow_psample.c b/src/plugins/sflow/sflow_psample.c
index 0e4fcfbe790..41df454d999 100644
--- a/src/plugins/sflow/sflow_psample.c
+++ b/src/plugins/sflow/sflow_psample.c
@@ -13,11 +13,6 @@
* limitations under the License.
*/
-#if defined(__cplusplus)
-extern "C"
-{
-#endif
-
#include <vlib/vlib.h>
#include <vnet/vnet.h>
#include <vnet/pg/pg.h>
diff --git a/src/plugins/sflow/sflow_vapi.c b/src/plugins/sflow/sflow_vapi.c
deleted file mode 100644
index cdc89a54c80..00000000000
--- a/src/plugins/sflow/sflow_vapi.c
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * Copyright (c) 2024 InMon Corp.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <sflow/sflow_vapi.h>
-
-#ifdef SFLOW_USE_VAPI
-
-#include <vlibapi/api.h>
-#include <vlibmemory/api.h>
-#include <vpp/app/version.h>
-#include <stdbool.h>
-
-#include <vapi/vapi.h>
-#include <vapi/memclnt.api.vapi.h>
-#include <vapi/vlib.api.vapi.h>
-
-#ifdef included_interface_types_api_types_h
-#define defined_vapi_enum_if_status_flags
-#define defined_vapi_enum_mtu_proto
-#define defined_vapi_enum_link_duplex
-#define defined_vapi_enum_sub_if_flags
-#define defined_vapi_enum_rx_mode
-#define defined_vapi_enum_if_type
-#define defined_vapi_enum_direction
-#endif
-#include <vapi/lcp.api.vapi.h>
-
-DEFINE_VAPI_MSG_IDS_LCP_API_JSON;
-
-static vapi_error_e
-my_pair_get_cb (struct vapi_ctx_s *ctx, void *callback_ctx, vapi_error_e rv,
- bool is_last, vapi_payload_lcp_itf_pair_get_v2_reply *reply)
-{
- // this is a no-op, but it seems like it's presence is still required. For
- // example, it is called if the pair lookup does not find anything.
- return VAPI_OK;
-}
-
-static vapi_error_e
-my_pair_details_cb (struct vapi_ctx_s *ctx, void *callback_ctx,
- vapi_error_e rv, bool is_last,
- vapi_payload_lcp_itf_pair_details *details)
-{
- sflow_per_interface_data_t *sfif =
- (sflow_per_interface_data_t *) callback_ctx;
- // Setting this here will mean it is sent to hsflowd with the interface
- // counters.
- sfif->linux_if_index = details->vif_index;
- return VAPI_OK;
-}
-
-static vapi_error_e
-sflow_vapi_connect (sflow_vapi_client_t *vac)
-{
- vapi_error_e rv = VAPI_OK;
- vapi_ctx_t ctx = vac->vapi_ctx;
- if (ctx == NULL)
- {
- // first time - open and connect.
- if ((rv = vapi_ctx_alloc (&ctx)) != VAPI_OK)
- {
- SFLOW_ERR ("vap_ctx_alloc() returned %d", rv);
- }
- else
- {
- vac->vapi_ctx = ctx;
- if ((rv = vapi_connect_from_vpp (
- ctx, "api_from_sflow_plugin", SFLOW_VAPI_MAX_REQUEST_Q,
- SFLOW_VAPI_MAX_RESPONSE_Q, VAPI_MODE_BLOCKING, true)) !=
- VAPI_OK)
- {
- SFLOW_ERR ("vapi_connect_from_vpp() returned %d", rv);
- }
- else
- {
- // Connected - but is there a handler for the request we want to
- // send?
- if (!vapi_is_msg_available (ctx,
- vapi_msg_id_lcp_itf_pair_add_del_v2))
- {
- SFLOW_WARN ("vapi_is_msg_available() returned false => "
- "linux-cp plugin not loaded");
- rv = VAPI_EUSER;
- }
- }
- }
- }
- return rv;
-}
-
-// in forked thread
-static void *
-get_lcp_itf_pairs (void *magic)
-{
- sflow_vapi_client_t *vac = magic;
- vapi_error_e rv = VAPI_OK;
-
- sflow_per_interface_data_t *intfs = vac->vapi_itfs;
- vlib_set_thread_name (SFLOW_VAPI_THREAD_NAME);
- if ((rv = sflow_vapi_connect (vac)) != VAPI_OK)
- {
- vac->vapi_unavailable = true;
- }
- else
- {
- vapi_ctx_t ctx = vac->vapi_ctx;
-
- for (int ii = 1; ii < vec_len (intfs); ii++)
- {
- sflow_per_interface_data_t *sfif = vec_elt_at_index (intfs, ii);
- if (sfif && sfif->sflow_enabled)
- {
- // TODO: if we try non-blocking we might not be able to just pour
- // all the requests in here. Might be better to do them one at a
- // time - e.g. when we poll for counters.
- vapi_msg_lcp_itf_pair_get_v2 *msg =
- vapi_alloc_lcp_itf_pair_get_v2 (ctx);
- if (msg)
- {
- msg->payload.sw_if_index = sfif->sw_if_index;
- if ((rv = vapi_lcp_itf_pair_get_v2 (ctx, msg, my_pair_get_cb,
- sfif, my_pair_details_cb,
- sfif)) != VAPI_OK)
- {
- SFLOW_ERR ("vapi_lcp_itf_pair_get_v2 returned %d", rv);
- // vapi.h: "message must be freed by vapi_msg_free if not
- // consumed by vapi_send"
- vapi_msg_free (ctx, msg);
- }
- }
- }
- }
- // We no longer disconnect or free the client structures
- // vapi_disconnect_from_vpp (ctx);
- // vapi_ctx_free (ctx);
- }
- // indicate that we are done - more portable that using pthread_tryjoin_np()
- vac->vapi_request_status = (int) rv;
- clib_atomic_store_rel_n (&vac->vapi_request_active, false);
- // TODO: how to tell if heap-allocated data is stored separately per thread?
- // And if so, how to tell the allocator to GC all data for the thread when it
- // exits?
- return (void *) rv;
-}
-
-int
-sflow_vapi_read_linux_if_index_numbers (sflow_vapi_client_t *vac,
- sflow_per_interface_data_t *itfs)
-{
-
-#ifdef SFLOW_VAPI_TEST_PLUGIN_SYMBOL
- // don't even fork the query thread if the symbol is not there
- if (!vlib_get_plugin_symbol ("linux_cp_plugin.so", "lcp_itf_pair_get"))
- {
- return false;
- }
-#endif
- // previous query is done and results extracted?
- int req_active = clib_atomic_load_acq_n (&vac->vapi_request_active);
- if (req_active == false && vac->vapi_itfs == NULL)
- {
- // make a copy of the current interfaces vector for the lookup thread to
- // write into
- vac->vapi_itfs = vec_dup (itfs);
- pthread_attr_t attr;
- pthread_attr_init (&attr);
- pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
- pthread_attr_setstacksize (&attr, VLIB_THREAD_STACK_SIZE);
- vac->vapi_request_active = true;
- pthread_create (&vac->vapi_thread, &attr, get_lcp_itf_pairs, vac);
- pthread_attr_destroy (&attr);
- return true;
- }
- return false;
-}
-
-int
-sflow_vapi_check_for_linux_if_index_results (sflow_vapi_client_t *vac,
- sflow_per_interface_data_t *itfs)
-{
- // request completed?
- // TODO: if we use non-blocking mode do we have to call something here to
- // receive results?
- int req_active = clib_atomic_load_acq_n (&vac->vapi_request_active);
- if (req_active == false && vac->vapi_itfs != NULL)
- {
- // yes, extract what we learned
- // TODO: would not have to do this if vector were array of pointers
- // to sflow_per_interface_data_t rather than an actual array, but
- // it does mean we have very clear separation between the threads.
- for (int ii = 1; ii < vec_len (vac->vapi_itfs); ii++)
- {
- sflow_per_interface_data_t *sfif1 =
- vec_elt_at_index (vac->vapi_itfs, ii);
- sflow_per_interface_data_t *sfif2 = vec_elt_at_index (itfs, ii);
- if (sfif1 && sfif2 && sfif1->sflow_enabled && sfif2->sflow_enabled)
- sfif2->linux_if_index = sfif1->linux_if_index;
- }
- vec_free (vac->vapi_itfs);
- vac->vapi_itfs = NULL;
- return true;
- }
- return false;
-}
-
-#endif /* SFLOW_USE_VAPI */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/sflow/sflow_vapi.h b/src/plugins/sflow/sflow_vapi.h
deleted file mode 100644
index 640fe997684..00000000000
--- a/src/plugins/sflow/sflow_vapi.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2024 InMon Corp.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __included_sflow_vapi_h__
-#define __included_sflow_vapi_h__
-
-#include <vnet/vnet.h>
-#include <sflow/sflow_common.h>
-
-#ifdef SFLOW_USE_VAPI
-
-#define SFLOW_VAPI_POLL_INTERVAL 5
-#define SFLOW_VAPI_MAX_REQUEST_Q 8
-#define SFLOW_VAPI_MAX_RESPONSE_Q 16
-#define SFLOW_VAPI_THREAD_NAME "sflow_vapi" // must be <= 15 characters
-
-// #define SFLOW_VAPI_TEST_PLUGIN_SYMBOL
-
-typedef struct
-{
- volatile int vapi_request_active; // to sync main <-> vapi_thread
- pthread_t vapi_thread;
- sflow_per_interface_data_t *vapi_itfs;
- int vapi_unavailable;
- int vapi_request_status; // written by vapi_thread
- void *vapi_ctx;
-} sflow_vapi_client_t;
-
-int sflow_vapi_read_linux_if_index_numbers (sflow_vapi_client_t *vac,
- sflow_per_interface_data_t *itfs);
-int
-sflow_vapi_check_for_linux_if_index_results (sflow_vapi_client_t *vac,
- sflow_per_interface_data_t *itfs);
-
-#endif /* SFLOW_USE_VAPI */
-#endif /* __included_sflow_vapi_h__ */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/snort/enqueue.c b/src/plugins/snort/enqueue.c
index 84efb4d432f..ae04c58bba0 100644
--- a/src/plugins/snort/enqueue.c
+++ b/src/plugins/snort/enqueue.c
@@ -93,7 +93,7 @@ snort_enq_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
snort_main_t *sm = &snort_main;
snort_instance_t *si = 0;
snort_qpair_t *qp = 0;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u32 n_left = frame->n_vectors;
u32 n_trace = 0;
u32 total_enq = 0, n_unprocessed = 0;
diff --git a/src/plugins/snort/main.c b/src/plugins/snort/main.c
index 9bab1185b60..c87ecfd7ebd 100644
--- a/src/plugins/snort/main.c
+++ b/src/plugins/snort/main.c
@@ -392,6 +392,18 @@ snort_instance_create (vlib_main_t *vm, char *name, u8 log2_queue_sz,
u8 align = CLIB_CACHE_LINE_BYTES;
int rv = 0;
+ if (sm->listener == 0)
+ {
+ clib_error_t *err;
+ err = snort_listener_init (vm);
+ if (err)
+ {
+ log_err ("listener init failed: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_API_ERROR_INIT_FAILED;
+ }
+ }
+
if (snort_get_instance_by_name (name))
return VNET_API_ERROR_ENTRY_ALREADY_EXISTS;
@@ -831,7 +843,7 @@ snort_init (vlib_main_t *vm)
if (!sm->socket_name)
snort_set_default_socket (sm, 0);
- return snort_listener_init (vm);
+ return 0;
}
VLIB_INIT_FUNCTION (snort_init);
diff --git a/src/plugins/srtp/srtp.c b/src/plugins/srtp/srtp.c
index 5426b7aa03f..f86b7be980e 100644
--- a/src/plugins/srtp/srtp.c
+++ b/src/plugins/srtp/srtp.c
@@ -19,11 +19,11 @@
static srtp_main_t srtp_main;
-static void srtp_disconnect (u32 ctx_handle, u32 thread_index);
+static void srtp_disconnect (u32 ctx_handle, clib_thread_index_t thread_index);
static void srtp_disconnect_transport (srtp_tc_t *ctx);
static inline u32
-srtp_ctx_alloc_w_thread (u32 thread_index)
+srtp_ctx_alloc_w_thread (clib_thread_index_t thread_index)
{
srtp_tc_t *ctx;
pool_get_aligned_safe (srtp_main.ctx_pool[thread_index], ctx,
@@ -36,7 +36,7 @@ srtp_ctx_alloc_w_thread (u32 thread_index)
}
static inline srtp_tc_t *
-srtp_ctx_get_w_thread (u32 ctx_index, u32 thread_index)
+srtp_ctx_get_w_thread (u32 ctx_index, clib_thread_index_t thread_index)
{
return pool_elt_at_index (srtp_main.ctx_pool[thread_index], ctx_index);
}
@@ -82,7 +82,7 @@ srtp_ctx_free (srtp_tc_t *ctx)
}
static inline u32
-srtp_ctx_attach (u32 thread_index, void *ctx_ptr)
+srtp_ctx_attach (clib_thread_index_t thread_index, void *ctx_ptr)
{
srtp_tc_t *ctx;
@@ -688,7 +688,7 @@ srtp_disconnect_transport (srtp_tc_t *ctx)
}
static void
-srtp_disconnect (u32 ctx_handle, u32 thread_index)
+srtp_disconnect (u32 ctx_handle, clib_thread_index_t thread_index)
{
session_t *app_session;
srtp_tc_t *ctx;
@@ -801,7 +801,7 @@ srtp_stop_listen (u32 lctx_index)
}
transport_connection_t *
-srtp_connection_get (u32 ctx_index, u32 thread_index)
+srtp_connection_get (u32 ctx_index, clib_thread_index_t thread_index)
{
srtp_tc_t *ctx;
ctx = srtp_ctx_get_w_thread (ctx_index, thread_index);
@@ -895,7 +895,7 @@ u8 *
format_srtp_connection (u8 *s, va_list *args)
{
u32 ctx_index = va_arg (*args, u32);
- u32 thread_index = va_arg (*args, u32);
+ clib_thread_index_t thread_index = va_arg (*args, u32);
u32 verbose = va_arg (*args, u32);
srtp_tc_t *ctx;
@@ -935,7 +935,7 @@ format_srtp_half_open (u8 *s, va_list *args)
}
static void
-srtp_transport_endpoint_get (u32 ctx_handle, u32 thread_index,
+srtp_transport_endpoint_get (u32 ctx_handle, clib_thread_index_t thread_index,
transport_endpoint_t *tep, u8 is_lcl)
{
srtp_tc_t *ctx = srtp_ctx_get_w_thread (ctx_handle, thread_index);
diff --git a/src/plugins/srv6-ad-flow/node.c b/src/plugins/srv6-ad-flow/node.c
index 66be2dc7972..28fbc105d84 100644
--- a/src/plugins/srv6-ad-flow/node.c
+++ b/src/plugins/srv6-ad-flow/node.c
@@ -583,7 +583,7 @@ srv6_ad_flow_localsid_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
ip6_sr_main_t *srm = &sr_main;
f64 now = vlib_time_now (vm);
u32 n_left_from, next_index, *from, *to_next, n_left_to_next;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
diff --git a/src/plugins/srv6-am/node.c b/src/plugins/srv6-am/node.c
index beef6a30910..012afda581b 100644
--- a/src/plugins/srv6-am/node.c
+++ b/src/plugins/srv6-am/node.c
@@ -147,7 +147,7 @@ srv6_am_localsid_fn (vlib_main_t * vm,
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
while (n_left_from > 0)
{
diff --git a/src/plugins/srv6-mobile/node.c b/src/plugins/srv6-mobile/node.c
index ed0697a8009..c8f619cf044 100644
--- a/src/plugins/srv6-mobile/node.c
+++ b/src/plugins/srv6-mobile/node.c
@@ -325,7 +325,7 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e)
srv6_end_main_v4_t *sm = &srv6_end_main_v4;
ip6_sr_main_t *sm2 = &sr_main;
u32 n_left_from, next_index, *from, *to_next;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u32 good_n = 0, bad_n = 0;
@@ -1327,7 +1327,7 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e)
srv6_end_main_v6_t *sm = &srv6_end_main_v6;
ip6_sr_main_t *sm2 = &sr_main;
u32 n_left_from, next_index, *from, *to_next;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u32 good_n = 0, bad_n = 0;
@@ -2088,7 +2088,7 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d)
srv6_end_main_v6_decap_t *sm = &srv6_end_main_v6_decap;
ip6_sr_main_t *sm2 = &sr_main;
u32 n_left_from, next_index, *from, *to_next;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
ip6_sr_localsid_t *ls0;
srv6_end_gtp6_d_param_t *ls_param;
@@ -2238,7 +2238,7 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di)
srv6_end_main_v6_decap_di_t *sm = &srv6_end_main_v6_decap_di;
ip6_sr_main_t *sm2 = &sr_main;
u32 n_left_from, next_index, *from, *to_next;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
srv6_end_gtp6_d_param_t *ls_param;
u32 good_n = 0, bad_n = 0;
@@ -2686,7 +2686,7 @@ VLIB_NODE_FN (srv6_end_m_gtp6_dt)
srv6_end_main_v6_dt_t *sm = &srv6_end_main_v6_dt;
ip6_sr_main_t *sm2 = &sr_main;
u32 n_left_from, next_index, *from, *to_next;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u32 good_n = 0, bad_n = 0;
diff --git a/src/plugins/tlsmbedtls/tls_mbedtls.c b/src/plugins/tlsmbedtls/tls_mbedtls.c
index 2f4757e28a1..44d48b1edb4 100644
--- a/src/plugins/tlsmbedtls/tls_mbedtls.c
+++ b/src/plugins/tlsmbedtls/tls_mbedtls.c
@@ -127,7 +127,7 @@ mbedtls_ctx_get_w_thread (u32 ctx_index, u8 thread_index)
static int
tls_init_ctr_seed_drbgs (void)
{
- u32 thread_index = vlib_get_thread_index ();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
mbedtls_main_t *tm = &mbedtls_main;
u8 *pers;
int rv;
diff --git a/src/plugins/tlsopenssl/tls_async.c b/src/plugins/tlsopenssl/tls_async.c
index cd08da5d9ea..e28d730e307 100644
--- a/src/plugins/tlsopenssl/tls_async.c
+++ b/src/plugins/tlsopenssl/tls_async.c
@@ -114,8 +114,8 @@ openssl_async_t openssl_async_main;
static vlib_node_registration_t tls_async_process_node;
/* to avoid build warning */
-void session_send_rpc_evt_to_thread (u32 thread_index, void *fp,
- void *rpc_args);
+void session_send_rpc_evt_to_thread (clib_thread_index_t thread_index,
+ void *fp, void *rpc_args);
void
evt_pool_init (vlib_main_t * vm)
@@ -528,7 +528,7 @@ openssl_async_node_enable_disable (u8 is_en)
}
int
-tls_async_do_job (int eidx, u32 thread_index)
+tls_async_do_job (int eidx, clib_thread_index_t thread_index)
{
tls_ctx_t *ctx;
openssl_evt_t *event;
diff --git a/src/plugins/tlsopenssl/tls_openssl.c b/src/plugins/tlsopenssl/tls_openssl.c
index f0be025a207..5b57e706586 100644
--- a/src/plugins/tlsopenssl/tls_openssl.c
+++ b/src/plugins/tlsopenssl/tls_openssl.c
@@ -35,7 +35,7 @@
openssl_main_t openssl_main;
static u32
-openssl_ctx_alloc_w_thread (u32 thread_index)
+openssl_ctx_alloc_w_thread (clib_thread_index_t thread_index)
{
openssl_main_t *om = &openssl_main;
openssl_ctx_t **ctx;
@@ -102,7 +102,7 @@ openssl_ctx_detach (tls_ctx_t *ctx)
}
static u32
-openssl_ctx_attach (u32 thread_index, void *ctx_ptr)
+openssl_ctx_attach (clib_thread_index_t thread_index, void *ctx_ptr)
{
openssl_main_t *om = &openssl_main;
session_handle_t sh;
diff --git a/src/plugins/tlspicotls/pico_vpp_crypto.c b/src/plugins/tlspicotls/pico_vpp_crypto.c
index 3d28d50b352..e8e4a875e33 100644
--- a/src/plugins/tlspicotls/pico_vpp_crypto.c
+++ b/src/plugins/tlspicotls/pico_vpp_crypto.c
@@ -107,8 +107,7 @@ ptls_vpp_crypto_cipher_setup_crypto (ptls_cipher_context_t * _ctx, int is_enc,
}
else
{
- TLS_DBG (1, "%s, Invalid crypto cipher : ", __FUNCTION__,
- _ctx->algo->name);
+ TLS_DBG (1, "%s, Invalid crypto cipher : ", __func__, _ctx->algo->name);
assert (0);
}
@@ -226,8 +225,7 @@ ptls_vpp_crypto_aead_setup_crypto (ptls_aead_context_t *_ctx, int is_enc,
}
else
{
- TLS_DBG (1, "%s, invalied aead cipher %s", __FUNCTION__,
- _ctx->algo->name);
+ TLS_DBG (1, "%s, invalied aead cipher %s", __func__, _ctx->algo->name);
return -1;
}
diff --git a/src/plugins/unittest/ipsec_test.c b/src/plugins/unittest/ipsec_test.c
index 98253eeb12a..869d53367b6 100644
--- a/src/plugins/unittest/ipsec_test.c
+++ b/src/plugins/unittest/ipsec_test.c
@@ -40,19 +40,26 @@ test_ipsec_command_fn (vlib_main_t *vm, unformat_input_t *input,
if (~0 != sa_id)
{
ipsec_sa_t *sa;
+ ipsec_sa_inb_rt_t *irt;
+ ipsec_sa_outb_rt_t *ort;
u32 sa_index;
sa_index = ipsec_sa_find_and_lock (sa_id);
sa = ipsec_sa_get (sa_index);
+ irt = ipsec_sa_get_inb_rt (sa);
+ ort = ipsec_sa_get_outb_rt (sa);
- sa->seq = seq_num & 0xffffffff;
- sa->seq_hi = seq_num >> 32;
+ if (ort)
+ ort->seq64 = seq_num;
- /* clear the window */
- if (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa))
- clib_bitmap_zero (sa->replay_window_huge);
- else
- sa->replay_window = 0;
+ if (irt)
+ {
+ irt->seq64 = seq_num;
+
+ /* clear the window */
+ uword_bitmap_clear (irt->replay_window,
+ irt->anti_replay_window_size / uword_bits);
+ }
ipsec_sa_unlock (sa_index);
}
diff --git a/src/plugins/unittest/session_test.c b/src/plugins/unittest/session_test.c
index 993f1be41a9..667851901c4 100644
--- a/src/plugins/unittest/session_test.c
+++ b/src/plugins/unittest/session_test.c
@@ -16,6 +16,7 @@
#include <arpa/inet.h>
#include <vnet/session/application.h>
#include <vnet/session/session.h>
+#include <vnet/session/transport.h>
#include <sys/epoll.h>
#include <vnet/session/session_rules_table.h>
@@ -50,6 +51,11 @@ placeholder_session_reset_callback (session_t * s)
volatile u32 connected_session_index = ~0;
volatile u32 connected_session_thread = ~0;
+static u32 placeholder_accept;
+volatile u32 accepted_session_index;
+volatile u32 accepted_session_thread;
+volatile int app_session_error = 0;
+
int
placeholder_session_connected_callback (u32 app_index, u32 api_context,
session_t * s, session_error_t err)
@@ -81,13 +87,22 @@ placeholder_del_segment_callback (u32 client_index, u64 segment_handle)
void
placeholder_session_disconnect_callback (session_t * s)
{
- clib_warning ("called...");
+ if (!(s->session_index == connected_session_index &&
+ s->thread_index == connected_session_thread) &&
+ !(s->session_index == accepted_session_index &&
+ s->thread_index == accepted_session_thread))
+ {
+ clib_warning (0, "unexpected disconnect s %u thread %u",
+ s->session_index, s->thread_index);
+ app_session_error = 1;
+ }
+ vnet_disconnect_args_t da = {
+ .handle = session_handle (s),
+ .app_index = app_worker_get (s->app_wrk_index)->app_index
+ };
+ vnet_disconnect_session (&da);
}
-static u32 placeholder_accept;
-volatile u32 accepted_session_index;
-volatile u32 accepted_session_thread;
-
int
placeholder_session_accept_callback (session_t * s)
{
@@ -105,12 +120,39 @@ placeholder_server_rx_callback (session_t * s)
return -1;
}
+void
+placeholder_cleanup_callback (session_t *s, session_cleanup_ntf_t ntf)
+{
+ if (ntf == SESSION_CLEANUP_TRANSPORT)
+ return;
+
+ if (s->session_index == connected_session_index &&
+ s->thread_index == connected_session_thread)
+ {
+ connected_session_index = ~0;
+ connected_session_thread = ~0;
+ }
+ else if (s->session_index == accepted_session_index &&
+ s->thread_index == accepted_session_thread)
+ {
+ accepted_session_index = ~0;
+ accepted_session_thread = ~0;
+ }
+ else
+ {
+ clib_warning (0, "unexpected cleanup s %u thread %u", s->session_index,
+ s->thread_index);
+ app_session_error = 1;
+ }
+}
+
static session_cb_vft_t placeholder_session_cbs = {
.session_reset_callback = placeholder_session_reset_callback,
.session_connected_callback = placeholder_session_connected_callback,
.session_accept_callback = placeholder_session_accept_callback,
.session_disconnect_callback = placeholder_session_disconnect_callback,
.builtin_app_rx_callback = placeholder_server_rx_callback,
+ .session_cleanup_callback = placeholder_cleanup_callback,
.add_segment_callback = placeholder_add_segment_callback,
.del_segment_callback = placeholder_del_segment_callback,
};
@@ -278,6 +320,7 @@ session_test_endpoint_cfg (vlib_main_t * vm, unformat_input_t * input)
u64 options[APP_OPTIONS_N_OPTIONS], placeholder_secret = 1234;
u16 placeholder_server_port = 1234, placeholder_client_port = 5678;
session_endpoint_cfg_t server_sep = SESSION_ENDPOINT_CFG_NULL;
+ u32 client_vrf = 0, server_vrf = 1;
ip4_address_t intf_addr[3];
transport_connection_t *tc;
session_t *s;
@@ -288,25 +331,25 @@ session_test_endpoint_cfg (vlib_main_t * vm, unformat_input_t * input)
* Create the loopbacks
*/
intf_addr[0].as_u32 = clib_host_to_net_u32 (0x01010101);
- session_create_lookpback (0, &sw_if_index[0], &intf_addr[0]);
+ session_create_lookpback (client_vrf, &sw_if_index[0], &intf_addr[0]);
intf_addr[1].as_u32 = clib_host_to_net_u32 (0x02020202);
- session_create_lookpback (1, &sw_if_index[1], &intf_addr[1]);
+ session_create_lookpback (server_vrf, &sw_if_index[1], &intf_addr[1]);
- session_add_del_route_via_lookup_in_table (0, 1, &intf_addr[1], 32,
- 1 /* is_add */ );
- session_add_del_route_via_lookup_in_table (1, 0, &intf_addr[0], 32,
- 1 /* is_add */ );
+ session_add_del_route_via_lookup_in_table (
+ client_vrf, server_vrf, &intf_addr[1], 32, 1 /* is_add */);
+ session_add_del_route_via_lookup_in_table (
+ server_vrf, client_vrf, &intf_addr[0], 32, 1 /* is_add */);
/*
* Insert namespace
*/
- appns_id = format (0, "appns1");
+ appns_id = format (0, "appns_server");
vnet_app_namespace_add_del_args_t ns_args = {
.ns_id = appns_id,
.secret = placeholder_secret,
- .sw_if_index = sw_if_index[1],
- .ip4_fib_id = 0,
+ .sw_if_index = sw_if_index[1], /* server interface*/
+ .ip4_fib_id = 0, /* sw_if_index takes precedence */
.is_add = 1
};
error = vnet_app_namespace_add_del (&ns_args);
@@ -357,10 +400,10 @@ session_test_endpoint_cfg (vlib_main_t * vm, unformat_input_t * input)
* Connect and force lcl ip
*/
client_sep.is_ip4 = 1;
- client_sep.ip.ip4.as_u32 = clib_host_to_net_u32 (0x02020202);
+ client_sep.ip.ip4.as_u32 = intf_addr[1].as_u32;
client_sep.port = placeholder_server_port;
client_sep.peer.is_ip4 = 1;
- client_sep.peer.ip.ip4.as_u32 = clib_host_to_net_u32 (0x01010101);
+ client_sep.peer.ip.ip4.as_u32 = intf_addr[0].as_u32;
client_sep.peer.port = placeholder_client_port;
client_sep.transport_proto = TRANSPORT_PROTO_TCP;
@@ -401,6 +444,35 @@ session_test_endpoint_cfg (vlib_main_t * vm, unformat_input_t * input)
SESSION_TEST ((tc->lcl_port == placeholder_client_port),
"ports should be equal");
+ /* Disconnect server session, should lead to faster port cleanup on client */
+ vnet_disconnect_args_t disconnect_args = {
+ .handle =
+ session_make_handle (accepted_session_index, accepted_session_thread),
+ .app_index = server_index,
+ };
+
+ error = vnet_disconnect_session (&disconnect_args);
+ SESSION_TEST ((error == 0), "disconnect should work");
+
+ /* wait for stuff to happen */
+ tries = 0;
+ while (connected_session_index != ~0 && ++tries < 100)
+ {
+ vlib_worker_thread_barrier_release (vm);
+ vlib_process_suspend (vm, 100e-3);
+ vlib_worker_thread_barrier_sync (vm);
+ }
+
+ /* Active closes take longer to cleanup, don't wait */
+
+ clib_warning ("waited %.1f seconds for disconnect", tries / 10.0);
+ SESSION_TEST ((connected_session_index == ~0), "session should not exist");
+ SESSION_TEST ((connected_session_thread == ~0), "thread should not exist");
+ SESSION_TEST (transport_port_local_in_use () == 0,
+ "port should be cleaned up");
+ SESSION_TEST ((app_session_error == 0), "no app session errors");
+
+ /* Start cleanup by detaching apps */
vnet_app_detach_args_t detach_args = {
.app_index = server_index,
.api_client_index = ~0,
@@ -416,13 +488,167 @@ session_test_endpoint_cfg (vlib_main_t * vm, unformat_input_t * input)
/* Allow the disconnects to finish before removing the routes. */
vlib_process_suspend (vm, 10e-3);
- session_add_del_route_via_lookup_in_table (0, 1, &intf_addr[1], 32,
- 0 /* is_add */ );
- session_add_del_route_via_lookup_in_table (1, 0, &intf_addr[0], 32,
- 0 /* is_add */ );
+ session_add_del_route_via_lookup_in_table (
+ client_vrf, server_vrf, &intf_addr[1], 32, 0 /* is_add */);
+ session_add_del_route_via_lookup_in_table (
+ server_vrf, client_vrf, &intf_addr[0], 32, 0 /* is_add */);
+
+ session_delete_loopback (sw_if_index[0]);
+ session_delete_loopback (sw_if_index[1]);
+
+ /*
+ * Redo the test but with client in the non-default namespace
+ */
+
+ /* Create the loopbacks */
+ client_vrf = 1;
+ server_vrf = 0;
+ session_create_lookpback (client_vrf, &sw_if_index[0], &intf_addr[0]);
+ session_create_lookpback (server_vrf, &sw_if_index[1], &intf_addr[1]);
+
+ session_add_del_route_via_lookup_in_table (
+ client_vrf, server_vrf, &intf_addr[1], 32, 1 /* is_add */);
+ session_add_del_route_via_lookup_in_table (
+ server_vrf, client_vrf, &intf_addr[0], 32, 1 /* is_add */);
+
+ /* Insert new client namespace */
+ vec_free (appns_id);
+ appns_id = format (0, "appns_client");
+ ns_args.ns_id = appns_id;
+ ns_args.sw_if_index = sw_if_index[0]; /* client interface*/
+ ns_args.is_add = 1;
+
+ error = vnet_app_namespace_add_del (&ns_args);
+ SESSION_TEST ((error == 0), "app ns insertion should succeed: %U",
+ format_session_error, error);
+
+ /* Attach client */
+ attach_args.name = format (0, "session_test_client");
+ attach_args.namespace_id = appns_id;
+ attach_args.options[APP_OPTIONS_ADD_SEGMENT_SIZE] = 0;
+ attach_args.options[APP_OPTIONS_NAMESPACE_SECRET] = placeholder_secret;
+ attach_args.api_client_index = ~0;
+
+ error = vnet_application_attach (&attach_args);
+ SESSION_TEST ((error == 0), "client app attached: %U", format_session_error,
+ error);
+ client_index = attach_args.app_index;
+ vec_free (attach_args.name);
+
+ /* Attach server */
+ attach_args.name = format (0, "session_test_server");
+ attach_args.namespace_id = 0;
+ attach_args.options[APP_OPTIONS_ADD_SEGMENT_SIZE] = 32 << 20;
+ attach_args.options[APP_OPTIONS_NAMESPACE_SECRET] = 0;
+ attach_args.api_client_index = ~0;
+ error = vnet_application_attach (&attach_args);
+ SESSION_TEST ((error == 0), "server app attached: %U", format_session_error,
+ error);
+ vec_free (attach_args.name);
+ server_index = attach_args.app_index;
+
+ /* Bind server */
+ clib_memset (&server_sep, 0, sizeof (server_sep));
+ server_sep.is_ip4 = 1;
+ server_sep.port = placeholder_server_port;
+ bind_args.sep_ext = server_sep;
+ bind_args.app_index = server_index;
+ error = vnet_listen (&bind_args);
+ SESSION_TEST ((error == 0), "server bind should work: %U",
+ format_session_error, error);
+
+ /* Connect client */
+ connected_session_index = connected_session_thread = ~0;
+ accepted_session_index = accepted_session_thread = ~0;
+ clib_memset (&client_sep, 0, sizeof (client_sep));
+ client_sep.is_ip4 = 1;
+ client_sep.ip.ip4.as_u32 = intf_addr[1].as_u32;
+ client_sep.port = placeholder_server_port;
+ client_sep.peer.is_ip4 = 1;
+ client_sep.peer.ip.ip4.as_u32 = intf_addr[0].as_u32;
+ client_sep.peer.port = placeholder_client_port;
+ client_sep.transport_proto = TRANSPORT_PROTO_TCP;
+
+ connect_args.sep_ext = client_sep;
+ connect_args.app_index = client_index;
+ error = vnet_connect (&connect_args);
+ SESSION_TEST ((error == 0), "connect should work");
+
+ /* wait for stuff to happen */
+ while (connected_session_index == ~0 && ++tries < 100)
+ {
+ vlib_worker_thread_barrier_release (vm);
+ vlib_process_suspend (vm, 100e-3);
+ vlib_worker_thread_barrier_sync (vm);
+ }
+ while (accepted_session_index == ~0 && ++tries < 100)
+ {
+ vlib_worker_thread_barrier_release (vm);
+ vlib_process_suspend (vm, 100e-3);
+ vlib_worker_thread_barrier_sync (vm);
+ }
+
+ clib_warning ("waited %.1f seconds for connections", tries / 10.0);
+ SESSION_TEST ((connected_session_index != ~0), "session should exist");
+ SESSION_TEST ((connected_session_thread != ~0), "thread should exist");
+ SESSION_TEST ((accepted_session_index != ~0), "session should exist");
+ SESSION_TEST ((accepted_session_thread != ~0), "thread should exist");
+ s = session_get (connected_session_index, connected_session_thread);
+ tc = session_get_transport (s);
+ SESSION_TEST ((tc != 0), "transport should exist");
+ SESSION_TEST (
+ (memcmp (&tc->lcl_ip, &client_sep.peer.ip, sizeof (tc->lcl_ip)) == 0),
+ "ips should be equal");
+ SESSION_TEST ((tc->lcl_port == placeholder_client_port),
+ "ports should be equal");
+
+ /* Disconnect server session, for faster port cleanup on client */
+ disconnect_args.app_index = server_index;
+ disconnect_args.handle =
+ session_make_handle (accepted_session_index, accepted_session_thread);
+
+ error = vnet_disconnect_session (&disconnect_args);
+ SESSION_TEST ((error == 0), "disconnect should work");
+
+ /* wait for stuff to happen */
+ tries = 0;
+ while (connected_session_index != ~0 && ++tries < 100)
+ {
+ vlib_worker_thread_barrier_release (vm);
+ vlib_process_suspend (vm, 100e-3);
+ vlib_worker_thread_barrier_sync (vm);
+ }
+
+ /* Active closes take longer to cleanup, don't wait */
+
+ clib_warning ("waited %.1f seconds for disconnect", tries / 10.0);
+ SESSION_TEST ((connected_session_index == ~0), "session should not exist");
+ SESSION_TEST ((connected_session_thread == ~0), "thread should not exist");
+ SESSION_TEST ((app_session_error == 0), "no app session errors");
+ SESSION_TEST (transport_port_local_in_use () == 0,
+ "port should be cleaned up");
+
+ /* Start cleanup by detaching apps */
+ detach_args.app_index = server_index;
+ vnet_application_detach (&detach_args);
+ detach_args.app_index = client_index;
+ vnet_application_detach (&detach_args);
+
+ ns_args.is_add = 0;
+ error = vnet_app_namespace_add_del (&ns_args);
+ SESSION_TEST ((error == 0), "app ns delete should succeed: %d", error);
+
+ /* Allow the disconnects to finish before removing the routes. */
+ vlib_process_suspend (vm, 10e-3);
+
+ session_add_del_route_via_lookup_in_table (
+ client_vrf, server_vrf, &intf_addr[1], 32, 0 /* is_add */);
+ session_add_del_route_via_lookup_in_table (
+ server_vrf, client_vrf, &intf_addr[0], 32, 0 /* is_add */);
session_delete_loopback (sw_if_index[0]);
session_delete_loopback (sw_if_index[1]);
+
return 0;
}
@@ -1781,6 +2007,11 @@ session_test_proxy (vlib_main_t * vm, unformat_input_t * input)
unformat_free (&tmp_input);
vec_free (attach_args.name);
session_delete_loopback (sw_if_index);
+
+ /* Revert default appns sw_if_index */
+ app_ns = app_namespace_get_default ();
+ app_ns->sw_if_index = ~0;
+
return 0;
}
@@ -2131,7 +2362,10 @@ session_get_memory_usage (void)
s = format (s, "%U\n", format_clib_mem_heap, heap, 0);
ss = strstr ((char *) s, "used:");
if (ss)
- sscanf (ss, "used: %f", &used);
+ {
+ if (sscanf (ss, "used: %f", &used) != 1)
+ clib_warning ("invalid 'used' value");
+ }
else
clib_warning ("substring 'used:' not found from show memory");
vec_free (s);
@@ -2501,6 +2735,8 @@ session_test (vlib_main_t * vm,
done:
if (res)
return clib_error_return (0, "Session unit test failed");
+
+ vlib_cli_output (vm, "SUCCESS");
return 0;
}
diff --git a/src/plugins/unittest/svm_fifo_test.c b/src/plugins/unittest/svm_fifo_test.c
index 9feb37cbc25..c6031c59987 100644
--- a/src/plugins/unittest/svm_fifo_test.c
+++ b/src/plugins/unittest/svm_fifo_test.c
@@ -2856,6 +2856,8 @@ svm_fifo_test (vlib_main_t * vm, unformat_input_t * input,
done:
if (res)
return clib_error_return (0, "svm fifo unit test failed");
+
+ vlib_cli_output (vm, "SUCCESS");
return 0;
}
diff --git a/src/plugins/unittest/tcp_test.c b/src/plugins/unittest/tcp_test.c
index bd39474ce93..6236ccdfe08 100644
--- a/src/plugins/unittest/tcp_test.c
+++ b/src/plugins/unittest/tcp_test.c
@@ -1002,16 +1002,16 @@ tbt_seq_lt (u32 a, u32 b)
}
static void
-tcp_test_set_time (u32 thread_index, u32 val)
+tcp_test_set_time (clib_thread_index_t thread_index, u32 val)
{
session_main.wrk[thread_index].last_vlib_time = val;
- tcp_set_time_now (&tcp_main.wrk_ctx[thread_index], val);
+ tcp_set_time_now (&tcp_main.wrk[thread_index], val);
}
static int
tcp_test_delivery (vlib_main_t * vm, unformat_input_t * input)
{
- u32 thread_index = 0, snd_una, *min_seqs = 0;
+ clib_thread_index_t thread_index = 0, snd_una, *min_seqs = 0;
tcp_rate_sample_t _rs = { 0 }, *rs = &_rs;
tcp_connection_t _tc, *tc = &_tc;
sack_scoreboard_t *sb = &tc->sack_sb;
@@ -1337,7 +1337,7 @@ tcp_test_delivery (vlib_main_t * vm, unformat_input_t * input)
static int
tcp_test_bt (vlib_main_t * vm, unformat_input_t * input)
{
- u32 thread_index = 0;
+ clib_thread_index_t thread_index = 0;
tcp_rate_sample_t _rs = { 0 }, *rs = &_rs;
tcp_connection_t _tc, *tc = &_tc;
int __clib_unused verbose = 0, i;
@@ -1594,6 +1594,8 @@ tcp_test (vlib_main_t * vm,
done:
if (res)
return clib_error_return (0, "TCP unit test failed");
+
+ vlib_cli_output (vm, "SUCCESS");
return 0;
}
diff --git a/src/plugins/urpf/urpf_dp.h b/src/plugins/urpf/urpf_dp.h
index b17fed7e04b..edb4ec79171 100644
--- a/src/plugins/urpf/urpf_dp.h
+++ b/src/plugins/urpf/urpf_dp.h
@@ -98,8 +98,8 @@ urpf_perform_check_x1 (ip_address_family_t af, vlib_dir_t dir,
lb_index = ip4_fib_forwarding_lookup (fib_index, &ip->src_address);
/* Pass multicast. */
- lpass = (ip4_address_is_multicast (&ip->src_address) ||
- ip4_address_is_global_broadcast (&ip->src_address));
+ lpass = (ip4_address_is_multicast (&ip->dst_address) ||
+ ip4_address_is_global_broadcast (&ip->dst_address));
}
else
{
@@ -108,7 +108,7 @@ urpf_perform_check_x1 (ip_address_family_t af, vlib_dir_t dir,
ip = (ip6_header_t *) h;
lb_index = ip6_fib_table_fwding_lookup (fib_index, &ip->src_address);
- lpass = ip6_address_is_multicast (&ip->src_address);
+ lpass = ip6_address_is_multicast (&ip->dst_address);
}
llb = load_balance_get (lb_index);
@@ -157,10 +157,10 @@ urpf_perform_check_x2 (ip_address_family_t af, vlib_dir_t dir,
ip4_fib_forwarding_lookup_x2 (fib_index0, fib_index1, &ip0->src_address,
&ip1->src_address, &lb_index0, &lb_index1);
/* Pass multicast. */
- lpass0 = (ip4_address_is_multicast (&ip0->src_address) ||
- ip4_address_is_global_broadcast (&ip0->src_address));
- lpass1 = (ip4_address_is_multicast (&ip1->src_address) ||
- ip4_address_is_global_broadcast (&ip1->src_address));
+ lpass0 = (ip4_address_is_multicast (&ip0->dst_address) ||
+ ip4_address_is_global_broadcast (&ip0->dst_address));
+ lpass1 = (ip4_address_is_multicast (&ip1->dst_address) ||
+ ip4_address_is_global_broadcast (&ip1->dst_address));
}
else
{
@@ -171,8 +171,8 @@ urpf_perform_check_x2 (ip_address_family_t af, vlib_dir_t dir,
lb_index0 = ip6_fib_table_fwding_lookup (fib_index0, &ip0->src_address);
lb_index1 = ip6_fib_table_fwding_lookup (fib_index1, &ip1->src_address);
- lpass0 = ip6_address_is_multicast (&ip0->src_address);
- lpass1 = ip6_address_is_multicast (&ip1->src_address);
+ lpass0 = ip6_address_is_multicast (&ip0->dst_address);
+ lpass1 = ip6_address_is_multicast (&ip1->dst_address);
}
llb0 = load_balance_get (lb_index0);
diff --git a/src/plugins/vhost/vhost_user.c b/src/plugins/vhost/vhost_user.c
index fdee984f97b..592a126c683 100644
--- a/src/plugins/vhost/vhost_user.c
+++ b/src/plugins/vhost/vhost_user.c
@@ -31,7 +31,7 @@
#include <linux/if_tun.h>
#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
+#include <vlib/file.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/devices/devices.h>
@@ -325,15 +325,13 @@ vhost_user_vring_close (vhost_user_intf_t * vui, u32 qid)
if (vring->kickfd_idx != ~0)
{
- clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
- vring->kickfd_idx);
+ clib_file_t *uf = clib_file_get (&file_main, vring->kickfd_idx);
clib_file_del (&file_main, uf);
vring->kickfd_idx = ~0;
}
if (vring->callfd_idx != ~0)
{
- clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
- vring->callfd_idx);
+ clib_file_t *uf = clib_file_get (&file_main, vring->callfd_idx);
clib_file_del (&file_main, uf);
vring->callfd_idx = ~0;
}
@@ -349,7 +347,7 @@ vhost_user_vring_close (vhost_user_intf_t * vui, u32 qid)
u16 q = vui->vrings[qid].qid;
u32 queue_index = vui->vrings[qid].queue_index;
u32 mode = vui->vrings[qid].mode;
- u32 thread_index = vui->vrings[qid].thread_index;
+ clib_thread_index_t thread_index = vui->vrings[qid].thread_index;
vhost_user_vring_init (vui, qid);
vui->vrings[qid].qid = q;
vui->vrings[qid].queue_index = queue_index;
@@ -367,7 +365,7 @@ vhost_user_if_disconnect (vhost_user_intf_t * vui)
if (vui->clib_file_index != ~0)
{
- clib_file_del (&file_main, file_main.file_pool + vui->clib_file_index);
+ clib_file_del_by_index (&file_main, vui->clib_file_index);
vui->clib_file_index = ~0;
}
@@ -750,8 +748,8 @@ vhost_user_socket_read (clib_file_t * uf)
/* if there is old fd, delete and close it */
if (vui->vrings[q].callfd_idx != ~0)
{
- clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
- vui->vrings[q].callfd_idx);
+ clib_file_t *uf =
+ clib_file_get (&file_main, vui->vrings[q].callfd_idx);
clib_file_del (&file_main, uf);
vui->vrings[q].callfd_idx = ~0;
}
@@ -823,8 +821,8 @@ vhost_user_socket_read (clib_file_t * uf)
if (vui->vrings[q].kickfd_idx != ~0)
{
- clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
- vui->vrings[q].kickfd_idx);
+ clib_file_t *uf =
+ clib_file_get (&file_main, vui->vrings[q].kickfd_idx);
clib_file_del (&file_main, uf);
vui->vrings[q].kickfd_idx = ~0;
}
@@ -1148,7 +1146,7 @@ vhost_user_socksvr_accept_ready (clib_file_t * uf)
{
vu_log_debug (vui, "Close client socket for vhost interface %d, fd %d",
vui->sw_if_index, UNIX_GET_FD (vui->clib_file_index));
- clib_file_del (&file_main, file_main.file_pool + vui->clib_file_index);
+ clib_file_del_by_index (&file_main, vui->clib_file_index);
}
vu_log_debug (vui, "New client socket for vhost interface %d, fd %d",
@@ -1408,8 +1406,7 @@ vhost_user_term_if (vhost_user_intf_t * vui)
if (vui->unix_server_index != ~0)
{
//Close server socket
- clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
- vui->unix_server_index);
+ clib_file_t *uf = clib_file_get (&file_main, vui->unix_server_index);
clib_file_del (&file_main, uf);
vui->unix_server_index = ~0;
unlink (vui->sock_filename);
@@ -1444,7 +1441,7 @@ vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index)
vhost_user_vring_t *txvq = &vui->vrings[qid];
if ((txvq->mode == VNET_HW_IF_RX_MODE_POLLING) &&
- (txvq->thread_index != ~0))
+ (txvq->thread_index != CLIB_INVALID_THREAD_INDEX))
{
vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, txvq->thread_index);
ASSERT (cpu->polling_q_count != 0);
diff --git a/src/plugins/vhost/vhost_user.h b/src/plugins/vhost/vhost_user.h
index a3582affb4b..9e461979007 100644
--- a/src/plugins/vhost/vhost_user.h
+++ b/src/plugins/vhost/vhost_user.h
@@ -62,11 +62,13 @@
dev->hw_if_index, ##__VA_ARGS__); \
};
-#define UNIX_GET_FD(unixfd_idx) ({ \
- typeof(unixfd_idx) __unixfd_idx = (unixfd_idx); \
- (__unixfd_idx != ~0) ? \
- pool_elt_at_index (file_main.file_pool, \
- __unixfd_idx)->file_descriptor : -1; })
+#define UNIX_GET_FD(unixfd_idx) \
+ ({ \
+ typeof (unixfd_idx) __unixfd_idx = (unixfd_idx); \
+ (__unixfd_idx != ~0) ? \
+ clib_file_get (&file_main, __unixfd_idx)->file_descriptor : \
+ -1; \
+ })
#define foreach_virtio_trace_flags \
_ (SIMPLE_CHAINED, 0, "Simple descriptor chaining") \
@@ -229,7 +231,7 @@ typedef struct
u16 last_kick;
u8 first_kick;
u32 queue_index;
- u32 thread_index;
+ clib_thread_index_t thread_index;
} vhost_user_vring_t;
#define VHOST_USER_EVENT_START_TIMER 1
diff --git a/src/plugins/vhost/vhost_user_input.c b/src/plugins/vhost/vhost_user_input.c
index ca5072485ff..5dc1eedf52a 100644
--- a/src/plugins/vhost/vhost_user_input.c
+++ b/src/plugins/vhost/vhost_user_input.c
@@ -31,7 +31,7 @@
#include <linux/if_tun.h>
#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
+#include <vlib/file.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/devices/devices.h>
diff --git a/src/plugins/vhost/vhost_user_output.c b/src/plugins/vhost/vhost_user_output.c
index 58fd4309f8c..3052ae39ec1 100644
--- a/src/plugins/vhost/vhost_user_output.c
+++ b/src/plugins/vhost/vhost_user_output.c
@@ -32,7 +32,7 @@
#include <linux/if_tun.h>
#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
+#include <vlib/file.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/devices/devices.h>
@@ -382,7 +382,7 @@ vhost_user_device_class_packed (vlib_main_t *vm, vlib_node_runtime_t *node,
vhost_user_main_t *vum = &vhost_user_main;
u32 qid = rxvq->qid;
u8 error;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
vhost_cpu_t *cpu = &vum->cpus[thread_index];
u32 map_hint = 0;
u8 retry = 8;
@@ -698,7 +698,7 @@ VNET_DEVICE_CLASS_TX_FN (vhost_user_device_class) (vlib_main_t * vm,
u32 qid;
vhost_user_vring_t *rxvq;
u8 error;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
vhost_cpu_t *cpu = &vum->cpus[thread_index];
u32 map_hint = 0;
u8 retry = 8;
@@ -1051,7 +1051,7 @@ vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index,
return clib_error_return (0, "unsupported");
}
- if (txvq->thread_index == ~0)
+ if (txvq->thread_index == CLIB_INVALID_THREAD_INDEX)
return clib_error_return (0, "Queue initialization is not finished yet");
cpu = vec_elt_at_index (vum->cpus, txvq->thread_index);
diff --git a/src/plugins/vmxnet3/input.c b/src/plugins/vmxnet3/input.c
index 25632546b6d..55fb418e501 100644
--- a/src/plugins/vmxnet3/input.c
+++ b/src/plugins/vmxnet3/input.c
@@ -203,7 +203,7 @@ vmxnet3_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vmxnet3_rx_comp *rx_comp;
u32 desc_idx;
vmxnet3_rxq_t *rxq;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u32 buffer_indices[VLIB_FRAME_SIZE], *bi;
u16 nexts[VLIB_FRAME_SIZE], *next;
vmxnet3_rx_ring *ring;
diff --git a/src/plugins/vmxnet3/vmxnet3.h b/src/plugins/vmxnet3/vmxnet3.h
index 89602f8ee9e..8de992eaffe 100644
--- a/src/plugins/vmxnet3/vmxnet3.h
+++ b/src/plugins/vmxnet3/vmxnet3.h
@@ -523,7 +523,7 @@ typedef struct
u32 mode;
u8 buffer_pool_index;
u32 queue_index;
- u32 thread_index;
+ clib_thread_index_t thread_index;
vmxnet3_rx_ring rx_ring[VMXNET3_RX_RING_SIZE];
vmxnet3_rx_desc *rx_desc[VMXNET3_RX_RING_SIZE];
vmxnet3_rx_comp *rx_comp;
diff --git a/src/plugins/vrrp/vrrp_periodic.c b/src/plugins/vrrp/vrrp_periodic.c
index 5f9d7ae938e..e3a374a112d 100644
--- a/src/plugins/vrrp/vrrp_periodic.c
+++ b/src/plugins/vrrp/vrrp_periodic.c
@@ -187,7 +187,19 @@ vrrp_periodic_process (vlib_main_t * vm,
timer = pool_elt_at_index (pm->vr_timers, next_timer);
timeout = timer->expire_time - now;
- vlib_process_wait_for_event_or_clock (vm, timeout);
+ /*
+ * Adding a virtual MAC to some NICs can take a significant amount
+ * of time (~1s). If a lot of VRs enter the master state around the
+ * same time, the process node can stay active for a very long time
+ * processing all of the transitions.
+ *
+ * Try to force a 10us sleep between processing events to ensure
+ * that the process node does not prevent API messages and RPCs
+ * from being handled for an extended period. This prevents
+ * vlib_process_wait_for_event_or_clock() from returning
+ * immediately.
+ */
+ vlib_process_wait_for_event_or_clock (vm, clib_max (timeout, 10e-6));
}
event_type = vlib_process_get_events (vm, (uword **) & event_data);
diff --git a/src/plugins/vxlan-gpe/CMakeLists.txt b/src/plugins/vxlan-gpe/CMakeLists.txt
new file mode 100644
index 00000000000..987ebcc2df9
--- /dev/null
+++ b/src/plugins/vxlan-gpe/CMakeLists.txt
@@ -0,0 +1,32 @@
+# Copyright (c) 2024 OpenInfra Foundation Europe
+# Copyright (c) 2025 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(vxlan-gpe
+ SOURCES
+ encap.c
+ decap.c
+ vxlan_gpe.c
+ vxlan_gpe_api.c
+ vxlan_gpe_packet.h
+ plugin.c
+
+ INSTALL_HEADERS
+ vxlan_gpe.h
+
+ MULTIARCH_SOURCES
+ decap.c
+
+ API_FILES
+ vxlan_gpe.api
+)
diff --git a/src/vnet/vxlan-gpe/FEATURE.yaml b/src/plugins/vxlan-gpe/FEATURE.yaml
index f4ec2f4c517..f4ec2f4c517 100644
--- a/src/vnet/vxlan-gpe/FEATURE.yaml
+++ b/src/plugins/vxlan-gpe/FEATURE.yaml
diff --git a/src/vnet/vxlan-gpe/decap.c b/src/plugins/vxlan-gpe/decap.c
index d4c7424630d..80f2facef29 100644
--- a/src/vnet/vxlan-gpe/decap.c
+++ b/src/plugins/vxlan-gpe/decap.c
@@ -22,7 +22,7 @@
#include <vlib/vlib.h>
#include <vnet/udp/udp_local.h>
-#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vxlan-gpe/vxlan_gpe.h>
/**
* @brief Struct for VXLAN GPE decap packet tracing
@@ -210,7 +210,7 @@ vxlan_gpe_input (vlib_main_t * vm,
vxlan4_gpe_tunnel_cache_t last4;
vxlan6_gpe_tunnel_cache_t last6;
u32 pkts_decapsulated = 0;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
if (is_ip4)
@@ -617,7 +617,7 @@ VLIB_NODE_FN (vxlan6_gpe_input_node) (vlib_main_t * vm,
*/
static char *vxlan_gpe_error_strings[] = {
#define vxlan_gpe_error(n,s) s,
-#include <vnet/vxlan-gpe/vxlan_gpe_error.def>
+#include <vxlan-gpe/vxlan_gpe_error.def>
#undef vxlan_gpe_error
#undef _
};
diff --git a/src/vnet/vxlan-gpe/dir.dox b/src/plugins/vxlan-gpe/dir.dox
index c154733b21f..c154733b21f 100644
--- a/src/vnet/vxlan-gpe/dir.dox
+++ b/src/plugins/vxlan-gpe/dir.dox
diff --git a/src/vnet/vxlan-gpe/encap.c b/src/plugins/vxlan-gpe/encap.c
index a769861577d..701c3af55b5 100644
--- a/src/vnet/vxlan-gpe/encap.c
+++ b/src/plugins/vxlan-gpe/encap.c
@@ -23,7 +23,7 @@
#include <vnet/ip/ip.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/udp/udp_inlines.h>
-#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vxlan-gpe/vxlan_gpe.h>
/** Statistics (not really errors) */
#define foreach_vxlan_gpe_encap_error \
@@ -156,7 +156,7 @@ vxlan_gpe_encap (vlib_main_t * vm,
vnet_main_t *vnm = ngm->vnet_main;
vnet_interface_main_t *im = &vnm->interface_main;
u32 pkts_encapsulated = 0;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
diff --git a/src/plugins/vxlan-gpe/plugin.c b/src/plugins/vxlan-gpe/plugin.c
new file mode 100644
index 00000000000..5a711a39d78
--- /dev/null
+++ b/src/plugins/vxlan-gpe/plugin.c
@@ -0,0 +1,26 @@
+/*
+ * plugin.c: vxlan-gpe
+ *
+ * Copyright (c) OpenInfra Foundation Europe.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+// register a plugin
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "VxLan GPE Tunnels",
+};
diff --git a/src/vnet/vxlan-gpe/vxlan-gpe-rfc.txt b/src/plugins/vxlan-gpe/vxlan-gpe-rfc.txt
index 35cee50f573..35cee50f573 100644
--- a/src/vnet/vxlan-gpe/vxlan-gpe-rfc.txt
+++ b/src/plugins/vxlan-gpe/vxlan-gpe-rfc.txt
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.api b/src/plugins/vxlan-gpe/vxlan_gpe.api
index 3cbd7ab7f71..3cbd7ab7f71 100644
--- a/src/vnet/vxlan-gpe/vxlan_gpe.api
+++ b/src/plugins/vxlan-gpe/vxlan_gpe.api
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.c b/src/plugins/vxlan-gpe/vxlan_gpe.c
index 5a5262ea9db..abb2049a356 100644
--- a/src/vnet/vxlan-gpe/vxlan_gpe.c
+++ b/src/plugins/vxlan-gpe/vxlan_gpe.c
@@ -17,7 +17,7 @@
* @brief Common utility functions for IPv4 and IPv6 VXLAN GPE tunnels
*
*/
-#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vxlan-gpe/vxlan_gpe.h>
#include <vnet/fib/fib.h>
#include <vnet/ip/format.h>
#include <vnet/fib/fib_entry.h>
@@ -44,7 +44,7 @@
* You can refer to this kind of L2 overlay bridge domain as a VXLAN-GPE segment.
*/
-vxlan_gpe_main_t vxlan_gpe_main;
+vxlan_gpe_main_t vxlan_gpe_main __clib_export;
static u8 *
format_decap_next (u8 * s, va_list * args)
@@ -1212,11 +1212,13 @@ VNET_FEATURE_INIT (ip6_vxlan_gpe_bypass, static) =
* @return error
*
*/
-clib_error_t *
-vxlan_gpe_init (vlib_main_t * vm)
+__clib_export clib_error_t *
+vxlan_gpe_init (vlib_main_t *vm)
{
vxlan_gpe_main_t *ngm = &vxlan_gpe_main;
+ ngm->register_decap_protocol = vxlan_gpe_register_decap_protocol;
+ ngm->unregister_decap_protocol = vxlan_gpe_unregister_decap_protocol;
ngm->vnet_main = vnet_get_main ();
ngm->vlib_main = vm;
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.h b/src/plugins/vxlan-gpe/vxlan_gpe.h
index aabaafeee6f..138ae840ef5 100644
--- a/src/vnet/vxlan-gpe/vxlan_gpe.h
+++ b/src/plugins/vxlan-gpe/vxlan_gpe.h
@@ -29,7 +29,7 @@
#include <vnet/l2/l2_output.h>
#include <vnet/l2/l2_bd.h>
#include <vnet/ethernet/ethernet.h>
-#include <vnet/vxlan-gpe/vxlan_gpe_packet.h>
+#include <vxlan-gpe/vxlan_gpe_packet.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
#include <vnet/udp/udp_packet.h>
@@ -196,11 +196,16 @@ typedef enum
typedef enum
{
#define vxlan_gpe_error(n,s) VXLAN_GPE_ERROR_##n,
-#include <vnet/vxlan-gpe/vxlan_gpe_error.def>
+#include <plugins/vxlan-gpe/vxlan_gpe_error.def>
#undef vxlan_gpe_error
VXLAN_GPE_N_ERROR,
} vxlan_gpe_input_error_t;
+typedef void (*vxlan_gpe_register_decap_protocol_callback_t) (
+ u8 protocol_id, uword next_node_index);
+typedef void (*vxlan_gpe_unregister_decap_protocol_callback_t) (
+ u8 protocol_id, uword next_node_index);
+
/** Struct for VXLAN GPE node state */
typedef struct
{
@@ -233,6 +238,10 @@ typedef struct
/** List of next nodes for the decap indexed on protocol */
uword decap_next_node_list[VXLAN_GPE_PROTOCOL_MAX];
+
+ /* export callbacks to register/unregister decapsulation protocol */
+ vxlan_gpe_register_decap_protocol_callback_t register_decap_protocol;
+ vxlan_gpe_unregister_decap_protocol_callback_t unregister_decap_protocol;
} vxlan_gpe_main_t;
extern vxlan_gpe_main_t vxlan_gpe_main;
@@ -279,13 +288,10 @@ typedef enum
VXLAN_GPE_ENCAP_N_NEXT
} vxlan_gpe_encap_next_t;
-
+void vxlan_gpe_register_decap_protocol (u8 protocol_id, uword next_node_index);
void vxlan_gpe_unregister_decap_protocol (u8 protocol_id,
uword next_node_index);
-void vxlan_gpe_register_decap_protocol (u8 protocol_id,
- uword next_node_index);
-
void vnet_int_vxlan_gpe_bypass_mode (u32 sw_if_index, u8 is_ip6,
u8 is_enable);
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe_api.c b/src/plugins/vxlan-gpe/vxlan_gpe_api.c
index cc74e1f58d4..e82445498e8 100644
--- a/src/vnet/vxlan-gpe/vxlan_gpe_api.c
+++ b/src/plugins/vxlan-gpe/vxlan_gpe_api.c
@@ -23,13 +23,13 @@
#include <vnet/interface.h>
#include <vnet/api_errno.h>
#include <vnet/feature/feature.h>
-#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vxlan-gpe/vxlan_gpe.h>
#include <vnet/fib/fib_table.h>
#include <vnet/format_fns.h>
#include <vnet/ip/ip_types_api.h>
-#include <vnet/vxlan-gpe/vxlan_gpe.api_enum.h>
-#include <vnet/vxlan-gpe/vxlan_gpe.api_types.h>
+#include <vxlan-gpe/vxlan_gpe.api_enum.h>
+#include <vxlan-gpe/vxlan_gpe.api_types.h>
#define REPLY_MSG_ID_BASE msg_id_base
#include <vlibapi/api_helper_macros.h>
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe_error.def b/src/plugins/vxlan-gpe/vxlan_gpe_error.def
index 9cf1b1cb656..9cf1b1cb656 100644
--- a/src/vnet/vxlan-gpe/vxlan_gpe_error.def
+++ b/src/plugins/vxlan-gpe/vxlan_gpe_error.def
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe_packet.h b/src/plugins/vxlan-gpe/vxlan_gpe_packet.h
index f5e5ddc2347..f5e5ddc2347 100644
--- a/src/vnet/vxlan-gpe/vxlan_gpe_packet.h
+++ b/src/plugins/vxlan-gpe/vxlan_gpe_packet.h
diff --git a/src/plugins/vxlan/decap.c b/src/plugins/vxlan/decap.c
index 5f28c5e97bb..4ad35bc2d5d 100644
--- a/src/plugins/vxlan/decap.c
+++ b/src/plugins/vxlan/decap.c
@@ -193,7 +193,7 @@ vxlan_input (vlib_main_t * vm,
last_tunnel_cache4 last4;
last_tunnel_cache6 last6;
u32 pkts_dropped = 0;
- u32 thread_index = vlib_get_thread_index ();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
if (is_ip4)
clib_memset (&last4, 0xff, sizeof last4);
@@ -1039,7 +1039,7 @@ VLIB_NODE_FN (vxlan4_flow_input_node) (vlib_main_t * vm,
[VXLAN_FLOW_NEXT_L2_INPUT] =
im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
};
- u32 thread_index = vlib_get_thread_index ();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
u32 *from = vlib_frame_vector_args (f);
u32 n_left_from = f->n_vectors;
diff --git a/src/plugins/vxlan/encap.c b/src/plugins/vxlan/encap.c
index 98464d809ba..60181bff451 100644
--- a/src/plugins/vxlan/encap.c
+++ b/src/plugins/vxlan/encap.c
@@ -78,7 +78,7 @@ vxlan_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_combined_counter_main_t *tx_counter =
im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX;
u32 pkts_encapsulated = 0;
- u32 thread_index = vlib_get_thread_index ();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
u32 sw_if_index0 = 0, sw_if_index1 = 0;
u32 next0 = 0, next1 = 0;
vxlan_tunnel_t *t0 = NULL, *t1 = NULL;
diff --git a/src/plugins/wireguard/wireguard_input.c b/src/plugins/wireguard/wireguard_input.c
index 1eb7fbfed0b..0ae0480fc2c 100644
--- a/src/plugins/wireguard/wireguard_input.c
+++ b/src/plugins/wireguard/wireguard_input.c
@@ -698,7 +698,7 @@ wg_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
vlib_buffer_t *lb;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
vnet_crypto_op_t **crypto_ops;
const u16 drop_next = WG_INPUT_NEXT_PUNT;
message_type_t header_type;
diff --git a/src/plugins/wireguard/wireguard_output_tun.c b/src/plugins/wireguard/wireguard_output_tun.c
index c9411f6ff20..7bbec11fdcb 100644
--- a/src/plugins/wireguard/wireguard_output_tun.c
+++ b/src/plugins/wireguard/wireguard_output_tun.c
@@ -436,7 +436,7 @@ wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vnet_crypto_op_t **crypto_ops;
u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
vlib_buffer_t *sync_bufs[VLIB_FRAME_SIZE];
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u16 n_sync = 0;
const u16 drop_next = WG_OUTPUT_NEXT_ERROR;
const u8 is_async = wg_op_mode_is_set_ASYNC ();
diff --git a/src/scripts/generate_version_h b/src/scripts/generate_version_h
index e8379550186..42359b7aa9c 100755
--- a/src/scripts/generate_version_h
+++ b/src/scripts/generate_version_h
@@ -2,6 +2,11 @@
: ${VPP_BUILD_USER:=$(whoami)}
: ${VPP_BUILD_HOST:=$(hostname)}
: ${VPP_BUILD_TOPDIR:=$(git rev-parse --show-toplevel 2> /dev/null)}
+if [ -n "${2}" ] && [ "${2}" != "default" ] ; then
+ VPP_PLATFORM="-${2}"
+else
+ VPP_PLATFORM=""
+fi
DATE_FMT="+%Y-%m-%dT%H:%M:%S"
SOURCE_DATE_EPOCH="${SOURCE_DATE_EPOCH:-$(date +%s)}"
VPP_BUILD_DATE=$(date -u -d "@$SOURCE_DATE_EPOCH" "$DATE_FMT" 2>/dev/null || date -u -r "$SOURCE_DATE_EPOCH" "$DATE_FMT" 2>/dev/null || date -u "$DATE_FMT")
@@ -29,6 +34,6 @@ cat > ${1} << __EOF__
#define VPP_BUILD_USER "$VPP_BUILD_USER"
#define VPP_BUILD_HOST "$VPP_BUILD_HOST"
#define VPP_BUILD_TOPDIR "$VPP_BUILD_TOPDIR"
-#define VPP_BUILD_VER "$(scripts/version)"
+#define VPP_BUILD_VER "$(scripts/version)${VPP_PLATFORM}"
#endif
__EOF__
diff --git a/src/scripts/host-stack/cc_plots.py b/src/scripts/host-stack/cc_plots.py
deleted file mode 100755
index f7953f223d4..00000000000
--- a/src/scripts/host-stack/cc_plots.py
+++ /dev/null
@@ -1,246 +0,0 @@
-#!/usr/bin/env python3
-
-import sys
-import re
-import argparse
-import matplotlib.pyplot as plt
-from matplotlib.lines import Line2D
-
-
-class Point:
- "CC event"
-
- def __init__(self, x, y):
- self.x = x
- self.y = y
-
-
-def listx(points):
- return list(map(lambda pt: pt.x, points))
-
-
-def listy(points):
- return list(map(lambda pt: pt.y, points))
-
-
-def plot_data(d):
- plt.figure(1)
-
- cwndx = listx(d["cwnd"])
- cwndy = listy(d["cwnd"])
- congx = listx(d["congestion"])
- congy = listy(d["congestion"])
- rcvrdx = listx(d["recovered"])
- rcvrdy = listy(d["recovered"])
- rxttx = listx(d["rxtTimeout"])
- rxtty = listy(d["rxtTimeout"])
-
- # cwnd/ssthresh/cc events
- plt.subplot(311)
- plt.title("cwnd/ssthresh")
- pcwnd = plt.plot(cwndx, cwndy, "r")
- psst = plt.plot(cwndx, d["ssthresh"], "y-")
- pcong = plt.plot(congx, congy, "yo")
- precov = plt.plot(rcvrdx, rcvrdy, "co")
- prxtt = plt.plot(rxttx, rxtty, "mo")
-
- marker1 = Line2D(range(1), range(1), color="r")
- marker2 = Line2D(range(1), range(1), color="y")
- marker3 = Line2D(range(1), range(1), color="w", marker="o", markerfacecolor="y")
- marker4 = Line2D(range(1), range(1), color="w", marker="o", markerfacecolor="c")
- marker5 = Line2D(range(1), range(1), color="w", marker="o", markerfacecolor="m")
- plt.legend(
- (marker1, marker2, marker3, marker4, marker5),
- ("cwnd", "ssthresh", "congestion", "recovered", "rxt-timeout"),
- loc=4,
- )
- axes = plt.gca()
- axes.set_ylim([-20e4, max(cwndy) + 20e4])
-
- # snd variables
- plt.subplot(312)
- plt.title("cc variables")
- plt.plot(cwndx, d["space"], "g-", markersize=1)
- plt.plot(cwndx, d["flight"], "b-", markersize=1)
- plt.plot(cwndx, d["sacked"], "m:", markersize=1)
- plt.plot(cwndx, d["lost"], "y:", markersize=1)
- plt.plot(cwndx, d["cc-space"], "k:", markersize=1)
- plt.plot(cwndx, cwndy, "ro", markersize=2)
-
- plt.plot(congx, congy, "y^", markersize=10, markerfacecolor="y")
- plt.plot(rcvrdx, rcvrdy, "c^", markersize=10, markerfacecolor="c")
- plt.plot(rxttx, rxtty, "m^", markersize=10, markerfacecolor="m")
-
- # plt.plot(cwndx, d["snd_wnd"], 'ko', markersize=1)
- plt.legend(
- (
- "snd-space",
- "flight",
- "sacked",
- "lost",
- "cc-space",
- "cwnd",
- "congestion",
- "recovered",
- "rxt-timeout",
- ),
- loc=1,
- )
-
- # rto/srrt/rttvar
- plt.subplot(313)
- plt.title("rtt")
- plt.plot(cwndx, d["srtt"], "g-")
- plt.plot(cwndx, [x / 1000 for x in d["mrtt-us"]], "r-")
- plt.plot(cwndx, d["rttvar"], "b-")
- plt.legend(["srtt", "mrtt-us", "rttvar"])
- axes = plt.gca()
- # plt.plot(cwndx, rto, 'r-')
- # axes.set_ylim([0, int(max(rto[2:len(rto)])) + 50])
-
- # show
- plt.show()
-
-
-def find_pattern(file_path, session_idx):
- is_active_open = 1
- listener_pattern = "l\[\d\]"
- if is_active_open:
- initial_pattern = "\[\d\](\.\d+:\d+\->\.\d+:\d+)\s+open:\s"
- else:
- initial_pattern = "\[\d\](\.\d+:\d+\->\.\d+:\d+)\s"
- idx = 0
- f = open(file_path, "r")
- for line in f:
- # skip listener lines (server)
- if re.search(listener_pattern, line) != None:
- continue
- match = re.search(initial_pattern, line)
- if match == None:
- continue
- if idx < session_idx:
- idx += 1
- continue
- filter_pattern = str(match.group(1)) + "\s+(.+)"
- print("pattern is %s" % filter_pattern)
- f.close()
- return filter_pattern
- raise Exception("Could not find initial pattern")
-
-
-def compute_time(min, sec, msec):
- return int(min) * 60 + int(sec) + int(msec) / 1000.0
-
-
-def run(file_path, session_idx):
- filter_sessions = 1
- filter_pattern = ""
-
- patterns = {
- "time": "^\d+:(\d+):(\d+):(\d+):\d+",
- "listener": "l\[\d\]",
- "cc": "cwnd (\d+) flight (\d+) space (\d+) ssthresh (\d+) snd_wnd (\d+)",
- "cc-snd": "cc_space (\d+) sacked (\d+) lost (\d+)",
- "rtt": "rto (\d+) srtt (\d+) mrtt-us (\d+) rttvar (\d+)",
- "rxtt": "rxt-timeout",
- "congestion": "congestion",
- "recovered": "recovered",
- }
- d = {
- "cwnd": [],
- "space": [],
- "flight": [],
- "ssthresh": [],
- "snd_wnd": [],
- "cc-space": [],
- "lost": [],
- "sacked": [],
- "rto": [],
- "srtt": [],
- "mrtt-us": [],
- "rttvar": [],
- "rxtTimeout": [],
- "congestion": [],
- "recovered": [],
- }
-
- if filter_sessions:
- filter_pattern = find_pattern(file_path, session_idx)
- f = open(file_path, "r")
-
- stats_index = 0
- start_time = 0
-
- for line in f:
- # skip listener lines (server)
- if re.search(patterns["listener"], line) != None:
- continue
- # filter sessions
- if filter_sessions:
- match = re.search(filter_pattern, line)
- if match == None:
- continue
-
- original_line = line
- line = match.group(1)
- match = re.search(patterns["time"], original_line)
- if match == None:
- print("something went wrong! no time!")
- continue
- time = compute_time(match.group(1), match.group(2), match.group(3))
- if start_time == 0:
- start_time = time
-
- time = time - start_time
- match = re.search(patterns["cc"], line)
- if match != None:
- d["cwnd"].append(Point(time, int(match.group(1))))
- d["flight"].append(int(match.group(2)))
- d["space"].append(int(match.group(3)))
- d["ssthresh"].append(int(match.group(4)))
- d["snd_wnd"].append(int(match.group(5)))
- stats_index += 1
- continue
- match = re.search(patterns["cc-snd"], line)
- if match != None:
- d["cc-space"].append(int(match.group(1)))
- d["sacked"].append(int(match.group(2)))
- d["lost"].append(int(match.group(3)))
- match = re.search(patterns["rtt"], line)
- if match != None:
- d["rto"].append(int(match.group(1)))
- d["srtt"].append(int(match.group(2)))
- d["mrtt-us"].append(int(match.group(3)))
- d["rttvar"].append(int(match.group(4)))
- if stats_index == 0:
- continue
- match = re.search(patterns["rxtt"], line)
- if match != None:
- d["rxtTimeout"].append(Point(time, d["cwnd"][stats_index - 1].y + 1e4))
- continue
- match = re.search(patterns["congestion"], line)
- if match != None:
- d["congestion"].append(Point(time, d["cwnd"][stats_index - 1].y - 1e4))
- continue
- match = re.search(patterns["recovered"], line)
- if match != None:
- d["recovered"].append(Point(time, d["cwnd"][stats_index - 1].y))
- continue
-
- plot_data(d)
-
-
-if __name__ == "__main__":
- parser = argparse.ArgumentParser(description="Plot tcp cc logs")
- parser.add_argument(
- "-f", action="store", dest="file", required=True, help="elog file in txt format"
- )
- parser.add_argument(
- "-s",
- action="store",
- dest="session_index",
- default=0,
- help="session index for which to plot cc logs",
- )
- results = parser.parse_args()
- run(results.file, int(results.session_index))
diff --git a/src/scripts/host-stack/convert_evt b/src/scripts/host-stack/convert_evt
deleted file mode 100755
index 1aba67d0268..00000000000
--- a/src/scripts/host-stack/convert_evt
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env bash
-
-# This depends on c2cpel and cpeldump. Enable their compilation by:
-# ccmake build-root/build-vpp-native/vpp/
-# and turning on VPP_BUILD_PERFTOOL
-
-BIN_PATH=../../../build-root/install-vpp-native/vpp/bin
-C2CPEL_BIN=$BIN_PATH/c2cpel
-CPELDUMP_BIN=$BIN_PATH/cpeldump
-
-$C2CPEL_BIN --in $1 --out /tmp/tmp_file.cpel
-$CPELDUMP_BIN --in /tmp/tmp_file.cpel --out $2
diff --git a/src/vat2/main.c b/src/vat2/main.c
index bf415854db1..2949c4899aa 100644
--- a/src/vat2/main.c
+++ b/src/vat2/main.c
@@ -253,16 +253,15 @@ print_help (void)
"Send API message to VPP and print reply\n"
"\n"
"-d, --debug Print additional information\n"
- "-p, --prefix <prefix> Specify shared memory prefix to connect "
- "to a given VPP instance\n"
+ "--dump-apis List all APIs available in VAT2 (might "
+ "not reflect running VPP)\n"
"-f, --file <filename> File containing a JSON object with the "
"arguments for the message to send\n"
+ "-p, --plugin-path Plugin path\n"
+ "-s, --prefix <prefix> Specify shared memory prefix to connect "
+ "to a given VPP instance\n"
"-t, --template <message-name> Print a template JSON object for given API"
- " message\n"
- "--dump-apis List all APIs available in VAT2 (might "
- "not reflect running VPP)\n"
- "--plugin-path Pluing path"
- "\n";
+ " message\n";
printf ("%s", help_string);
}
@@ -281,38 +280,38 @@ main (int argc, char **argv)
char *msgname = 0;
static struct option long_options[] = {
{ "debug", no_argument, 0, 'd' },
- { "prefix", required_argument, 0, 's' },
- { "file", required_argument, 0, 'f' },
{ "dump-apis", no_argument, 0, 0 },
- { "template", required_argument, 0, 't' },
+ { "file", required_argument, 0, 'f' },
{ "plugin-path", required_argument, 0, 'p' },
+ { "prefix", required_argument, 0, 's' },
+ { "template", required_argument, 0, 't' },
{ 0, 0, 0, 0 }
};
- while ((c = getopt_long (argc, argv, "hdp:f:t:", long_options,
+ while ((c = getopt_long (argc, argv, "df:p:s:t:", long_options,
&option_index)) != -1)
{
switch (c)
{
case 0:
- if (option_index == 3)
+ if (option_index == 1)
dump_api = true;
break;
case 'd':
vat2_debug = true;
break;
- case 't':
- template = optarg;
- break;
- case 's':
- prefix = optarg;
- break;
case 'f':
filename = optarg;
break;
case 'p':
pluginpath = optarg;
break;
+ case 's':
+ prefix = optarg;
+ break;
+ case 't':
+ template = optarg;
+ break;
case '?':
print_help ();
return 1;
diff --git a/src/vcl/vcl_locked.c b/src/vcl/vcl_locked.c
index f38df8fbf47..7ba9fab25fa 100644
--- a/src/vcl/vcl_locked.c
+++ b/src/vcl/vcl_locked.c
@@ -743,6 +743,7 @@ vls_listener_wrk_start_listen (vcl_locked_session_t * vls, u32 wrk_index)
if (ls->flags & VCL_SESSION_F_PENDING_LISTEN)
return;
+ ls->flags &= ~VCL_SESSION_F_LISTEN_NO_MQ;
vcl_send_session_listen (wrk, ls);
vls_listener_wrk_set (vls, wrk_index, 1 /* is_active */);
@@ -759,7 +760,7 @@ vls_listener_wrk_stop_listen (vcl_locked_session_t * vls, u32 wrk_index)
if (s->session_state != VCL_STATE_LISTEN)
return;
vcl_send_session_unlisten (wrk, s);
- s->session_state = VCL_STATE_LISTEN_NO_MQ;
+ s->flags |= VCL_SESSION_F_LISTEN_NO_MQ;
vls_listener_wrk_set (vls, wrk_index, 0 /* is_active */ );
}
@@ -912,7 +913,7 @@ vls_share_session (vls_worker_t * vls_wrk, vcl_locked_session_t * vls)
if (s->session_state == VCL_STATE_LISTEN)
{
- s->session_state = VCL_STATE_LISTEN_NO_MQ;
+ s->flags |= VCL_SESSION_F_LISTEN_NO_MQ;
s->rx_fifo = s->tx_fifo = 0;
}
else if (s->rx_fifo)
@@ -1384,36 +1385,41 @@ vls_mp_checks (vcl_locked_session_t * vls, int is_add)
switch (s->session_state)
{
case VCL_STATE_LISTEN:
- if (is_add)
+ if (!(s->flags & VCL_SESSION_F_LISTEN_NO_MQ))
{
- vls_listener_wrk_set (vls, vls->vcl_wrk_index, 1 /* is_active */);
- break;
+ if (is_add)
+ {
+ vls_listener_wrk_set (vls, vls->vcl_wrk_index,
+ 1 /* is_active */);
+ break;
+ }
+ /* Although removal from epoll means listener no longer accepts new
+ * sessions, the accept queue built by vpp cannot be drained by
+ * stopping the listener. Morover, some applications, e.g., nginx,
+ * might constantly remove and add listeners to their epfds. Removing
+ * listeners in such situations causes a lot of churn in vpp as
+ * segments and segment managers need to be recreated. */
+ /* vls_listener_wrk_stop_listen (vls, vls->vcl_wrk_index); */
+ }
+ else
+ {
+ if (!is_add)
+ break;
+
+ /* Register worker as listener */
+ vls_listener_wrk_start_listen (vls, vls->vcl_wrk_index);
+
+ /* If owner worker did not attempt to accept/xpoll on the session,
+ * force a listen stop for it, since it may not be interested in
+ * accepting new sessions.
+ * This is pretty much a hack done to give app workers the illusion
+ * that it is fine to listen and not accept new sessions for a
+ * given listener. Without it, we would accumulate unhandled
+ * accepts on the passive worker message queue. */
+ owner_wrk = vls_shared_get_owner (vls);
+ if (!vls_listener_wrk_is_active (vls, owner_wrk))
+ vls_listener_wrk_stop_listen (vls, owner_wrk);
}
- /* Although removal from epoll means listener no longer accepts new
- * sessions, the accept queue built by vpp cannot be drained by stopping
- * the listener. Morover, some applications, e.g., nginx, might
- * constantly remove and add listeners to their epfds. Removing
- * listeners in such situations causes a lot of churn in vpp as segments
- * and segment managers need to be recreated. */
- /* vls_listener_wrk_stop_listen (vls, vls->vcl_wrk_index); */
- break;
- case VCL_STATE_LISTEN_NO_MQ:
- if (!is_add)
- break;
-
- /* Register worker as listener */
- vls_listener_wrk_start_listen (vls, vls->vcl_wrk_index);
-
- /* If owner worker did not attempt to accept/xpoll on the session,
- * force a listen stop for it, since it may not be interested in
- * accepting new sessions.
- * This is pretty much a hack done to give app workers the illusion
- * that it is fine to listen and not accept new sessions for a
- * given listener. Without it, we would accumulate unhandled
- * accepts on the passive worker message queue. */
- owner_wrk = vls_shared_get_owner (vls);
- if (!vls_listener_wrk_is_active (vls, owner_wrk))
- vls_listener_wrk_stop_listen (vls, owner_wrk);
break;
default:
break;
diff --git a/src/vcl/vcl_private.c b/src/vcl/vcl_private.c
index d9814394f0d..80fbd8b4c7e 100644
--- a/src/vcl/vcl_private.c
+++ b/src/vcl/vcl_private.c
@@ -49,7 +49,7 @@ vcl_mq_epoll_add_api_sock (vcl_worker_t *wrk)
struct epoll_event e = { 0 };
int rv;
- e.data.u32 = ~0;
+ e.data.u32 = VCL_EP_SAPIFD_EVT;
rv = epoll_ctl (wrk->mqs_epfd, EPOLL_CTL_ADD, cs->fd, &e);
if (rv != EEXIST && rv < 0)
return -1;
@@ -189,6 +189,55 @@ vcl_worker_cleanup_cb (void *arg)
}
void
+vcl_worker_detached_start_signal_mq (vcl_worker_t *wrk)
+{
+ /* Generate mq epfd events using pipes to hopefully force
+ * calls into epoll_wait which retries attaching to vpp */
+ if (!wrk->detached_pipefds[0])
+ {
+ if (pipe (wrk->detached_pipefds))
+ {
+ VDBG (0, "failed to add mq eventfd to mq epoll fd");
+ exit (1);
+ }
+ }
+
+ struct epoll_event evt = {};
+ evt.events = EPOLLIN;
+ evt.data.u32 = VCL_EP_PIPEFD_EVT;
+ if (epoll_ctl (wrk->mqs_epfd, EPOLL_CTL_ADD, wrk->detached_pipefds[0],
+ &evt) < 0)
+ {
+ VDBG (0, "failed to add mq eventfd to mq epoll fd");
+ exit (1);
+ }
+
+ int __clib_unused rv;
+ u8 sig = 1;
+ rv = write (wrk->detached_pipefds[1], &sig, 1);
+}
+
+void
+vcl_worker_detached_signal_mq (vcl_worker_t *wrk)
+{
+ int __clib_unused rv;
+ u8 buf;
+ rv = read (wrk->detached_pipefds[0], &buf, 1);
+ rv = write (wrk->detached_pipefds[1], &buf, 1);
+}
+
+void
+vcl_worker_detached_stop_signal_mq (vcl_worker_t *wrk)
+{
+ if (epoll_ctl (wrk->mqs_epfd, EPOLL_CTL_DEL, wrk->detached_pipefds[0], 0) <
+ 0)
+ {
+ VDBG (0, "failed to del mq eventfd to mq epoll fd");
+ exit (1);
+ }
+}
+
+void
vcl_worker_detach_sessions (vcl_worker_t *wrk)
{
session_event_t *e;
@@ -201,17 +250,17 @@ vcl_worker_detach_sessions (vcl_worker_t *wrk)
{
if (s->session_state == VCL_STATE_LISTEN)
{
- s->session_state = VCL_STATE_LISTEN_NO_MQ;
+ s->flags |= VCL_SESSION_F_LISTEN_NO_MQ;
continue;
}
if ((s->flags & VCL_SESSION_F_IS_VEP) ||
- s->session_state == VCL_STATE_LISTEN_NO_MQ ||
s->session_state == VCL_STATE_CLOSED)
continue;
hash_set (seg_indices_map, s->tx_fifo->segment_index, 1);
s->session_state = VCL_STATE_DETACHED;
+ s->flags |= VCL_SESSION_F_APP_CLOSING;
vec_add2 (wrk->unhandled_evts_vector, e, 1);
e->event_type = SESSION_CTRL_EVT_DISCONNECTED;
e->session_index = s->session_index;
@@ -221,13 +270,26 @@ vcl_worker_detach_sessions (vcl_worker_t *wrk)
hash_foreach (seg_index, val, seg_indices_map,
({ vec_add1 (seg_indices, seg_index); }));
+ /* If multi-threaded apps, wait for all threads to hopefully finish
+ * their blocking operations */
+ if (wrk->pre_wait_fn)
+ wrk->pre_wait_fn (VCL_INVALID_SESSION_INDEX);
+ sleep (1);
+ if (wrk->post_wait_fn)
+ wrk->post_wait_fn (VCL_INVALID_SESSION_INDEX);
+
vcl_segment_detach_segments (seg_indices);
/* Detach worker's mqs segment */
vcl_segment_detach (vcl_vpp_worker_segment_handle (wrk->wrk_index));
+ wrk->app_event_queue = 0;
+ wrk->ctrl_mq = 0;
+
vec_free (seg_indices);
hash_free (seg_indices_map);
+
+ vcl_worker_detached_start_signal_mq (wrk);
}
void
@@ -364,8 +426,8 @@ vcl_session_read_ready (vcl_session_t * s)
}
else
{
- return (s->session_state == VCL_STATE_DISCONNECT) ?
- VPPCOM_ECONNRESET : VPPCOM_ENOTCONN;
+ return (s->session_state == VCL_STATE_DISCONNECT) ? VPPCOM_ECONNRESET :
+ VPPCOM_ENOTCONN;
}
}
@@ -773,9 +835,6 @@ vcl_session_state_str (vcl_session_state_t state)
case VCL_STATE_UPDATED:
st = "STATE_UPDATED";
break;
- case VCL_STATE_LISTEN_NO_MQ:
- st = "STATE_LISTEN_NO_MQ";
- break;
default:
st = "UNKNOWN_STATE";
break;
diff --git a/src/vcl/vcl_private.h b/src/vcl/vcl_private.h
index c98e1cde9b1..609653f20a4 100644
--- a/src/vcl/vcl_private.h
+++ b/src/vcl/vcl_private.h
@@ -32,6 +32,8 @@
#endif
#define VPPCOM_DEBUG vcm->debug
+#define VCL_EP_SAPIFD_EVT ((u32) ~0)
+#define VCL_EP_PIPEFD_EVT ((u32) (~0 - 1))
extern __thread uword __vcl_worker_index;
@@ -71,7 +73,6 @@ typedef enum vcl_session_state_
VCL_STATE_DISCONNECT,
VCL_STATE_DETACHED,
VCL_STATE_UPDATED,
- VCL_STATE_LISTEN_NO_MQ,
} vcl_session_state_t;
typedef struct epoll_event vppcom_epoll_event_t;
@@ -144,6 +145,7 @@ typedef enum vcl_session_flags_
VCL_SESSION_F_PENDING_FREE = 1 << 7,
VCL_SESSION_F_PENDING_LISTEN = 1 << 8,
VCL_SESSION_F_APP_CLOSING = 1 << 9,
+ VCL_SESSION_F_LISTEN_NO_MQ = 1 << 10,
} __clib_packed vcl_session_flags_t;
typedef enum vcl_worker_wait_
@@ -325,6 +327,9 @@ typedef struct vcl_worker_
/* functions to be called pre/post wait if vcl managed by vls */
vcl_worker_wait_mq_fn pre_wait_fn;
vcl_worker_wait_mq_fn post_wait_fn;
+
+ /* mq_epfd signal pipes when wrk detached from vpp */
+ int detached_pipefds[2];
} vcl_worker_t;
STATIC_ASSERT (sizeof (session_disconnected_msg_t) <= 16,
@@ -563,9 +568,8 @@ vcl_session_table_lookup_listener (vcl_worker_t * wrk, u64 handle)
return 0;
}
- ASSERT (s->session_state == VCL_STATE_LISTEN
- || s->session_state == VCL_STATE_LISTEN_NO_MQ
- || vcl_session_is_connectable_listener (wrk, s));
+ ASSERT (s->session_state == VCL_STATE_LISTEN ||
+ vcl_session_is_connectable_listener (wrk, s));
return s;
}
@@ -800,6 +804,9 @@ void vcl_worker_detach_sessions (vcl_worker_t *wrk);
void vcl_worker_set_wait_mq_fns (vcl_worker_wait_mq_fn pre_wait,
vcl_worker_wait_mq_fn post_wait);
+void vcl_worker_detached_start_signal_mq (vcl_worker_t *wrk);
+void vcl_worker_detached_signal_mq (vcl_worker_t *wrk);
+void vcl_worker_detached_stop_signal_mq (vcl_worker_t *wrk);
/*
* VCL Binary API
*/
diff --git a/src/vcl/vppcom.c b/src/vcl/vppcom.c
index 19d58c349b7..6f84178de79 100644
--- a/src/vcl/vppcom.c
+++ b/src/vcl/vppcom.c
@@ -519,8 +519,7 @@ vcl_session_reset_handler (vcl_worker_t * wrk,
}
/* Caught a reset before actually accepting the session */
- if (session->session_state == VCL_STATE_LISTEN ||
- session->session_state == VCL_STATE_LISTEN_NO_MQ)
+ if (session->session_state == VCL_STATE_LISTEN)
{
if (!vcl_flag_accepted_session (session, reset_msg->handle,
VCL_ACCEPTED_F_RESET))
@@ -712,8 +711,7 @@ vcl_session_disconnected_handler (vcl_worker_t * wrk,
return 0;
/* Caught a disconnect before actually accepting the session */
- if (session->session_state == VCL_STATE_LISTEN ||
- session->session_state == VCL_STATE_LISTEN_NO_MQ)
+ if (session->session_state == VCL_STATE_LISTEN)
{
if (!vcl_flag_accepted_session (session, msg->handle,
VCL_ACCEPTED_F_CLOSED))
@@ -1085,8 +1083,7 @@ vcl_handle_mq_event (vcl_worker_t * wrk, session_event_t * e)
* VPP_CLOSING state instead can been marked as ACCEPTED_F_CLOSED.
*/
if (vcl_session_has_attr (s, VCL_SESS_ATTR_NONBLOCK) &&
- !(s->session_state == VCL_STATE_LISTEN ||
- s->session_state == VCL_STATE_LISTEN_NO_MQ))
+ !(s->session_state == VCL_STATE_LISTEN))
{
s->session_state = VCL_STATE_VPP_CLOSING;
s->flags |= VCL_SESSION_F_PENDING_DISCONNECT;
@@ -1114,8 +1111,7 @@ vcl_handle_mq_event (vcl_worker_t * wrk, session_event_t * e)
* DISCONNECT state instead can been marked as ACCEPTED_F_RESET.
*/
if (vcl_session_has_attr (s, VCL_SESS_ATTR_NONBLOCK) &&
- !(s->session_state == VCL_STATE_LISTEN ||
- s->session_state == VCL_STATE_LISTEN_NO_MQ))
+ !(s->session_state == VCL_STATE_LISTEN))
{
s->flags |= VCL_SESSION_F_PENDING_DISCONNECT;
s->session_state = VCL_STATE_DISCONNECT;
@@ -1331,6 +1327,12 @@ vppcom_session_unbind (u32 session_handle)
}
clib_fifo_free (session->accept_evts_fifo);
+ if (session->flags & VCL_SESSION_F_LISTEN_NO_MQ)
+ {
+ vcl_session_free (wrk, session);
+ return VPPCOM_OK;
+ }
+
vcl_send_session_unlisten (wrk, session);
VDBG (0, "session %u [0x%llx]: sending unbind!", session->session_index,
@@ -1402,6 +1404,8 @@ vcl_api_retry_attach (vcl_worker_t *wrk)
{
vcl_session_t *s;
+ vcl_worker_detached_signal_mq (wrk);
+
clib_spinlock_lock (&vcm->workers_lock);
if (vcl_is_first_reattach_to_execute ())
{
@@ -1410,12 +1414,14 @@ vcl_api_retry_attach (vcl_worker_t *wrk)
clib_spinlock_unlock (&vcm->workers_lock);
return;
}
+ vcl_worker_detached_stop_signal_mq (wrk);
vcl_set_reattach_counter ();
clib_spinlock_unlock (&vcm->workers_lock);
}
else
{
vcl_set_reattach_counter ();
+ vcl_worker_detached_stop_signal_mq (wrk);
clib_spinlock_unlock (&vcm->workers_lock);
vcl_worker_register_with_vpp ();
}
@@ -1425,10 +1431,11 @@ vcl_api_retry_attach (vcl_worker_t *wrk)
{
if (s->flags & VCL_SESSION_F_IS_VEP)
continue;
- if (s->session_state == VCL_STATE_LISTEN_NO_MQ)
+ if (s->session_state == VCL_STATE_LISTEN)
vppcom_session_listen (vcl_session_handle (s), 10);
else
- VDBG (0, "internal error: unexpected state %d", s->session_state);
+ VDBG (0, "reattach error: %u unexpected state %d", s->session_index,
+ s->session_state);
}
}
@@ -1769,12 +1776,20 @@ vppcom_session_listen (uint32_t listen_sh, uint32_t q_len)
return VPPCOM_EBADFD;
listen_vpp_handle = listen_session->vpp_handle;
- if (listen_session->session_state == VCL_STATE_LISTEN)
+ if (listen_session->session_state == VCL_STATE_LISTEN &&
+ !(listen_session->flags & VCL_SESSION_F_LISTEN_NO_MQ))
+ {
+ VDBG (0, "session %u [0x%llx]: already in listen state!", listen_sh,
+ listen_vpp_handle);
+ return VPPCOM_OK;
+ }
+ if (PREDICT_FALSE (!wrk->ctrl_mq))
{
- VDBG (0, "session %u [0x%llx]: already in listen state!",
- listen_sh, listen_vpp_handle);
+ listen_session->session_state = VCL_STATE_LISTEN;
+ listen_session->flags |= VCL_SESSION_F_LISTEN_NO_MQ;
return VPPCOM_OK;
}
+ listen_session->flags &= ~VCL_SESSION_F_LISTEN_NO_MQ;
VDBG (0, "session %u: sending vpp listen request...", listen_sh);
@@ -1851,7 +1866,6 @@ again:
return VPPCOM_EBADFD;
if ((ls->session_state != VCL_STATE_LISTEN) &&
- (ls->session_state != VCL_STATE_LISTEN_NO_MQ) &&
(!vcl_session_is_connectable_listener (wrk, ls)))
{
VDBG (0, "ERROR: session [0x%llx]: not in listen state! state (%s)",
@@ -2653,6 +2667,9 @@ vcl_select_handle_mq_event (vcl_worker_t * wrk, session_event_t * e,
*bits_set += 1;
}
break;
+ case SESSION_CTRL_EVT_BOUND:
+ vcl_session_bound_handler (wrk, (session_bound_msg_t *) e->data);
+ break;
case SESSION_CTRL_EVT_UNLISTEN_REPLY:
vcl_session_unlisten_reply_handler (wrk, e->data);
break;
@@ -3588,8 +3605,13 @@ vppcom_epoll_wait_eventfd (vcl_worker_t *wrk, struct epoll_event *events,
for (i = 0; i < n_mq_evts; i++)
{
- if (PREDICT_FALSE (wrk->mq_events[i].data.u32 == ~0))
+ if (PREDICT_FALSE (wrk->mq_events[i].data.u32 >= VCL_EP_PIPEFD_EVT))
{
+ if (wrk->mq_events[i].data.u32 == VCL_EP_PIPEFD_EVT)
+ {
+ vcl_api_retry_attach (wrk);
+ continue;
+ }
/* api socket was closed */
vcl_api_handle_disconnect (wrk);
continue;
@@ -4544,17 +4566,31 @@ vppcom_session_sendto (uint32_t session_handle, void *buffer,
if (ep->app_tlvs)
vcl_handle_ep_app_tlvs (s, ep);
- /* Session not connected/bound in vpp. Create it by 'connecting' it */
+ /* Session not connected/bound in vpp. Create it by binding it */
if (PREDICT_FALSE (s->session_state == VCL_STATE_CLOSED))
{
u32 session_index = s->session_index;
f64 timeout = vcm->cfg.session_timeout;
int rv;
- vcl_send_session_connect (wrk, s);
- rv = vppcom_wait_for_session_state_change (session_index,
- VCL_STATE_READY,
- timeout);
+ /* VPP assumes sockets are bound, not ideal, but for now
+ * connect socket, grab lcl ip:port pair and use it to bind */
+ if (s->transport.rmt_port == 0 ||
+ ip46_address_is_zero (&s->transport.lcl_ip))
+ {
+ vcl_send_session_connect (wrk, s);
+ rv = vppcom_wait_for_session_state_change (
+ session_index, VCL_STATE_READY, timeout);
+ if (rv < 0)
+ return rv;
+ vcl_send_session_disconnect (wrk, s);
+ rv = vppcom_wait_for_session_state_change (
+ session_index, VCL_STATE_DETACHED, timeout);
+ s->session_state = VCL_STATE_CLOSED;
+ }
+ vcl_send_session_listen (wrk, s);
+ rv = vppcom_wait_for_session_state_change (
+ session_index, VCL_STATE_LISTEN, timeout);
if (rv < 0)
return rv;
s = vcl_session_get (wrk, session_index);
diff --git a/src/vlib/CMakeLists.txt b/src/vlib/CMakeLists.txt
index 3c354b764dd..b4fc1775194 100644
--- a/src/vlib/CMakeLists.txt
+++ b/src/vlib/CMakeLists.txt
@@ -71,6 +71,19 @@ set(PLATFORM_SOURCES
)
endif()
+set(VLIB_LIBS vppinfra svm ${CMAKE_DL_LIBS} ${EPOLL_LIB})
+
+vpp_find_path(LIBIBERTY_INCLUDE_DIR libiberty/demangle.h)
+vpp_find_library(LIBIBERTY_LIB NAMES iberty libiberty)
+
+if (LIBIBERTY_INCLUDE_DIR AND LIBUNWIND_LIB)
+ message(STATUS "libiberty found at ${LIBIBERTY_LIB}")
+ list(APPEND VLIB_LIBS ${LIBIBERTY_LIB})
+ add_definitions(-DHAVE_LIBIBERTY)
+else()
+ message(WARNING "libiberty not found - stack trace demangle disabled")
+endif()
+
add_vpp_library(vlib
SOURCES
buffer.c
@@ -79,6 +92,7 @@ add_vpp_library(vlib
counter.c
drop.c
error.c
+ file.c
format.c
handoff_trace.c
init.c
@@ -104,7 +118,6 @@ add_vpp_library(vlib
time.c
trace.c
unix/cli.c
- unix/input.c
unix/main.c
unix/plugin.c
unix/util.c
@@ -130,6 +143,7 @@ add_vpp_library(vlib
dma/dma.h
error_funcs.h
error.h
+ file.h
format_funcs.h
global_funcs.h
init.h
@@ -149,6 +163,7 @@ add_vpp_library(vlib
time.h
trace_funcs.h
trace.h
+ tw_funcs.h
unix/mc_socket.h
unix/plugin.h
unix/unix.h
@@ -159,7 +174,7 @@ add_vpp_library(vlib
API_FILES
pci/pci_types.api
- LINK_LIBRARIES vppinfra svm ${CMAKE_DL_LIBS} ${EPOLL_LIB}
+ LINK_LIBRARIES ${VLIB_LIBS}
DEPENDS api_headers
)
diff --git a/src/vlib/buffer_funcs.c b/src/vlib/buffer_funcs.c
index d910b25afac..ce09a51c600 100644
--- a/src/vlib/buffer_funcs.c
+++ b/src/vlib/buffer_funcs.c
@@ -297,7 +297,7 @@ vlib_buffer_enqueue_to_thread_inline (vlib_main_t *vm,
u32 drop_list[VLIB_FRAME_SIZE], n_drop = 0;
vlib_frame_bitmap_t mask, used_elts = {};
vlib_frame_queue_elt_t *hf = 0;
- u16 thread_index;
+ clib_thread_index_t thread_index;
u32 n_comp, off = 0, n_left = n_packets;
thread_index = thread_indices[0];
diff --git a/src/vlib/cli.c b/src/vlib/cli.c
index 4198b4b0976..38a8c2aa19c 100644
--- a/src/vlib/cli.c
+++ b/src/vlib/cli.c
@@ -39,6 +39,7 @@
#include <vlib/vlib.h>
#include <vlib/stats/stats.h>
+#include <vlib/file.h>
#include <vlib/unix/unix.h>
#include <vppinfra/callback.h>
#include <vppinfra/cpu.h>
@@ -1226,20 +1227,20 @@ restart_cmd_fn (vlib_main_t * vm, unformat_input_t * input,
{
vlib_global_main_t *vgm = vlib_get_global_main ();
clib_file_main_t *fm = &file_main;
- clib_file_t *f;
/* environ(7) does not indicate a header for this */
extern char **environ;
/* Close all known open files */
- pool_foreach (f, fm->file_pool)
- {
+ pool_foreach_pointer (f, fm->file_pool)
+ {
if (f->file_descriptor > 2)
close(f->file_descriptor);
}
/* Exec ourself */
- execve (vgm->name, (char **) vgm->argv, environ);
+ if (execve ((void *) vgm->argv[0], (char **) vgm->argv, environ))
+ return clib_error_return_unix (0, "execve failed");
return 0;
}
diff --git a/src/vlib/counter.h b/src/vlib/counter.h
index f9da576a5f2..a9c261770d4 100644
--- a/src/vlib/counter.h
+++ b/src/vlib/counter.h
@@ -68,7 +68,7 @@ u32 vlib_simple_counter_n_counters (const vlib_simple_counter_main_t * cm);
/** Pre-fetch a per-thread simple counter for the given object index */
always_inline void
vlib_prefetch_simple_counter (const vlib_simple_counter_main_t *cm,
- u32 thread_index, u32 index)
+ clib_thread_index_t thread_index, u32 index)
{
counter_t *my_counters;
@@ -86,8 +86,9 @@ vlib_prefetch_simple_counter (const vlib_simple_counter_main_t *cm,
@param increment - (u64) quantitiy to add to the counter
*/
always_inline void
-vlib_increment_simple_counter (vlib_simple_counter_main_t * cm,
- u32 thread_index, u32 index, u64 increment)
+vlib_increment_simple_counter (vlib_simple_counter_main_t *cm,
+ clib_thread_index_t thread_index, u32 index,
+ u64 increment)
{
counter_t *my_counters;
@@ -102,8 +103,9 @@ vlib_increment_simple_counter (vlib_simple_counter_main_t * cm,
@param increment - (u64) quantitiy remove from the counter value
*/
always_inline void
-vlib_decrement_simple_counter (vlib_simple_counter_main_t * cm,
- u32 thread_index, u32 index, u64 decrement)
+vlib_decrement_simple_counter (vlib_simple_counter_main_t *cm,
+ clib_thread_index_t thread_index, u32 index,
+ u64 decrement)
{
counter_t *my_counters;
@@ -121,8 +123,9 @@ vlib_decrement_simple_counter (vlib_simple_counter_main_t * cm,
@param value - (u64) quantitiy to set to the counter
*/
always_inline void
-vlib_set_simple_counter (vlib_simple_counter_main_t * cm,
- u32 thread_index, u32 index, u64 value)
+vlib_set_simple_counter (vlib_simple_counter_main_t *cm,
+ clib_thread_index_t thread_index, u32 index,
+ u64 value)
{
counter_t *my_counters;
@@ -246,9 +249,9 @@ void vlib_clear_combined_counters (vlib_combined_counter_main_t * cm);
*/
always_inline void
-vlib_increment_combined_counter (vlib_combined_counter_main_t * cm,
- u32 thread_index,
- u32 index, u64 n_packets, u64 n_bytes)
+vlib_increment_combined_counter (vlib_combined_counter_main_t *cm,
+ clib_thread_index_t thread_index, u32 index,
+ u64 n_packets, u64 n_bytes)
{
vlib_counter_t *my_counters;
@@ -261,8 +264,8 @@ vlib_increment_combined_counter (vlib_combined_counter_main_t * cm,
/** Pre-fetch a per-thread combined counter for the given object index */
always_inline void
-vlib_prefetch_combined_counter (const vlib_combined_counter_main_t * cm,
- u32 thread_index, u32 index)
+vlib_prefetch_combined_counter (const vlib_combined_counter_main_t *cm,
+ clib_thread_index_t thread_index, u32 index)
{
vlib_counter_t *cpu_counters;
diff --git a/src/vlib/file.c b/src/vlib/file.c
new file mode 100644
index 00000000000..286b0d1f2ad
--- /dev/null
+++ b/src/vlib/file.c
@@ -0,0 +1,305 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+
+#include <sys/epoll.h>
+#include <sys/eventfd.h>
+#include <limits.h>
+
+VLIB_REGISTER_LOG_CLASS (vlib_file_log, static) = {
+ .class_name = "vlib",
+ .subclass_name = "file",
+};
+
+#define log_debug(fmt, ...) \
+ vlib_log_debug (vlib_file_log.class, fmt, __VA_ARGS__)
+#define log_warn(fmt, ...) \
+ vlib_log_warn (vlib_file_log.class, fmt, __VA_ARGS__)
+#define log_err(fmt, ...) vlib_log_err (vlib_file_log.class, fmt, __VA_ARGS__)
+
+clib_file_main_t file_main;
+
+static void
+vlib_file_update (clib_file_t *f, clib_file_update_type_t update_type)
+{
+ vlib_main_t *vm = vlib_get_main_by_index (f->polling_thread_index);
+ int op = -1, add_del = 0;
+
+ struct epoll_event e = {
+ .events = EPOLLIN,
+ .data.ptr = f,
+ };
+
+ if (f->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE)
+ e.events |= EPOLLOUT;
+ if (f->flags & UNIX_FILE_EVENT_EDGE_TRIGGERED)
+ e.events |= EPOLLET;
+
+ switch (update_type)
+ {
+ case UNIX_FILE_UPDATE_ADD:
+ op = EPOLL_CTL_ADD;
+ add_del = 1;
+ break;
+
+ case UNIX_FILE_UPDATE_MODIFY:
+ op = EPOLL_CTL_MOD;
+ break;
+
+ case UNIX_FILE_UPDATE_DELETE:
+ op = EPOLL_CTL_DEL;
+ add_del = -1;
+ break;
+
+ default:
+ log_err ("%s: unknown update_type %d", __func__, update_type);
+ return;
+ }
+
+ if (epoll_ctl (vm->epoll_fd, op, (int) f->file_descriptor, &e) < 0)
+ {
+ log_err ("%s: epoll_ctl() failed, errno %d", __func__, errno);
+ return;
+ }
+
+ vm->n_epoll_fds += add_del;
+}
+
+static clib_error_t *
+wake_read_fn (struct clib_file *f)
+{
+ u64 val, __clib_unused rv;
+ rv = read ((int) f->file_descriptor, &val, sizeof (u64));
+ return 0;
+}
+
+void
+vlib_file_poll_init (vlib_main_t *vm)
+{
+ vm->epoll_fd = epoll_create (1);
+
+ if (vm->epoll_fd < 0)
+ clib_panic ("failed to initialize epoll for thread %u", vm->thread_index);
+
+ vm->wakeup_fd = eventfd (0, EFD_NONBLOCK);
+
+ if (vm->wakeup_fd < 0)
+ clib_panic ("failed to initialize wakeup event for thread %u",
+ vm->thread_index);
+
+ if (!file_main.file_update)
+ file_main.file_update = vlib_file_update;
+
+ clib_file_add (&file_main, &(clib_file_t){
+ .polling_thread_index = vm->thread_index,
+ .file_descriptor = vm->wakeup_fd,
+ .description = format (0, "wakeup thread %u",
+ vm->thread_index),
+ .read_function = wake_read_fn,
+ });
+}
+
+void
+vlib_file_poll (vlib_main_t *vm)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ unix_main_t *um = &unix_main;
+ struct epoll_event *e, epoll_events[16];
+ int n_fds_ready;
+ int is_main = (vm->thread_index == 0);
+ int timeout_ms = 0, max_timeout_ms = 10;
+ u32 ticks;
+
+ /*
+ * If we've been asked for a fixed-sleep between main loop polls,
+ * do so right away.
+ */
+ if (PREDICT_FALSE (is_main && um->poll_sleep_usec))
+ {
+ struct timespec ts, tsrem;
+ ts.tv_sec = 0;
+ ts.tv_nsec = 1000L * um->poll_sleep_usec;
+
+ while (nanosleep (&ts, &tsrem) < 0)
+ ts = tsrem;
+
+ goto epoll;
+ }
+
+ /* we are busy, skip some loops before polling again */
+ if (vlib_last_vectors_per_main_loop (vm) >= 2)
+ goto skip_loops;
+
+ /* at least one node is polling */
+ if (nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING])
+ goto skip_loops;
+
+ /* pending APIs in the queue */
+ if (is_main && vm->api_queue_nonempty)
+ goto skip_loops;
+
+ if (is_main == 0)
+ {
+ if (*vlib_worker_threads->wait_at_barrier)
+ goto epoll;
+
+ if (vlib_get_first_main ()->time_last_barrier_release + 0.5 >=
+ vlib_time_now (vm))
+ goto skip_loops;
+ }
+
+ /* check for pending interrupts */
+ for (int nt = 0; nt < VLIB_N_NODE_TYPE; nt++)
+ if (nm->node_interrupts[nt] &&
+ clib_interrupt_is_any_pending (nm->node_interrupts[nt]))
+ goto epoll;
+
+ /* at this point we know that thread is going to sleep, so let's annonce
+ * to other threads that they need to wakeup us if they need our attention */
+ __atomic_store_n (&vm->thread_sleeps, 1, __ATOMIC_RELAXED);
+
+ ticks = vlib_tw_timer_first_expires_in_ticks (vm);
+
+ if (ticks != TW_SLOTS_PER_RING)
+ {
+ timeout_ms = (int) (ticks / ((u32) VLIB_TW_TICKS_PER_SECOND / 1000));
+ timeout_ms = clib_min (timeout_ms, max_timeout_ms);
+ }
+ else
+ timeout_ms = max_timeout_ms;
+
+ goto epoll;
+
+skip_loops:
+ /* Don't come back for a respectable number of dispatch cycles */
+ vm->file_poll_skip_loops = 1024;
+
+epoll:
+ n_fds_ready = epoll_wait (vm->epoll_fd, epoll_events,
+ ARRAY_LEN (epoll_events), timeout_ms);
+
+ __atomic_store_n (&vm->thread_sleeps, 0, __ATOMIC_RELAXED);
+ __atomic_store_n (&vm->wakeup_pending, 0, __ATOMIC_RELAXED);
+
+ if (n_fds_ready < 0)
+ {
+ if (unix_error_is_fatal (errno))
+ vlib_panic_with_error (vm, clib_error_return_unix (0, "epoll_wait"));
+
+ /* non fatal error (e.g. EINTR). */
+ return;
+ }
+
+ vm->epoll_waits += 1;
+ vm->epoll_files_ready += n_fds_ready;
+
+ for (e = epoll_events; e < epoll_events + n_fds_ready; e++)
+ {
+ clib_file_t *f = e->data.ptr;
+ clib_error_t *err;
+
+ if (PREDICT_FALSE (!f->active))
+ {
+ foreach_int (flag, EPOLLIN, EPOLLOUT, EPOLLERR)
+ if (e->events & flag)
+ {
+ const char *str[] = {
+ [EPOLLIN] = "EPOLLIN",
+ [EPOLLOUT] = "EPOLLOUT",
+ [EPOLLERR] = "EPOLLERR",
+ };
+ log_debug ("epoll event %s dropped due to inactive file",
+ str[flag]);
+ }
+ continue;
+ }
+ else if (PREDICT_TRUE (!(e->events & EPOLLERR)))
+ {
+ if (e->events & EPOLLIN)
+ {
+ f->read_events++;
+ err = f->read_function (f);
+ if (err)
+ {
+ log_err ("file read error: %U", format_clib_error, err);
+ clib_error_free (err);
+ }
+ }
+ if (e->events & EPOLLOUT)
+ {
+ f->write_events++;
+ err = f->write_function (f);
+ if (err)
+ {
+ log_err ("file write error: %U", format_clib_error, err);
+ clib_error_free (err);
+ }
+ }
+ }
+ else
+ {
+ if (f->error_function)
+ {
+ f->error_events++;
+ err = f->error_function (f);
+ if (err)
+ {
+ log_err ("file error: %U", format_clib_error, err);
+ clib_error_free (err);
+ }
+ }
+ else if (f->dont_close == 0)
+ close ((int) f->file_descriptor);
+ }
+ }
+
+ /* maximum epoll events received, there may be more ... */
+ if (n_fds_ready == ARRAY_LEN (epoll_events))
+ {
+ timeout_ms = 0;
+ goto epoll;
+ }
+
+ /* removing fd from epoll instance doesn't remove event from epoll queue
+ * so we need to be sure epoll queue is empty before freeing */
+ clib_file_free_deleted (&file_main, vm->thread_index);
+}
+
+static clib_error_t *
+show_files (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
+{
+ clib_error_t *error = 0;
+ clib_file_main_t *fm = &file_main;
+ char path[PATH_MAX];
+ u8 *s = 0;
+
+ vlib_cli_output (vm, "%3s %6s %12s %12s %12s %-32s %s", "FD", "Thread",
+ "Read", "Write", "Error", "File Name", "Description");
+
+ pool_foreach_pointer (f, fm->file_pool)
+ {
+ ssize_t rv;
+ s = format (s, "/proc/self/fd/%d%c", f->file_descriptor, 0);
+ rv = readlink ((char *) s, path, PATH_MAX - 1);
+
+ path[rv < 0 ? 0 : rv] = 0;
+
+ vlib_cli_output (vm, "%3d %6d %12d %12d %12d %-32s %v",
+ f->file_descriptor, f->polling_thread_index,
+ f->read_events, f->write_events, f->error_events, path,
+ f->description);
+ vec_reset_length (s);
+ }
+ vec_free (s);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (cli_show_files, static) = {
+ .path = "show files",
+ .short_help = "Show files in use",
+ .function = show_files,
+};
diff --git a/src/vlib/file.h b/src/vlib/file.h
new file mode 100644
index 00000000000..82bbb22f650
--- /dev/null
+++ b/src/vlib/file.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#ifndef __vlib_file_h__
+#define __vlib_file_h__
+
+#include <vppinfra/file.h>
+
+extern clib_file_main_t file_main;
+
+void vlib_file_poll_init (vlib_main_t *vm);
+void vlib_file_poll (vlib_main_t *vm);
+#endif /* __vlib_file_h__ */
diff --git a/src/vlib/format.c b/src/vlib/format.c
index 98010620a5d..8ed2535fe8a 100644
--- a/src/vlib/format.c
+++ b/src/vlib/format.c
@@ -213,7 +213,7 @@ unformat_vlib_tmpfile (unformat_input_t * input, va_list * args)
u8 *
format_vlib_thread_name (u8 * s, va_list * args)
{
- u32 thread_index = va_arg (*args, u32);
+ clib_thread_index_t thread_index = va_arg (*args, u32);
if (thread_index == 0)
return format (s, "main");
@@ -226,7 +226,7 @@ format_vlib_thread_name (u8 * s, va_list * args)
u8 *
format_vlib_thread_name_and_index (u8 * s, va_list * args)
{
- u32 thread_index = va_arg (*args, u32);
+ clib_thread_index_t thread_index = va_arg (*args, u32);
return format (s, "%U (%u)", format_vlib_thread_name, thread_index,
thread_index);
diff --git a/src/vlib/freebsd/pci.c b/src/vlib/freebsd/pci.c
index a4e9eb2dda6..92c27c24373 100644
--- a/src/vlib/freebsd/pci.c
+++ b/src/vlib/freebsd/pci.c
@@ -375,6 +375,4 @@ freebsd_pci_init (vlib_main_t *vm)
return 0;
}
-VLIB_INIT_FUNCTION (freebsd_pci_init) = {
- .runs_after = VLIB_INITS ("unix_input_init"),
-};
+VLIB_INIT_FUNCTION (freebsd_pci_init);
diff --git a/src/vlib/global_funcs.h b/src/vlib/global_funcs.h
index 3c0fdb78364..2558e611750 100644
--- a/src/vlib/global_funcs.h
+++ b/src/vlib/global_funcs.h
@@ -26,7 +26,7 @@ vlib_get_n_threads ()
}
always_inline vlib_main_t *
-vlib_get_main_by_index (u32 thread_index)
+vlib_get_main_by_index (clib_thread_index_t thread_index)
{
vlib_main_t *vm;
vm = vlib_global_main.vlib_mains[thread_index];
diff --git a/src/vlib/linux/pci.c b/src/vlib/linux/pci.c
index 29ca3d97523..bf9c6f27cd5 100644
--- a/src/vlib/linux/pci.c
+++ b/src/vlib/linux/pci.c
@@ -42,6 +42,7 @@
#include <vppinfra/unix.h>
#include <vlib/vlib.h>
+#include <vlib/file.h>
#include <vlib/pci/pci.h>
#include <vlib/unix/unix.h>
#include <vlib/linux/vfio.h>
@@ -1578,15 +1579,4 @@ linux_pci_init (vlib_main_t * vm)
return 0;
}
-VLIB_INIT_FUNCTION (linux_pci_init) =
-{
- .runs_after = VLIB_INITS("unix_input_init"),
-};
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
+VLIB_INIT_FUNCTION (linux_pci_init);
diff --git a/src/vlib/linux/vmbus.c b/src/vlib/linux/vmbus.c
index 9dc9d554ebd..27a5e271fd0 100644
--- a/src/vlib/linux/vmbus.c
+++ b/src/vlib/linux/vmbus.c
@@ -455,15 +455,4 @@ linux_vmbus_init (vlib_main_t * vm)
return 0;
}
-VLIB_INIT_FUNCTION (linux_vmbus_init) =
-{
- .runs_before = VLIB_INITS("unix_input_init"),
-};
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
+VLIB_INIT_FUNCTION (linux_vmbus_init);
diff --git a/src/vlib/log.c b/src/vlib/log.c
index 60fb9fb5178..be6b759c61a 100644
--- a/src/vlib/log.c
+++ b/src/vlib/log.c
@@ -108,6 +108,17 @@ log_level_is_enabled (vlib_log_level_t level, vlib_log_level_t configured)
return 1;
}
+static void
+log_size_validate (vlib_log_main_t *lm)
+{
+ if (vec_len (lm->entries) < lm->size)
+ {
+ CLIB_SPINLOCK_LOCK (lm->lock);
+ vec_validate (lm->entries, lm->size);
+ CLIB_SPINLOCK_UNLOCK (lm->lock);
+ }
+}
+
void
vlib_log (vlib_log_level_t level, vlib_log_class_t class, char *fmt, ...)
{
@@ -122,14 +133,10 @@ vlib_log (vlib_log_level_t level, vlib_log_class_t class, char *fmt, ...)
int syslog_enabled = log_level_is_enabled (level, sc->syslog_level);
u8 *s = 0;
- /* make sure we are running on the main thread to avoid use in dataplane
- code, for dataplane logging consider use of event-logger */
- ASSERT (vlib_get_thread_index () == 0);
-
if ((log_enabled || syslog_enabled) == 0)
return;
- vec_validate (lm->entries, lm->size);
+ log_size_validate (lm);
if ((delta > lm->unthrottle_time) ||
(sc->is_throttling == 0 && (delta > 1)))
@@ -197,13 +204,19 @@ vlib_log (vlib_log_level_t level, vlib_log_class_t class, char *fmt, ...)
if (log_enabled)
{
+ CLIB_SPINLOCK_LOCK (lm->lock);
e = vec_elt_at_index (lm->entries, lm->next);
- vec_free (e->string);
+ lm->next = (lm->next + 1) % lm->size;
+ if (lm->size > lm->count)
+ lm->count++;
e->level = level;
e->class = class;
- e->string = s;
e->timestamp = t;
- s = 0;
+ e->thread_index = vm->thread_index;
+ CLIB_SWAP (e->string, s);
+ CLIB_SPINLOCK_UNLOCK (lm->lock);
+
+ vec_free (s);
if (lm->add_to_elog)
{
@@ -235,10 +248,6 @@ vlib_log (vlib_log_level_t level, vlib_log_class_t class, char *fmt, ...)
ed->string_index =
elog_string (&vlib_global_main.elog_main, "%v%c", e->string, 0);
}
-
- lm->next = (lm->next + 1) % lm->size;
- if (lm->size > lm->count)
- lm->count++;
}
vec_free (s);
@@ -356,7 +365,7 @@ vlib_log_init (vlib_main_t *vm)
gettimeofday (&lm->time_zero_timeval, 0);
lm->time_zero = vlib_time_now (vm);
- vec_validate (lm->entries, lm->size);
+ log_size_validate (lm);
while (r)
{
@@ -384,23 +393,30 @@ show_log (vlib_main_t * vm,
{
clib_error_t *error = 0;
vlib_log_main_t *lm = &log_main;
- vlib_log_entry_t *e;
+ vlib_log_entry_t *e, *entries;
int i = last_log_entry ();
- int count = lm->count;
+ int count;
f64 time_offset;
time_offset = (f64) lm->time_zero_timeval.tv_sec
+ (((f64) lm->time_zero_timeval.tv_usec) * 1e-6) - lm->time_zero;
+ CLIB_SPINLOCK_LOCK (lm->lock);
+ count = lm->count;
+ entries = vec_dup (lm->entries);
+ CLIB_SPINLOCK_UNLOCK (lm->lock);
+
while (count--)
{
- e = vec_elt_at_index (lm->entries, i);
+ e = vec_elt_at_index (entries, i);
vlib_cli_output (vm, "%U %-10U %-14U %v", format_time_float, NULL,
e->timestamp + time_offset, format_vlib_log_level,
e->level, format_vlib_log_class, e->class, e->string);
i = (i + 1) % lm->size;
}
+ vec_free (entries);
+
return error;
}
@@ -464,8 +480,10 @@ clear_log (vlib_main_t * vm,
vlib_log_main_t *lm = &log_main;
vlib_log_entry_t *e;
int i = last_log_entry ();
- int count = lm->count;
+ int count;
+ CLIB_SPINLOCK_LOCK (lm->lock);
+ count = lm->count;
while (count--)
{
e = vec_elt_at_index (lm->entries, i);
@@ -475,6 +493,8 @@ clear_log (vlib_main_t * vm,
lm->count = 0;
lm->next = 0;
+ CLIB_SPINLOCK_UNLOCK (lm->lock);
+
vlib_log_info (log_log.class, "log cleared");
return error;
}
@@ -667,7 +687,7 @@ set_log_size (vlib_main_t * vm,
if (unformat (line_input, "%d", &size))
{
lm->size = size;
- vec_validate (lm->entries, lm->size);
+ log_size_validate (lm);
}
else
return clib_error_return (0, "unknown input `%U'",
@@ -809,7 +829,7 @@ log_config (vlib_main_t * vm, unformat_input_t * input)
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
if (unformat (input, "size %d", &lm->size))
- vec_validate (lm->entries, lm->size);
+ log_size_validate (lm);
else if (unformat (input, "unthrottle-time %d", &lm->unthrottle_time))
;
else if (unformat (input, "default-log-level %U",
diff --git a/src/vlib/log.h b/src/vlib/log.h
index 45e2b59946c..5bacd7f45cb 100644
--- a/src/vlib/log.h
+++ b/src/vlib/log.h
@@ -41,7 +41,8 @@ typedef enum
typedef struct
{
- vlib_log_level_t level;
+ clib_thread_index_t thread_index;
+ u8 level; /* vlib_log_level_t */
vlib_log_class_t class;
f64 timestamp;
u8 *string;
@@ -95,6 +96,7 @@ typedef struct
vlib_log_entry_t *entries;
vlib_log_class_data_t *classes;
int size, next, count;
+ u8 lock;
int default_rate_limit;
int default_log_level;
diff --git a/src/vlib/main.c b/src/vlib/main.c
index a2f833711ab..ffa4a696422 100644
--- a/src/vlib/main.c
+++ b/src/vlib/main.c
@@ -41,11 +41,10 @@
#include <vppinfra/format.h>
#include <vlib/vlib.h>
#include <vlib/threads.h>
+#include <vlib/file.h>
#include <vlib/stats/stats.h>
#include <vppinfra/tw_timer_1t_3w_1024sl_ov.h>
-#include <vlib/unix/unix.h>
-
#define VLIB_FRAME_MAGIC (0xabadc0ed)
always_inline u32 *
@@ -258,11 +257,6 @@ vlib_next_frame_change_ownership (vlib_main_t * vm,
node = vec_elt (nm->nodes, node_runtime->node_index);
- /* Only internal & input nodes are allowed to call other nodes. */
- ASSERT (node->type == VLIB_NODE_TYPE_INTERNAL
- || node->type == VLIB_NODE_TYPE_INPUT
- || node->type == VLIB_NODE_TYPE_PROCESS);
-
ASSERT (vec_len (node->next_nodes) == node_runtime->n_next_nodes);
next_frame =
@@ -871,16 +865,16 @@ add_trajectory_trace (vlib_buffer_t * b, u32 node_index)
}
static_always_inline u64
-dispatch_node (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_node_type_t type,
- vlib_node_state_t dispatch_state,
- vlib_frame_t * frame, u64 last_time_stamp)
+dispatch_node (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_node_type_t type, vlib_frame_t *frame,
+ vlib_node_dispatch_reason_t dispatch_reason,
+ u64 last_time_stamp)
{
uword n, v;
u64 t;
vlib_node_main_t *nm = &vm->node_main;
vlib_next_frame_t *nf;
+ vlib_node_type_atts_t attr = node_type_attrs[type];
if (CLIB_DEBUG > 0)
{
@@ -888,15 +882,14 @@ dispatch_node (vlib_main_t * vm,
ASSERT (n->type == type);
}
- /* Only non-internal nodes may be disabled. */
- if (type != VLIB_NODE_TYPE_INTERNAL && node->state != dispatch_state)
+ if (attr.can_be_disabled == 0 && node->state == VLIB_NODE_STATE_DISABLED)
{
- ASSERT (type != VLIB_NODE_TYPE_INTERNAL);
+ ASSERT (0); /* disabled node should not be dispatched */
return last_time_stamp;
}
- if ((type == VLIB_NODE_TYPE_PRE_INPUT || type == VLIB_NODE_TYPE_INPUT)
- && dispatch_state != VLIB_NODE_STATE_INTERRUPT)
+ if (attr.decrement_main_loop_per_calls_if_polling &&
+ node->state == VLIB_NODE_STATE_POLLING)
{
u32 c = node->input_main_loops_per_call;
/* Only call node when count reaches zero. */
@@ -938,18 +931,13 @@ dispatch_node (vlib_main_t * vm,
vlib_buffer_t *b = vlib_get_buffer (vm, from[i]);
add_trajectory_trace (b, node->node_index);
}
- if (PREDICT_TRUE (vm->dispatch_wrapper_fn == 0))
- n = node->function (vm, node, frame);
- else
- n = vm->dispatch_wrapper_fn (vm, node, frame);
}
+
+ node->dispatch_reason = dispatch_reason;
+ if (PREDICT_TRUE (vm->dispatch_wrapper_fn == 0))
+ n = node->function (vm, node, frame);
else
- {
- if (PREDICT_TRUE (vm->dispatch_wrapper_fn == 0))
- n = node->function (vm, node, frame);
- else
- n = vm->dispatch_wrapper_fn (vm, node, frame);
- }
+ n = vm->dispatch_wrapper_fn (vm, node, frame);
t = clib_cpu_time_now ();
@@ -968,11 +956,12 @@ dispatch_node (vlib_main_t * vm,
/* When in adaptive mode and vector rate crosses threshold switch to
polling mode and vice versa. */
- if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_ADAPTIVE_MODE))
+ if (PREDICT_FALSE (attr.supports_adaptive_mode &&
+ node->flags & VLIB_NODE_FLAG_ADAPTIVE_MODE))
{
ELOG_TYPE_DECLARE (e) =
{
- .function = (char *) __FUNCTION__,
+ .function = (char *) __func__,
.format = "%s vector length %d, switching to %s",
.format_args = "T4i4t4",
.n_enum_strings = 2,
@@ -985,8 +974,8 @@ dispatch_node (vlib_main_t * vm,
u32 node_name, vector_length, is_polling;
} *ed;
- if ((dispatch_state == VLIB_NODE_STATE_INTERRUPT
- && v >= nm->polling_threshold_vector_length) &&
+ if ((node->state == VLIB_NODE_STATE_INTERRUPT &&
+ v >= nm->polling_threshold_vector_length) &&
!(node->flags &
VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))
{
@@ -1012,8 +1001,8 @@ dispatch_node (vlib_main_t * vm,
ed->is_polling = 1;
}
}
- else if (dispatch_state == VLIB_NODE_STATE_POLLING
- && v <= nm->interrupt_threshold_vector_length)
+ else if (node->state == VLIB_NODE_STATE_POLLING &&
+ v <= nm->interrupt_threshold_vector_length)
{
vlib_node_t *n = vlib_get_node (vm, node->node_index);
if (node->flags &
@@ -1103,10 +1092,9 @@ dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index,
n->flags |= (nf->flags & VLIB_FRAME_TRACE) ? VLIB_NODE_FLAG_TRACE : 0;
nf->flags &= ~VLIB_FRAME_TRACE;
- last_time_stamp = dispatch_node (vm, n,
- VLIB_NODE_TYPE_INTERNAL,
- VLIB_NODE_STATE_POLLING,
- f, last_time_stamp);
+ last_time_stamp =
+ dispatch_node (vm, n, VLIB_NODE_TYPE_INTERNAL, f,
+ VLIB_NODE_DISPATCH_REASON_PENDING_FRAME, last_time_stamp);
/* Internal node vector-rate accounting, for summary stats */
vm->internal_node_vectors += f->n_vectors;
vm->internal_node_calls++;
@@ -1260,6 +1248,18 @@ vlib_process_resume (vlib_main_t * vm, vlib_process_t * p)
return r;
}
+static void
+process_timer_start (vlib_main_t *vm, vlib_process_t *p, u32 runtime_index)
+{
+ vlib_tw_event_t e = { .type = VLIB_TW_EVENT_T_PROCESS_NODE,
+ .index = runtime_index };
+
+ if (p->resume_clock_interval == 0)
+ return;
+
+ p->stop_timer_handle = vlib_tw_timer_start (vm, e, p->resume_clock_interval);
+}
+
static u64
dispatch_process (vlib_main_t * vm,
vlib_process_t * p, vlib_frame_t * f, u64 last_time_stamp)
@@ -1309,15 +1309,7 @@ dispatch_process (vlib_main_t * vm,
p->n_suspends += 1;
p->suspended_process_frame_index = pf - nm->suspended_process_frames;
- if (p->resume_clock_interval)
- {
- TWT (tw_timer_wheel) *tw = (TWT (tw_timer_wheel) *) nm->timing_wheel;
- p->stop_timer_handle =
- TW (tw_timer_start) (tw,
- vlib_timing_wheel_data_set_suspended_process (
- node->runtime_index) /* [sic] pool idex */,
- 0 /* timer_id */, p->resume_clock_interval);
- }
+ process_timer_start (vm, p, node->runtime_index);
}
else
p->state = VLIB_PROCESS_STATE_NOT_STARTED;
@@ -1419,15 +1411,7 @@ dispatch_suspended_process (vlib_main_t *vm, vlib_process_restore_t *r,
/* Suspend it again. */
n_vectors = 0;
p->n_suspends += 1;
- if (p->resume_clock_interval)
- {
- p->stop_timer_handle =
- TW (tw_timer_start) ((TWT (tw_timer_wheel) *) nm->timing_wheel,
- vlib_timing_wheel_data_set_suspended_process
- (node->runtime_index) /* [sic] pool idex */ ,
- 0 /* timer_id */ ,
- p->resume_clock_interval);
- }
+ process_timer_start (vm, p, node->runtime_index);
}
else
{
@@ -1452,6 +1436,44 @@ dispatch_suspended_process (vlib_main_t *vm, vlib_process_restore_t *r,
return t;
}
+static __clib_warn_unused_result u32 *
+process_expired_timers (u32 *v)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_node_main_t *nm = &vm->node_main;
+ u32 *handle;
+
+ v = vlib_tw_timer_expire_timers (vm, v);
+
+ vec_foreach (handle, v)
+ {
+ vlib_tw_event_t e = { .as_u32 = *handle };
+ vlib_process_restore_t restore = {};
+
+ if (e.type == VLIB_TW_EVENT_T_TIMED_EVENT)
+ {
+ restore.reason = VLIB_PROCESS_RESTORE_REASON_TIMED_EVENT;
+ restore.timed_event_data_pool_index = e.index;
+ vec_add1 (nm->process_restore_current, restore);
+ }
+ else if (e.type == VLIB_TW_EVENT_T_PROCESS_NODE)
+ {
+ vlib_process_t *p = vec_elt (nm->processes, e.index);
+ p->stop_timer_handle = ~0;
+ restore.reason = VLIB_PROCESS_RESTORE_REASON_CLOCK;
+ restore.runtime_index = e.index;
+ vec_add1 (nm->process_restore_current, restore);
+ }
+ else if (e.type == VLIB_TW_EVENT_T_SCHED_NODE)
+ {
+ vec_add1 (nm->sched_node_pending, e.index);
+ }
+ else
+ ASSERT (0);
+ }
+ return v;
+}
+
static_always_inline void
vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
{
@@ -1462,6 +1484,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
f64 now;
vlib_frame_queue_main_t *fqm;
u32 frame_queue_check_counter = 0;
+ u32 *expired_timers = 0;
/* Initialize pending node vector. */
if (is_main)
@@ -1485,7 +1508,6 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
if (!nm->interrupt_threshold_vector_length)
nm->interrupt_threshold_vector_length = 5;
- vm->cpu_id = clib_get_current_cpu_id ();
vm->numa_node = clib_get_current_numa_node ();
os_set_numa_index (vm->numa_node);
@@ -1548,58 +1570,64 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
clib_call_callbacks (vm->worker_thread_main_loop_callbacks, vm,
cpu_time_now);
- /* Process pre-input nodes. */
cpu_time_now = clib_cpu_time_now ();
- vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])
- cpu_time_now = dispatch_node (vm, n,
- VLIB_NODE_TYPE_PRE_INPUT,
- VLIB_NODE_STATE_POLLING,
- /* frame */ 0,
- cpu_time_now);
-
- if (clib_interrupt_is_any_pending (nm->pre_input_node_interrupts))
- {
- int int_num = -1;
- while ((int_num = clib_interrupt_get_next_and_clear (
- nm->pre_input_node_interrupts, int_num)) != -1)
+ if (vm->file_poll_skip_loops)
+ vm->file_poll_skip_loops--;
+ else
+ vlib_file_poll (vm);
+
+ for (vlib_node_type_t nt = 0; nt < VLIB_N_NODE_TYPE; nt++)
+ {
+ if (node_type_attrs[nt].can_be_polled)
+ vec_foreach (n, nm->nodes_by_type[nt])
+ if (n->state == VLIB_NODE_STATE_POLLING)
+ cpu_time_now = dispatch_node (
+ vm, n, nt,
+ /* frame */ 0, VLIB_NODE_DISPATCH_REASON_POLL, cpu_time_now);
+
+ if (node_type_attrs[nt].may_receive_interrupts &&
+ nm->node_interrupts[nt] &&
+ clib_interrupt_is_any_pending (nm->node_interrupts[nt]))
{
- vlib_node_runtime_t *n;
- n = vec_elt_at_index (
- nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT], int_num);
- cpu_time_now = dispatch_node (vm, n, VLIB_NODE_TYPE_PRE_INPUT,
- VLIB_NODE_STATE_INTERRUPT,
- /* frame */ 0, cpu_time_now);
+ int int_num = -1;
+
+ while ((int_num = clib_interrupt_get_next_and_clear (
+ nm->node_interrupts[nt], int_num)) != -1)
+ {
+ vlib_node_runtime_t *n;
+ n = vec_elt_at_index (nm->nodes_by_type[nt], int_num);
+ cpu_time_now = dispatch_node (
+ vm, n, nt,
+ /* frame */ 0, VLIB_NODE_DISPATCH_REASON_INTERRUPT,
+ cpu_time_now);
+ }
}
}
- /* Next process input nodes. */
- vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_INPUT])
- cpu_time_now = dispatch_node (vm, n,
- VLIB_NODE_TYPE_INPUT,
- VLIB_NODE_STATE_POLLING,
- /* frame */ 0,
- cpu_time_now);
-
- if (PREDICT_TRUE (is_main && vm->queue_signal_pending == 0))
- vm->queue_signal_callback (vm);
-
- if (clib_interrupt_is_any_pending (nm->input_node_interrupts))
+ /* Process sched nodes. */
+ if (vec_len (nm->sched_node_pending))
{
- int int_num = -1;
-
- while ((int_num = clib_interrupt_get_next_and_clear (
- nm->input_node_interrupts, int_num)) != -1)
+ vec_foreach_index (i, nm->sched_node_pending)
{
- vlib_node_runtime_t *n;
- n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT],
- int_num);
- cpu_time_now = dispatch_node (vm, n, VLIB_NODE_TYPE_INPUT,
- VLIB_NODE_STATE_INTERRUPT,
- /* frame */ 0, cpu_time_now);
+ vlib_node_t *n = vlib_get_node (vm, nm->sched_node_pending[i]);
+ if (n->type == VLIB_NODE_TYPE_SCHED)
+ {
+ vlib_node_runtime_t *nr =
+ vlib_node_get_runtime (vm, n->index);
+ nr->stop_timer_handle_plus_1 = 0;
+ cpu_time_now = dispatch_node (
+ vm, nr, VLIB_NODE_TYPE_SCHED,
+ /* frame */ 0, VLIB_NODE_DISPATCH_REASON_SCHED,
+ cpu_time_now);
+ }
}
+ vec_reset_length (nm->sched_node_pending);
}
+ if (PREDICT_TRUE (is_main && vm->queue_signal_pending == 0))
+ vm->queue_signal_callback (vm);
+
/* Input nodes may have added work to the pending vector.
Process pending vector until there is nothing left.
All pending vectors will be processed from input -> output. */
@@ -1632,8 +1660,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
if (PREDICT_FALSE (vm->elog_trace_graph_dispatch))
ed = ELOG_DATA (&vlib_global_main.elog_main, es);
- TW (tw_timer_expire_timers)
- ((TWT (tw_timer_wheel) *) nm->timing_wheel, vlib_time_now (vm));
+ expired_timers = process_expired_timers (expired_timers);
ASSERT (nm->process_restore_current != 0);
@@ -1664,7 +1691,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
p->stop_timer_handle = ~0;
void *data;
data = vlib_process_signal_event_helper (
- nm, n, p, te->event_type_index, te->n_data_elts,
+ vm, nm, n, p, te->event_type_index, te->n_data_elts,
te->n_data_elt_bytes);
if (te->n_data_bytes < sizeof (te->inline_event_data))
clib_memcpy_fast (data, te->inline_event_data,
@@ -1691,6 +1718,9 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
nm->process_restore_next);
}
}
+ else
+ expired_timers = process_expired_timers (expired_timers);
+
vlib_increment_main_loop_counter (vm);
/* Record time stamp in case there are no enabled nodes and above
calls do not update time stamp. */
@@ -1725,18 +1755,6 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
}
}
-static void
-vlib_main_loop (vlib_main_t * vm)
-{
- vlib_main_or_worker_loop (vm, /* is_main */ 1);
-}
-
-void
-vlib_worker_loop (vlib_main_t * vm)
-{
- vlib_main_or_worker_loop (vm, /* is_main */ 0);
-}
-
vlib_global_main_t vlib_global_main;
void
@@ -1862,34 +1880,6 @@ vl_api_get_elog_trace_api_messages (void)
return 0;
}
-static void
-process_expired_timer_cb (u32 *expired_timer_handles)
-{
- vlib_main_t *vm = vlib_get_main ();
- vlib_node_main_t *nm = &vm->node_main;
- u32 *handle;
-
- vec_foreach (handle, expired_timer_handles)
- {
- u32 index = vlib_timing_wheel_data_get_index (*handle);
- vlib_process_restore_t restore = {};
-
- if (vlib_timing_wheel_data_is_timed_event (*handle))
- {
- restore.reason = VLIB_PROCESS_RESTORE_REASON_TIMED_EVENT;
- restore.timed_event_data_pool_index = index;
- }
- else
- {
- vlib_process_t *p = vec_elt (nm->processes, index);
- p->stop_timer_handle = ~0;
- restore.reason = VLIB_PROCESS_RESTORE_REASON_CLOCK;
- restore.runtime_index = index;
- }
- vec_add1 (nm->process_restore_current, restore);
- }
-}
-
/* Main function. */
int
vlib_main (vlib_main_t * volatile vm, unformat_input_t * input)
@@ -1905,12 +1895,13 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input)
vgm->configured_elog_ring_size != vgm->elog_main.event_ring_size)
elog_resize (&vgm->elog_main, vgm->configured_elog_ring_size);
vl_api_set_elog_main (vlib_get_elog_main ());
- (void) vl_api_set_elog_trace_api_messages (1);
/* Default name. */
if (!vgm->name)
vgm->name = "VLIB";
+ vm->numa_node = clib_get_current_numa_node ();
+
if ((error = vlib_physmem_init (vm)))
{
clib_error_report (error);
@@ -1982,26 +1973,20 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input)
goto done;
}
+ vlib_tw_init (vm);
+ vlib_file_poll_init (vm);
+
/* See unix/main.c; most likely already set up */
if (vgm->init_functions_called == 0)
vgm->init_functions_called = hash_create (0, /* value bytes */ 0);
if ((error = vlib_call_all_init_functions (vm)))
goto done;
- nm->timing_wheel = clib_mem_alloc_aligned (sizeof (TWT (tw_timer_wheel)),
- CLIB_CACHE_LINE_BYTES);
-
vec_validate (nm->process_restore_current, 10);
vec_validate (nm->process_restore_next, 10);
vec_set_len (nm->process_restore_current, 0);
vec_set_len (nm->process_restore_next, 0);
- /* Create the process timing wheel */
- TW (tw_timer_wheel_init)
- ((TWT (tw_timer_wheel) *) nm->timing_wheel,
- process_expired_timer_cb /* callback */, 10e-6 /* timer period 10us */,
- ~0 /* max expirations per call */);
-
vec_validate (vm->pending_rpc_requests, 0);
vec_set_len (vm->pending_rpc_requests, 0);
vec_validate (vm->processing_rpc_requests, 0);
@@ -2052,7 +2037,7 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input)
goto done;
}
- vlib_main_loop (vm);
+ vlib_main_or_worker_loop (vm, /* is_main */ 1);
done:
/* Stop worker threads, barrier will not be released */
@@ -2072,6 +2057,40 @@ done:
return vm->main_loop_exit_status;
}
+static void
+vlib_worker_thread_fn (void *arg)
+{
+ vlib_global_main_t *vgm = vlib_get_global_main ();
+ vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg;
+ vlib_main_t *vm = vlib_get_main ();
+ clib_error_t *e;
+
+ ASSERT (vm->thread_index == vlib_get_thread_index ());
+ vm->numa_node = clib_get_current_numa_node ();
+
+ vlib_worker_thread_init (w);
+ clib_time_init (&vm->clib_time);
+ clib_mem_set_heap (w->thread_mheap);
+ vlib_tw_init (vm);
+ vlib_file_poll_init (vm);
+
+ vm->worker_init_functions_called = hash_create (0, 0);
+
+ e = vlib_call_init_exit_functions_no_sort (
+ vm, &vgm->worker_init_function_registrations, 1 /* call_once */,
+ 0 /* is_global */);
+ if (e)
+ clib_error_report (e);
+
+ vlib_main_or_worker_loop (vm, /* is_main */ 0);
+}
+
+VLIB_REGISTER_THREAD (worker_thread_reg, static) = {
+ .name = "workers",
+ .short_name = "wk",
+ .function = vlib_worker_thread_fn,
+};
+
vlib_main_t *
vlib_get_main_not_inline (void)
{
diff --git a/src/vlib/main.h b/src/vlib/main.h
index 94b8c4fa954..1700369738d 100644
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -204,10 +204,19 @@ typedef struct vlib_main_t
clib_random_buffer_t random_buffer;
/* thread, cpu and numa_node indices */
- u32 thread_index;
- u32 cpu_id;
+ clib_thread_index_t thread_index;
u32 numa_node;
+ /* epoll and eventfd */
+ int epoll_fd;
+ int wakeup_fd;
+ int n_epoll_fds;
+ u32 file_poll_skip_loops;
+ u64 epoll_files_ready;
+ u64 epoll_waits;
+ u8 wakeup_pending;
+ u8 thread_sleeps;
+
/* control-plane API queue signal pending, length indication */
volatile u32 queue_signal_pending;
volatile u32 api_queue_nonempty;
@@ -258,6 +267,10 @@ typedef struct vlib_main_t
u32 buffer_alloc_success_seed;
f64 buffer_alloc_success_rate;
+ /* Timing wheel for scheduling time-based node dispatch. */
+ void *timing_wheel;
+ u32 n_tw_timers;
+
#ifdef CLIB_SANITIZE_ADDR
/* address sanitizer stack save */
void *asan_stack_save;
@@ -319,18 +332,13 @@ typedef struct vlib_global_main_t
/* Global main structure. */
extern vlib_global_main_t vlib_global_main;
-void vlib_worker_loop (vlib_main_t * vm);
-
always_inline f64
vlib_time_now (vlib_main_t * vm)
{
-#if CLIB_DEBUG > 0
- extern __thread uword __os_thread_index;
-#endif
/*
* Make sure folks don't pass &vlib_global_main from a worker thread.
*/
- ASSERT (vm->thread_index == __os_thread_index);
+ ASSERT (vm->thread_index == os_get_thread_index ());
return clib_time_now (&vm->clib_time) + vm->time_offset;
}
diff --git a/src/vlib/node.c b/src/vlib/node.c
index c0572f3cf83..edeb6dc70a7 100644
--- a/src/vlib/node.c
+++ b/src/vlib/node.c
@@ -518,7 +518,7 @@ vlib_register_node (vlib_main_t *vm, vlib_node_registration_t *r, char *fmt,
vlib_node_runtime_t *rt;
u32 i;
- if (n->type == VLIB_NODE_TYPE_PROCESS)
+ if (node_type_attrs[n->type].is_process)
{
vlib_process_t *p;
uword log2_n_stack_bytes;
@@ -559,15 +559,14 @@ vlib_register_node (vlib_main_t *vm, vlib_node_registration_t *r, char *fmt,
{
vec_add2_aligned (nm->nodes_by_type[n->type], rt, 1,
/* align */ CLIB_CACHE_LINE_BYTES);
- if (n->type == VLIB_NODE_TYPE_INPUT)
- clib_interrupt_resize (&nm->input_node_interrupts,
- vec_len (nm->nodes_by_type[n->type]));
- else if (n->type == VLIB_NODE_TYPE_PRE_INPUT)
- clib_interrupt_resize (&nm->pre_input_node_interrupts,
- vec_len (nm->nodes_by_type[n->type]));
+
n->runtime_index = rt - nm->nodes_by_type[n->type];
}
+ if (node_type_attrs[n->type].may_receive_interrupts)
+ clib_interrupt_resize (&nm->node_interrupts[n->type],
+ vec_len (nm->nodes_by_type[n->type]));
+
if (n->type == VLIB_NODE_TYPE_INPUT)
nm->input_node_counts_by_state[n->state] += 1;
diff --git a/src/vlib/node.h b/src/vlib/node.h
index bb6d8f818a8..52edab9e488 100644
--- a/src/vlib/node.h
+++ b/src/vlib/node.h
@@ -83,9 +83,46 @@ typedef enum
/* "Process" nodes which can be suspended and later resumed. */
VLIB_NODE_TYPE_PROCESS,
+ /* Nodes to by called by per-thread timing wheel. */
+ VLIB_NODE_TYPE_SCHED,
+
VLIB_N_NODE_TYPE,
} vlib_node_type_t;
+typedef struct
+{
+ u8 can_be_disabled : 1;
+ u8 may_receive_interrupts : 1;
+ u8 decrement_main_loop_per_calls_if_polling : 1;
+ u8 supports_adaptive_mode : 1;
+ u8 can_be_polled : 1;
+ u8 is_process : 1;
+} vlib_node_type_atts_t;
+
+static const vlib_node_type_atts_t node_type_attrs[VLIB_N_NODE_TYPE] ={
+ [VLIB_NODE_TYPE_PRE_INPUT] = {
+ .can_be_disabled = 1,
+ .may_receive_interrupts = 1,
+ .decrement_main_loop_per_calls_if_polling = 1,
+ .can_be_polled = 1,
+ },
+ [VLIB_NODE_TYPE_INPUT] = {
+ .can_be_disabled = 1,
+ .may_receive_interrupts = 1,
+ .decrement_main_loop_per_calls_if_polling = 1,
+ .supports_adaptive_mode = 1,
+ .can_be_polled = 1,
+ },
+ [VLIB_NODE_TYPE_PROCESS] = {
+ .can_be_disabled = 1,
+ .is_process = 1,
+ },
+ [VLIB_NODE_TYPE_SCHED] = {
+ .can_be_disabled = 1,
+ .may_receive_interrupts = 1,
+ },
+};
+
typedef struct _vlib_node_fn_registration
{
vlib_node_function_t *function;
@@ -245,7 +282,26 @@ typedef enum
foreach_vlib_node_state
#undef _
VLIB_N_NODE_STATE,
-} vlib_node_state_t;
+} __clib_packed vlib_node_state_t;
+
+typedef enum
+{
+ VLIB_NODE_DISPATCH_REASON_UNKNOWN = 0,
+ VLIB_NODE_DISPATCH_REASON_PENDING_FRAME,
+ VLIB_NODE_DISPATCH_REASON_POLL,
+ VLIB_NODE_DISPATCH_REASON_INTERRUPT,
+ VLIB_NODE_DISPATCH_REASON_SCHED,
+ VLIB_NODE_DISPATCH_N_REASON,
+} __clib_packed vlib_node_dispatch_reason_t;
+
+#define vlib_node_dispatch_reason_enum_strings \
+ { \
+ [VLIB_NODE_DISPATCH_REASON_UNKNOWN] = "unknown", \
+ [VLIB_NODE_DISPATCH_REASON_PENDING_FRAME] = "pending-frame", \
+ [VLIB_NODE_DISPATCH_REASON_POLL] = "poll", \
+ [VLIB_NODE_DISPATCH_REASON_INTERRUPT] = "interrupt", \
+ [VLIB_NODE_DISPATCH_REASON_SCHED] = "scheduled", \
+ }
typedef struct vlib_node_t
{
@@ -498,7 +554,10 @@ typedef struct vlib_node_runtime_t
u16 flags; /**< Copy of main node flags. */
- u16 state; /**< Input node state. */
+ vlib_node_state_t state; /**< Input node state. */
+
+ vlib_node_dispatch_reason_t
+ dispatch_reason; /**< Reason for running this node. */
u16 n_next_nodes;
@@ -507,6 +566,9 @@ typedef struct vlib_node_runtime_t
last time this node ran. Set to
zero before first run of this
node. */
+ u32 stop_timer_handle_plus_1; /**< Timing wheel stop handle for
+ SCHED node incremented by 1,
+ 0 = no timer running. */
CLIB_ALIGN_MARK (runtime_data_pad, 8);
@@ -679,30 +741,6 @@ typedef struct
}
vlib_signal_timed_event_data_t;
-always_inline uword
-vlib_timing_wheel_data_is_timed_event (u32 d)
-{
- return d & 1;
-}
-
-always_inline u32
-vlib_timing_wheel_data_set_suspended_process (u32 i)
-{
- return 0 + 2 * i;
-}
-
-always_inline u32
-vlib_timing_wheel_data_set_timed_event (u32 i)
-{
- return 1 + 2 * i;
-}
-
-always_inline uword
-vlib_timing_wheel_data_get_index (u32 d)
-{
- return d / 2;
-}
-
typedef struct
{
clib_march_variant_type_t index;
@@ -727,8 +765,7 @@ typedef struct
vlib_node_runtime_t *nodes_by_type[VLIB_N_NODE_TYPE];
/* Node runtime indices for input nodes with pending interrupts. */
- void *input_node_interrupts;
- void *pre_input_node_interrupts;
+ void *node_interrupts[VLIB_N_NODE_TYPE];
/* Input nodes are switched from/to interrupt to/from polling mode
when average vector length goes above/below polling/interrupt
@@ -742,14 +779,14 @@ typedef struct
/* Vector of internal node's frames waiting to be called. */
vlib_pending_frame_t *pending_frames;
- /* Timing wheel for scheduling time-based node dispatch. */
- void *timing_wheel;
-
vlib_signal_timed_event_data_t *signal_timed_event_data_pool;
/* Vector of process nodes waiting for restore */
vlib_process_restore_t *process_restore_current;
+ /* Vector of sched nodes waiting to be calleed */
+ u32 *sched_node_pending;
+
/* Vector of process nodes waiting for restore in next greaph scheduler run
*/
vlib_process_restore_t *process_restore_next;
diff --git a/src/vlib/node_cli.c b/src/vlib/node_cli.c
index 16e904e8433..375b17bd7ae 100644
--- a/src/vlib/node_cli.c
+++ b/src/vlib/node_cli.c
@@ -734,6 +734,9 @@ show_node (vlib_main_t * vm, unformat_input_t * input,
case VLIB_NODE_TYPE_PROCESS:
type_str = "process";
break;
+ case VLIB_NODE_TYPE_SCHED:
+ type_str = "sched";
+ break;
default:
type_str = "unknown";
}
diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h
index ffa17ba7bb1..17677ee7aec 100644
--- a/src/vlib/node_funcs.h
+++ b/src/vlib/node_funcs.h
@@ -47,7 +47,6 @@
#include <vppinfra/clib.h>
#include <vppinfra/fifo.h>
-#include <vppinfra/tw_timer_1t_3w_1024sl_ov.h>
#include <vppinfra/interrupt.h>
#ifdef CLIB_SANITIZE_ADDR
@@ -249,24 +248,58 @@ vlib_node_set_interrupt_pending (vlib_main_t *vm, u32 node_index)
{
vlib_node_main_t *nm = &vm->node_main;
vlib_node_t *n = vec_elt (nm->nodes, node_index);
- void *interrupts = 0;
+ void *interrupts = nm->node_interrupts[n->type];
- if (n->type == VLIB_NODE_TYPE_INPUT)
- interrupts = nm->input_node_interrupts;
- else if (n->type == VLIB_NODE_TYPE_PRE_INPUT)
- interrupts = nm->pre_input_node_interrupts;
- else
- {
- ASSERT (0);
- return;
- }
+ ASSERT (interrupts);
if (vm != vlib_get_main ())
- clib_interrupt_set_atomic (interrupts, n->runtime_index);
+ {
+ clib_interrupt_set_atomic (interrupts, n->runtime_index);
+ vlib_thread_wakeup (vm->thread_index);
+ }
else
clib_interrupt_set (interrupts, n->runtime_index);
}
+always_inline int
+vlib_node_is_scheduled (vlib_main_t *vm, u32 node_index)
+{
+ vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, node_index);
+ return rt->stop_timer_handle_plus_1 ? 1 : 0;
+}
+
+always_inline void
+vlib_node_schedule (vlib_main_t *vm, u32 node_index, f64 dt)
+{
+ u64 ticks;
+
+ vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, node_index);
+ vlib_tw_event_t e = {
+ .type = VLIB_TW_EVENT_T_SCHED_NODE,
+ .index = node_index,
+ };
+
+ ASSERT (vm == vlib_get_main ());
+ ASSERT (vlib_node_is_scheduled (vm, node_index) == 0);
+
+ dt = flt_round_nearest (dt * VLIB_TW_TICKS_PER_SECOND);
+ ticks = clib_max ((u64) dt, 1);
+
+ rt->stop_timer_handle_plus_1 = 1 + vlib_tw_timer_start (vm, e, ticks);
+}
+
+always_inline void
+vlib_node_unschedule (vlib_main_t *vm, u32 node_index)
+{
+ vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, node_index);
+
+ ASSERT (vm == vlib_get_main ());
+ ASSERT (vlib_node_is_scheduled (vm, node_index) == 1);
+
+ vlib_tw_timer_stop (vm, rt->stop_timer_handle_plus_1 - 1);
+ rt->stop_timer_handle_plus_1 = 0;
+}
+
always_inline vlib_process_t *
vlib_get_process_from_node (vlib_main_t * vm, vlib_node_t * node)
{
@@ -570,14 +603,14 @@ vlib_get_current_process_node_index (vlib_main_t * vm)
return process->node_runtime.node_index;
}
-/** Returns TRUE if a process suspend time is less than 10us
+/** Returns TRUE if a process suspend time is less than vlib timer wheel tick
@param dt - remaining poll time in seconds
- @returns 1 if dt < 10e-6, 0 otherwise
+ @returns 1 if dt < 1/VLIB_TW_TICKS_PER_SECOND, 0 otherwise
*/
always_inline uword
vlib_process_suspend_time_is_zero (f64 dt)
{
- return dt < 10e-6;
+ return dt < (1 / VLIB_TW_TICKS_PER_SECOND);
}
/** Suspend a vlib cooperative multi-tasking thread for a period of time
@@ -601,7 +634,7 @@ vlib_process_suspend (vlib_main_t * vm, f64 dt)
if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND)
{
/* expiration time in 10us ticks */
- p->resume_clock_interval = dt * 1e5;
+ p->resume_clock_interval = dt * VLIB_TW_TICKS_PER_SECOND;
vlib_process_start_switch_stack (vm, 0);
clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND);
}
@@ -912,7 +945,7 @@ vlib_process_wait_for_event_or_clock (vlib_main_t * vm, f64 dt)
r = clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND);
if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND)
{
- p->resume_clock_interval = dt * 1e5;
+ p->resume_clock_interval = dt * VLIB_TW_TICKS_PER_SECOND;
vlib_process_start_switch_stack (vm, 0);
clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND);
}
@@ -963,10 +996,8 @@ vlib_process_delete_one_time_event (vlib_main_t * vm, uword node_index,
}
always_inline void *
-vlib_process_signal_event_helper (vlib_node_main_t * nm,
- vlib_node_t * n,
- vlib_process_t * p,
- uword t,
+vlib_process_signal_event_helper (vlib_main_t *vm, vlib_node_main_t *nm,
+ vlib_node_t *n, vlib_process_t *p, uword t,
uword n_data_elts, uword n_data_elt_bytes)
{
uword add_to_pending = 0, delete_from_wheel = 0;
@@ -1016,8 +1047,7 @@ vlib_process_signal_event_helper (vlib_node_main_t * nm,
break;
}
- if (TW (tw_timer_handle_is_free) ((TWT (tw_timer_wheel) *) nm->timing_wheel,
- p->stop_timer_handle))
+ if (vlib_tw_timer_handle_is_free (vm, p->stop_timer_handle))
delete_from_wheel = 0;
/* Never add current process to pending vector since current process is
@@ -1036,8 +1066,7 @@ vlib_process_signal_event_helper (vlib_node_main_t * nm,
if (delete_from_wheel)
{
- TW (tw_timer_stop)
- ((TWT (tw_timer_wheel) *) nm->timing_wheel, p->stop_timer_handle);
+ vlib_tw_timer_stop (vm, p->stop_timer_handle);
p->stop_timer_handle = ~0;
}
@@ -1069,7 +1098,7 @@ vlib_process_signal_event_data (vlib_main_t * vm,
else
t = h[0];
- return vlib_process_signal_event_helper (nm, n, p, t, n_data_elts,
+ return vlib_process_signal_event_helper (vm, nm, n, p, t, n_data_elts,
n_data_elt_bytes);
}
@@ -1097,7 +1126,7 @@ vlib_process_signal_event_at_time (vlib_main_t * vm,
t = h[0];
if (vlib_process_suspend_time_is_zero (dt))
- return vlib_process_signal_event_helper (nm, n, p, t, n_data_elts,
+ return vlib_process_signal_event_helper (vm, nm, n, p, t, n_data_elts,
n_data_elt_bytes);
else
{
@@ -1118,11 +1147,12 @@ vlib_process_signal_event_at_time (vlib_main_t * vm,
te->event_type_index = t;
p->stop_timer_handle =
- TW (tw_timer_start) ((TWT (tw_timer_wheel) *) nm->timing_wheel,
- vlib_timing_wheel_data_set_timed_event
- (te - nm->signal_timed_event_data_pool),
- 0 /* timer_id */ ,
- (vlib_time_now (vm) + dt) * 1e5);
+ vlib_tw_timer_start (vm,
+ (vlib_tw_event_t){
+ .type = VLIB_TW_EVENT_T_TIMED_EVENT,
+ .index = te - nm->signal_timed_event_data_pool,
+ },
+ dt * VLIB_TW_TICKS_PER_SECOND);
/* Inline data big enough to hold event? */
if (te->n_data_bytes < sizeof (te->inline_event_data))
@@ -1146,8 +1176,8 @@ vlib_process_signal_one_time_event_data (vlib_main_t * vm,
vlib_node_main_t *nm = &vm->node_main;
vlib_node_t *n = vlib_get_node (vm, node_index);
vlib_process_t *p = vec_elt (nm->processes, n->runtime_index);
- return vlib_process_signal_event_helper (nm, n, p, type_index, n_data_elts,
- n_data_elt_bytes);
+ return vlib_process_signal_event_helper (vm, nm, n, p, type_index,
+ n_data_elts, n_data_elt_bytes);
}
always_inline void
diff --git a/src/vlib/punt_node.c b/src/vlib/punt_node.c
index 4b81a61715a..800d520733d 100644
--- a/src/vlib/punt_node.c
+++ b/src/vlib/punt_node.c
@@ -68,14 +68,10 @@ format_punt_trace (u8 * s, va_list * args)
}
always_inline u32
-punt_replicate (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- u32 thread_index,
- vlib_buffer_t * b0,
- u32 bi0,
- vlib_punt_reason_t pr0,
- u32 * next_index,
- u32 * n_left_to_next, u32 ** to_next, u32 * n_dispatched)
+punt_replicate (vlib_main_t *vm, vlib_node_runtime_t *node,
+ clib_thread_index_t thread_index, vlib_buffer_t *b0, u32 bi0,
+ vlib_punt_reason_t pr0, u32 *next_index, u32 *n_left_to_next,
+ u32 **to_next, u32 *n_dispatched)
{
/* multiple clients => replicate a copy to each */
u16 n_clones0, n_cloned0, clone0;
@@ -134,13 +130,10 @@ punt_replicate (vlib_main_t * vm,
}
always_inline u32
-punt_dispatch_one (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_combined_counter_main_t * cm,
- u32 thread_index,
- u32 bi0,
- u32 * next_index,
- u32 * n_left_to_next, u32 ** to_next, u32 * n_dispatched)
+punt_dispatch_one (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_combined_counter_main_t *cm,
+ clib_thread_index_t thread_index, u32 bi0, u32 *next_index,
+ u32 *n_left_to_next, u32 **to_next, u32 *n_dispatched)
{
vlib_punt_reason_t pr0;
vlib_buffer_t *b0;
diff --git a/src/vlib/stats/init.c b/src/vlib/stats/init.c
index 8b382daf333..50f71b3eb11 100644
--- a/src/vlib/stats/init.c
+++ b/src/vlib/stats/init.c
@@ -3,6 +3,7 @@
*/
#include <vlib/vlib.h>
+#include <vlib/file.h>
#include <vlib/unix/unix.h>
#include <vlib/stats/stats.h>
@@ -253,6 +254,4 @@ statseg_init (vlib_main_t *vm)
return stats_segment_socket_init ();
}
-VLIB_INIT_FUNCTION (statseg_init) = {
- .runs_after = VLIB_INITS ("unix_input_init", "linux_epoll_input_init"),
-};
+VLIB_INIT_FUNCTION (statseg_init);
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index fa8d949d549..a1839e787c3 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -183,6 +183,7 @@ vlib_thread_init (vlib_main_t * vm)
u32 first_index = 1;
u32 i;
uword *avail_cpu;
+ uword n_cpus;
u32 stats_num_worker_threads_dir_index;
stats_num_worker_threads_dir_index =
@@ -190,12 +191,24 @@ vlib_thread_init (vlib_main_t * vm)
ASSERT (stats_num_worker_threads_dir_index != ~0);
/* get bitmaps of active cpu cores and sockets */
- tm->cpu_core_bitmap = os_get_online_cpu_core_bitmap ();
tm->cpu_socket_bitmap = os_get_online_cpu_node_bitmap ();
+ if (!tm->cpu_translate)
+ tm->cpu_core_bitmap = os_get_online_cpu_core_bitmap ();
+ else
+ {
+ /* get bitmap of cpu core affinity */
+ if ((tm->cpu_core_bitmap = os_get_cpu_affinity_bitmap ()) == 0)
+ return clib_error_return (0, "could not fetch cpu affinity bmp");
+ }
avail_cpu = clib_bitmap_dup (tm->cpu_core_bitmap);
/* skip cores */
+ n_cpus = clib_bitmap_count_set_bits (avail_cpu);
+ if (tm->skip_cores >= n_cpus)
+ return clib_error_return (
+ 0, "skip-core value greater or equal to available cpus");
+
for (i = 0; i < tm->skip_cores; i++)
{
uword c = clib_bitmap_first_set (avail_cpu);
@@ -213,8 +226,20 @@ vlib_thread_init (vlib_main_t * vm)
if (tm->main_lcore != ~0)
{
if (clib_bitmap_get (avail_cpu, tm->main_lcore) == 0)
- return clib_error_return (0, "cpu %u is not available to be used"
- " for the main thread", tm->main_lcore);
+ {
+ if (tm->cpu_translate)
+ return clib_error_return (
+ 0,
+ "cpu %u (relative cpu %u) is not available to be used"
+ " for the main thread in relative mode",
+ tm->main_lcore,
+ os_translate_cpu_from_affinity_bitmap (tm->main_lcore));
+ else
+ return clib_error_return (0,
+ "cpu %u is not available to be used"
+ " for the main thread",
+ tm->main_lcore);
+ }
avail_cpu = clib_bitmap_set (avail_cpu, tm->main_lcore, 0);
}
@@ -297,11 +322,23 @@ vlib_thread_init (vlib_main_t * vm)
uword c;
clib_bitmap_foreach (c, tr->coremask) {
if (clib_bitmap_get(avail_cpu, c) == 0)
- return clib_error_return (0, "cpu %u is not available to be used"
- " for the '%s' thread",c, tr->name);
+ {
+ if (tm->cpu_translate)
+ return clib_error_return (
+ 0,
+ "cpu %u (relative cpu %u) is not available to be used"
+ " for the '%s' thread in relative mode",
+ c, os_translate_cpu_from_affinity_bitmap (c), tr->name);
+ else
+ return clib_error_return (
+ 0,
+ "cpu %u is not available to be used"
+ " for the '%s' thread",
+ c, tr->name);
+ }
- avail_cpu = clib_bitmap_set(avail_cpu, c, 0);
- }
+ avail_cpu = clib_bitmap_set (avail_cpu, c, 0);
+ }
}
else
{
@@ -313,7 +350,7 @@ vlib_thread_init (vlib_main_t * vm)
uword c = clib_bitmap_first_set (avail_cpu);
/* Use CPU 0 as a last resort */
- if (c == ~0 && avail_c0)
+ if (c == ~0 && avail_c0 && !tm->cpu_translate)
{
c = 0;
avail_c0 = 0;
@@ -323,7 +360,7 @@ vlib_thread_init (vlib_main_t * vm)
return clib_error_return (0,
"no available cpus to be used for"
" the '%s' thread #%u",
- tr->name, tr->count);
+ tr->name, j);
avail_cpu = clib_bitmap_set (avail_cpu, 0, avail_c0);
avail_cpu = clib_bitmap_set (avail_cpu, c, 0);
@@ -705,52 +742,29 @@ start_workers (vlib_main_t * vm)
vec_add1 (nm_clone->nodes, n);
n++;
}
- nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] =
- vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL],
- CLIB_CACHE_LINE_BYTES);
- vec_foreach (rt,
- nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL])
- {
- vlib_node_t *n = vlib_get_node (vm, rt->node_index);
- /* copy initial runtime_data from node */
- if (n->runtime_data && n->runtime_data_bytes > 0)
- clib_memcpy (rt->runtime_data, n->runtime_data,
- clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
- n->runtime_data_bytes));
- }
- nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
- vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT],
- CLIB_CACHE_LINE_BYTES);
- clib_interrupt_init (
- &nm_clone->input_node_interrupts,
- vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]));
- clib_interrupt_init (
- &nm_clone->pre_input_node_interrupts,
- vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]));
- vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
- {
- vlib_node_t *n = vlib_get_node (vm, rt->node_index);
- /* copy initial runtime_data from node */
- if (n->runtime_data && n->runtime_data_bytes > 0)
- clib_memcpy (rt->runtime_data, n->runtime_data,
- clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
- n->runtime_data_bytes));
- }
-
- nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT] =
- vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT],
- CLIB_CACHE_LINE_BYTES);
- vec_foreach (rt,
- nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])
- {
- vlib_node_t *n = vlib_get_node (vm, rt->node_index);
- /* copy initial runtime_data from node */
- if (n->runtime_data && n->runtime_data_bytes > 0)
- clib_memcpy (rt->runtime_data, n->runtime_data,
- clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
- n->runtime_data_bytes));
- }
+ foreach_int (nt, VLIB_NODE_TYPE_INTERNAL,
+ VLIB_NODE_TYPE_PRE_INPUT, VLIB_NODE_TYPE_INPUT,
+ VLIB_NODE_TYPE_SCHED)
+ {
+ u32 n_nodes = vec_len (nm_clone->nodes_by_type[nt]);
+ nm_clone->nodes_by_type[nt] = vec_dup_aligned (
+ nm->nodes_by_type[nt], CLIB_CACHE_LINE_BYTES);
+
+ if (node_type_attrs[nt].may_receive_interrupts)
+ clib_interrupt_init (&nm_clone->node_interrupts[nt],
+ n_nodes);
+
+ vec_foreach (rt, nm_clone->nodes_by_type[nt])
+ {
+ vlib_node_t *n = vlib_get_node (vm, rt->node_index);
+ /* copy initial runtime_data from node */
+ if (n->runtime_data && n->runtime_data_bytes > 0)
+ clib_memcpy (rt->runtime_data, n->runtime_data,
+ clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
+ n->runtime_data_bytes));
+ }
+ }
nm_clone->processes = vec_dup_aligned (nm->processes,
CLIB_CACHE_LINE_BYTES);
@@ -1016,101 +1030,53 @@ vlib_worker_thread_node_refork (void)
vec_free (old_nodes_clone);
+ /* re-clone nodes */
- /* re-clone internal nodes */
- old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL];
- nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] =
- vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL],
- CLIB_CACHE_LINE_BYTES);
-
- vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL])
- {
- vlib_node_t *n = vlib_get_node (vm, rt->node_index);
- /* copy runtime_data, will be overwritten later for existing rt */
- if (n->runtime_data && n->runtime_data_bytes > 0)
- clib_memcpy_fast (rt->runtime_data, n->runtime_data,
- clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
- n->runtime_data_bytes));
- }
-
- for (j = 0; j < vec_len (old_rt); j++)
+ foreach_int (nt, VLIB_NODE_TYPE_INTERNAL, VLIB_NODE_TYPE_PRE_INPUT,
+ VLIB_NODE_TYPE_INPUT, VLIB_NODE_TYPE_SCHED)
{
- rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index);
- rt->state = old_rt[j].state;
- rt->flags = old_rt[j].flags;
- clib_memcpy_fast (rt->runtime_data, old_rt[j].runtime_data,
- VLIB_NODE_RUNTIME_DATA_SIZE);
- }
+ old_rt = nm_clone->nodes_by_type[nt];
+ u32 n_nodes = vec_len (nm->nodes_by_type[nt]);
- vec_free (old_rt);
-
- /* re-clone input nodes */
- old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT];
- nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
- vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT],
- CLIB_CACHE_LINE_BYTES);
- clib_interrupt_resize (
- &nm_clone->input_node_interrupts,
- vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]));
- clib_interrupt_resize (
- &nm_clone->pre_input_node_interrupts,
- vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]));
-
- vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
- {
- vlib_node_t *n = vlib_get_node (vm, rt->node_index);
- /* copy runtime_data, will be overwritten later for existing rt */
- if (n->runtime_data && n->runtime_data_bytes > 0)
- clib_memcpy_fast (rt->runtime_data, n->runtime_data,
- clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
- n->runtime_data_bytes));
- }
+ nm_clone->nodes_by_type[nt] =
+ vec_dup_aligned (nm->nodes_by_type[nt], CLIB_CACHE_LINE_BYTES);
- for (j = 0; j < vec_len (old_rt); j++)
- {
- rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index);
- rt->state = old_rt[j].state;
- rt->flags = old_rt[j].flags;
- clib_memcpy_fast (rt->runtime_data, old_rt[j].runtime_data,
- VLIB_NODE_RUNTIME_DATA_SIZE);
- }
-
- for (j = vec_len (old_rt);
- j < vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]); j++)
- {
- rt = &nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT][j];
- nm_clone->input_node_counts_by_state[rt->state] += 1;
- }
+ if (nm_clone->node_interrupts[nt])
+ clib_interrupt_resize (&nm_clone->node_interrupts[nt], n_nodes);
- vec_free (old_rt);
+ vec_foreach (rt, nm_clone->nodes_by_type[nt])
+ {
+ vlib_node_t *n = vlib_get_node (vm, rt->node_index);
+ /* copy runtime_data, will be overwritten later for existing rt */
+ if (n->runtime_data && n->runtime_data_bytes > 0)
+ clib_memcpy_fast (
+ rt->runtime_data, n->runtime_data,
+ clib_min (VLIB_NODE_RUNTIME_DATA_SIZE, n->runtime_data_bytes));
+ }
- /* re-clone pre-input nodes */
- old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT];
- nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT] =
- vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT],
- CLIB_CACHE_LINE_BYTES);
+ for (j = 0; j < vec_len (old_rt); j++)
+ {
+ rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index);
+ rt->state = old_rt[j].state;
+ rt->flags = old_rt[j].flags;
+ clib_memcpy_fast (rt->runtime_data, old_rt[j].runtime_data,
+ VLIB_NODE_RUNTIME_DATA_SIZE);
+ }
- vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])
- {
- vlib_node_t *n = vlib_get_node (vm, rt->node_index);
- /* copy runtime_data, will be overwritten later for existing rt */
- if (n->runtime_data && n->runtime_data_bytes > 0)
- clib_memcpy_fast (rt->runtime_data, n->runtime_data,
- clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
- n->runtime_data_bytes));
- }
+ if (nt == VLIB_NODE_TYPE_INPUT)
+ {
+ for (j = vec_len (old_rt);
+ j < vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]);
+ j++)
+ {
+ rt = &nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT][j];
+ nm_clone->input_node_counts_by_state[rt->state] += 1;
+ }
+ }
- for (j = 0; j < vec_len (old_rt); j++)
- {
- rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index);
- rt->state = old_rt[j].state;
- rt->flags = old_rt[j].flags;
- clib_memcpy_fast (rt->runtime_data, old_rt[j].runtime_data,
- VLIB_NODE_RUNTIME_DATA_SIZE);
+ vec_free (old_rt);
}
- vec_free (old_rt);
-
vec_free (nm_clone->processes);
nm_clone->processes = vec_dup_aligned (nm->processes,
CLIB_CACHE_LINE_BYTES);
@@ -1177,6 +1143,8 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input)
;
else if (unformat (input, "skip-cores %u", &tm->skip_cores))
;
+ else if (unformat (input, "relative"))
+ tm->cpu_translate = 1;
else if (unformat (input, "numa-heap-size %U",
unformat_memory_size, &tm->numa_heap_size))
;
@@ -1235,6 +1203,11 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input)
if (use_corelist && tm->main_lcore == ~0)
return clib_error_return (0, "main-core must be specified when using "
"corelist-* or coremask-* attribute");
+
+ if (tm->skip_cores != 0 && tm->main_lcore == ~0)
+ return clib_error_return (
+ 0, "main-core must be specified when using skip-cores attribute");
+
if (tm->sched_priority != ~0)
{
if (tm->sched_policy == SCHED_FIFO || tm->sched_policy == SCHED_RR)
@@ -1267,6 +1240,36 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input)
tr = tr->next;
}
+ /* for relative mode, update requested main-core and corelists */
+ if (tm->cpu_translate)
+ {
+
+ if (tm->main_lcore == ~0)
+ clib_error ("main-core must be specified in relative mode");
+ int cpu_translate_main_core =
+ os_translate_cpu_to_affinity_bitmap (tm->main_lcore);
+ if (cpu_translate_main_core == -1)
+ clib_error ("cpu %u is not available to be used"
+ " for the main thread in relative mode",
+ tm->main_lcore);
+ tm->main_lcore = cpu_translate_main_core;
+
+ tr = tm->next;
+ uword *translated_cpu_bmp;
+ while (tr && tr->coremask)
+ {
+ translated_cpu_bmp =
+ os_translate_cpu_bmp_to_affinity_bitmap (tr->coremask);
+
+ if (!translated_cpu_bmp)
+ clib_error ("could not translate corelist associated to %s",
+ tr->name);
+ clib_bitmap_free (tr->coremask);
+ tr->coremask = translated_cpu_bmp;
+ tr = tr->next;
+ }
+ }
+
return 0;
}
@@ -1304,7 +1307,7 @@ vlib_worker_thread_initial_barrier_sync_and_release (vlib_main_t * vm)
{
if ((now = vlib_time_now (vm)) > deadline)
{
- fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__);
+ fformat (stderr, "%s: worker thread deadlock\n", __func__);
os_panic ();
}
CLIB_PAUSE ();
@@ -1404,12 +1407,16 @@ vlib_worker_thread_barrier_sync_int (vlib_main_t * vm, const char *func_name)
deadline = now + BARRIER_SYNC_TIMEOUT;
- *vlib_worker_threads->wait_at_barrier = 1;
+ __atomic_store_n (vlib_worker_threads->wait_at_barrier, 1, __ATOMIC_RELEASE);
+
+ for (clib_thread_index_t ti = 1; ti < vlib_get_n_threads (); ti++)
+ vlib_thread_wakeup (ti);
+
while (*vlib_worker_threads->workers_at_barrier != count)
{
if ((now = vlib_time_now (vm)) > deadline)
{
- fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__);
+ fformat (stderr, "%s: worker thread deadlock\n", __func__);
os_panic ();
}
}
@@ -1477,15 +1484,14 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
* time offset. See vlib_time_now(...)
*/
vm->time_last_barrier_release = vlib_time_now (vm);
- CLIB_MEMORY_STORE_BARRIER ();
- *vlib_worker_threads->wait_at_barrier = 0;
+ __atomic_store_n (vlib_worker_threads->wait_at_barrier, 0, __ATOMIC_RELEASE);
while (*vlib_worker_threads->workers_at_barrier > 0)
{
if ((now = vlib_time_now (vm)) > deadline)
{
- fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__);
+ fformat (stderr, "%s: worker thread deadlock\n", __func__);
os_panic ();
}
}
@@ -1502,7 +1508,7 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
if ((now = vlib_time_now (vm)) > deadline)
{
fformat (stderr, "%s: worker thread refork deadlock\n",
- __FUNCTION__);
+ __func__);
os_panic ();
}
}
@@ -1546,7 +1552,7 @@ vlib_workers_sync (void)
if (!(*vlib_worker_threads->wait_at_barrier) &&
!clib_atomic_swap_rel_n (&vlib_worker_threads->wait_before_barrier, 1))
{
- u32 thread_index = vlib_get_thread_index ();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
vlib_rpc_call_main_thread (vlib_worker_sync_rpc, (u8 *) &thread_index,
sizeof (thread_index));
vlib_worker_flush_pending_rpc_requests (vlib_get_main ());
@@ -1630,37 +1636,6 @@ vlib_worker_flush_pending_rpc_requests (vlib_main_t *vm)
clib_spinlock_unlock_if_init (&vm_global->pending_rpc_lock);
}
-void
-vlib_worker_thread_fn (void *arg)
-{
- vlib_global_main_t *vgm = vlib_get_global_main ();
- vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg;
- vlib_main_t *vm = vlib_get_main ();
- clib_error_t *e;
-
- ASSERT (vm->thread_index == vlib_get_thread_index ());
-
- vlib_worker_thread_init (w);
- clib_time_init (&vm->clib_time);
- clib_mem_set_heap (w->thread_mheap);
-
- vm->worker_init_functions_called = hash_create (0, 0);
-
- e = vlib_call_init_exit_functions_no_sort (
- vm, &vgm->worker_init_function_registrations, 1 /* call_once */,
- 0 /* is_global */);
- if (e)
- clib_error_report (e);
-
- vlib_worker_loop (vm);
-}
-
-VLIB_REGISTER_THREAD (worker_thread_reg, static) = {
- .name = "workers",
- .short_name = "wk",
- .function = vlib_worker_thread_fn,
-};
-
extern clib_march_fn_registration
*vlib_frame_queue_dequeue_with_aux_fn_march_fn_registrations;
extern clib_march_fn_registration
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index c671aa78c39..46a1476ee4b 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -166,7 +166,10 @@ u32 vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts);
#define BARRIER_SYNC_TIMEOUT (1.0)
#endif
-#define vlib_worker_thread_barrier_sync(X) {vlib_worker_thread_barrier_sync_int(X, __FUNCTION__);}
+#define vlib_worker_thread_barrier_sync(X) \
+ { \
+ vlib_worker_thread_barrier_sync_int (X, __func__); \
+ }
void vlib_worker_thread_barrier_sync_int (vlib_main_t * vm,
const char *func_name);
@@ -183,7 +186,7 @@ void vlib_worker_wait_one_loop (void);
*/
void vlib_worker_flush_pending_rpc_requests (vlib_main_t *vm);
-static_always_inline uword
+static_always_inline clib_thread_index_t
vlib_get_thread_index (void)
{
return __os_thread_index;
@@ -195,7 +198,7 @@ vlib_smp_unsafe_warning (void)
if (CLIB_DEBUG > 0)
{
if (vlib_get_thread_index ())
- fformat (stderr, "%s: SMP unsafe warning...\n", __FUNCTION__);
+ fformat (stderr, "%s: SMP unsafe warning...\n", __func__);
}
}
@@ -257,6 +260,9 @@ typedef struct
int use_pthreads;
+ /* Translate requested cpu configuration to vpp affinity mask */
+ int cpu_translate;
+
/* Number of vlib_main / vnet_main clones */
u32 n_vlib_mains;
@@ -337,7 +343,7 @@ vlib_get_worker_thread_index (u32 worker_index)
}
always_inline u32
-vlib_get_worker_index (u32 thread_index)
+vlib_get_worker_index (clib_thread_index_t thread_index)
{
return thread_index - 1;
}
@@ -355,7 +361,7 @@ vlib_worker_thread_barrier_check (void)
{
vlib_global_main_t *vgm = vlib_get_global_main ();
vlib_main_t *vm = vlib_get_main ();
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
f64 t = vlib_time_now (vm);
if (PREDICT_FALSE (vec_len (vm->barrier_perf_callbacks) != 0))
@@ -372,7 +378,7 @@ vlib_worker_thread_barrier_check (void)
struct
{
- u32 thread_index;
+ clib_thread_index_t thread_index;
} __clib_packed *ed;
ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, w->elog_track);
@@ -419,7 +425,7 @@ vlib_worker_thread_barrier_check (void)
struct
{
- u32 thread_index;
+ clib_thread_index_t thread_index;
} __clib_packed *ed;
ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e,
@@ -444,7 +450,7 @@ vlib_worker_thread_barrier_check (void)
struct
{
- u32 thread_index;
+ clib_thread_index_t thread_index;
u32 duration;
} __clib_packed *ed;
@@ -498,13 +504,16 @@ void vlib_workers_sync (void);
* Release barrier after workers sync
*/
void vlib_workers_continue (void);
+static_always_inline void
+vlib_thread_wakeup (clib_thread_index_t thread_index)
+{
+ vlib_main_t *vm = vlib_get_main_by_index (thread_index);
+ ssize_t __clib_unused rv;
+ u64 val = 1;
-#endif /* included_vlib_threads_h */
+ if (__atomic_load_n (&vm->thread_sleeps, __ATOMIC_RELAXED))
+ if (__atomic_exchange_n (&vm->wakeup_pending, 1, __ATOMIC_RELAXED) == 0)
+ rv = write (vm->wakeup_fd, &val, sizeof (u64));
+}
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
+#endif /* included_vlib_threads_h */
diff --git a/src/vlib/threads_cli.c b/src/vlib/threads_cli.c
index 2872a025d66..ccc34fc8d8d 100644
--- a/src/vlib/threads_cli.c
+++ b/src/vlib/threads_cli.c
@@ -14,6 +14,8 @@
*/
#define _GNU_SOURCE
+#include <vppinfra/bitmap.h>
+#include <vppinfra/unix.h>
#include <vppinfra/format.h>
#include <vlib/vlib.h>
@@ -46,16 +48,20 @@ show_threads_fn (vlib_main_t * vm,
const vlib_thread_main_t *tm = vlib_get_thread_main ();
vlib_worker_thread_t *w;
int i;
+ u8 *line = NULL;
- vlib_cli_output (vm, "%-7s%-20s%-12s%-8s%-25s%-7s%-7s%-7s%-10s",
- "ID", "Name", "Type", "LWP", "Sched Policy (Priority)",
- "lcore", "Core", "Socket", "State");
+ line = format (line, "%-7s%-20s%-12s%-8s%-25s%-7s%-7s%-7s%-10s", "ID",
+ "Name", "Type", "LWP", "Sched Policy (Priority)", "lcore",
+ "Core", "Socket", "State");
+ if (tm->cpu_translate)
+ line = format (line, "%-15s", "Relative Core");
+ vlib_cli_output (vm, "%v", line);
+ vec_free (line);
#if !defined(__powerpc64__)
for (i = 0; i < vec_len (vlib_worker_threads); i++)
{
w = vlib_worker_threads + i;
- u8 *line = NULL;
line = format (line, "%-7d%-20s%-12s%-8d",
i,
@@ -69,7 +75,13 @@ show_threads_fn (vlib_main_t * vm,
{
int core_id = w->core_id;
int numa_id = w->numa_id;
- line = format (line, "%-7u%-7u%-7u%", cpu_id, core_id, numa_id);
+ line = format (line, "%-7u%-7u%-17u%", cpu_id, core_id, numa_id);
+ if (tm->cpu_translate)
+ {
+ int cpu_translate_core_id =
+ os_translate_cpu_from_affinity_bitmap (cpu_id);
+ line = format (line, "%-7u", cpu_translate_core_id);
+ }
}
else
{
diff --git a/src/vlib/time.h b/src/vlib/time.h
index 61873bb2ef3..fa8cdb2ec8d 100644
--- a/src/vlib/time.h
+++ b/src/vlib/time.h
@@ -7,14 +7,13 @@
#define included_vlib_time_h
#include <vlib/vlib.h>
-#include <vppinfra/tw_timer_1t_3w_1024sl_ov.h>
+#include <vlib/tw_funcs.h>
static inline f64
vlib_time_get_next_timer (vlib_main_t *vm)
{
- vlib_node_main_t *nm = &vm->node_main;
- TWT (tw_timer_wheel) *wheel = nm->timing_wheel;
- return TW (tw_timer_first_expires_in_ticks) (wheel) * wheel->timer_interval;
+ TWT (tw_timer_wheel) *wheel = vm->timing_wheel;
+ return vlib_tw_timer_first_expires_in_ticks (vm) * wheel->timer_interval;
}
static inline void
diff --git a/src/vlib/tw_funcs.h b/src/vlib/tw_funcs.h
new file mode 100644
index 00000000000..c64aaa3db45
--- /dev/null
+++ b/src/vlib/tw_funcs.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#ifndef __vlib_tw_funcs_h__
+#define __vlib_tw_funcs_h__
+
+#include <vppinfra/tw_timer_1t_3w_1024sl_ov.h>
+#define VLIB_TW_TICKS_PER_SECOND 1e5 /* 10 us */
+
+typedef enum
+{
+ VLIB_TW_EVENT_T_PROCESS_NODE = 1,
+ VLIB_TW_EVENT_T_TIMED_EVENT = 2,
+ VLIB_TW_EVENT_T_SCHED_NODE = 3,
+} vlib_tw_event_type_t;
+
+typedef union
+{
+ struct
+ {
+ u32 type : 2; /* vlib_tw_event_type_t */
+ u32 index : 30;
+ };
+ u32 as_u32;
+} vlib_tw_event_t;
+
+static_always_inline u32
+vlib_tw_timer_start (vlib_main_t *vm, vlib_tw_event_t e, u64 interval)
+{
+ TWT (tw_timer_wheel) *tw = (TWT (tw_timer_wheel) *) vm->timing_wheel;
+ vm->n_tw_timers++;
+ return TW (tw_timer_start) (tw, e.as_u32, 0 /* timer_id */, interval);
+}
+
+static_always_inline void
+vlib_tw_timer_stop (vlib_main_t *vm, u32 handle)
+{
+ TWT (tw_timer_wheel) *tw = (TWT (tw_timer_wheel) *) vm->timing_wheel;
+ ASSERT (vm->n_tw_timers > 0);
+ vm->n_tw_timers--;
+ TW (tw_timer_stop) (tw, handle);
+}
+
+static_always_inline int
+vlib_tw_timer_handle_is_free (vlib_main_t *vm, u32 handle)
+{
+ TWT (tw_timer_wheel) *tw = (TWT (tw_timer_wheel) *) vm->timing_wheel;
+ return TW (tw_timer_handle_is_free) (tw, handle);
+}
+
+static_always_inline u32
+vlib_tw_timer_first_expires_in_ticks (vlib_main_t *vm)
+{
+ return TW (tw_timer_first_expires_in_ticks) (
+ (TWT (tw_timer_wheel) *) vm->timing_wheel);
+}
+
+static_always_inline void
+vlib_tw_init (vlib_main_t *vm)
+{
+ TWT (tw_timer_wheel) *tw = (TWT (tw_timer_wheel) *) vm->timing_wheel;
+ tw = clib_mem_alloc_aligned (sizeof (TWT (tw_timer_wheel)),
+ CLIB_CACHE_LINE_BYTES);
+ /* Create the process timing wheel */
+ TW (tw_timer_wheel_init)
+ (tw, 0 /* callback */, 1 / VLIB_TW_TICKS_PER_SECOND,
+ ~0 /* max expirations per call */);
+ vm->timing_wheel = tw;
+ vm->n_tw_timers = 0;
+}
+
+static_always_inline u32 *
+vlib_tw_timer_expire_timers (vlib_main_t *vm, u32 *v)
+{
+ TWT (tw_timer_wheel) *tw = (TWT (tw_timer_wheel) *) vm->timing_wheel;
+
+ vec_reset_length (v);
+
+ if (vm->n_tw_timers > 0)
+ {
+ v = TW (tw_timer_expire_timers_vec) (tw, vlib_time_now (vm), v);
+ ASSERT (vec_len (v) <= vm->n_tw_timers);
+ vm->n_tw_timers -= vec_len (v);
+ }
+
+ return v;
+}
+
+#endif /* __vlib_tw_funcs_h__ */
diff --git a/src/vlib/unix/cli.c b/src/vlib/unix/cli.c
index 051c5730aed..8f0f00c8b9f 100644
--- a/src/vlib/unix/cli.c
+++ b/src/vlib/unix/cli.c
@@ -46,6 +46,7 @@
/*? %%syscfg:group_label Command line session %% ?*/
#include <vlib/vlib.h>
+#include <vlib/file.h>
#include <vlib/unix/unix.h>
#include <ctype.h>
@@ -1102,7 +1103,7 @@ unix_vlib_cli_output (uword cli_file_index, u8 * buffer, uword buffer_bytes)
clib_file_t *uf;
cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index);
- uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index);
+ uf = clib_file_get (fm, cf->clib_file_index);
if (cf->no_pager || um->cli_pager_buffer_limit == 0 || cf->height == 0)
{
@@ -1244,7 +1245,7 @@ unix_cli_file_welcome (unix_cli_main_t * cm, unix_cli_file_t * cf)
{
unix_main_t *um = &unix_main;
clib_file_main_t *fm = &file_main;
- clib_file_t *uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index);
+ clib_file_t *uf = clib_file_get (fm, cf->clib_file_index);
unix_cli_banner_t *banner;
int i, len;
@@ -2460,7 +2461,7 @@ static int
unix_cli_line_edit (unix_cli_main_t * cm, unix_main_t * um,
clib_file_main_t * fm, unix_cli_file_t * cf)
{
- clib_file_t *uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index);
+ clib_file_t *uf = clib_file_get (fm, cf->clib_file_index);
int i;
for (i = 0; i < vec_len (cf->input_vector); i++)
@@ -2628,7 +2629,7 @@ more:
/* Re-fetch pointer since pool may have moved. */
cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index);
- uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index);
+ uf = clib_file_get (fm, cf->clib_file_index);
done:
/* reset vector; we'll re-use it later */
@@ -2707,7 +2708,7 @@ unix_cli_kill (unix_cli_main_t * cm, uword cli_file_index)
}
cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index);
- uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index);
+ uf = clib_file_get (fm, cf->clib_file_index);
/* Quit/EOF on stdin means quit program. */
if (uf->file_descriptor == STDIN_FILENO)
@@ -3015,7 +3016,7 @@ unix_cli_listen_read_ready (clib_file_t * uf)
cf->height = UNIX_CLI_DEFAULT_TERMINAL_HEIGHT;
/* Send the telnet options */
- uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index);
+ uf = clib_file_get (fm, cf->clib_file_index);
unix_vlib_cli_output_raw (cf, uf, charmode_option,
ARRAY_LEN (charmode_option));
@@ -3050,7 +3051,7 @@ unix_cli_resize_interrupt (int signum)
unix_cli_main_t *cm = &unix_cli_main;
unix_cli_file_t *cf = pool_elt_at_index (cm->cli_file_pool,
cm->stdin_cli_file_index);
- clib_file_t *uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index);
+ clib_file_t *uf = clib_file_get (fm, cf->clib_file_index);
struct winsize ws;
(void) signum;
@@ -3548,45 +3549,6 @@ VLIB_CLI_COMMAND (cli_unix_show_errors, static) = {
.function = unix_show_errors,
};
-/** CLI command to show various unix error statistics. */
-static clib_error_t *
-unix_show_files (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- clib_error_t *error = 0;
- clib_file_main_t *fm = &file_main;
- clib_file_t *f;
- char path[PATH_MAX];
- u8 *s = 0;
-
- vlib_cli_output (vm, "%3s %6s %12s %12s %12s %-32s %s", "FD", "Thread",
- "Read", "Write", "Error", "File Name", "Description");
-
- pool_foreach (f, fm->file_pool)
- {
- int rv;
- s = format (s, "/proc/self/fd/%d%c", f->file_descriptor, 0);
- rv = readlink((char *) s, path, PATH_MAX - 1);
-
- path[rv < 0 ? 0 : rv] = 0;
-
- vlib_cli_output (vm, "%3d %6d %12d %12d %12d %-32s %v",
- f->file_descriptor, f->polling_thread_index,
- f->read_events, f->write_events, f->error_events,
- path, f->description);
- vec_reset_length (s);
- }
- vec_free (s);
-
- return error;
-}
-
-VLIB_CLI_COMMAND (cli_unix_show_files, static) = {
- .path = "show unix files",
- .short_help = "Show Unix files in use",
- .function = unix_show_files,
-};
-
/** CLI command to show session command history. */
static clib_error_t *
unix_cli_show_history (vlib_main_t * vm,
@@ -3713,7 +3675,7 @@ unix_cli_show_cli_sessions (vlib_main_t * vm,
{
int j = 0;
- uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index);
+ uf = clib_file_get (fm, cf->clib_file_index);
table_format_cell (t, i, j++, "%u", cf->process_node_index);
table_format_cell (t, i, j++, "%u", uf->file_descriptor);
table_format_cell (t, i, j++, "%v", cf->name);
diff --git a/src/vlib/unix/input.c b/src/vlib/unix/input.c
deleted file mode 100644
index e96cd902466..00000000000
--- a/src/vlib/unix/input.c
+++ /dev/null
@@ -1,431 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * input.c: Unix file input
- *
- * Copyright (c) 2008 Eliot Dresselhaus
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-#include <signal.h>
-#include <unistd.h>
-#include <vppinfra/tw_timer_1t_3w_1024sl_ov.h>
-
-/* FIXME autoconf */
-#define HAVE_LINUX_EPOLL
-
-#ifdef HAVE_LINUX_EPOLL
-
-#include <sys/epoll.h>
-
-typedef struct
-{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- int epoll_fd;
- struct epoll_event *epoll_events;
- int n_epoll_fds;
-
- /* Statistics. */
- u64 epoll_files_ready;
- u64 epoll_waits;
-} linux_epoll_main_t;
-
-static linux_epoll_main_t *linux_epoll_mains = 0;
-
-static void
-linux_epoll_file_update (clib_file_t * f, clib_file_update_type_t update_type)
-{
- clib_file_main_t *fm = &file_main;
- linux_epoll_main_t *em = vec_elt_at_index (linux_epoll_mains,
- f->polling_thread_index);
- struct epoll_event e = { 0 };
- int op, add_del = 0;
-
- e.events = EPOLLIN;
- if (f->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE)
- e.events |= EPOLLOUT;
- if (f->flags & UNIX_FILE_EVENT_EDGE_TRIGGERED)
- e.events |= EPOLLET;
- e.data.u32 = f - fm->file_pool;
-
- op = -1;
-
- switch (update_type)
- {
- case UNIX_FILE_UPDATE_ADD:
- op = EPOLL_CTL_ADD;
- add_del = 1;
- break;
-
- case UNIX_FILE_UPDATE_MODIFY:
- op = EPOLL_CTL_MOD;
- break;
-
- case UNIX_FILE_UPDATE_DELETE:
- op = EPOLL_CTL_DEL;
- add_del = -1;
- break;
-
- default:
- clib_warning ("unknown update_type %d", update_type);
- return;
- }
-
- /* worker threads open epoll fd only if needed */
- if (update_type == UNIX_FILE_UPDATE_ADD && em->epoll_fd == -1)
- {
- em->epoll_fd = epoll_create (1);
- if (em->epoll_fd < 0)
- {
- clib_unix_warning ("epoll_create");
- return;
- }
- em->n_epoll_fds = 0;
- }
-
- if (epoll_ctl (em->epoll_fd, op, f->file_descriptor, &e) < 0)
- {
- clib_unix_warning ("epoll_ctl");
- return;
- }
-
- em->n_epoll_fds += add_del;
-
- if (em->n_epoll_fds == 0)
- {
- close (em->epoll_fd);
- em->epoll_fd = -1;
- }
-}
-
-static_always_inline uword
-linux_epoll_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame, u32 thread_index)
-{
- unix_main_t *um = &unix_main;
- clib_file_main_t *fm = &file_main;
- linux_epoll_main_t *em = vec_elt_at_index (linux_epoll_mains, thread_index);
- struct epoll_event *e;
- int n_fds_ready;
- int is_main = (thread_index == 0);
-
- {
- vlib_node_main_t *nm = &vm->node_main;
- u32 ticks_until_expiration;
- f64 timeout;
- f64 now;
- int timeout_ms = 0, max_timeout_ms = 10;
- f64 vector_rate = vlib_last_vectors_per_main_loop (vm);
-
- if (is_main == 0)
- now = vlib_time_now (vm);
-
- /*
- * If we've been asked for a fixed-sleep between main loop polls,
- * do so right away.
- */
- if (PREDICT_FALSE (is_main && um->poll_sleep_usec))
- {
- struct timespec ts, tsrem;
- timeout = 0;
- timeout_ms = 0;
- node->input_main_loops_per_call = 0;
- ts.tv_sec = 0;
- ts.tv_nsec = 1000 * um->poll_sleep_usec;
-
- while (nanosleep (&ts, &tsrem) < 0)
- {
- ts = tsrem;
- }
- }
- /* If we're not working very hard, decide how long to sleep */
- else if (is_main && vector_rate < 2 && vm->api_queue_nonempty == 0
- && nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0)
- {
- ticks_until_expiration = TW (tw_timer_first_expires_in_ticks)
- ((TWT (tw_timer_wheel) *) nm->timing_wheel);
-
- /* Nothing on the fast wheel, sleep 10ms */
- if (ticks_until_expiration == TW_SLOTS_PER_RING)
- {
- timeout = 10e-3;
- timeout_ms = max_timeout_ms;
- }
- else
- {
- timeout = (f64) ticks_until_expiration *1e-5;
- if (timeout < 1e-3)
- timeout_ms = 0;
- else
- {
- timeout_ms = timeout * 1e3;
- /* Must be between 1 and 10 ms. */
- timeout_ms = clib_max (1, timeout_ms);
- timeout_ms = clib_min (max_timeout_ms, timeout_ms);
- }
- }
- node->input_main_loops_per_call = 0;
- }
- else if (is_main == 0 && vector_rate < 2 &&
- (vlib_get_first_main ()->time_last_barrier_release + 0.5 < now) &&
- nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0)
- {
- timeout = 10e-3;
- timeout_ms = max_timeout_ms;
- node->input_main_loops_per_call = 0;
- }
- else /* busy */
- {
- /* Don't come back for a respectable number of dispatch cycles */
- node->input_main_loops_per_call = 1024;
- }
-
- /* Allow any signal to wakeup our sleep. */
- if (is_main || em->epoll_fd != -1)
- {
- static sigset_t unblock_all_signals;
- n_fds_ready = epoll_pwait (em->epoll_fd,
- em->epoll_events,
- vec_len (em->epoll_events),
- timeout_ms, &unblock_all_signals);
-
- /* This kludge is necessary to run over absurdly old kernels */
- if (n_fds_ready < 0 && errno == ENOSYS)
- {
- n_fds_ready = epoll_wait (em->epoll_fd,
- em->epoll_events,
- vec_len (em->epoll_events), timeout_ms);
- }
-
- }
- else
- {
- /*
- * Worker thread, no epoll fd's, sleep for 100us at a time
- * and check for a barrier sync request
- */
- if (timeout_ms)
- {
- struct timespec ts, tsrem;
- f64 limit = now + (f64) timeout_ms * 1e-3;
-
- while (vlib_time_now (vm) < limit)
- {
- /* Sleep for 100us at a time */
- ts.tv_sec = 0;
- ts.tv_nsec = 1000 * 100;
-
- while (nanosleep (&ts, &tsrem) < 0)
- ts = tsrem;
- if (*vlib_worker_threads->wait_at_barrier ||
- clib_interrupt_is_any_pending (
- nm->input_node_interrupts) ||
- clib_interrupt_is_any_pending (
- nm->pre_input_node_interrupts))
- goto done;
- }
- }
- goto done;
- }
- }
-
- if (n_fds_ready < 0)
- {
- if (unix_error_is_fatal (errno))
- vlib_panic_with_error (vm, clib_error_return_unix (0, "epoll_wait"));
-
- /* non fatal error (e.g. EINTR). */
- goto done;
- }
-
- em->epoll_waits += 1;
- em->epoll_files_ready += n_fds_ready;
-
- for (e = em->epoll_events; e < em->epoll_events + n_fds_ready; e++)
- {
- u32 i = e->data.u32;
- clib_file_t *f;
- clib_error_t *errors[4];
- int n_errors = 0;
-
- /*
- * Under rare scenarios, epoll may still post us events for the
- * deleted file descriptor. We just deal with it and throw away the
- * events for the corresponding file descriptor.
- */
- f = fm->file_pool + i;
- if (PREDICT_FALSE (pool_is_free (fm->file_pool, f)))
- {
- if (e->events & EPOLLIN)
- {
- errors[n_errors] =
- clib_error_return (0, "epoll event EPOLLIN dropped due "
- "to free index %u", i);
- n_errors++;
- }
- if (e->events & EPOLLOUT)
- {
- errors[n_errors] =
- clib_error_return (0, "epoll event EPOLLOUT dropped due "
- "to free index %u", i);
- n_errors++;
- }
- if (e->events & EPOLLERR)
- {
- errors[n_errors] =
- clib_error_return (0, "epoll event EPOLLERR dropped due "
- "to free index %u", i);
- n_errors++;
- }
- }
- else if (PREDICT_TRUE (!(e->events & EPOLLERR)))
- {
- if (e->events & EPOLLIN)
- {
- f->read_events++;
- errors[n_errors] = f->read_function (f);
- /* Make sure f is valid if the file pool moves */
- if (pool_is_free_index (fm->file_pool, i))
- continue;
- f = pool_elt_at_index (fm->file_pool, i);
- n_errors += errors[n_errors] != 0;
- }
- if (e->events & EPOLLOUT)
- {
- f->write_events++;
- errors[n_errors] = f->write_function (f);
- n_errors += errors[n_errors] != 0;
- }
- }
- else
- {
- if (f->error_function)
- {
- f->error_events++;
- errors[n_errors] = f->error_function (f);
- n_errors += errors[n_errors] != 0;
- }
- else
- close (f->file_descriptor);
- }
-
- ASSERT (n_errors < ARRAY_LEN (errors));
- for (i = 0; i < n_errors; i++)
- {
- unix_save_error (um, errors[i]);
- }
- }
-
-done:
- if (PREDICT_FALSE (vm->cpu_id != clib_get_current_cpu_id ()))
- {
- vm->cpu_id = clib_get_current_cpu_id ();
- vm->numa_node = clib_get_current_numa_node ();
- }
-
- return 0;
-}
-
-static uword
-linux_epoll_input (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame)
-{
- u32 thread_index = vlib_get_thread_index ();
-
- if (thread_index == 0)
- return linux_epoll_input_inline (vm, node, frame, 0);
- else
- return linux_epoll_input_inline (vm, node, frame, thread_index);
-}
-
-VLIB_REGISTER_NODE (linux_epoll_input_node,static) = {
- .function = linux_epoll_input,
- .type = VLIB_NODE_TYPE_PRE_INPUT,
- .name = "unix-epoll-input",
-};
-
-clib_error_t *
-linux_epoll_input_init (vlib_main_t * vm)
-{
- linux_epoll_main_t *em;
- clib_file_main_t *fm = &file_main;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
-
-
- vec_validate_aligned (linux_epoll_mains, tm->n_vlib_mains,
- CLIB_CACHE_LINE_BYTES);
-
- vec_foreach (em, linux_epoll_mains)
- {
- /* Allocate some events. */
- vec_resize (em->epoll_events, VLIB_FRAME_SIZE);
-
- if (linux_epoll_mains == em)
- {
- em->epoll_fd = epoll_create (1);
- if (em->epoll_fd < 0)
- return clib_error_return_unix (0, "epoll_create");
- }
- else
- em->epoll_fd = -1;
- }
-
- fm->file_update = linux_epoll_file_update;
-
- return 0;
-}
-
-VLIB_INIT_FUNCTION (linux_epoll_input_init);
-
-#endif /* HAVE_LINUX_EPOLL */
-
-static clib_error_t *
-unix_input_init (vlib_main_t * vm)
-{
- return 0;
-}
-
-VLIB_INIT_FUNCTION (unix_input_init) =
-{
- .runs_before = VLIB_INITS ("linux_epoll_input_init"),
-};
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vlib/unix/main.c b/src/vlib/unix/main.c
index 11d0cb1160c..cd1f1e1c99a 100644
--- a/src/vlib/unix/main.c
+++ b/src/vlib/unix/main.c
@@ -54,6 +54,10 @@
#include <sys/resource.h>
#include <unistd.h>
+#ifdef HAVE_LIBIBERTY
+#include <libiberty/demangle.h>
+#endif
+
/** Default CLI pager limit is not configured in startup.conf */
#define UNIX_CLI_DEFAULT_PAGER_LIMIT 100000
@@ -64,7 +68,6 @@ char *vlib_default_runtime_dir __attribute__ ((weak));
char *vlib_default_runtime_dir = "vlib";
unix_main_t unix_main;
-clib_file_main_t file_main;
static clib_error_t *
unix_main_init (vlib_main_t * vm)
@@ -74,10 +77,7 @@ unix_main_init (vlib_main_t * vm)
return 0;
}
-VLIB_INIT_FUNCTION (unix_main_init) =
-{
- .runs_before = VLIB_INITS ("unix_input_init"),
-};
+VLIB_INIT_FUNCTION (unix_main_init);
static int
unsetup_signal_handlers (int sig)
@@ -226,8 +226,20 @@ unix_signal_handler (int signum, siginfo_t * si, ucontext_t * uc)
{
if (color)
syslog_msg = format (syslog_msg, ANSI_FG_YELLOW);
- syslog_msg =
- format (syslog_msg, " %s + 0x%x", sf->name, sf->offset);
+#if HAVE_LIBIBERTY
+ if (strncmp (sf->name, "_Z", 2) == 0)
+ {
+ char *demangled = cplus_demangle (sf->name, DMGL_AUTO);
+ syslog_msg = format (syslog_msg, " %s",
+ demangled ? demangled : sf->name);
+ if (demangled)
+ free (demangled);
+ }
+ else
+#endif
+ syslog_msg = format (syslog_msg, " %s", sf->name);
+
+ syslog_msg = format (syslog_msg, " + 0x%x", sf->offset);
if (color)
syslog_msg = format (syslog_msg, ANSI_FG_DEFAULT);
}
@@ -374,6 +386,7 @@ unix_config (vlib_main_t * vm, unformat_input_t * input)
clib_error_t *error = 0;
gid_t gid;
int pidfd = -1;
+ int use_current_dir = 0;
/* Defaults */
um->cli_pager_buffer_limit = UNIX_CLI_DEFAULT_PAGER_LIMIT;
@@ -397,6 +410,8 @@ unix_config (vlib_main_t * vm, unformat_input_t * input)
else
if (unformat (input, "cli-listen %s", &um->cli_listen_socket.config))
;
+ else if (unformat (input, "use-current-dir"))
+ use_current_dir = 1;
else if (unformat (input, "runtime-dir %s", &um->runtime_dir))
;
else if (unformat (input, "cli-line-mode"))
@@ -486,6 +501,13 @@ unix_config (vlib_main_t * vm, unformat_input_t * input)
format_unformat_error, input);
}
+ if (use_current_dir)
+ {
+ char cwd[PATH_MAX];
+ if (getcwd (cwd, PATH_MAX))
+ um->runtime_dir = format (um->runtime_dir, "%s", cwd);
+ }
+
if (um->runtime_dir == 0)
{
uid_t uid = geteuid ();
diff --git a/src/vlib/unix/mc_socket.c b/src/vlib/unix/mc_socket.c
index 1f3b4e9a8f1..396e442d4fa 100644
--- a/src/vlib/unix/mc_socket.c
+++ b/src/vlib/unix/mc_socket.c
@@ -827,8 +827,7 @@ static void *
catchup_add_pending_output (mc_socket_catchup_t * c, uword n_bytes,
u8 * set_output_vector)
{
- clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
- c->clib_file_index);
+ clib_file_t *uf = clib_file_get (&file_main, c->clib_file_index);
u8 *result = 0;
if (set_output_vector)
diff --git a/src/vlib/unix/plugin.c b/src/vlib/unix/plugin.c
index 77e4633e14a..c784c5b44ad 100644
--- a/src/vlib/unix/plugin.c
+++ b/src/vlib/unix/plugin.c
@@ -748,11 +748,11 @@ config_one_plugin (vlib_main_t * vm, char *name, unformat_input_t * input)
}
vec_add2 (pm->configs, pc, 1);
- hash_set_mem (pm->config_index_by_name, name, pc - pm->configs);
pc->is_enabled = is_enable;
pc->is_disabled = is_disable;
pc->skip_version_check = skip_version_check;
- pc->name = name;
+ pc->name = vec_dup (name);
+ hash_set_mem (pm->config_index_by_name, pc->name, pc - pm->configs);
done:
return error;
@@ -816,6 +816,7 @@ done:
unformat_vlib_cli_sub_input, &sub_input))
{
error = config_one_plugin (vm, (char *) s, &sub_input);
+ vec_free (s);
unformat_free (&sub_input);
if (error)
goto done2;
diff --git a/src/vlib/unix/unix.h b/src/vlib/unix/unix.h
index 4b5f98a2e66..d0b7a4c7005 100644
--- a/src/vlib/unix/unix.h
+++ b/src/vlib/unix/unix.h
@@ -121,7 +121,6 @@ typedef enum
/* Global main structure. */
extern unix_main_t unix_main;
-extern clib_file_main_t file_main;
always_inline void
unix_save_error (unix_main_t * um, clib_error_t * error)
diff --git a/src/vlib/vlib.h b/src/vlib/vlib.h
index 36f8a361abc..1e2b25eba3b 100644
--- a/src/vlib/vlib.h
+++ b/src/vlib/vlib.h
@@ -71,6 +71,7 @@ typedef u32 vlib_log_class_t;
#include <vlib/threads.h>
#include <vlib/physmem_funcs.h>
#include <vlib/buffer_funcs.h>
+#include <vlib/tw_funcs.h>
#include <vlib/error_funcs.h>
#include <vlib/format_funcs.h>
#include <vlib/node_funcs.h>
diff --git a/src/vlibapi/api.h b/src/vlibapi/api.h
index 74957a6f0f6..2729b88dd26 100644
--- a/src/vlibapi/api.h
+++ b/src/vlibapi/api.h
@@ -25,6 +25,7 @@
#include <svm/svm.h>
#include <svm/queue.h>
#include <vlib/vlib.h>
+#include <vlib/file.h>
#include <vlib/unix/unix.h>
#include <vlibapi/api_common.h>
diff --git a/src/vlibmemory/socket_api.c b/src/vlibmemory/socket_api.c
index 26be8d09522..83b63592d44 100644
--- a/src/vlibmemory/socket_api.c
+++ b/src/vlibmemory/socket_api.c
@@ -227,7 +227,7 @@ socket_cleanup_pending_remove_registration_cb (u32 *preg_index)
clib_file_main_t *fm = &file_main;
u32 pending_remove_file_index = vl_api_registration_file_index (rp);
- clib_file_t *zf = fm->file_pool + pending_remove_file_index;
+ clib_file_t *zf = clib_file_get (fm, pending_remove_file_index);
clib_file_del (fm, zf);
vl_socket_free_registration_index (rp - socket_main.registration_pool);
diff --git a/src/vnet/CMakeLists.txt b/src/vnet/CMakeLists.txt
index b6227d45a2a..5c9c5cc0dc5 100644
--- a/src/vnet/CMakeLists.txt
+++ b/src/vnet/CMakeLists.txt
@@ -522,6 +522,7 @@ list(APPEND VNET_API_FILES bfd/bfd.api)
list(APPEND VNET_SOURCES
crypto/cli.c
+ crypto/config.c
crypto/crypto.c
crypto/format.c
crypto/main.c
@@ -589,6 +590,7 @@ list(APPEND VNET_HEADERS
ipsec/ipsec_tun.h
ipsec/ipsec_types_api.h
ipsec/ipsec_punt.h
+ ipsec/ipsec_funcs.h
ipsec/esp.h
ipsec/ah.h
)
@@ -749,29 +751,6 @@ list(APPEND VNET_HEADERS
list(APPEND VNET_API_FILES mpls/mpls.api)
##############################################################################
-# Tunnel protocol: vxlan-gpe
-##############################################################################
-
-list(APPEND VNET_SOURCES
- vxlan-gpe/vxlan_gpe.c
- vxlan-gpe/encap.c
- vxlan-gpe/decap.c
- vxlan-gpe/vxlan_gpe_api.c
-)
-
-list (APPEND VNET_MULTIARCH_SOURCES
- vxlan-gpe/decap.c
-)
-
-list(APPEND VNET_HEADERS
- vxlan-gpe/vxlan_gpe.h
- vxlan-gpe/vxlan_gpe_packet.h
- vxlan-gpe/vxlan_gpe_error.def
-)
-
-list(APPEND VNET_API_FILES vxlan-gpe/vxlan_gpe.api)
-
-##############################################################################
# ipv6 segment routing
##############################################################################
diff --git a/src/vnet/adj/adj_l2.c b/src/vnet/adj/adj_l2.c
index 5413eca6212..8bf9b8225ad 100644
--- a/src/vnet/adj/adj_l2.c
+++ b/src/vnet/adj/adj_l2.c
@@ -53,7 +53,7 @@ adj_l2_rewrite_inline (vlib_main_t * vm,
{
u32 * from = vlib_frame_vector_args (frame);
u32 n_left_from, n_left_to_next, * to_next, next_index;
- u32 thread_index = vlib_get_thread_index();
+ clib_thread_index_t thread_index = vlib_get_thread_index();
ethernet_main_t * em = &ethernet_main;
n_left_from = frame->n_vectors;
diff --git a/src/vnet/adj/adj_nsh.c b/src/vnet/adj/adj_nsh.c
index 1b4fa6c15b9..9c59d70bf17 100644
--- a/src/vnet/adj/adj_nsh.c
+++ b/src/vnet/adj/adj_nsh.c
@@ -55,7 +55,7 @@ adj_nsh_rewrite_inline (vlib_main_t * vm,
{
u32 * from = vlib_frame_vector_args (frame);
u32 n_left_from, n_left_to_next, * to_next, next_index;
- u32 thread_index = vlib_get_thread_index();
+ clib_thread_index_t thread_index = vlib_get_thread_index();
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
diff --git a/src/vnet/bier/bier_lookup.c b/src/vnet/bier/bier_lookup.c
index f7a21a1c744..50e07d1a2fc 100644
--- a/src/vnet/bier/bier_lookup.c
+++ b/src/vnet/bier/bier_lookup.c
@@ -83,7 +83,7 @@ bier_lookup (vlib_main_t * vm,
{
u32 n_left_from, next_index, * from, * to_next;
bier_lookup_main_t *blm = &bier_lookup_main;
- u32 thread_index = vlib_get_thread_index();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
bier_bit_mask_bucket_t buckets_copy[BIER_HDR_BUCKETS_4096];
from = vlib_frame_vector_args (from_frame);
@@ -347,7 +347,7 @@ clib_error_t *
bier_lookup_module_init (vlib_main_t * vm)
{
bier_lookup_main_t *blm = &bier_lookup_main;
- u32 thread_index;
+ clib_thread_index_t thread_index;
vec_validate (blm->blm_clones, vlib_num_workers());
vec_validate (blm->blm_fmasks, vlib_num_workers());
diff --git a/src/vnet/bier/bier_output.c b/src/vnet/bier/bier_output.c
index 5c19103d6a3..99fccf09f27 100644
--- a/src/vnet/bier/bier_output.c
+++ b/src/vnet/bier/bier_output.c
@@ -68,7 +68,7 @@ bier_output (vlib_main_t * vm,
{
vlib_combined_counter_main_t *cm = &bier_fmask_counters;
u32 n_left_from, next_index, * from, * to_next;
- u32 thread_index;
+ clib_thread_index_t thread_index;
thread_index = vm->thread_index;
from = vlib_frame_vector_args (from_frame);
diff --git a/src/vnet/bonding/cli.c b/src/vnet/bonding/cli.c
index cdc935ff10f..33a313b8052 100644
--- a/src/vnet/bonding/cli.c
+++ b/src/vnet/bonding/cli.c
@@ -609,7 +609,7 @@ bond_add_member (vlib_main_t * vm, bond_add_member_args_t * args)
vnet_interface_main_t *im = &vnm->interface_main;
vnet_hw_interface_t *bif_hw, *mif_hw;
vnet_sw_interface_t *sw;
- u32 thread_index;
+ clib_thread_index_t thread_index;
u32 mif_if_index;
bif = bond_get_bond_if_by_sw_if_index (args->group);
diff --git a/src/vnet/bonding/device.c b/src/vnet/bonding/device.c
index a0b93fccde1..5081ddfed57 100644
--- a/src/vnet/bonding/device.c
+++ b/src/vnet/bonding/device.c
@@ -186,19 +186,19 @@ bond_lb_broadcast (vlib_main_t *vm, bond_if_t *bif, vlib_buffer_t *b0,
vlib_buffer_t *c0;
int port;
u32 sw_if_index;
- u16 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
bond_per_thread_data_t *ptd = vec_elt_at_index (bm->per_thread_data,
thread_index);
for (port = 1; port < n_members; port++)
{
- sw_if_index = *vec_elt_at_index (bif->active_members, port);
- c0 = vlib_buffer_copy (vm, b0);
- if (PREDICT_TRUE (c0 != 0))
- {
- vnet_buffer (c0)->sw_if_index[VLIB_TX] = sw_if_index;
- bond_tx_add_to_queue (ptd, port, vlib_get_buffer_index (vm, c0));
- }
+ sw_if_index = *vec_elt_at_index (ptd->active_members, port);
+ c0 = vlib_buffer_copy (vm, b0);
+ if (PREDICT_TRUE (c0 != 0))
+ {
+ vnet_buffer (c0)->sw_if_index[VLIB_TX] = sw_if_index;
+ bond_tx_add_to_queue (ptd, port, vlib_get_buffer_index (vm, c0));
+ }
}
return 0;
@@ -351,8 +351,8 @@ bond_hash_to_port (u32 * h, u32 n_left, u32 n_members,
}
static_always_inline void
-bond_update_sw_if_index (bond_per_thread_data_t * ptd, bond_if_t * bif,
- u32 * bi, vlib_buffer_t ** b, u32 * data, u32 n_left,
+bond_update_sw_if_index (bond_per_thread_data_t *ptd, bond_if_t *bif, u32 *bi,
+ vlib_buffer_t **b, u32 *data, u32 n_left,
int single_sw_if_index)
{
u32 sw_if_index = data[0];
@@ -381,13 +381,13 @@ bond_update_sw_if_index (bond_per_thread_data_t * ptd, bond_if_t * bif,
else
{
vnet_buffer (b[0])->sw_if_index[VLIB_TX] =
- *vec_elt_at_index (bif->active_members, h[0]);
+ *vec_elt_at_index (ptd->active_members, h[0]);
vnet_buffer (b[1])->sw_if_index[VLIB_TX] =
- *vec_elt_at_index (bif->active_members, h[1]);
+ *vec_elt_at_index (ptd->active_members, h[1]);
vnet_buffer (b[2])->sw_if_index[VLIB_TX] =
- *vec_elt_at_index (bif->active_members, h[2]);
+ *vec_elt_at_index (ptd->active_members, h[2]);
vnet_buffer (b[3])->sw_if_index[VLIB_TX] =
- *vec_elt_at_index (bif->active_members, h[3]);
+ *vec_elt_at_index (ptd->active_members, h[3]);
bond_tx_add_to_queue (ptd, h[0], bi[0]);
bond_tx_add_to_queue (ptd, h[1], bi[1]);
@@ -410,7 +410,7 @@ bond_update_sw_if_index (bond_per_thread_data_t * ptd, bond_if_t * bif,
else
{
vnet_buffer (b[0])->sw_if_index[VLIB_TX] =
- *vec_elt_at_index (bif->active_members, h[0]);
+ *vec_elt_at_index (ptd->active_members, h[0]);
bond_tx_add_to_queue (ptd, h[0], bi[0]);
}
@@ -422,8 +422,9 @@ bond_update_sw_if_index (bond_per_thread_data_t * ptd, bond_if_t * bif,
}
static_always_inline void
-bond_tx_trace (vlib_main_t * vm, vlib_node_runtime_t * node, bond_if_t * bif,
- vlib_buffer_t ** b, u32 n_left, u32 * h)
+bond_tx_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
+ bond_per_thread_data_t *ptd, vlib_buffer_t **b, u32 n_left,
+ u32 *h)
{
uword n_trace = vlib_get_trace_count (vm, node);
@@ -441,15 +442,12 @@ bond_tx_trace (vlib_main_t * vm, vlib_node_runtime_t * node, bond_if_t * bif,
t0->ethernet = *eth;
t0->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
if (!h)
- {
- t0->bond_sw_if_index =
- *vec_elt_at_index (bif->active_members, 0);
- }
+ t0->bond_sw_if_index = *vec_elt_at_index (ptd->active_members, 0);
else
{
- t0->bond_sw_if_index =
- *vec_elt_at_index (bif->active_members, h[0]);
- h++;
+ t0->bond_sw_if_index =
+ *vec_elt_at_index (ptd->active_members, h[0]);
+ h++;
}
}
b++;
@@ -463,7 +461,7 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
{
vnet_interface_output_runtime_t *rund = (void *) node->runtime_data;
bond_main_t *bm = &bond_main;
- u16 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
bond_if_t *bif = pool_elt_at_index (bm->interfaces, rund->dev_instance);
uword n_members;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
@@ -473,7 +471,7 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
vnet_main_t *vnm = vnet_get_main ();
bond_per_thread_data_t *ptd = vec_elt_at_index (bm->per_thread_data,
thread_index);
- u32 p, sw_if_index;
+ u32 p, sw_if_index, n_numa_members;
if (PREDICT_FALSE (bif->admin_up == 0))
{
@@ -487,9 +485,10 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
return frame->n_vectors;
}
- n_members = vec_len (bif->active_members);
- if (PREDICT_FALSE (n_members == 0))
+ clib_spinlock_lock_if_init (&bif->lockp);
+ if (PREDICT_FALSE (vec_len (bif->active_members) == 0))
{
+ clib_spinlock_unlock_if_init (&bif->lockp);
vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
vlib_increment_simple_counter (vnet_main.interface_main.sw_if_counters +
VNET_INTERFACE_COUNTER_DROP,
@@ -500,14 +499,25 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
return frame->n_vectors;
}
+ /*
+ * Take a snapshot of the active members as members may be freed
+ * asynchronously
+ */
+ vec_validate (ptd->active_members, vec_len (bif->active_members) - 1);
+ vec_copy (ptd->active_members, bif->active_members);
+ n_numa_members = bif->n_numa_members;
+ clib_spinlock_unlock_if_init (&bif->lockp);
+
+ n_members = vec_len (ptd->active_members);
+
vlib_get_buffers (vm, from, bufs, n_left);
/* active-backup mode, ship everything to first sw if index */
if ((bif->lb == BOND_LB_AB) || PREDICT_FALSE (n_members == 1))
{
- sw_if_index = *vec_elt_at_index (bif->active_members, 0);
+ sw_if_index = *vec_elt_at_index (ptd->active_members, 0);
- bond_tx_trace (vm, node, bif, bufs, frame->n_vectors, 0);
+ bond_tx_trace (vm, node, ptd, bufs, frame->n_vectors, 0);
bond_update_sw_if_index (ptd, bif, from, bufs, &sw_if_index, n_left,
/* single_sw_if_index */ 1);
goto done;
@@ -515,10 +525,10 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
if (bif->lb == BOND_LB_BC)
{
- sw_if_index = *vec_elt_at_index (bif->active_members, 0);
+ sw_if_index = *vec_elt_at_index (ptd->active_members, 0);
bond_tx_no_hash (vm, bif, bufs, hashes, n_left, n_members, BOND_LB_BC);
- bond_tx_trace (vm, node, bif, bufs, frame->n_vectors, 0);
+ bond_tx_trace (vm, node, ptd, bufs, frame->n_vectors, 0);
bond_update_sw_if_index (ptd, bif, from, bufs, &sw_if_index, n_left,
/* single_sw_if_index */ 1);
goto done;
@@ -527,7 +537,7 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
/* if have at least one member on local numa node, only members on local numa
node will transmit pkts when bif->local_numa_only is enabled */
if (bif->n_numa_members >= 1)
- n_members = bif->n_numa_members;
+ n_members = n_numa_members;
if (bif->lb == BOND_LB_RR)
bond_tx_no_hash (vm, bif, bufs, hashes, n_left, n_members, BOND_LB_RR);
@@ -541,7 +551,7 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
else
bond_hash_to_port (h, frame->n_vectors, n_members, 0);
- bond_tx_trace (vm, node, bif, bufs, frame->n_vectors, h);
+ bond_tx_trace (vm, node, ptd, bufs, frame->n_vectors, h);
bond_update_sw_if_index (ptd, bif, from, bufs, hashes, frame->n_vectors,
/* single_sw_if_index */ 0);
@@ -552,7 +562,7 @@ done:
vlib_frame_t *f;
u32 *to_next;
- sw_if_index = *vec_elt_at_index (bif->active_members, p);
+ sw_if_index = *vec_elt_at_index (ptd->active_members, p);
if (PREDICT_TRUE (ptd->per_port_queue[p].n_buffers))
{
f = vnet_get_frame_to_sw_interface (vnm, sw_if_index);
@@ -564,6 +574,7 @@ done:
ptd->per_port_queue[p].n_buffers = 0;
}
}
+ vec_reset_length (ptd->active_members);
return frame->n_vectors;
}
diff --git a/src/vnet/bonding/node.c b/src/vnet/bonding/node.c
index 66de1e4dd80..347aa56dbc0 100644
--- a/src/vnet/bonding/node.c
+++ b/src/vnet/bonding/node.c
@@ -197,7 +197,7 @@ VLIB_NODE_FN (bond_input_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- u16 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u32 *from, n_left;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u32 sw_if_indices[VLIB_FRAME_SIZE], *sw_if_index;
diff --git a/src/vnet/bonding/node.h b/src/vnet/bonding/node.h
index c6602ef01b9..c6efa5b2e72 100644
--- a/src/vnet/bonding/node.h
+++ b/src/vnet/bonding/node.h
@@ -165,6 +165,7 @@ typedef struct
{
bond_per_port_queue_t *per_port_queue;
void **data;
+ u32 *active_members;
} bond_per_thread_data_t;
typedef struct
diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h
index 247af56f403..276cb1115f1 100644
--- a/src/vnet/buffer.h
+++ b/src/vnet/buffer.h
@@ -340,7 +340,7 @@ typedef struct
u32 __pad[3];
u32 sad_index;
u32 protect_index;
- u16 thread_index;
+ clib_thread_index_t thread_index;
} ipsec;
/* MAP */
@@ -502,7 +502,7 @@ typedef struct
*/
struct
{
- u32 thread_index;
+ clib_thread_index_t thread_index;
u32 pool_index;
u32 id;
} reass;
diff --git a/src/vnet/classify/vnet_classify.c b/src/vnet/classify/vnet_classify.c
index 77c1c81f9c4..998615e5d33 100644
--- a/src/vnet/classify/vnet_classify.c
+++ b/src/vnet/classify/vnet_classify.c
@@ -232,7 +232,7 @@ static inline void make_working_copy
vnet_classify_bucket_t working_bucket __attribute__ ((aligned (8)));
void *oldheap;
vnet_classify_entry_t *working_copy;
- u32 thread_index = vlib_get_thread_index ();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
int working_copy_length, required_length;
if (thread_index >= vec_len (t->working_copies))
@@ -427,7 +427,7 @@ vnet_classify_add_del (vnet_classify_table_t *t, vnet_classify_entry_t *add_v,
u32 hash, new_hash;
u32 limit;
u32 old_log2_pages, new_log2_pages;
- u32 thread_index = vlib_get_thread_index ();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
u8 *key_minus_skip;
int resplit_once = 0;
int mark_bucket_linear;
diff --git a/src/vnet/crypto/config.c b/src/vnet/crypto/config.c
new file mode 100644
index 00000000000..09f39b38b4e
--- /dev/null
+++ b/src/vnet/crypto/config.c
@@ -0,0 +1,105 @@
+/*
+ * config.c: crypto engines configuration
+ *
+ * Copyright (c) 2025 Cisco and/or its affiliates.
+ * SPDX-License-Identifier: Apache-2.0
+ * https://spdx.org/licenses/Apache-2.0.html
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/crypto/crypto.h>
+
+static clib_error_t *
+config_one_crypto (vlib_main_t *vm, char *name, unformat_input_t *input)
+{
+ vnet_crypto_main_t *cm = &crypto_main;
+ vnet_crypto_config_t *pc;
+ clib_error_t *error = 0;
+ uword *p;
+ int is_enable = 0;
+ int is_disable = 0;
+
+ if (cm->config_index_by_name == 0)
+ cm->config_index_by_name = hash_create_string (0, sizeof (uword));
+
+ p = hash_get_mem (cm->config_index_by_name, name);
+ if (p)
+ {
+ error = clib_error_return (0, "crypto '%s' already configured", name);
+ goto done;
+ }
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "enable"))
+ is_enable = 1;
+ else if (unformat (input, "disable"))
+ is_disable = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ if (is_enable && is_disable)
+ {
+ error = clib_error_return (0,
+ "please specify either enable or disable"
+ " for crypto '%s'",
+ name);
+ goto done;
+ }
+
+ vec_add2 (cm->configs, pc, 1);
+ pc->is_enabled = is_enable;
+ pc->is_disabled = is_disable;
+ pc->name = vec_dup (name);
+ hash_set_mem (cm->config_index_by_name, pc->name, pc - cm->configs);
+
+done:
+ return error;
+}
+
+static clib_error_t *
+crypto_engines_config (vlib_main_t *vm, unformat_input_t *input)
+{
+ vnet_crypto_main_t *cm = &crypto_main;
+ clib_error_t *error = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ unformat_input_t sub_input;
+ u8 *s = 0;
+ if (unformat (input, "default %U", unformat_vlib_cli_sub_input,
+ &sub_input))
+ {
+ cm->default_disabled = unformat (&sub_input, "disable") ? 1 : 0;
+ unformat_free (&sub_input);
+ }
+ else if (unformat (input, "%s %U", &s, unformat_vlib_cli_sub_input,
+ &sub_input))
+ {
+ error = config_one_crypto (vm, (char *) s, &sub_input);
+ vec_free (s);
+ unformat_free (&sub_input);
+ if (error)
+ goto done;
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ {
+ vec_free (s);
+ goto done;
+ }
+ }
+ }
+
+done:
+ return error;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (crypto_engines_config, "crypto-engines");
diff --git a/src/vnet/crypto/crypto.c b/src/vnet/crypto/crypto.c
index 35e7768375d..765dc499078 100644
--- a/src/vnet/crypto/crypto.c
+++ b/src/vnet/crypto/crypto.c
@@ -18,6 +18,8 @@ VLIB_REGISTER_LOG_CLASS (crypto_main_log, static) = {
#define log_debug(f, ...) \
vlib_log (VLIB_LOG_LEVEL_DEBUG, crypto_main_log.class, f, ##__VA_ARGS__)
+#define log_notice(f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_NOTICE, crypto_main_log.class, f, ##__VA_ARGS__)
#define log_err(f, ...) \
vlib_log (VLIB_LOG_LEVEL_ERR, crypto_main_log.class, f, ##__VA_ARGS__)
@@ -381,17 +383,44 @@ vnet_crypto_register_key_handler (vlib_main_t *vm, u32 engine_index,
return;
}
+static vnet_crypto_key_t *
+vnet_crypoto_key_alloc (u32 length)
+{
+ vnet_crypto_main_t *cm = &crypto_main;
+ u8 expected = 0;
+ vnet_crypto_key_t *k, **kp;
+ u32 alloc_sz = sizeof (vnet_crypto_key_t) + round_pow2 (length, 16);
+
+ while (!__atomic_compare_exchange_n (&cm->keys_lock, &expected, 1, 0,
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
+ {
+ while (__atomic_load_n (&cm->keys_lock, __ATOMIC_RELAXED))
+ CLIB_PAUSE ();
+ expected = 0;
+ }
+
+ pool_get (cm->keys, kp);
+
+ __atomic_store_n (&cm->keys_lock, 0, __ATOMIC_RELEASE);
+
+ k = clib_mem_alloc_aligned (alloc_sz, alignof (vnet_crypto_key_t));
+ kp[0] = k;
+ *k = (vnet_crypto_key_t){
+ .index = kp - cm->keys,
+ .length = length,
+ };
+
+ return k;
+}
+
u32
vnet_crypto_key_add (vlib_main_t * vm, vnet_crypto_alg_t alg, u8 * data,
u16 length)
{
- u32 index;
vnet_crypto_main_t *cm = &crypto_main;
vnet_crypto_engine_t *engine;
- vnet_crypto_key_t *key, **kp;
+ vnet_crypto_key_t *key;
vnet_crypto_alg_data_t *ad = cm->algs + alg;
- u32 alloc_sz = sizeof (vnet_crypto_key_t) + round_pow2 (length, 16);
- u8 need_barrier_sync = 0;
ASSERT (alg != 0);
@@ -407,29 +436,14 @@ vnet_crypto_key_add (vlib_main_t * vm, vnet_crypto_alg_t alg, u8 * data,
return ~0;
}
- need_barrier_sync = pool_get_will_expand (cm->keys);
- /* If the cm->keys will expand, stop the parade. */
- if (need_barrier_sync)
- vlib_worker_thread_barrier_sync (vm);
+ key = vnet_crypoto_key_alloc (length);
+ key->alg = alg;
- pool_get (cm->keys, kp);
-
- if (need_barrier_sync)
- vlib_worker_thread_barrier_release (vm);
-
- key = clib_mem_alloc_aligned (alloc_sz, _Alignof (vnet_crypto_key_t));
- kp[0] = key;
- index = kp - cm->keys;
- *key = (vnet_crypto_key_t){
- .index = index,
- .alg = alg,
- .length = length,
- };
clib_memcpy (key->data, data, length);
vec_foreach (engine, cm->engines)
if (engine->key_op_handler)
- engine->key_op_handler (VNET_CRYPTO_KEY_OP_ADD, index);
- return index;
+ engine->key_op_handler (VNET_CRYPTO_KEY_OP_ADD, key->index);
+ return key->index;
}
void
@@ -478,10 +492,9 @@ vnet_crypto_key_add_linked (vlib_main_t * vm,
vnet_crypto_key_index_t index_crypto,
vnet_crypto_key_index_t index_integ)
{
- u32 index, need_barrier_sync;
vnet_crypto_main_t *cm = &crypto_main;
vnet_crypto_engine_t *engine;
- vnet_crypto_key_t *key_crypto, *key_integ, *key, **kp;
+ vnet_crypto_key_t *key_crypto, *key_integ, *key;
vnet_crypto_alg_t linked_alg;
key_crypto = cm->keys[index_crypto];
@@ -491,33 +504,17 @@ vnet_crypto_key_add_linked (vlib_main_t * vm,
if (linked_alg == ~0)
return ~0;
- need_barrier_sync = pool_get_will_expand (cm->keys);
- /* If the cm->keys will expand, stop the parade. */
- if (need_barrier_sync)
- vlib_worker_thread_barrier_sync (vm);
-
- pool_get (cm->keys, kp);
-
- if (need_barrier_sync)
- vlib_worker_thread_barrier_release (vm);
-
- key = clib_mem_alloc_aligned (sizeof (vnet_crypto_key_t),
- _Alignof (vnet_crypto_key_t));
- kp[0] = key;
- index = kp - cm->keys;
- *key = (vnet_crypto_key_t){
- .index = index,
- .is_link = 1,
- .index_crypto = index_crypto,
- .index_integ = index_integ,
- .alg = linked_alg,
- };
+ key = vnet_crypoto_key_alloc (0);
+ key->is_link = 1;
+ key->index_crypto = index_crypto;
+ key->index_integ = index_integ;
+ key->alg = linked_alg;
vec_foreach (engine, cm->engines)
if (engine->key_op_handler)
- engine->key_op_handler (VNET_CRYPTO_KEY_OP_ADD, index);
+ engine->key_op_handler (VNET_CRYPTO_KEY_OP_ADD, key->index);
- return index;
+ return key->index;
}
u32
@@ -569,11 +566,14 @@ static void
vnet_crypto_load_engines (vlib_main_t *vm)
{
vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vnet_crypto_main_t *cm = &crypto_main;
+ vnet_crypto_config_t *pc;
u8 *path;
char *p;
u32 path_len;
struct dirent *entry;
DIR *dp;
+ uword *config_index;
path = os_get_exec_path ();
log_debug ("exec path is %s", path);
@@ -628,6 +628,31 @@ vnet_crypto_load_engines (vlib_main_t *vm)
continue;
}
+ /* follow crypto-engines config section directive */
+ config_index = hash_get_mem (cm->config_index_by_name, r->name);
+ if (config_index)
+ {
+ pc = vec_elt_at_index (cm->configs, config_index[0]);
+ if (pc->is_disabled)
+ {
+ log_notice ("crypto disabled: %s", r->name);
+ dlclose (handle);
+ continue;
+ }
+ if (cm->default_disabled && pc->is_enabled == 0)
+ {
+ log_notice ("crypto disabled (default): %s", r->name);
+ dlclose (handle);
+ continue;
+ }
+ }
+ else if (cm->default_disabled)
+ {
+ log_notice ("crypto disabled (default): %s", r->name);
+ dlclose (handle);
+ continue;
+ }
+
if (r->per_thread_data_sz)
{
u64 sz =
diff --git a/src/vnet/crypto/crypto.h b/src/vnet/crypto/crypto.h
index a4b6ab97620..a56f4c42c0b 100644
--- a/src/vnet/crypto/crypto.h
+++ b/src/vnet/crypto/crypto.h
@@ -326,7 +326,7 @@ typedef struct
vnet_crypto_async_frame_elt_t elts[VNET_CRYPTO_FRAME_SIZE];
u32 buffer_indices[VNET_CRYPTO_FRAME_SIZE];
u16 next_node_index[VNET_CRYPTO_FRAME_SIZE];
- u32 enqueue_thread_index;
+ clib_thread_index_t enqueue_thread_index;
} vnet_crypto_async_frame_t;
typedef struct
@@ -353,9 +353,9 @@ typedef void (vnet_crypto_key_fn_t) (vnet_crypto_key_op_t kop,
/** async crypto function handlers **/
typedef int (vnet_crypto_frame_enq_fn_t) (vlib_main_t *vm,
vnet_crypto_async_frame_t *frame);
-typedef vnet_crypto_async_frame_t *
- (vnet_crypto_frame_dequeue_t) (vlib_main_t * vm, u32 * nb_elts_processed,
- u32 * enqueue_thread_idx);
+typedef vnet_crypto_async_frame_t *(
+ vnet_crypto_frame_dequeue_t) (vlib_main_t *vm, u32 *nb_elts_processed,
+ clib_thread_index_t *enqueue_thread_idx);
u32
vnet_crypto_register_engine (vlib_main_t * vm, char *name, int prio,
@@ -420,16 +420,28 @@ typedef struct
typedef struct
{
- vnet_crypto_alg_data_t algs[VNET_CRYPTO_N_ALGS];
+ char *name;
+ u8 is_disabled;
+ u8 is_enabled;
+} vnet_crypto_config_t;
+
+typedef struct
+{
+ vnet_crypto_key_t **keys;
+ u8 keys_lock;
+ u32 crypto_node_index;
vnet_crypto_thread_t *threads;
vnet_crypto_frame_dequeue_t **dequeue_handlers;
- vnet_crypto_op_data_t opt_data[VNET_CRYPTO_N_OP_IDS];
vnet_crypto_engine_t *engines;
- vnet_crypto_key_t **keys;
+ /* configs and hash by name */
+ vnet_crypto_config_t *configs;
+ uword *config_index_by_name;
uword *engine_index_by_name;
uword *alg_index_by_name;
vnet_crypto_async_next_node_t *next_nodes;
- u32 crypto_node_index;
+ vnet_crypto_alg_data_t algs[VNET_CRYPTO_N_ALGS];
+ vnet_crypto_op_data_t opt_data[VNET_CRYPTO_N_OP_IDS];
+ u8 default_disabled;
} vnet_crypto_main_t;
extern vnet_crypto_main_t crypto_main;
diff --git a/src/vnet/crypto/node.c b/src/vnet/crypto/node.c
index 7d023f3ff9d..c0d258ae963 100644
--- a/src/vnet/crypto/node.c
+++ b/src/vnet/crypto/node.c
@@ -78,7 +78,7 @@ crypto_dequeue_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
{
vnet_crypto_main_t *cm = &crypto_main;
u32 n_elts = 0;
- u32 enqueue_thread_idx = ~0;
+ clib_thread_index_t enqueue_thread_idx = CLIB_INVALID_THREAD_INDEX;
vnet_crypto_async_frame_t *cf = (hdl) (vm, &n_elts, &enqueue_thread_idx);
*n_total += n_elts;
diff --git a/src/vnet/dev/bus/pci.c b/src/vnet/dev/bus/pci.c
index 4bb8660f4b4..a8d374f9510 100644
--- a/src/vnet/dev/bus/pci.c
+++ b/src/vnet/dev/bus/pci.c
@@ -6,7 +6,7 @@
#include <vnet/dev/dev.h>
#include <vnet/dev/bus/pci.h>
#include <vnet/dev/log.h>
-#include <vlib/unix/unix.h>
+#include <vlib/file.h>
VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
.class_name = "dev",
@@ -318,7 +318,8 @@ vnet_dev_pci_msix_add_handler (vlib_main_t *vm, vnet_dev_t *dev,
void
vnet_dev_pci_msix_set_polling_thread (vlib_main_t *vm, vnet_dev_t *dev,
- u16 line, u16 thread_index)
+ u16 line,
+ clib_thread_index_t thread_index)
{
vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
u32 index;
diff --git a/src/vnet/dev/dev.h b/src/vnet/dev/dev.h
index f3f7563317e..ad2e793907f 100644
--- a/src/vnet/dev/dev.h
+++ b/src/vnet/dev/dev.h
@@ -711,7 +711,7 @@ void vnet_dev_poll_port_remove (vlib_main_t *, vnet_dev_port_t *,
typedef struct
{
- u16 thread_index;
+ clib_thread_index_t thread_index;
u8 completed;
u8 in_order;
vnet_dev_port_t *port;
diff --git a/src/vnet/dev/runtime.c b/src/vnet/dev/runtime.c
index 944c3ef32fa..7a6f39a6be6 100644
--- a/src/vnet/dev/runtime.c
+++ b/src/vnet/dev/runtime.c
@@ -55,7 +55,7 @@ static uword
vnet_dev_rt_mgmt_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_frame_t *frame)
{
- u16 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
vnet_dev_rt_op_t *op, *ops = __atomic_load_n (&rt_ops, __ATOMIC_ACQUIRE);
u32 n_pending = 0;
uword rv = 0;
diff --git a/src/vnet/devices/devices.h b/src/vnet/devices/devices.h
index cadf1f857a6..5c904dffc13 100644
--- a/src/vnet/devices/devices.h
+++ b/src/vnet/devices/devices.h
@@ -81,7 +81,7 @@ vnet_get_aggregate_rx_packets (void)
}
static inline void
-vnet_device_increment_rx_packets (u32 thread_index, u64 count)
+vnet_device_increment_rx_packets (clib_thread_index_t thread_index, u64 count)
{
vnet_device_main_t *vdm = &vnet_device_main;
vnet_device_per_worker_data_t *pwd;
diff --git a/src/vnet/devices/virtio/node.c b/src/vnet/devices/virtio/node.c
index 027e1ed4e74..730c1ff17d8 100644
--- a/src/vnet/devices/virtio/node.c
+++ b/src/vnet/devices/virtio/node.c
@@ -262,7 +262,7 @@ virtio_device_input_gso_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
int checksum_offload_enabled, int packed)
{
vnet_main_t *vnm = vnet_get_main ();
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
uword n_trace = vlib_get_trace_count (vm, node);
u32 next_index;
const int hdr_sz = vif->virtio_net_hdr_sz;
diff --git a/src/vnet/devices/virtio/virtio.c b/src/vnet/devices/virtio/virtio.c
index 682ec32ceff..3ac209aa571 100644
--- a/src/vnet/devices/virtio/virtio.c
+++ b/src/vnet/devices/virtio/virtio.c
@@ -29,7 +29,7 @@
#include <vlib/vlib.h>
#include <vlib/pci/pci.h>
-#include <vlib/unix/unix.h>
+#include <vlib/file.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
diff --git a/src/vnet/dpo/interface_rx_dpo.c b/src/vnet/dpo/interface_rx_dpo.c
index 5a519d344c1..9f5cb6be059 100644
--- a/src/vnet/dpo/interface_rx_dpo.c
+++ b/src/vnet/dpo/interface_rx_dpo.c
@@ -242,7 +242,7 @@ interface_rx_dpo_inline (vlib_main_t * vm,
u8 is_l2)
{
u32 n_left_from, next_index, * from, * to_next;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
vnet_interface_main_t *im;
im = &vnet_get_main ()->interface_main;
diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c
index 8f2a0de6ea8..f6f9392a42b 100644
--- a/src/vnet/dpo/load_balance.c
+++ b/src/vnet/dpo/load_balance.c
@@ -1030,6 +1030,7 @@ load_balance_module_init (void)
* This should never be used, but just in case, stack it on a drop.
*/
lbi = load_balance_create(1, DPO_PROTO_IP4, 0);
+ ASSERT(0 == lbi);
load_balance_set_bucket(lbi, 0, drop_dpo_get(DPO_PROTO_IP4));
load_balance_logger =
@@ -1038,6 +1039,12 @@ load_balance_module_init (void)
load_balance_map_module_init();
}
+void
+load_balance_pool_alloc (uword size)
+{
+ pool_alloc_aligned(load_balance_pool, size, CLIB_CACHE_LINE_BYTES);
+}
+
static clib_error_t *
load_balance_show (vlib_main_t * vm,
unformat_input_t * input,
diff --git a/src/vnet/dpo/load_balance.h b/src/vnet/dpo/load_balance.h
index eee073f5892..76aa7982401 100644
--- a/src/vnet/dpo/load_balance.h
+++ b/src/vnet/dpo/load_balance.h
@@ -260,5 +260,6 @@ load_balance_get_bucket_i (const load_balance_t *lb,
}
extern void load_balance_module_init(void);
+extern void load_balance_pool_alloc (uword size);
#endif
diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c
index 9ce94eebe5c..265f3d93023 100644
--- a/src/vnet/dpo/lookup_dpo.c
+++ b/src/vnet/dpo/lookup_dpo.c
@@ -290,7 +290,7 @@ lookup_dpo_ip4_inline (vlib_main_t * vm,
int table_from_interface)
{
u32 n_left_from, next_index, * from, * to_next;
- u32 thread_index = vlib_get_thread_index();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
from = vlib_frame_vector_args (from_frame);
@@ -647,7 +647,7 @@ lookup_dpo_ip6_inline (vlib_main_t * vm,
{
vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
u32 n_left_from, next_index, * from, * to_next;
- u32 thread_index = vlib_get_thread_index();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
@@ -989,7 +989,7 @@ lookup_dpo_mpls_inline (vlib_main_t * vm,
int table_from_interface)
{
u32 n_left_from, next_index, * from, * to_next;
- u32 thread_index = vlib_get_thread_index();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
from = vlib_frame_vector_args (from_frame);
diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c
index 0474fd82984..07254f5cf6a 100644
--- a/src/vnet/dpo/replicate_dpo.c
+++ b/src/vnet/dpo/replicate_dpo.c
@@ -742,7 +742,7 @@ replicate_inline (vlib_main_t * vm,
vlib_combined_counter_main_t * cm = &replicate_main.repm_counters;
replicate_main_t * rm = &replicate_main;
u32 n_left_from, * from, * to_next, next_index;
- u32 thread_index = vlib_get_thread_index();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c
index f1bb6b81070..39073d761ea 100644
--- a/src/vnet/ethernet/interface.c
+++ b/src/vnet/ethernet/interface.c
@@ -507,7 +507,7 @@ simulated_ethernet_interface_tx (vlib_main_t * vm,
u32 n_left_from, *from;
u32 next_index = 0;
u32 n_bytes;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
vnet_main_t *vnm = vnet_get_main ();
vnet_interface_main_t *im = &vnm->interface_main;
l2_input_config_t *config;
diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c
index 03cbdde1c2b..2d7f0913994 100644
--- a/src/vnet/ethernet/node.c
+++ b/src/vnet/ethernet/node.c
@@ -1218,7 +1218,7 @@ ethernet_input_inline (vlib_main_t * vm,
vlib_node_runtime_t *error_node;
u32 n_left_from, next_index, *to_next;
u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u32 cached_sw_if_index = ~0;
u32 cached_is_l2 = 0; /* shut up gcc */
vnet_hw_interface_t *hi = NULL; /* used for main interface only */
diff --git a/src/vnet/ethernet/p2p_ethernet_input.c b/src/vnet/ethernet/p2p_ethernet_input.c
index 3d81e99cff2..140a3fd95d3 100644
--- a/src/vnet/ethernet/p2p_ethernet_input.c
+++ b/src/vnet/ethernet/p2p_ethernet_input.c
@@ -63,7 +63,7 @@ VLIB_NODE_FN (p2p_ethernet_input_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u32 n_trace = vlib_get_trace_count (vm, node);
u32 n_left_from, *from, *to_next;
u32 next_index;
diff --git a/src/vnet/feature/feature.api b/src/vnet/feature/feature.api
index 7b52a6630cb..e3054b53b9d 100644
--- a/src/vnet/feature/feature.api
+++ b/src/vnet/feature/feature.api
@@ -38,6 +38,25 @@ autoreply define feature_enable_disable {
string feature_name[64];
};
+autoendian define feature_is_enabled
+{
+ u32 client_index;
+ u32 context;
+
+ string arc_name[64];
+ string feature_name[64];
+ vl_api_interface_index_t sw_if_index;
+};
+
+autoendian define feature_is_enabled_reply
+{
+ u32 client_index;
+ u32 context;
+ i32 retval;
+
+ bool is_enabled;
+};
+
/*
* Local Variables:
* eval: (c-set-style "gnu")
diff --git a/src/vnet/feature/feature_api.c b/src/vnet/feature/feature_api.c
index a8c74277788..152e44bd288 100644
--- a/src/vnet/feature/feature_api.c
+++ b/src/vnet/feature/feature_api.c
@@ -74,6 +74,30 @@ vl_api_feature_enable_disable_t_handler (vl_api_feature_enable_disable_t * mp)
REPLY_MACRO (VL_API_FEATURE_ENABLE_DISABLE_REPLY);
}
+static void
+vl_api_feature_is_enabled_t_handler (vl_api_feature_is_enabled_t *mp)
+{
+ vl_api_feature_is_enabled_reply_t *rmp = NULL;
+ i32 rv = 0;
+ bool is_enabled = false;
+
+ VALIDATE_SW_IF_INDEX_END (mp);
+
+ u8 *arc_name = format (0, "%s%c", mp->arc_name, 0);
+ u8 *feature_name = format (0, "%s%c", mp->feature_name, 0);
+
+ is_enabled = vnet_feature_is_enabled (
+ (const char *) arc_name, (const char *) feature_name, mp->sw_if_index);
+
+ vec_free (feature_name);
+ vec_free (arc_name);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO2_END (VL_API_FEATURE_IS_ENABLED_REPLY,
+ ({ rmp->is_enabled = is_enabled; }));
+}
+
#include <vnet/feature/feature.api.c>
static clib_error_t *
diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c
index adf880b8bbb..c86941fce9a 100644
--- a/src/vnet/fib/fib_entry.c
+++ b/src/vnet/fib/fib_entry.c
@@ -1772,6 +1772,12 @@ fib_entry_module_init (void)
fib_entry_track_module_init();
}
+void
+fib_entry_pool_alloc (uword size)
+{
+ pool_alloc(fib_entry_pool, size);
+}
+
fib_route_path_t *
fib_entry_encode (fib_node_index_t fib_entry_index)
{
diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h
index 7331f803ec4..2c88d1e5f6a 100644
--- a/src/vnet/fib/fib_entry.h
+++ b/src/vnet/fib/fib_entry.h
@@ -480,6 +480,7 @@ extern void fib_entry_set_flow_hash_config(fib_node_index_t fib_entry_index,
flow_hash_config_t hash_config);
extern void fib_entry_module_init(void);
+extern void fib_entry_pool_alloc(uword size);
extern u32 fib_entry_get_stats_index(fib_node_index_t fib_entry_index);
diff --git a/src/vnet/gso/node.c b/src/vnet/gso/node.c
index c1d4459476e..c4f4b74cd92 100644
--- a/src/vnet/gso/node.c
+++ b/src/vnet/gso/node.c
@@ -471,7 +471,7 @@ drop_one_buffer_and_count (vlib_main_t * vm, vnet_main_t * vnm,
vlib_node_runtime_t * node, u32 * pbi0,
u32 sw_if_index, u32 drop_error_code)
{
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
vlib_simple_counter_main_t *cm;
cm =
@@ -498,7 +498,7 @@ vnet_gso_node_inline (vlib_main_t * vm,
u32 *from = vlib_frame_vector_args (frame);
u32 n_left_from = frame->n_vectors;
u32 *from_end = from + n_left_from;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
vnet_interface_main_t *im = &vnm->interface_main;
vnet_interface_per_thread_data_t *ptd =
vec_elt_at_index (im->per_thread_data, thread_index);
diff --git a/src/vnet/interface.h b/src/vnet/interface.h
index eb557fdef8b..81d7729700b 100644
--- a/src/vnet/interface.h
+++ b/src/vnet/interface.h
@@ -615,7 +615,7 @@ typedef struct
u32 dev_instance;
/* index of thread pollling this queue */
- u32 thread_index;
+ clib_thread_index_t thread_index;
/* file index of queue interrupt line */
u32 file_index;
diff --git a/src/vnet/interface/rx_queue.c b/src/vnet/interface/rx_queue.c
index b1fc82f38e9..84068a05ea5 100644
--- a/src/vnet/interface/rx_queue.c
+++ b/src/vnet/interface/rx_queue.c
@@ -16,7 +16,7 @@
#include <vnet/vnet.h>
#include <vnet/devices/devices.h>
#include <vnet/interface/rx_queue_funcs.h>
-#include <vlib/unix/unix.h>
+#include <vlib/file.h>
VLIB_REGISTER_LOG_CLASS (if_rxq_log, static) = {
.class_name = "interface",
@@ -27,7 +27,7 @@ VLIB_REGISTER_LOG_CLASS (if_rxq_log, static) = {
#define log_err(fmt, ...) vlib_log_err (if_rxq_log.class, fmt, __VA_ARGS__)
static u32
-next_thread_index (vnet_main_t *vnm, u32 thread_index)
+next_thread_index (vnet_main_t *vnm, clib_thread_index_t thread_index)
{
vnet_device_main_t *vdm = &vnet_device_main;
if (vdm->first_worker_thread_index == 0)
@@ -62,7 +62,7 @@ vnet_hw_if_get_rx_queue_index_by_id (vnet_main_t *vnm, u32 hw_if_index,
u32
vnet_hw_if_register_rx_queue (vnet_main_t *vnm, u32 hw_if_index, u32 queue_id,
- u32 thread_index)
+ clib_thread_index_t thread_index)
{
vnet_interface_main_t *im = &vnm->interface_main;
vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
@@ -225,7 +225,7 @@ vnet_hw_if_get_rx_queue_mode (vnet_main_t *vnm, u32 queue_index)
void
vnet_hw_if_set_rx_queue_thread_index (vnet_main_t *vnm, u32 queue_index,
- u32 thread_index)
+ clib_thread_index_t thread_index)
{
vnet_hw_if_rx_queue_t *rxq = vnet_hw_if_get_rx_queue (vnm, queue_index);
vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, rxq->hw_if_index);
diff --git a/src/vnet/interface/rx_queue_funcs.h b/src/vnet/interface/rx_queue_funcs.h
index 906d7118296..4295463f4b9 100644
--- a/src/vnet/interface/rx_queue_funcs.h
+++ b/src/vnet/interface/rx_queue_funcs.h
@@ -20,7 +20,8 @@
u32 vnet_hw_if_get_rx_queue_index_by_id (vnet_main_t *vnm, u32 hw_if_index,
u32 queue_id);
u32 vnet_hw_if_register_rx_queue (vnet_main_t *vnm, u32 hw_if_index,
- u32 queue_id, u32 thread_idnex);
+ u32 queue_id,
+ clib_thread_index_t thread_index);
void vnet_hw_if_unregister_rx_queue (vnet_main_t *vnm, u32 queue_index);
void vnet_hw_if_unregister_all_rx_queues (vnet_main_t *vnm, u32 hw_if_index);
void vnet_hw_if_set_rx_queue_file_index (vnet_main_t *vnm, u32 queue_index,
@@ -32,7 +33,7 @@ int vnet_hw_if_set_rx_queue_mode (vnet_main_t *vnm, u32 queue_index,
vnet_hw_if_rx_mode vnet_hw_if_get_rx_queue_mode (vnet_main_t *vnm,
u32 queue_index);
void vnet_hw_if_set_rx_queue_thread_index (vnet_main_t *vnm, u32 queue_index,
- u32 thread_index);
+ clib_thread_index_t thread_index);
vnet_hw_if_rxq_poll_vector_t *
vnet_hw_if_generate_rxq_int_poll_vector (vlib_main_t *vm,
vlib_node_runtime_t *node);
diff --git a/src/vnet/interface/tx_queue.c b/src/vnet/interface/tx_queue.c
index 8a6cd9da304..6c7c938c576 100644
--- a/src/vnet/interface/tx_queue.c
+++ b/src/vnet/interface/tx_queue.c
@@ -107,7 +107,7 @@ vnet_hw_if_unregister_all_tx_queues (vnet_main_t *vnm, u32 hw_if_index)
void
vnet_hw_if_tx_queue_assign_thread (vnet_main_t *vnm, u32 queue_index,
- u32 thread_index)
+ clib_thread_index_t thread_index)
{
vnet_hw_if_tx_queue_t *txq = vnet_hw_if_get_tx_queue (vnm, queue_index);
vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, txq->hw_if_index);
@@ -122,7 +122,7 @@ vnet_hw_if_tx_queue_assign_thread (vnet_main_t *vnm, u32 queue_index,
void
vnet_hw_if_tx_queue_unassign_thread (vnet_main_t *vnm, u32 queue_index,
- u32 thread_index)
+ clib_thread_index_t thread_index)
{
vnet_hw_if_tx_queue_t *txq = vnet_hw_if_get_tx_queue (vnm, queue_index);
vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, txq->hw_if_index);
diff --git a/src/vnet/interface/tx_queue_funcs.h b/src/vnet/interface/tx_queue_funcs.h
index 8fcf7c336a8..14792e0f023 100644
--- a/src/vnet/interface/tx_queue_funcs.h
+++ b/src/vnet/interface/tx_queue_funcs.h
@@ -13,9 +13,9 @@ u32 vnet_hw_if_register_tx_queue (vnet_main_t *vnm, u32 hw_if_index,
void vnet_hw_if_unregister_tx_queue (vnet_main_t *vnm, u32 queue_index);
void vnet_hw_if_unregister_all_tx_queues (vnet_main_t *vnm, u32 hw_if_index);
void vnet_hw_if_tx_queue_assign_thread (vnet_main_t *vnm, u32 queue_index,
- u32 thread_index);
+ clib_thread_index_t thread_index);
void vnet_hw_if_tx_queue_unassign_thread (vnet_main_t *vnm, u32 queue_index,
- u32 thread_index);
+ clib_thread_index_t thread_index);
/* inline functions */
diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c
index 65f3a02c86b..d835a36f46d 100644
--- a/src/vnet/interface_api.c
+++ b/src/vnet/interface_api.c
@@ -1330,7 +1330,7 @@ vl_api_sw_interface_set_tx_placement_t_handler (
size = mp->array_size;
for (u32 i = 0; i < size; i++)
{
- u32 thread_index = mp->threads[i];
+ clib_thread_index_t thread_index = mp->threads[i];
bitmap = clib_bitmap_set (bitmap, thread_index, 1);
}
diff --git a/src/vnet/interface_cli.c b/src/vnet/interface_cli.c
index cc214c10f6b..4d3c98f6aa5 100644
--- a/src/vnet/interface_cli.c
+++ b/src/vnet/interface_cli.c
@@ -1694,7 +1694,7 @@ VLIB_CLI_COMMAND (show_interface_rx_placement, static) = {
};
clib_error_t *
set_hw_interface_rx_placement (u32 hw_if_index, u32 queue_id,
- u32 thread_index, u8 is_main)
+ clib_thread_index_t thread_index, u8 is_main)
{
vnet_main_t *vnm = vnet_get_main ();
vnet_device_main_t *vdm = &vnet_device_main;
@@ -1731,7 +1731,7 @@ set_interface_rx_placement (vlib_main_t *vm, unformat_input_t *input,
vnet_main_t *vnm = vnet_get_main ();
u32 hw_if_index = (u32) ~ 0;
u32 queue_id = (u32) 0;
- u32 thread_index = (u32) ~ 0;
+ clib_thread_index_t thread_index = CLIB_INVALID_THREAD_INDEX;
u8 is_main = 0;
if (!unformat_user (input, unformat_line_input, line_input))
@@ -1831,11 +1831,12 @@ set_hw_interface_tx_queue (u32 hw_if_index, u32 queue_id, uword *bitmap)
vlib_thread_main_t *vtm = vlib_get_thread_main ();
vnet_hw_if_tx_queue_t *txq;
u32 queue_index;
- u32 thread_index;
+ clib_thread_index_t thread_index;
/* highest set bit in bitmap should not exceed last worker thread index */
thread_index = clib_bitmap_last_set (bitmap);
- if ((thread_index != ~0) && (thread_index >= vtm->n_vlib_mains))
+ if ((thread_index != CLIB_INVALID_THREAD_INDEX) &&
+ (thread_index >= vtm->n_vlib_mains))
return VNET_API_ERROR_INVALID_VALUE;
queue_index =
diff --git a/src/vnet/interface_funcs.h b/src/vnet/interface_funcs.h
index 511df4920e4..54e789679b0 100644
--- a/src/vnet/interface_funcs.h
+++ b/src/vnet/interface_funcs.h
@@ -426,7 +426,8 @@ clib_error_t *set_hw_interface_change_rx_mode (vnet_main_t * vnm,
/* Set rx-placement on the interface */
clib_error_t *set_hw_interface_rx_placement (u32 hw_if_index, u32 queue_id,
- u32 thread_index, u8 is_main);
+ clib_thread_index_t thread_index,
+ u8 is_main);
/* Set tx-queue placement on the interface */
int set_hw_interface_tx_queue (u32 hw_if_index, u32 queue_id, uword *bitmap);
diff --git a/src/vnet/interface_test.c b/src/vnet/interface_test.c
index 2d0c0ee81d1..f2889fd45fc 100644
--- a/src/vnet/interface_test.c
+++ b/src/vnet/interface_test.c
@@ -911,7 +911,7 @@ vl_api_sw_interface_tx_placement_details_t_handler (
for (u32 i = 0; i < size; i++)
{
- u32 thread_index = ntohl (mp->threads[i]);
+ clib_thread_index_t thread_index = ntohl (mp->threads[i]);
bitmap = clib_bitmap_set (bitmap, thread_index, 1);
}
diff --git a/src/vnet/ip-neighbor/ip4_neighbor.c b/src/vnet/ip-neighbor/ip4_neighbor.c
index 61b9e768fe5..1d8d39ddcb8 100644
--- a/src/vnet/ip-neighbor/ip4_neighbor.c
+++ b/src/vnet/ip-neighbor/ip4_neighbor.c
@@ -56,7 +56,7 @@ VLIB_REGISTER_LOG_CLASS (ip4_neighbor_log, static) = {
vlib_log_debug (ip4_neighbor_log.class, fmt, __VA_ARGS__)
void
-ip4_neighbor_probe_dst (u32 sw_if_index, u32 thread_index,
+ip4_neighbor_probe_dst (u32 sw_if_index, clib_thread_index_t thread_index,
const ip4_address_t *dst)
{
ip4_address_t src;
@@ -74,7 +74,8 @@ ip4_neighbor_probe_dst (u32 sw_if_index, u32 thread_index,
void
ip4_neighbor_advertise (vlib_main_t *vm, vnet_main_t *vnm, u32 sw_if_index,
- u32 thread_index, const ip4_address_t *addr)
+ clib_thread_index_t thread_index,
+ const ip4_address_t *addr)
{
vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
ip4_main_t *i4m = &ip4_main;
@@ -142,7 +143,7 @@ ip4_arp_inline (vlib_main_t * vm,
vnet_main_t *vnm = vnet_get_main ();
u32 *from, *to_next_drop;
uword n_left_from, n_left_to_next_drop, next_index;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u64 seed;
if (node->flags & VLIB_NODE_FLAG_TRACE)
diff --git a/src/vnet/ip-neighbor/ip4_neighbor.h b/src/vnet/ip-neighbor/ip4_neighbor.h
index 7941ebdbced..3327c525d68 100644
--- a/src/vnet/ip-neighbor/ip4_neighbor.h
+++ b/src/vnet/ip-neighbor/ip4_neighbor.h
@@ -20,10 +20,12 @@
#include <vnet/ethernet/arp_packet.h>
#include <vnet/ip-neighbor/ip_neighbor_types.h>
-extern void ip4_neighbor_probe_dst (u32 sw_if_index, u32 thread_index,
+extern void ip4_neighbor_probe_dst (u32 sw_if_index,
+ clib_thread_index_t thread_index,
const ip4_address_t *dst);
extern void ip4_neighbor_advertise (vlib_main_t *vm, vnet_main_t *vnm,
- u32 sw_if_index, u32 thread_index,
+ u32 sw_if_index,
+ clib_thread_index_t thread_index,
const ip4_address_t *addr);
always_inline vlib_buffer_t *
diff --git a/src/vnet/ip-neighbor/ip6_neighbor.c b/src/vnet/ip-neighbor/ip6_neighbor.c
index ca8aed3d4ca..79a4a30ff53 100644
--- a/src/vnet/ip-neighbor/ip6_neighbor.c
+++ b/src/vnet/ip-neighbor/ip6_neighbor.c
@@ -32,7 +32,7 @@ VLIB_REGISTER_LOG_CLASS (ip6_neighbor_log, static) = {
#define log_debug(fmt, ...) \
vlib_log_debug (ip6_neighbor_log.class, fmt, __VA_ARGS__)
void
-ip6_neighbor_probe_dst (u32 sw_if_index, u32 thread_index,
+ip6_neighbor_probe_dst (u32 sw_if_index, clib_thread_index_t thread_index,
const ip6_address_t *dst)
{
ip6_address_t src;
@@ -45,7 +45,8 @@ ip6_neighbor_probe_dst (u32 sw_if_index, u32 thread_index,
void
ip6_neighbor_advertise (vlib_main_t *vm, vnet_main_t *vnm, u32 sw_if_index,
- u32 thread_index, const ip6_address_t *addr)
+ clib_thread_index_t thread_index,
+ const ip6_address_t *addr)
{
vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
ip6_main_t *i6m = &ip6_main;
@@ -129,7 +130,7 @@ ip6_discover_neighbor_inline (vlib_main_t * vm,
u32 *from, *to_next_drop;
uword n_left_from, n_left_to_next_drop;
u64 seed;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
if (node->flags & VLIB_NODE_FLAG_TRACE)
ip6_forward_next_trace (vm, node, frame, VLIB_TX);
diff --git a/src/vnet/ip-neighbor/ip6_neighbor.h b/src/vnet/ip-neighbor/ip6_neighbor.h
index c6e718dc2ff..31dc1eab033 100644
--- a/src/vnet/ip-neighbor/ip6_neighbor.h
+++ b/src/vnet/ip-neighbor/ip6_neighbor.h
@@ -31,15 +31,17 @@
extern vlib_packet_template_t ip6_neighbor_packet_template;
extern void ip6_neighbor_advertise (vlib_main_t *vm, vnet_main_t *vnm,
- u32 sw_if_index, u32 thread_index,
+ u32 sw_if_index,
+ clib_thread_index_t thread_index,
const ip6_address_t *addr);
-extern void ip6_neighbor_probe_dst (u32 sw_if_index, u32 thread_index,
+extern void ip6_neighbor_probe_dst (u32 sw_if_index,
+ clib_thread_index_t thread_index,
const ip6_address_t *dst);
always_inline vlib_buffer_t *
ip6_neighbor_probe (vlib_main_t *vm, vnet_main_t *vnm, u32 sw_if_index,
- u32 thread_index, const ip6_address_t *src,
+ clib_thread_index_t thread_index, const ip6_address_t *src,
const ip6_address_t *dst)
{
icmp6_neighbor_solicitation_header_t *h0;
diff --git a/src/vnet/ip-neighbor/ip_neighbor.c b/src/vnet/ip-neighbor/ip_neighbor.c
index 614b78489cd..73fa0b30317 100644
--- a/src/vnet/ip-neighbor/ip_neighbor.c
+++ b/src/vnet/ip-neighbor/ip_neighbor.c
@@ -1092,7 +1092,7 @@ ip_neighbor_register (ip_address_family_t af, const ip_neighbor_vft_t * vft)
}
void
-ip_neighbor_probe_dst (u32 sw_if_index, u32 thread_index,
+ip_neighbor_probe_dst (u32 sw_if_index, clib_thread_index_t thread_index,
ip_address_family_t af, const ip46_address_t *dst)
{
if (!vnet_sw_interface_is_admin_up (vnet_get_main (), sw_if_index))
diff --git a/src/vnet/ip-neighbor/ip_neighbor.h b/src/vnet/ip-neighbor/ip_neighbor.h
index cc888ba2054..813c2bb1e2d 100644
--- a/src/vnet/ip-neighbor/ip_neighbor.h
+++ b/src/vnet/ip-neighbor/ip_neighbor.h
@@ -56,7 +56,8 @@ extern void ip_neighbor_learn (const ip_neighbor_learn_t * l);
extern void ip_neighbor_update (vnet_main_t * vnm, adj_index_t ai);
extern void ip_neighbor_probe (const ip_adjacency_t * adj);
-extern void ip_neighbor_probe_dst (u32 sw_if_index, u32 thread_index,
+extern void ip_neighbor_probe_dst (u32 sw_if_index,
+ clib_thread_index_t thread_index,
ip_address_family_t af,
const ip46_address_t *ip);
diff --git a/src/vnet/ip/icmp4.c b/src/vnet/ip/icmp4.c
index fa4a0e12276..acbe06bfc1d 100644
--- a/src/vnet/ip/icmp4.c
+++ b/src/vnet/ip/icmp4.c
@@ -251,7 +251,7 @@ ip4_icmp_error (vlib_main_t * vm,
u32 *from, *to_next;
uword n_left_from, n_left_to_next;
ip4_icmp_error_next_t next_index;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
diff --git a/src/vnet/ip/icmp6.c b/src/vnet/ip/icmp6.c
index f93ebce4bf1..b37554c3d78 100644
--- a/src/vnet/ip/icmp6.c
+++ b/src/vnet/ip/icmp6.c
@@ -292,7 +292,7 @@ ip6_icmp_error (vlib_main_t * vm,
u32 *from, *to_next;
uword n_left_from, n_left_to_next;
ip6_icmp_error_next_t next_index;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index 81d6cd1a0bd..cabefd81230 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -117,7 +117,7 @@ VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
{
vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
u32 n_left, *from;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
u16 nexts[VLIB_FRAME_SIZE], *next;
@@ -2113,7 +2113,7 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_node_get_runtime (vm, ip4_input_node.index);
n_left_from = frame->n_vectors;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
vlib_get_buffers (vm, from, bufs, n_left_from);
clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
diff --git a/src/vnet/ip/ip4_forward.h b/src/vnet/ip/ip4_forward.h
index 54150d4dab4..8122d57e653 100644
--- a/src/vnet/ip/ip4_forward.h
+++ b/src/vnet/ip/ip4_forward.h
@@ -59,7 +59,7 @@ ip4_lookup_inline (vlib_main_t * vm,
ip4_main_t *im = &ip4_main;
vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
u32 n_left, *from;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
vlib_buffer_t **b = bufs;
u16 nexts[VLIB_FRAME_SIZE], *next;
diff --git a/src/vnet/ip/ip4_input.c b/src/vnet/ip/ip4_input.c
index af2b89ab2ec..28b9fb8b279 100644
--- a/src/vnet/ip/ip4_input.c
+++ b/src/vnet/ip/ip4_input.c
@@ -96,7 +96,7 @@ ip4_input_check_sw_if_index (vlib_main_t * vm,
{
ip4_main_t *im = &ip4_main;
ip_lookup_main_t *lm = &im->lookup_main;
- u32 thread_index;
+ clib_thread_index_t thread_index;
if (*last_sw_if_index == sw_if_index)
{
(*cnt)++;
@@ -125,7 +125,7 @@ ip4_input_inline (vlib_main_t * vm,
{
vnet_main_t *vnm = vnet_get_main ();
u32 n_left_from, *from;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
vlib_node_runtime_t *error_node =
vlib_node_get_runtime (vm, ip4_input_node.index);
vlib_simple_counter_main_t *cm;
diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c
index 00855f7db43..df70dc9edca 100644
--- a/src/vnet/ip/ip4_mtrie.c
+++ b/src/vnet/ip/ip4_mtrie.c
@@ -190,7 +190,7 @@ ip4_mtrie_8_init (ip4_mtrie_8_t *m)
{
ip4_mtrie_8_ply_t *root;
- pool_get (ip4_ply_pool, root);
+ pool_get_aligned (ip4_ply_pool, root, CLIB_CACHE_LINE_BYTES);
m->root_ply = root - ip4_ply_pool;
ply_8_init (root, IP4_MTRIE_LEAF_EMPTY, 0, 0);
@@ -853,13 +853,19 @@ ip4_mtrie_module_init (vlib_main_t * vm)
clib_error_t *error = NULL;
/* Burn one ply so index 0 is taken */
- pool_get (ip4_ply_pool, p);
+ pool_get_aligned (ip4_ply_pool, p, CLIB_CACHE_LINE_BYTES);
return (error);
}
VLIB_INIT_FUNCTION (ip4_mtrie_module_init);
+void
+ip4_mtrie_pool_alloc (uword size)
+{
+ pool_alloc_aligned (ip4_ply_pool, size, CLIB_CACHE_LINE_BYTES);
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/ip/ip4_mtrie.h b/src/vnet/ip/ip4_mtrie.h
index 16c524745be..2631f07eb2b 100644
--- a/src/vnet/ip/ip4_mtrie.h
+++ b/src/vnet/ip/ip4_mtrie.h
@@ -179,6 +179,11 @@ format_function_t format_ip4_mtrie_8;
extern ip4_mtrie_8_ply_t *ip4_ply_pool;
/**
+ * @brief Pre-allocate the pool of plys
+ */
+extern void ip4_mtrie_pool_alloc (uword size);
+
+/**
* Is the leaf terminal (i.e. an LB index) or non-terminal (i.e. a PLY index)
*/
always_inline u32
diff --git a/src/vnet/ip/ip4_to_ip6.h b/src/vnet/ip/ip4_to_ip6.h
index d356fd5411c..3c14a59f174 100644
--- a/src/vnet/ip/ip4_to_ip6.h
+++ b/src/vnet/ip/ip4_to_ip6.h
@@ -37,6 +37,20 @@ static u8 icmp_to_icmp6_updater_pointer_table[] =
#define frag_id_4to6(id) (id)
+always_inline u64
+icmp_type_is_error_message (u8 icmp_type)
+{
+ int bmp = 0;
+ bmp |= 1 << ICMP4_destination_unreachable;
+ bmp |= 1 << ICMP4_time_exceeded;
+ bmp |= 1 << ICMP4_parameter_problem;
+ bmp |= 1 << ICMP4_source_quench;
+ bmp |= 1 << ICMP4_redirect;
+ bmp |= 1 << ICMP4_alternate_host_address;
+
+ return (1ULL << icmp_type) & bmp;
+}
+
/**
* @brief Get TCP/UDP port number or ICMP id from IPv4 packet.
*
@@ -70,9 +84,14 @@ ip4_get_port (ip4_header_t *ip, u8 sender)
* - outer ICMP header length (2*sizeof (icmp46_header_t))
* - inner IP header length
* - first 8 bytes of payload of original packet in case of ICMP error
+ *
+ * Also make sure we only attempt to parse payload as IP packet if it's
+ * an ICMP error.
*/
else if (clib_net_to_host_u16 (ip->length) >=
- 2 * sizeof (ip4_header_t) + 2 * sizeof (icmp46_header_t) + 8)
+ 2 * sizeof (ip4_header_t) + 2 * sizeof (icmp46_header_t) +
+ 8 &&
+ icmp_type_is_error_message (icmp->type))
{
ip = (ip4_header_t *) (icmp + 2);
if (PREDICT_TRUE ((ip->protocol == IP_PROTOCOL_TCP) ||
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index 31adc90ecab..3c1f40beff5 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -750,7 +750,7 @@ VLIB_NODE_FN (ip6_load_balance_node) (vlib_main_t * vm,
{
vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
u32 n_left, *from;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
ip6_main_t *im = &ip6_main;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
u16 nexts[VLIB_FRAME_SIZE], *next;
@@ -1781,7 +1781,7 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm,
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
while (n_left_from > 0)
{
diff --git a/src/vnet/ip/ip6_forward.h b/src/vnet/ip/ip6_forward.h
index 8e5dd256ceb..71b6cc9ae04 100644
--- a/src/vnet/ip/ip6_forward.h
+++ b/src/vnet/ip/ip6_forward.h
@@ -60,7 +60,7 @@ ip6_lookup_inline (vlib_main_t * vm,
vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
u32 n_left_from, n_left_to_next, *from, *to_next;
ip_lookup_next_t next;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
diff --git a/src/vnet/ip/ip6_input.c b/src/vnet/ip/ip6_input.c
index ae59b765d2e..a79a17ca64a 100644
--- a/src/vnet/ip/ip6_input.c
+++ b/src/vnet/ip/ip6_input.c
@@ -73,7 +73,7 @@ VLIB_NODE_FN (ip6_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_node_runtime_t *error_node =
vlib_node_get_runtime (vm, ip6_input_node.index);
vlib_simple_counter_main_t *cm;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
diff --git a/src/vnet/ip/ip6_to_ip4.h b/src/vnet/ip/ip6_to_ip4.h
index ebabcd0b797..931d2da0fa3 100644
--- a/src/vnet/ip/ip6_to_ip4.h
+++ b/src/vnet/ip/ip6_to_ip4.h
@@ -168,7 +168,19 @@ ip6_get_port (vlib_main_t *vm, vlib_buffer_t *b, ip6_header_t *ip6,
if (dst_port)
*dst_port = ((u16 *) (icmp))[2];
}
- else if (clib_net_to_host_u16 (ip6->payload_length) >= 64)
+ /*
+ * if there is enough data and ICMP type indicates ICMP error, then parse
+ * inner packet
+ *
+ * ICMP6 errors are:
+ * 1 - destination_unreachable
+ * 2 - packet_too_big
+ * 3 - time_exceeded
+ * 4 - parameter_problem
+ */
+ else if (clib_net_to_host_u16 (ip6->payload_length) >= 64 &&
+ icmp->type >= ICMP6_destination_unreachable &&
+ icmp->type <= ICMP6_parameter_problem)
{
u16 ip6_pay_len;
ip6_header_t *inner_ip6;
diff --git a/src/vnet/ip/ip_init.c b/src/vnet/ip/ip_init.c
index c2490f196ef..cfc3644a1bf 100644
--- a/src/vnet/ip/ip_init.c
+++ b/src/vnet/ip/ip_init.c
@@ -38,6 +38,9 @@
*/
#include <vnet/ip/ip.h>
+#include <vnet/ip/ip4_mtrie.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/dpo/load_balance.h>
ip_main_t ip_main;
@@ -112,6 +115,39 @@ VLIB_INIT_FUNCTION (ip_main_init) = {
"flow_classify_init"),
};
+static clib_error_t *
+ip_config_init (vlib_main_t *vm, unformat_input_t *input)
+{
+ uword lbsz = 0, fibentrysz = 0, mtriesz = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "load-balance-pool-size %U", unformat_memory_size,
+ &lbsz))
+ ;
+ else if (unformat (input, "fib-entry-pool-size %U", unformat_memory_size,
+ &fibentrysz))
+ ;
+ else if (unformat (input, "ip4-mtrie-pool-size %U", unformat_memory_size,
+ &mtriesz))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (lbsz)
+ load_balance_pool_alloc (lbsz);
+ if (fibentrysz)
+ fib_entry_pool_alloc (fibentrysz);
+ if (mtriesz)
+ ip4_mtrie_pool_alloc (mtriesz);
+
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (ip_config_init, "l3fib");
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/ip/punt.c b/src/vnet/ip/punt.c
index 3c46549634a..220a71ad5df 100644
--- a/src/vnet/ip/punt.c
+++ b/src/vnet/ip/punt.c
@@ -27,13 +27,12 @@
#include <vnet/udp/udp.h>
#include <vnet/tcp/tcp.h>
#include <vnet/ip/punt.h>
-#include <vlib/unix/unix.h>
+#include <vlib/file.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/socket.h>
#include <sys/uio.h>
-#include <stdlib.h>
punt_main_t punt_main;
diff --git a/src/vnet/ip/punt_node.c b/src/vnet/ip/punt_node.c
index 6400e49c626..9898a663154 100644
--- a/src/vnet/ip/punt_node.c
+++ b/src/vnet/ip/punt_node.c
@@ -247,7 +247,7 @@ punt_socket_inline2 (vlib_main_t *vm, vlib_node_runtime_t *node,
ip_address_family_t af, ip_protocol_t protocol)
{
u32 *buffers = vlib_frame_vector_args (frame);
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
uword n_packets = frame->n_vectors;
punt_main_t *pm = &punt_main;
int i;
diff --git a/src/vnet/ip/reass/ip4_full_reass.c b/src/vnet/ip/reass/ip4_full_reass.c
index bab7d479dcf..808acb03ab8 100644
--- a/src/vnet/ip/reass/ip4_full_reass.c
+++ b/src/vnet/ip/reass/ip4_full_reass.c
@@ -91,7 +91,7 @@ typedef union
struct
{
u32 reass_index;
- u32 memory_owner_thread_index;
+ clib_thread_index_t memory_owner_thread_index;
};
u64 as_u64;
} ip4_full_reass_val_t;
@@ -147,10 +147,10 @@ typedef struct
// number of fragments in this reassembly
u32 fragments_n;
// thread owning memory for this context (whose pool contains this ctx)
- u32 memory_owner_thread_index;
+ clib_thread_index_t memory_owner_thread_index;
// thread which received fragment with offset 0 and which sends out the
// completed reassembly
- u32 sendout_thread_index;
+ clib_thread_index_t sendout_thread_index;
} ip4_full_reass_t;
typedef struct
@@ -246,8 +246,8 @@ typedef struct
ip4_full_reass_range_trace_t trace_range;
u32 size_diff;
u32 op_id;
- u32 thread_id;
- u32 thread_id_to;
+ clib_thread_index_t thread_id;
+ clib_thread_index_t thread_id_to;
u32 fragment_first;
u32 fragment_last;
u32 total_data_len;
@@ -345,10 +345,10 @@ format_ip4_full_reass_trace (u8 * s, va_list * args)
}
static void
-ip4_full_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip4_full_reass_t * reass, u32 bi,
+ip4_full_reass_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip4_full_reass_t *reass, u32 bi,
ip4_full_reass_trace_operation_e action,
- u32 size_diff, u32 thread_id_to)
+ u32 size_diff, clib_thread_index_t thread_id_to)
{
vlib_buffer_t *b = vlib_get_buffer (vm, bi);
vnet_buffer_opaque_t *vnb = vnet_buffer (b);
@@ -917,11 +917,12 @@ ip4_full_reass_remove_range_from_chain (vlib_main_t * vm,
}
always_inline ip4_full_reass_rc_t
-ip4_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip4_full_reass_main_t * rm,
- ip4_full_reass_per_thread_t * rt,
- ip4_full_reass_t * reass, u32 * bi0, u32 * next0,
- u32 * error0, bool is_custom, u32 * handoff_thread_idx)
+ip4_full_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip4_full_reass_main_t *rm,
+ ip4_full_reass_per_thread_t *rt,
+ ip4_full_reass_t *reass, u32 *bi0, u32 *next0,
+ u32 *error0, bool is_custom,
+ clib_thread_index_t *handoff_thread_idx)
{
vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0);
vnet_buffer_opaque_t *fvnb = vnet_buffer (fb);
@@ -1256,7 +1257,7 @@ ip4_full_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
}
else if (reass)
{
- u32 handoff_thread_idx;
+ clib_thread_index_t handoff_thread_idx;
u32 counter = ~0;
switch (ip4_full_reass_update (vm, node, rm, rt, reass, &bi0, &next0,
&error0, CUSTOM == type,
diff --git a/src/vnet/ip/reass/ip4_sv_reass.c b/src/vnet/ip/reass/ip4_sv_reass.c
index 50b4b22eb60..6d14526e6a7 100644
--- a/src/vnet/ip/reass/ip4_sv_reass.c
+++ b/src/vnet/ip/reass/ip4_sv_reass.c
@@ -65,7 +65,7 @@ typedef union
struct
{
u32 reass_index;
- u32 thread_index;
+ clib_thread_index_t thread_index;
};
u64 as_u64;
} ip4_sv_reass_val_t;
@@ -1684,7 +1684,7 @@ static char *ip4_sv_reass_handoff_error_strings[] = {
typedef struct
{
- u32 thread_index;
+ clib_thread_index_t thread_index;
} ip4_sv_reass_handoff_trace_t;
static u8 *
diff --git a/src/vnet/ip/reass/ip6_sv_reass.c b/src/vnet/ip/reass/ip6_sv_reass.c
index 69b27c5aa8e..b2934d99721 100644
--- a/src/vnet/ip/reass/ip6_sv_reass.c
+++ b/src/vnet/ip/reass/ip6_sv_reass.c
@@ -67,7 +67,7 @@ typedef union
struct
{
u32 reass_index;
- u32 thread_index;
+ clib_thread_index_t thread_index;
};
u64 as_u64;
} ip6_sv_reass_val_t;
@@ -1399,7 +1399,7 @@ static char *ip6_sv_reassembly_handoff_error_strings[] = {
typedef struct
{
- u32 thread_index;
+ clib_thread_index_t thread_index;
} ip6_sv_reassembly_handoff_trace_t;
static u8 *
diff --git a/src/vnet/ipfix-export/flow_report.c b/src/vnet/ipfix-export/flow_report.c
index 4eb93520ed8..7d94b4aa6e2 100644
--- a/src/vnet/ipfix-export/flow_report.c
+++ b/src/vnet/ipfix-export/flow_report.c
@@ -312,7 +312,7 @@ vnet_flow_rewrite_generic_callback (ipfix_exporter_t *exp, flow_report_t *fr,
vlib_buffer_t *
vnet_ipfix_exp_get_buffer (vlib_main_t *vm, ipfix_exporter_t *exp,
- flow_report_t *fr, u32 thread_index)
+ flow_report_t *fr, clib_thread_index_t thread_index)
{
u32 bi0;
vlib_buffer_t *b0;
@@ -343,7 +343,8 @@ vnet_ipfix_exp_get_buffer (vlib_main_t *vm, ipfix_exporter_t *exp,
void
vnet_ipfix_exp_send_buffer (vlib_main_t *vm, ipfix_exporter_t *exp,
flow_report_t *fr, flow_report_stream_t *stream,
- u32 thread_index, vlib_buffer_t *b0)
+ clib_thread_index_t thread_index,
+ vlib_buffer_t *b0)
{
flow_report_main_t *frm = &flow_report_main;
vlib_frame_t *f;
diff --git a/src/vnet/ipfix-export/flow_report.h b/src/vnet/ipfix-export/flow_report.h
index cd0cafb6158..1a94ce9b9a6 100644
--- a/src/vnet/ipfix-export/flow_report.h
+++ b/src/vnet/ipfix-export/flow_report.h
@@ -255,7 +255,8 @@ vnet_ipfix_exporter_lookup (const ip_address_t *ipfix_collector);
*/
vlib_buffer_t *vnet_ipfix_exp_get_buffer (vlib_main_t *vm,
ipfix_exporter_t *exp,
- flow_report_t *fr, u32 thread_index);
+ flow_report_t *fr,
+ clib_thread_index_t thread_index);
/*
* Send the provided buffer. At this stage the buffer should be populated
@@ -265,7 +266,8 @@ vlib_buffer_t *vnet_ipfix_exp_get_buffer (vlib_main_t *vm,
void vnet_ipfix_exp_send_buffer (vlib_main_t *vm, ipfix_exporter_t *exp,
flow_report_t *fr,
flow_report_stream_t *stream,
- u32 thread_index, vlib_buffer_t *b0);
+ clib_thread_index_t thread_index,
+ vlib_buffer_t *b0);
#endif /* __included_vnet_flow_report_h__ */
diff --git a/src/vnet/ipip/node.c b/src/vnet/ipip/node.c
index a289cc885df..ae9317f446f 100644
--- a/src/vnet/ipip/node.c
+++ b/src/vnet/ipip/node.c
@@ -66,7 +66,7 @@ ipip_input (vlib_main_t * vm, vlib_node_runtime_t * node,
ipip_main_t *gm = &ipip_main;
u32 n_left_from, next_index, *from, *to_next, n_left_to_next;
u32 tunnel_sw_if_index = ~0;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u32 len;
vnet_interface_main_t *im = &gm->vnet_main->interface_main;
diff --git a/src/vnet/ipsec/ah.h b/src/vnet/ipsec/ah.h
index 450c9cfd6dc..08842702e76 100644
--- a/src/vnet/ipsec/ah.h
+++ b/src/vnet/ipsec/ah.h
@@ -74,8 +74,8 @@ ah_decrypt_err_to_sa_err (u32 err)
always_inline void
ah_encrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node,
- u32 thread_index, u32 err, u16 index, u16 *nexts,
- u16 drop_next, u32 sa_index)
+ clib_thread_index_t thread_index, u32 err,
+ u16 index, u16 *nexts, u16 drop_next, u32 sa_index)
{
ipsec_set_next_index (b, node, thread_index, err,
ah_encrypt_err_to_sa_err (err), index, nexts,
@@ -84,8 +84,8 @@ ah_encrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node,
always_inline void
ah_decrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node,
- u32 thread_index, u32 err, u16 index, u16 *nexts,
- u16 drop_next, u32 sa_index)
+ clib_thread_index_t thread_index, u32 err,
+ u16 index, u16 *nexts, u16 drop_next, u32 sa_index)
{
ipsec_set_next_index (b, node, thread_index, err,
ah_decrypt_err_to_sa_err (err), index, nexts,
diff --git a/src/vnet/ipsec/ah_decrypt.c b/src/vnet/ipsec/ah_decrypt.c
index ec4db0fed57..e95cc9e851e 100644
--- a/src/vnet/ipsec/ah_decrypt.c
+++ b/src/vnet/ipsec/ah_decrypt.c
@@ -118,7 +118,7 @@ ah_decrypt_inline (vlib_main_t * vm,
int is_ip6)
{
u32 n_left, *from;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u16 buffer_data_size = vlib_buffer_get_default_data_size (vm);
ah_decrypt_packet_data_t pkt_data[VLIB_FRAME_SIZE], *pd = pkt_data;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
@@ -127,7 +127,7 @@ ah_decrypt_inline (vlib_main_t * vm,
ipsec_per_thread_data_t *ptd = vec_elt_at_index (im->ptd, thread_index);
from = vlib_frame_vector_args (from_frame);
n_left = from_frame->n_vectors;
- ipsec_sa_t *sa0 = 0;
+ ipsec_sa_inb_rt_t *irt = 0;
bool anti_replay_result;
u32 current_sa_index = ~0, current_sa_bytes = 0, current_sa_pkts = 0;
@@ -149,25 +149,25 @@ ah_decrypt_inline (vlib_main_t * vm,
current_sa_index, current_sa_pkts,
current_sa_bytes);
current_sa_index = vnet_buffer (b[0])->ipsec.sad_index;
- sa0 = ipsec_sa_get (current_sa_index);
+ irt = ipsec_sa_get_inb_rt_by_index (current_sa_index);
current_sa_bytes = current_sa_pkts = 0;
vlib_prefetch_combined_counter (&ipsec_sa_counters,
thread_index, current_sa_index);
}
- if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index))
+ if (PREDICT_FALSE ((u16) ~0 == irt->thread_index))
{
/* this is the first packet to use this SA, claim the SA
* for this thread. this could happen simultaneously on
* another thread */
- clib_atomic_cmp_and_swap (&sa0->thread_index, ~0,
+ clib_atomic_cmp_and_swap (&irt->thread_index, ~0,
ipsec_sa_assign_thread (thread_index));
}
- if (PREDICT_TRUE (thread_index != sa0->thread_index))
+ if (PREDICT_TRUE (thread_index != irt->thread_index))
{
- vnet_buffer (b[0])->ipsec.thread_index = sa0->thread_index;
+ vnet_buffer (b[0])->ipsec.thread_index = irt->thread_index;
next[0] = AH_DECRYPT_NEXT_HANDOFF;
goto next;
}
@@ -202,16 +202,8 @@ ah_decrypt_inline (vlib_main_t * vm,
pd->seq = clib_host_to_net_u32 (ah0->seq_no);
/* anti-replay check */
- if (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa0)))
- {
- anti_replay_result = ipsec_sa_anti_replay_and_sn_advance (
- sa0, pd->seq, ~0, false, &pd->seq_hi, true);
- }
- else
- {
- anti_replay_result = ipsec_sa_anti_replay_and_sn_advance (
- sa0, pd->seq, ~0, false, &pd->seq_hi, false);
- }
+ anti_replay_result = ipsec_sa_anti_replay_and_sn_advance (
+ irt, pd->seq, ~0, false, &pd->seq_hi);
if (anti_replay_result)
{
ah_decrypt_set_next_index (b[0], node, vm->thread_index,
@@ -223,13 +215,14 @@ ah_decrypt_inline (vlib_main_t * vm,
current_sa_bytes += b[0]->current_length;
current_sa_pkts += 1;
- pd->icv_size = sa0->integ_icv_size;
+ pd->icv_size = irt->integ_icv_size;
pd->nexthdr_cached = ah0->nexthdr;
- if (PREDICT_TRUE (sa0->integ_alg != IPSEC_INTEG_ALG_NONE))
+ if (PREDICT_TRUE (irt->integ_icv_size))
{
- if (PREDICT_FALSE (ipsec_sa_is_set_USE_ESN (sa0) &&
- pd->current_data + b[0]->current_length
- + sizeof (u32) > buffer_data_size))
+ if (PREDICT_FALSE (irt->use_esn && pd->current_data +
+ b[0]->current_length +
+ sizeof (u32) >
+ buffer_data_size))
{
ah_decrypt_set_next_index (
b[0], node, vm->thread_index, AH_DECRYPT_ERROR_NO_TAIL_SPACE,
@@ -239,16 +232,16 @@ ah_decrypt_inline (vlib_main_t * vm,
vnet_crypto_op_t *op;
vec_add2_aligned (ptd->integ_ops, op, 1, CLIB_CACHE_LINE_BYTES);
- vnet_crypto_op_init (op, sa0->integ_op_id);
+ vnet_crypto_op_init (op, irt->integ_op_id);
op->src = (u8 *) ih4;
op->len = b[0]->current_length;
op->digest = (u8 *) ih4 - pd->icv_size;
op->flags = VNET_CRYPTO_OP_FLAG_HMAC_CHECK;
op->digest_len = pd->icv_size;
- op->key_index = sa0->integ_key_index;
+ op->key_index = irt->integ_key_index;
op->user_data = b - bufs;
- if (ipsec_sa_is_set_USE_ESN (sa0))
+ if (irt->use_esn)
{
u32 seq_hi = clib_host_to_net_u32 (pd->seq_hi);
@@ -311,37 +304,21 @@ ah_decrypt_inline (vlib_main_t * vm,
if (next[0] < AH_DECRYPT_N_NEXT)
goto trace;
- sa0 = ipsec_sa_get (pd->sa_index);
+ irt = ipsec_sa_get_inb_rt_by_index (pd->sa_index);
- if (PREDICT_TRUE (sa0->integ_alg != IPSEC_INTEG_ALG_NONE))
+ if (PREDICT_TRUE (irt->integ_icv_size))
{
/* redo the anti-reply check. see esp_decrypt for details */
- if (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa0)))
+ if (ipsec_sa_anti_replay_and_sn_advance (irt, pd->seq, pd->seq_hi,
+ true, NULL))
{
- if (ipsec_sa_anti_replay_and_sn_advance (
- sa0, pd->seq, pd->seq_hi, true, NULL, true))
- {
- ah_decrypt_set_next_index (
- b[0], node, vm->thread_index, AH_DECRYPT_ERROR_REPLAY, 0,
- next, AH_DECRYPT_NEXT_DROP, pd->sa_index);
- goto trace;
- }
- n_lost = ipsec_sa_anti_replay_advance (
- sa0, thread_index, pd->seq, pd->seq_hi, true);
- }
- else
- {
- if (ipsec_sa_anti_replay_and_sn_advance (
- sa0, pd->seq, pd->seq_hi, true, NULL, false))
- {
- ah_decrypt_set_next_index (
- b[0], node, vm->thread_index, AH_DECRYPT_ERROR_REPLAY, 0,
- next, AH_DECRYPT_NEXT_DROP, pd->sa_index);
- goto trace;
- }
- n_lost = ipsec_sa_anti_replay_advance (
- sa0, thread_index, pd->seq, pd->seq_hi, false);
+ ah_decrypt_set_next_index (b[0], node, vm->thread_index,
+ AH_DECRYPT_ERROR_REPLAY, 0, next,
+ AH_DECRYPT_NEXT_DROP, pd->sa_index);
+ goto trace;
}
+ n_lost = ipsec_sa_anti_replay_advance (irt, thread_index, pd->seq,
+ pd->seq_hi);
vlib_prefetch_simple_counter (
&ipsec_sa_err_counters[IPSEC_SA_ERROR_LOST], thread_index,
pd->sa_index);
@@ -354,7 +331,7 @@ ah_decrypt_inline (vlib_main_t * vm,
b[0]->flags &= ~(VNET_BUFFER_F_L4_CHECKSUM_COMPUTED |
VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
- if (PREDICT_TRUE (ipsec_sa_is_set_IS_TUNNEL (sa0)))
+ if (PREDICT_TRUE (irt->is_tunnel))
{ /* tunnel mode */
if (PREDICT_TRUE (pd->nexthdr_cached == IP_PROTOCOL_IP_IN_IP))
next[0] = AH_DECRYPT_NEXT_IP4_INPUT;
@@ -424,10 +401,10 @@ ah_decrypt_inline (vlib_main_t * vm,
trace:
if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
{
- sa0 = ipsec_sa_get (vnet_buffer (b[0])->ipsec.sad_index);
+ ipsec_sa_t *sa = ipsec_sa_get (vnet_buffer (b[0])->ipsec.sad_index);
ah_decrypt_trace_t *tr =
vlib_add_trace (vm, node, b[0], sizeof (*tr));
- tr->integ_alg = sa0->integ_alg;
+ tr->integ_alg = sa->integ_alg;
tr->seq_num = pd->seq;
}
diff --git a/src/vnet/ipsec/ah_encrypt.c b/src/vnet/ipsec/ah_encrypt.c
index 86694660878..1b32b8d2c7c 100644
--- a/src/vnet/ipsec/ah_encrypt.c
+++ b/src/vnet/ipsec/ah_encrypt.c
@@ -43,8 +43,7 @@ typedef struct
{
u32 sa_index;
u32 spi;
- u32 seq_lo;
- u32 seq_hi;
+ u64 seq;
ipsec_integ_alg_t integ_alg;
} ah_encrypt_trace_t;
@@ -56,9 +55,9 @@ format_ah_encrypt_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
ah_encrypt_trace_t *t = va_arg (*args, ah_encrypt_trace_t *);
- s = format (s, "ah: sa-index %d spi %u (0x%08x) seq %u:%u integrity %U",
- t->sa_index, t->spi, t->spi, t->seq_hi, t->seq_lo,
- format_ipsec_integ_alg, t->integ_alg);
+ s = format (s, "ah: sa-index %d spi %u (0x%08x) seq %lu integrity %U",
+ t->sa_index, t->spi, t->spi, t->seq, format_ipsec_integ_alg,
+ t->integ_alg);
return s;
}
@@ -128,7 +127,7 @@ ah_encrypt_inline (vlib_main_t * vm,
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
ipsec_per_thread_data_t *ptd = vec_elt_at_index (im->ptd, thread_index);
- ipsec_sa_t *sa0 = 0;
+ ipsec_sa_outb_rt_t *ort = 0;
ip4_and_ah_header_t *ih0, *oh0 = 0;
ip6_and_ah_header_t *ih6_0, *oh6_0 = 0;
u32 current_sa_index = ~0, current_sa_bytes = 0, current_sa_pkts = 0;
@@ -158,7 +157,7 @@ ah_encrypt_inline (vlib_main_t * vm,
current_sa_index, current_sa_pkts,
current_sa_bytes);
current_sa_index = vnet_buffer (b[0])->ipsec.sad_index;
- sa0 = ipsec_sa_get (current_sa_index);
+ ort = ipsec_sa_get_outb_rt_by_index (current_sa_index);
current_sa_bytes = current_sa_pkts = 0;
vlib_prefetch_combined_counter (&ipsec_sa_counters, thread_index,
@@ -168,23 +167,23 @@ ah_encrypt_inline (vlib_main_t * vm,
pd->sa_index = current_sa_index;
next[0] = AH_ENCRYPT_NEXT_DROP;
- if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index))
+ if (PREDICT_FALSE ((u16) ~0 == ort->thread_index))
{
/* this is the first packet to use this SA, claim the SA
* for this thread. this could happen simultaneously on
* another thread */
- clib_atomic_cmp_and_swap (&sa0->thread_index, ~0,
+ clib_atomic_cmp_and_swap (&ort->thread_index, ~0,
ipsec_sa_assign_thread (thread_index));
}
- if (PREDICT_TRUE (thread_index != sa0->thread_index))
+ if (PREDICT_TRUE (thread_index != ort->thread_index))
{
- vnet_buffer (b[0])->ipsec.thread_index = sa0->thread_index;
+ vnet_buffer (b[0])->ipsec.thread_index = ort->thread_index;
next[0] = AH_ENCRYPT_NEXT_HANDOFF;
goto next;
}
- if (PREDICT_FALSE (esp_seq_advance (sa0)))
+ if (PREDICT_FALSE (esp_seq_advance (ort)))
{
ah_encrypt_set_next_index (b[0], node, vm->thread_index,
AH_ENCRYPT_ERROR_SEQ_CYCLED, 0, next,
@@ -199,7 +198,7 @@ ah_encrypt_inline (vlib_main_t * vm,
ssize_t adv;
ih0 = vlib_buffer_get_current (b[0]);
- if (PREDICT_TRUE (ipsec_sa_is_set_IS_TUNNEL (sa0)))
+ if (PREDICT_TRUE (ort->is_tunnel))
{
if (is_ip6)
adv = -sizeof (ip6_and_ah_header_t);
@@ -211,11 +210,11 @@ ah_encrypt_inline (vlib_main_t * vm,
adv = -sizeof (ah_header_t);
}
- icv_size = sa0->integ_icv_size;
+ icv_size = ort->integ_icv_size;
const u8 padding_len = ah_calc_icv_padding_len (icv_size, is_ip6);
adv -= padding_len;
/* transport mode save the eth header before it is overwritten */
- if (PREDICT_FALSE (!ipsec_sa_is_set_IS_TUNNEL (sa0)))
+ if (PREDICT_FALSE (!ort->is_tunnel))
{
const u32 l2_len = vnet_buffer (b[0])->ip.save_rewrite_length;
u8 *l2_hdr_in = (u8 *) vlib_buffer_get_current (b[0]) - l2_len;
@@ -238,16 +237,16 @@ ah_encrypt_inline (vlib_main_t * vm,
oh6_0->ip6.ip_version_traffic_class_and_flow_label =
ih6_0->ip6.ip_version_traffic_class_and_flow_label;
- if (PREDICT_FALSE (ipsec_sa_is_set_IS_TUNNEL (sa0)))
+ if (PREDICT_FALSE (ort->is_tunnel))
{
- ip6_set_dscp_network_order (&oh6_0->ip6, sa0->tunnel.t_dscp);
- tunnel_encap_fixup_6o6 (sa0->tunnel_flags, &ih6_0->ip6,
+ ip6_set_dscp_network_order (&oh6_0->ip6, ort->t_dscp);
+ tunnel_encap_fixup_6o6 (ort->tunnel_flags, &ih6_0->ip6,
&oh6_0->ip6);
}
pd->ip_version_traffic_class_and_flow_label =
oh6_0->ip6.ip_version_traffic_class_and_flow_label;
- if (PREDICT_TRUE (ipsec_sa_is_set_IS_TUNNEL (sa0)))
+ if (PREDICT_TRUE (ort->is_tunnel))
{
next_hdr_type = IP_PROTOCOL_IPV6;
}
@@ -260,8 +259,8 @@ ah_encrypt_inline (vlib_main_t * vm,
clib_memcpy_fast (&oh6_0->ip6, &ip6_hdr_template, 8);
oh6_0->ah.reserved = 0;
oh6_0->ah.nexthdr = next_hdr_type;
- oh6_0->ah.spi = clib_net_to_host_u32 (sa0->spi);
- oh6_0->ah.seq_no = clib_net_to_host_u32 (sa0->seq);
+ oh6_0->ah.spi = ort->spi_be;
+ oh6_0->ah.seq_no = clib_net_to_host_u32 (ort->seq64);
oh6_0->ip6.payload_length =
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b[0]) -
sizeof (ip6_header_t));
@@ -274,18 +273,18 @@ ah_encrypt_inline (vlib_main_t * vm,
oh0 = vlib_buffer_get_current (b[0]);
pd->ttl = ih0->ip4.ttl;
- if (PREDICT_FALSE (ipsec_sa_is_set_IS_TUNNEL (sa0)))
+ if (PREDICT_FALSE (ort->is_tunnel))
{
- if (sa0->tunnel.t_dscp)
- pd->tos = sa0->tunnel.t_dscp << 2;
+ if (ort->t_dscp)
+ pd->tos = ort->t_dscp << 2;
else
{
pd->tos = ih0->ip4.tos;
- if (!(sa0->tunnel_flags &
+ if (!(ort->tunnel_flags &
TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_DSCP))
pd->tos &= 0x3;
- if (!(sa0->tunnel_flags &
+ if (!(ort->tunnel_flags &
TUNNEL_ENCAP_DECAP_FLAG_ENCAP_COPY_ECN))
pd->tos &= 0xfc;
}
@@ -298,7 +297,7 @@ ah_encrypt_inline (vlib_main_t * vm,
pd->current_data = b[0]->current_data;
clib_memset (oh0, 0, sizeof (ip4_and_ah_header_t));
- if (PREDICT_TRUE (ipsec_sa_is_set_IS_TUNNEL (sa0)))
+ if (PREDICT_TRUE (ort->is_tunnel))
{
next_hdr_type = IP_PROTOCOL_IP_IN_IP;
}
@@ -314,57 +313,51 @@ ah_encrypt_inline (vlib_main_t * vm,
oh0->ip4.length =
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b[0]));
- oh0->ah.spi = clib_net_to_host_u32 (sa0->spi);
- oh0->ah.seq_no = clib_net_to_host_u32 (sa0->seq);
+ oh0->ah.spi = ort->spi_be;
+ oh0->ah.seq_no = clib_net_to_host_u32 (ort->seq64);
oh0->ah.nexthdr = next_hdr_type;
oh0->ah.hdrlen =
(sizeof (ah_header_t) + icv_size + padding_len) / 4 - 2;
}
- if (PREDICT_TRUE (!is_ip6 && ipsec_sa_is_set_IS_TUNNEL (sa0) &&
- !ipsec_sa_is_set_IS_TUNNEL_V6 (sa0)))
+ if (PREDICT_TRUE (!is_ip6 && ort->is_tunnel && !ort->is_tunnel_v6))
{
- clib_memcpy_fast (&oh0->ip4.address_pair,
- &sa0->ip4_hdr.address_pair,
+ clib_memcpy_fast (&oh0->ip4.address_pair, &ort->ip4_hdr.address_pair,
sizeof (ip4_address_pair_t));
- next[0] = sa0->dpo.dpoi_next_node;
- vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = sa0->dpo.dpoi_index;
+ next[0] = ort->dpo.dpoi_next_node;
+ vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = ort->dpo.dpoi_index;
}
- else if (is_ip6 && ipsec_sa_is_set_IS_TUNNEL (sa0) &&
- ipsec_sa_is_set_IS_TUNNEL_V6 (sa0))
+ else if (is_ip6 && ort->is_tunnel && ort->is_tunnel_v6)
{
- clib_memcpy_fast (&oh6_0->ip6.src_address,
- &sa0->ip6_hdr.src_address,
+ clib_memcpy_fast (&oh6_0->ip6.src_address, &ort->ip6_hdr.src_address,
sizeof (ip6_address_t) * 2);
- next[0] = sa0->dpo.dpoi_next_node;
- vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = sa0->dpo.dpoi_index;
+ next[0] = ort->dpo.dpoi_next_node;
+ vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = ort->dpo.dpoi_index;
}
- if (PREDICT_TRUE (sa0->integ_op_id))
+ if (PREDICT_TRUE (ort->integ_op_id))
{
vnet_crypto_op_t *op;
vec_add2_aligned (ptd->integ_ops, op, 1, CLIB_CACHE_LINE_BYTES);
- vnet_crypto_op_init (op, sa0->integ_op_id);
+ vnet_crypto_op_init (op, ort->integ_op_id);
op->src = vlib_buffer_get_current (b[0]);
op->len = b[0]->current_length;
op->digest = vlib_buffer_get_current (b[0]) + ip_hdr_size +
sizeof (ah_header_t);
clib_memset (op->digest, 0, icv_size);
op->digest_len = icv_size;
- op->key_index = sa0->integ_key_index;
+ op->key_index = ort->integ_key_index;
op->user_data = b - bufs;
- if (ipsec_sa_is_set_USE_ESN (sa0))
+ if (ort->use_esn)
{
- u32 seq_hi = clib_host_to_net_u32 (sa0->seq_hi);
-
- op->len += sizeof (seq_hi);
- clib_memcpy (op->src + b[0]->current_length, &seq_hi,
- sizeof (seq_hi));
+ *(u32u *) (op->src + b[0]->current_length) =
+ clib_host_to_net_u32 (ort->seq64 >> 32);
+ op->len += sizeof (u32);
}
}
- if (!ipsec_sa_is_set_IS_TUNNEL (sa0))
+ if (!ort->is_tunnel)
{
next[0] = AH_ENCRYPT_NEXT_INTERFACE_OUTPUT;
vlib_buffer_advance (b[0], -sizeof (ethernet_header_t));
@@ -373,13 +366,14 @@ ah_encrypt_inline (vlib_main_t * vm,
next:
if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
{
- sa0 = ipsec_sa_get (pd->sa_index);
+ ipsec_sa_t *sa = ipsec_sa_get (pd->sa_index);
+ ipsec_sa_outb_rt_t *ort =
+ ipsec_sa_get_outb_rt_by_index (pd->sa_index);
ah_encrypt_trace_t *tr =
vlib_add_trace (vm, node, b[0], sizeof (*tr));
- tr->spi = sa0->spi;
- tr->seq_lo = sa0->seq;
- tr->seq_hi = sa0->seq_hi;
- tr->integ_alg = sa0->integ_alg;
+ tr->spi = sa->spi;
+ tr->seq = ort->seq64;
+ tr->integ_alg = sa->integ_alg;
tr->sa_index = pd->sa_index;
}
diff --git a/src/vnet/ipsec/esp.h b/src/vnet/ipsec/esp.h
index 1c3ce776ad2..c855843f35d 100644
--- a/src/vnet/ipsec/esp.h
+++ b/src/vnet/ipsec/esp.h
@@ -79,46 +79,28 @@ typedef struct esp_aead_t_
u32 data[3];
} __clib_packed esp_aead_t;
-#define ESP_SEQ_MAX (4294967295UL)
-
u8 *format_esp_header (u8 * s, va_list * args);
/* TODO seq increment should be atomic to be accessed by multiple workers */
always_inline int
-esp_seq_advance (ipsec_sa_t * sa)
+esp_seq_advance (ipsec_sa_outb_rt_t *ort)
{
- if (PREDICT_TRUE (ipsec_sa_is_set_USE_ESN (sa)))
- {
- if (PREDICT_FALSE (sa->seq == ESP_SEQ_MAX))
- {
- if (PREDICT_FALSE (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) &&
- sa->seq_hi == ESP_SEQ_MAX))
- return 1;
- sa->seq_hi++;
- }
- sa->seq++;
- }
- else
- {
- if (PREDICT_FALSE (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) &&
- sa->seq == ESP_SEQ_MAX))
- return 1;
- sa->seq++;
- }
-
+ u64 max = ort->use_esn ? CLIB_U64_MAX : CLIB_U32_MAX;
+ if (ort->seq64 == max)
+ return 1;
+ ort->seq64++;
return 0;
}
always_inline u16
-esp_aad_fill (u8 *data, const esp_header_t *esp, const ipsec_sa_t *sa,
- u32 seq_hi)
+esp_aad_fill (u8 *data, const esp_header_t *esp, int use_esn, u32 seq_hi)
{
esp_aead_t *aad;
aad = (esp_aead_t *) data;
aad->data[0] = esp->spi;
- if (ipsec_sa_is_set_USE_ESN (sa))
+ if (use_esn)
{
/* SPI, seq-hi, seq-low */
aad->data[1] = (u32) clib_host_to_net_u32 (seq_hi);
@@ -187,8 +169,8 @@ esp_decrypt_err_to_sa_err (u32 err)
always_inline void
esp_encrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node,
- u32 thread_index, u32 err, u16 index, u16 *nexts,
- u16 drop_next, u32 sa_index)
+ clib_thread_index_t thread_index, u32 err,
+ u16 index, u16 *nexts, u16 drop_next, u32 sa_index)
{
ipsec_set_next_index (b, node, thread_index, err,
esp_encrypt_err_to_sa_err (err), index, nexts,
@@ -197,8 +179,8 @@ esp_encrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node,
always_inline void
esp_decrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node,
- u32 thread_index, u32 err, u16 index, u16 *nexts,
- u16 drop_next, u32 sa_index)
+ clib_thread_index_t thread_index, u32 err,
+ u16 index, u16 *nexts, u16 drop_next, u32 sa_index)
{
ipsec_set_next_index (b, node, thread_index, err,
esp_decrypt_err_to_sa_err (err), index, nexts,
@@ -218,7 +200,8 @@ typedef struct
{
u8 icv_sz;
u8 iv_sz;
- ipsec_sa_flags_t flags;
+ u8 udp_sz;
+ u8 is_transport;
u32 sa_index;
};
u64 sa_data;
diff --git a/src/vnet/ipsec/esp_decrypt.c b/src/vnet/ipsec/esp_decrypt.c
index 6384bb927a8..7f7cd57488d 100644
--- a/src/vnet/ipsec/esp_decrypt.c
+++ b/src/vnet/ipsec/esp_decrypt.c
@@ -60,8 +60,7 @@ typedef enum
typedef struct
{
u32 seq;
- u32 sa_seq;
- u32 sa_seq_hi;
+ u64 sa_seq64;
u32 pkt_seq_hi;
ipsec_crypto_alg_t crypto_alg;
ipsec_integ_alg_t integ_alg;
@@ -81,10 +80,10 @@ format_esp_decrypt_trace (u8 * s, va_list * args)
esp_decrypt_trace_t *t = va_arg (*args, esp_decrypt_trace_t *);
s = format (s,
- "esp: crypto %U integrity %U pkt-seq %d sa-seq %u sa-seq-hi %u "
+ "esp: crypto %U integrity %U pkt-seq %d sa-seq %lu "
"pkt-seq-hi %u",
format_ipsec_crypto_alg, t->crypto_alg, format_ipsec_integ_alg,
- t->integ_alg, t->seq, t->sa_seq, t->sa_seq_hi, t->pkt_seq_hi);
+ t->integ_alg, t->seq, t->sa_seq64, t->pkt_seq_hi);
return s;
}
@@ -251,11 +250,12 @@ esp_move_icv (vlib_main_t * vm, vlib_buffer_t * first,
}
static_always_inline u16
-esp_insert_esn (vlib_main_t *vm, ipsec_sa_t *sa, esp_decrypt_packet_data_t *pd,
- esp_decrypt_packet_data2_t *pd2, u32 *data_len, u8 **digest,
- u16 *len, vlib_buffer_t *b, u8 *payload)
+esp_insert_esn (vlib_main_t *vm, ipsec_sa_inb_rt_t *irt,
+ esp_decrypt_packet_data_t *pd, esp_decrypt_packet_data2_t *pd2,
+ u32 *data_len, u8 **digest, u16 *len, vlib_buffer_t *b,
+ u8 *payload)
{
- if (!ipsec_sa_is_set_USE_ESN (sa))
+ if (!irt->use_esn)
return 0;
/* shift ICV by 4 bytes to insert ESN */
u32 seq_hi = clib_host_to_net_u32 (pd->seq_hi);
@@ -288,17 +288,17 @@ esp_insert_esn (vlib_main_t *vm, ipsec_sa_t *sa, esp_decrypt_packet_data_t *pd,
}
static_always_inline u8 *
-esp_move_icv_esn (vlib_main_t * vm, vlib_buffer_t * first,
- esp_decrypt_packet_data_t * pd,
- esp_decrypt_packet_data2_t * pd2, u16 icv_sz,
- ipsec_sa_t * sa, u8 * extra_esn, u32 * len)
+esp_move_icv_esn (vlib_main_t *vm, vlib_buffer_t *first,
+ esp_decrypt_packet_data_t *pd,
+ esp_decrypt_packet_data2_t *pd2, u16 icv_sz,
+ ipsec_sa_inb_rt_t *irt, u8 *extra_esn, u32 *len)
{
u16 dif = 0;
u8 *digest = esp_move_icv (vm, first, pd, pd2, icv_sz, &dif);
if (dif)
*len -= dif;
- if (ipsec_sa_is_set_USE_ESN (sa))
+ if (irt->use_esn)
{
u32 seq_hi = clib_host_to_net_u32 (pd->seq_hi);
u16 space_left = vlib_buffer_space_left_at_end (vm, pd2->lb);
@@ -326,9 +326,9 @@ esp_move_icv_esn (vlib_main_t * vm, vlib_buffer_t * first,
static_always_inline int
esp_decrypt_chain_integ (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
const esp_decrypt_packet_data_t *pd,
- esp_decrypt_packet_data2_t *pd2, ipsec_sa_t *sa0,
- vlib_buffer_t *b, u8 icv_sz, u8 *start_src,
- u32 start_len, u8 **digest, u16 *n_ch,
+ esp_decrypt_packet_data2_t *pd2,
+ ipsec_sa_inb_rt_t *irt, vlib_buffer_t *b, u8 icv_sz,
+ u8 *start_src, u32 start_len, u8 **digest, u16 *n_ch,
u32 *integ_total_len)
{
vnet_crypto_op_chunk_t *ch;
@@ -350,7 +350,7 @@ esp_decrypt_chain_integ (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
ch->len = cb->current_length;
else
ch->len = cb->current_length - icv_sz;
- if (ipsec_sa_is_set_USE_ESN (sa0))
+ if (irt->use_esn)
{
u32 seq_hi = clib_host_to_net_u32 (pd->seq_hi);
u8 tmp[ESP_MAX_ICV_SIZE];
@@ -422,11 +422,11 @@ esp_decrypt_chain_integ (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
}
static_always_inline u32
-esp_decrypt_chain_crypto (vlib_main_t * vm, ipsec_per_thread_data_t * ptd,
- esp_decrypt_packet_data_t * pd,
- esp_decrypt_packet_data2_t * pd2,
- ipsec_sa_t * sa0, vlib_buffer_t * b, u8 icv_sz,
- u8 * start, u32 start_len, u8 ** tag, u16 * n_ch)
+esp_decrypt_chain_crypto (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
+ esp_decrypt_packet_data_t *pd,
+ esp_decrypt_packet_data2_t *pd2,
+ ipsec_sa_inb_rt_t *irt, vlib_buffer_t *b, u8 icv_sz,
+ u8 *start, u32 start_len, u8 **tag, u16 *n_ch)
{
vnet_crypto_op_chunk_t *ch;
vlib_buffer_t *cb = b;
@@ -445,7 +445,7 @@ esp_decrypt_chain_crypto (vlib_main_t * vm, ipsec_per_thread_data_t * ptd,
ch->src = ch->dst = vlib_buffer_get_current (cb);
if (pd2->lb == cb)
{
- if (ipsec_sa_is_set_IS_AEAD (sa0))
+ if (irt->is_aead)
{
if (pd2->lb->current_length < icv_sz)
{
@@ -496,8 +496,9 @@ esp_decrypt_chain_crypto (vlib_main_t * vm, ipsec_per_thread_data_t * ptd,
static_always_inline esp_decrypt_error_t
esp_decrypt_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
- ipsec_sa_t *sa0, u8 *payload, u16 len, u8 icv_sz,
- u8 iv_sz, esp_decrypt_packet_data_t *pd,
+ ipsec_sa_inb_rt_t *irt, u8 *payload, u16 len,
+ u8 icv_sz, u8 iv_sz,
+ esp_decrypt_packet_data_t *pd,
esp_decrypt_packet_data2_t *pd2, vlib_buffer_t *b,
u32 index)
{
@@ -506,10 +507,10 @@ esp_decrypt_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
vnet_crypto_op_t _op, *op = &_op;
const u8 esp_sz = sizeof (esp_header_t);
- if (PREDICT_TRUE (sa0->integ_op_id != VNET_CRYPTO_OP_NONE))
+ if (PREDICT_TRUE (irt->integ_op_id != VNET_CRYPTO_OP_NONE))
{
- vnet_crypto_op_init (op, sa0->integ_op_id);
- op->key_index = sa0->integ_key_index;
+ vnet_crypto_op_init (op, irt->integ_op_id);
+ op->key_index = irt->integ_key_index;
op->src = payload;
op->flags = VNET_CRYPTO_OP_FLAG_HMAC_CHECK;
op->user_data = index;
@@ -531,9 +532,8 @@ esp_decrypt_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
if (pd2->lb->current_length < icv_sz)
{
u8 extra_esn = 0;
- op->digest =
- esp_move_icv_esn (vm, b, pd, pd2, icv_sz, sa0,
- &extra_esn, &op->len);
+ op->digest = esp_move_icv_esn (vm, b, pd, pd2, icv_sz, irt,
+ &extra_esn, &op->len);
if (extra_esn)
{
@@ -558,7 +558,7 @@ esp_decrypt_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
op->flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS;
op->chunk_index = vec_len (ptd->chunks);
- if (esp_decrypt_chain_integ (vm, ptd, pd, pd2, sa0, b, icv_sz,
+ if (esp_decrypt_chain_integ (vm, ptd, pd, pd2, irt, b, icv_sz,
payload, pd->current_length,
&op->digest, &op->n_chunks, 0) < 0)
return ESP_DECRYPT_ERROR_NO_BUFFERS;
@@ -566,7 +566,7 @@ esp_decrypt_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
else
{
integ_ops = &ptd->integ_ops;
- esp_insert_esn (vm, sa0, pd, pd2, &op->len, &op->digest, &len, b,
+ esp_insert_esn (vm, irt, pd, pd2, &op->len, &op->digest, &len, b,
payload);
}
out:
@@ -576,27 +576,28 @@ esp_decrypt_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
payload += esp_sz;
len -= esp_sz;
- if (sa0->crypto_dec_op_id != VNET_CRYPTO_OP_NONE)
+ if (irt->cipher_op_id != VNET_CRYPTO_OP_NONE)
{
- vnet_crypto_op_init (op, sa0->crypto_dec_op_id);
- op->key_index = sa0->crypto_key_index;
+ vnet_crypto_op_init (op, irt->cipher_op_id);
+ op->key_index = irt->cipher_key_index;
op->iv = payload;
- if (ipsec_sa_is_set_IS_CTR (sa0))
+ if (irt->is_ctr)
{
/* construct nonce in a scratch space in front of the IP header */
esp_ctr_nonce_t *nonce =
(esp_ctr_nonce_t *) (payload - esp_sz - pd->hdr_sz -
sizeof (*nonce));
- if (ipsec_sa_is_set_IS_AEAD (sa0))
+ if (irt->is_aead)
{
/* constuct aad in a scratch space in front of the nonce */
esp_header_t *esp0 = (esp_header_t *) (payload - esp_sz);
op->aad = (u8 *) nonce - sizeof (esp_aead_t);
- op->aad_len = esp_aad_fill (op->aad, esp0, sa0, pd->seq_hi);
+ op->aad_len =
+ esp_aad_fill (op->aad, esp0, irt->use_esn, pd->seq_hi);
op->tag = payload + len;
op->tag_len = 16;
- if (PREDICT_FALSE (ipsec_sa_is_set_IS_NULL_GMAC (sa0)))
+ if (PREDICT_FALSE (irt->is_null_gmac))
{
/* RFC-4543 ENCR_NULL_AUTH_AES_GMAC: IV is part of AAD */
payload -= iv_sz;
@@ -607,7 +608,7 @@ esp_decrypt_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
{
nonce->ctr = clib_host_to_net_u32 (1);
}
- nonce->salt = sa0->salt;
+ nonce->salt = irt->salt;
ASSERT (sizeof (u64) == iv_sz);
nonce->iv = *(u64 *) op->iv;
op->iv = (u8 *) nonce;
@@ -621,9 +622,9 @@ esp_decrypt_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
/* buffer is chained */
op->flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS;
op->chunk_index = vec_len (ptd->chunks);
- esp_decrypt_chain_crypto (vm, ptd, pd, pd2, sa0, b, icv_sz,
- payload, len - pd->iv_sz + pd->icv_sz,
- &op->tag, &op->n_chunks);
+ esp_decrypt_chain_crypto (vm, ptd, pd, pd2, irt, b, icv_sz, payload,
+ len - pd->iv_sz + pd->icv_sz, &op->tag,
+ &op->n_chunks);
crypto_ops = &ptd->chained_crypto_ops;
}
else
@@ -639,8 +640,9 @@ esp_decrypt_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
static_always_inline esp_decrypt_error_t
esp_decrypt_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
- vnet_crypto_async_frame_t *f, ipsec_sa_t *sa0,
- u8 *payload, u16 len, u8 icv_sz, u8 iv_sz,
+ vnet_crypto_async_frame_t *f,
+ ipsec_sa_inb_rt_t *irt, u8 *payload, u16 len,
+ u8 icv_sz, u8 iv_sz,
esp_decrypt_packet_data_t *pd,
esp_decrypt_packet_data2_t *pd2, u32 bi,
vlib_buffer_t *b, u16 async_next)
@@ -649,17 +651,17 @@ esp_decrypt_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
esp_decrypt_packet_data_t *async_pd = &(esp_post_data (b))->decrypt_data;
esp_decrypt_packet_data2_t *async_pd2 = esp_post_data2 (b);
u8 *tag = payload + len, *iv = payload + esp_sz, *aad = 0;
- const u32 key_index = sa0->crypto_key_index;
+ const u32 key_index = irt->cipher_key_index;
u32 crypto_len, integ_len = 0;
i16 crypto_start_offset, integ_start_offset = 0;
u8 flags = 0;
- if (!ipsec_sa_is_set_IS_AEAD (sa0))
+ if (!irt->is_aead)
{
/* linked algs */
integ_start_offset = payload - b->data;
integ_len = len;
- if (PREDICT_TRUE (sa0->integ_op_id != VNET_CRYPTO_OP_NONE))
+ if (PREDICT_TRUE (irt->integ_op_id != VNET_CRYPTO_OP_NONE))
flags |= VNET_CRYPTO_OP_FLAG_HMAC_CHECK;
if (pd->is_chain)
@@ -674,8 +676,8 @@ esp_decrypt_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
if (pd2->lb->current_length < icv_sz)
{
u8 extra_esn = 0;
- tag = esp_move_icv_esn (vm, b, pd, pd2, icv_sz, sa0,
- &extra_esn, &integ_len);
+ tag = esp_move_icv_esn (vm, b, pd, pd2, icv_sz, irt, &extra_esn,
+ &integ_len);
if (extra_esn)
{
@@ -698,7 +700,7 @@ esp_decrypt_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
tag = vlib_buffer_get_tail (pd2->lb) - icv_sz;
flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS;
- if (esp_decrypt_chain_integ (vm, ptd, pd, pd2, sa0, b, icv_sz,
+ if (esp_decrypt_chain_integ (vm, ptd, pd, pd2, irt, b, icv_sz,
payload, pd->current_length, &tag, 0,
&integ_len) < 0)
{
@@ -707,7 +709,7 @@ esp_decrypt_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
}
}
else
- esp_insert_esn (vm, sa0, pd, pd2, &integ_len, &tag, &len, b, payload);
+ esp_insert_esn (vm, irt, pd, pd2, &integ_len, &tag, &len, b, payload);
}
out:
@@ -716,19 +718,19 @@ out:
len -= esp_sz;
iv = payload;
- if (ipsec_sa_is_set_IS_CTR (sa0))
+ if (irt->is_ctr)
{
/* construct nonce in a scratch space in front of the IP header */
esp_ctr_nonce_t *nonce =
(esp_ctr_nonce_t *) (payload - esp_sz - pd->hdr_sz - sizeof (*nonce));
- if (ipsec_sa_is_set_IS_AEAD (sa0))
+ if (irt->is_aead)
{
/* constuct aad in a scratch space in front of the nonce */
esp_header_t *esp0 = (esp_header_t *) (payload - esp_sz);
aad = (u8 *) nonce - sizeof (esp_aead_t);
- esp_aad_fill (aad, esp0, sa0, pd->seq_hi);
+ esp_aad_fill (aad, esp0, irt->use_esn, pd->seq_hi);
tag = payload + len;
- if (PREDICT_FALSE (ipsec_sa_is_set_IS_NULL_GMAC (sa0)))
+ if (PREDICT_FALSE (irt->is_null_gmac))
{
/* RFC-4543 ENCR_NULL_AUTH_AES_GMAC: IV is part of AAD */
payload -= iv_sz;
@@ -739,7 +741,7 @@ out:
{
nonce->ctr = clib_host_to_net_u32 (1);
}
- nonce->salt = sa0->salt;
+ nonce->salt = irt->salt;
ASSERT (sizeof (u64) == iv_sz);
nonce->iv = *(u64 *) iv;
iv = (u8 *) nonce;
@@ -753,10 +755,9 @@ out:
/* buffer is chained */
flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS;
- crypto_len = esp_decrypt_chain_crypto (vm, ptd, pd, pd2, sa0, b, icv_sz,
- payload,
- len - pd->iv_sz + pd->icv_sz,
- &tag, 0);
+ crypto_len =
+ esp_decrypt_chain_crypto (vm, ptd, pd, pd2, irt, b, icv_sz, payload,
+ len - pd->iv_sz + pd->icv_sz, &tag, 0);
}
*async_pd = *pd;
@@ -779,10 +780,9 @@ esp_decrypt_post_crypto (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_buffer_t *b, u16 *next, int is_ip6, int is_tun,
int is_async)
{
- ipsec_sa_t *sa0 = ipsec_sa_get (pd->sa_index);
+ ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt_by_index (pd->sa_index);
vlib_buffer_t *lb = b;
const u8 esp_sz = sizeof (esp_header_t);
- const u8 tun_flags = IPSEC_SA_FLAG_IS_TUNNEL | IPSEC_SA_FLAG_IS_TUNNEL_V6;
u8 pad_length = 0, next_header = 0;
u16 icv_sz;
u64 n_lost;
@@ -809,32 +809,16 @@ esp_decrypt_post_crypto (vlib_main_t *vm, vlib_node_runtime_t *node,
* a sequence s, s+1, s+2, s+3, ... s+n and nothing will prevent any
* implementation, sequential or batching, from decrypting these.
*/
- if (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa0)))
+ if (ipsec_sa_anti_replay_and_sn_advance (irt, pd->seq, pd->seq_hi, true,
+ NULL))
{
- if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, pd->seq_hi, true,
- NULL, true))
- {
- esp_decrypt_set_next_index (b, node, vm->thread_index,
- ESP_DECRYPT_ERROR_REPLAY, 0, next,
- ESP_DECRYPT_NEXT_DROP, pd->sa_index);
- return;
- }
- n_lost = ipsec_sa_anti_replay_advance (sa0, vm->thread_index, pd->seq,
- pd->seq_hi, true);
- }
- else
- {
- if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, pd->seq_hi, true,
- NULL, false))
- {
- esp_decrypt_set_next_index (b, node, vm->thread_index,
- ESP_DECRYPT_ERROR_REPLAY, 0, next,
- ESP_DECRYPT_NEXT_DROP, pd->sa_index);
- return;
- }
- n_lost = ipsec_sa_anti_replay_advance (sa0, vm->thread_index, pd->seq,
- pd->seq_hi, false);
+ esp_decrypt_set_next_index (b, node, vm->thread_index,
+ ESP_DECRYPT_ERROR_REPLAY, 0, next,
+ ESP_DECRYPT_NEXT_DROP, pd->sa_index);
+ return;
}
+ n_lost =
+ ipsec_sa_anti_replay_advance (irt, vm->thread_index, pd->seq, pd->seq_hi);
vlib_prefetch_simple_counter (&ipsec_sa_err_counters[IPSEC_SA_ERROR_LOST],
vm->thread_index, pd->sa_index);
@@ -899,10 +883,9 @@ esp_decrypt_post_crypto (vlib_main_t *vm, vlib_node_runtime_t *node,
b->flags &=
~(VNET_BUFFER_F_L4_CHECKSUM_COMPUTED | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
- if ((pd->flags & tun_flags) == 0 && !is_tun) /* transport mode */
+ if (pd->is_transport && !is_tun) /* transport mode */
{
- u8 udp_sz = (is_ip6 == 0 && pd->flags & IPSEC_SA_FLAG_UDP_ENCAP) ?
- sizeof (udp_header_t) : 0;
+ u8 udp_sz = is_ip6 ? 0 : pd->udp_sz;
u16 ip_hdr_sz = pd->hdr_sz - udp_sz;
u8 *old_ip = b->data + pd->current_data - ip_hdr_sz - udp_sz;
u8 *ip = old_ip + adv + udp_sz;
@@ -1012,7 +995,7 @@ esp_decrypt_post_crypto (vlib_main_t *vm, vlib_node_runtime_t *node,
if (is_tun)
{
- if (ipsec_sa_is_set_IS_PROTECT (sa0))
+ if (irt->is_protect)
{
/*
* There are two encap possibilities
@@ -1085,7 +1068,7 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
ipsec_main_t *im = &ipsec_main;
const u16 *next_by_next_header = im->next_header_registrations;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u16 len;
ipsec_per_thread_data_t *ptd = vec_elt_at_index (im->ptd, thread_index);
u32 *from = vlib_frame_vector_args (from_frame);
@@ -1101,21 +1084,18 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
esp_decrypt_packet_data_t cpd = { };
u32 current_sa_index = ~0, current_sa_bytes = 0, current_sa_pkts = 0;
const u8 esp_sz = sizeof (esp_header_t);
- ipsec_sa_t *sa0 = 0;
+ ipsec_sa_inb_rt_t *irt = 0;
bool anti_replay_result;
- int is_async = im->async_mode;
+ int is_async = 0;
vnet_crypto_op_id_t async_op = ~0;
vnet_crypto_async_frame_t *async_frames[VNET_CRYPTO_N_OP_IDS];
esp_decrypt_error_t err;
vlib_get_buffers (vm, from, b, n_left);
- if (!is_async)
- {
- vec_reset_length (ptd->crypto_ops);
- vec_reset_length (ptd->integ_ops);
- vec_reset_length (ptd->chained_crypto_ops);
- vec_reset_length (ptd->chained_integ_ops);
- }
+ vec_reset_length (ptd->crypto_ops);
+ vec_reset_length (ptd->integ_ops);
+ vec_reset_length (ptd->chained_crypto_ops);
+ vec_reset_length (ptd->chained_integ_ops);
vec_reset_length (ptd->async_frames);
vec_reset_length (ptd->chunks);
clib_memset (sync_nexts, -1, sizeof (sync_nexts));
@@ -1157,29 +1137,28 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
current_sa_index = vnet_buffer (b[0])->ipsec.sad_index;
vlib_prefetch_combined_counter (&ipsec_sa_counters, thread_index,
current_sa_index);
- sa0 = ipsec_sa_get (current_sa_index);
+ irt = ipsec_sa_get_inb_rt_by_index (current_sa_index);
- /* fetch the second cacheline ASAP */
- clib_prefetch_load (sa0->cacheline1);
- cpd.icv_sz = sa0->integ_icv_size;
- cpd.iv_sz = sa0->crypto_iv_size;
- cpd.flags = sa0->flags;
+ cpd.icv_sz = irt->integ_icv_size;
+ cpd.iv_sz = irt->cipher_iv_size;
+ cpd.udp_sz = irt->udp_sz;
+ cpd.is_transport = irt->is_transport;
cpd.sa_index = current_sa_index;
- is_async = im->async_mode | ipsec_sa_is_set_IS_ASYNC (sa0);
+ is_async = irt->is_async;
}
- if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index))
+ if (PREDICT_FALSE ((u16) ~0 == irt->thread_index))
{
/* this is the first packet to use this SA, claim the SA
* for this thread. this could happen simultaneously on
* another thread */
- clib_atomic_cmp_and_swap (&sa0->thread_index, ~0,
+ clib_atomic_cmp_and_swap (&irt->thread_index, ~0,
ipsec_sa_assign_thread (thread_index));
}
- if (PREDICT_FALSE (thread_index != sa0->thread_index))
+ if (PREDICT_FALSE (thread_index != irt->thread_index))
{
- vnet_buffer (b[0])->ipsec.thread_index = sa0->thread_index;
+ vnet_buffer (b[0])->ipsec.thread_index = irt->thread_index;
err = ESP_DECRYPT_ERROR_HANDOFF;
esp_decrypt_set_next_index (b[0], node, thread_index, err, n_noop,
noop_nexts, ESP_DECRYPT_NEXT_HANDOFF,
@@ -1209,16 +1188,8 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
pd->current_length = b[0]->current_length;
/* anti-reply check */
- if (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa0)))
- {
- anti_replay_result = ipsec_sa_anti_replay_and_sn_advance (
- sa0, pd->seq, ~0, false, &pd->seq_hi, true);
- }
- else
- {
- anti_replay_result = ipsec_sa_anti_replay_and_sn_advance (
- sa0, pd->seq, ~0, false, &pd->seq_hi, false);
- }
+ anti_replay_result = ipsec_sa_anti_replay_and_sn_advance (
+ irt, pd->seq, ~0, false, &pd->seq_hi);
if (anti_replay_result)
{
@@ -1244,7 +1215,7 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (is_async)
{
- async_op = sa0->crypto_async_dec_op_id;
+ async_op = irt->async_op_id;
/* get a frame for this op if we don't yet have one or it's full
*/
@@ -1267,7 +1238,7 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
}
err = esp_decrypt_prepare_async_frame (
- vm, ptd, async_frames[async_op], sa0, payload, len, cpd.icv_sz,
+ vm, ptd, async_frames[async_op], irt, payload, len, cpd.icv_sz,
cpd.iv_sz, pd, pd2, from[b - bufs], b[0], async_next_node);
if (ESP_DECRYPT_ERROR_RX_PKTS != err)
{
@@ -1278,7 +1249,7 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
}
else
{
- err = esp_decrypt_prepare_sync_op (vm, ptd, sa0, payload, len,
+ err = esp_decrypt_prepare_sync_op (vm, ptd, irt, payload, len,
cpd.icv_sz, cpd.iv_sz, pd, pd2,
b[0], n_sync);
if (err != ESP_DECRYPT_ERROR_RX_PKTS)
@@ -1391,12 +1362,13 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
esp_decrypt_trace_t *tr;
tr = vlib_add_trace (vm, node, b[0], sizeof (*tr));
- sa0 = ipsec_sa_get (current_sa_index);
- tr->crypto_alg = sa0->crypto_alg;
- tr->integ_alg = sa0->integ_alg;
+ ipsec_sa_t *sa = ipsec_sa_get (current_sa_index);
+ ipsec_sa_inb_rt_t *irt =
+ ipsec_sa_get_inb_rt_by_index (current_sa_index);
+ tr->crypto_alg = sa->crypto_alg;
+ tr->integ_alg = sa->integ_alg;
tr->seq = pd->seq;
- tr->sa_seq = sa0->seq;
- tr->sa_seq_hi = sa0->seq_hi;
+ tr->sa_seq64 = irt->seq64;
tr->pkt_seq_hi = pd->seq_hi;
}
@@ -1456,18 +1428,19 @@ esp_decrypt_post_inline (vlib_main_t * vm,
/*trace: */
if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
{
- ipsec_sa_t *sa0 = ipsec_sa_get (pd->sa_index);
+ ipsec_sa_t *sa;
+ ipsec_sa_inb_rt_t *irt;
esp_decrypt_trace_t *tr;
esp_decrypt_packet_data_t *async_pd =
&(esp_post_data (b[0]))->decrypt_data;
tr = vlib_add_trace (vm, node, b[0], sizeof (*tr));
- sa0 = ipsec_sa_get (async_pd->sa_index);
+ sa = ipsec_sa_get (async_pd->sa_index);
+ irt = ipsec_sa_get_inb_rt_by_index (async_pd->sa_index);
- tr->crypto_alg = sa0->crypto_alg;
- tr->integ_alg = sa0->integ_alg;
+ tr->crypto_alg = sa->crypto_alg;
+ tr->integ_alg = sa->integ_alg;
tr->seq = pd->seq;
- tr->sa_seq = sa0->seq;
- tr->sa_seq_hi = sa0->seq_hi;
+ tr->sa_seq64 = irt->seq64;
}
n_left--;
diff --git a/src/vnet/ipsec/esp_encrypt.c b/src/vnet/ipsec/esp_encrypt.c
index 4338cb01e5d..1f2cc243074 100644
--- a/src/vnet/ipsec/esp_encrypt.c
+++ b/src/vnet/ipsec/esp_encrypt.c
@@ -49,8 +49,7 @@ typedef struct
{
u32 sa_index;
u32 spi;
- u32 seq;
- u32 sa_seq_hi;
+ u64 seq;
u8 udp_encap;
ipsec_crypto_alg_t crypto_alg;
ipsec_integ_alg_t integ_alg;
@@ -71,13 +70,11 @@ format_esp_encrypt_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
esp_encrypt_trace_t *t = va_arg (*args, esp_encrypt_trace_t *);
- s =
- format (s,
- "esp: sa-index %d spi %u (0x%08x) seq %u sa-seq-hi %u crypto %U integrity %U%s",
- t->sa_index, t->spi, t->spi, t->seq, t->sa_seq_hi,
- format_ipsec_crypto_alg,
- t->crypto_alg, format_ipsec_integ_alg, t->integ_alg,
- t->udp_encap ? " udp-encap-enabled" : "");
+ s = format (
+ s, "esp: sa-index %d spi %u (0x%08x) seq %lu crypto %U integrity %U%s",
+ t->sa_index, t->spi, t->spi, t->seq, format_ipsec_crypto_alg,
+ t->crypto_alg, format_ipsec_integ_alg, t->integ_alg,
+ t->udp_encap ? " udp-encap-enabled" : "");
return s;
}
@@ -162,9 +159,9 @@ esp_update_ip4_hdr (ip4_header_t * ip4, u16 len, int is_transport, int is_udp)
}
static_always_inline void
-esp_fill_udp_hdr (ipsec_sa_t * sa, udp_header_t * udp, u16 len)
+esp_fill_udp_hdr (ipsec_sa_outb_rt_t *ort, udp_header_t *udp, u16 len)
{
- clib_memcpy_fast (udp, &sa->udp_hdr, sizeof (udp_header_t));
+ clib_memcpy_fast (udp, &ort->udp_hdr, sizeof (udp_header_t));
udp->length = clib_net_to_host_u16 (len);
}
@@ -223,12 +220,12 @@ esp_get_ip6_hdr_len (ip6_header_t * ip6, ip6_ext_header_t ** ext_hdr)
* message. You can refer to NIST SP800-38a and NIST SP800-38d for more
* details. */
static_always_inline void *
-esp_generate_iv (ipsec_sa_t *sa, void *payload, int iv_sz)
+esp_generate_iv (ipsec_sa_outb_rt_t *ort, void *payload, int iv_sz)
{
ASSERT (iv_sz >= sizeof (u64));
u64 *iv = (u64 *) (payload - iv_sz);
clib_memset_u8 (iv, 0, iv_sz);
- *iv = clib_pcg64i_random_r (&sa->iv_prng);
+ *iv = clib_pcg64i_random_r (&ort->iv_prng);
return iv;
}
@@ -294,10 +291,9 @@ esp_process_ops (vlib_main_t * vm, vlib_node_runtime_t * node,
}
static_always_inline u32
-esp_encrypt_chain_crypto (vlib_main_t * vm, ipsec_per_thread_data_t * ptd,
- ipsec_sa_t * sa0, vlib_buffer_t * b,
- vlib_buffer_t * lb, u8 icv_sz, u8 * start,
- u32 start_len, u16 * n_ch)
+esp_encrypt_chain_crypto (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
+ vlib_buffer_t *b, vlib_buffer_t *lb, u8 icv_sz,
+ u8 *start, u32 start_len, u16 *n_ch)
{
vnet_crypto_op_chunk_t *ch;
vlib_buffer_t *cb = b;
@@ -331,10 +327,10 @@ esp_encrypt_chain_crypto (vlib_main_t * vm, ipsec_per_thread_data_t * ptd,
}
static_always_inline u32
-esp_encrypt_chain_integ (vlib_main_t * vm, ipsec_per_thread_data_t * ptd,
- ipsec_sa_t * sa0, vlib_buffer_t * b,
- vlib_buffer_t * lb, u8 icv_sz, u8 * start,
- u32 start_len, u8 * digest, u16 * n_ch)
+esp_encrypt_chain_integ (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
+ ipsec_sa_outb_rt_t *ort, vlib_buffer_t *b,
+ vlib_buffer_t *lb, u8 icv_sz, u8 *start,
+ u32 start_len, u8 *digest, u16 *n_ch)
{
vnet_crypto_op_chunk_t *ch;
vlib_buffer_t *cb = b;
@@ -352,12 +348,11 @@ esp_encrypt_chain_integ (vlib_main_t * vm, ipsec_per_thread_data_t * ptd,
if (lb == cb)
{
total_len += ch->len = cb->current_length - icv_sz;
- if (ipsec_sa_is_set_USE_ESN (sa0))
+ if (ort->use_esn)
{
- u32 seq_hi = clib_net_to_host_u32 (sa0->seq_hi);
- clib_memcpy_fast (digest, &seq_hi, sizeof (seq_hi));
- ch->len += sizeof (seq_hi);
- total_len += sizeof (seq_hi);
+ *(u32u *) digest = clib_net_to_host_u32 (ort->seq64 >> 32);
+ ch->len += sizeof (u32);
+ total_len += sizeof (u32);
}
}
else
@@ -379,16 +374,16 @@ esp_encrypt_chain_integ (vlib_main_t * vm, ipsec_per_thread_data_t * ptd,
always_inline void
esp_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
vnet_crypto_op_t **crypto_ops,
- vnet_crypto_op_t **integ_ops, ipsec_sa_t *sa0, u32 seq_hi,
- u8 *payload, u16 payload_len, u8 iv_sz, u8 icv_sz, u32 bi,
- vlib_buffer_t **b, vlib_buffer_t *lb, u32 hdr_len,
- esp_header_t *esp)
+ vnet_crypto_op_t **integ_ops, ipsec_sa_outb_rt_t *ort,
+ u32 seq_hi, u8 *payload, u16 payload_len, u8 iv_sz,
+ u8 icv_sz, u32 bi, vlib_buffer_t **b, vlib_buffer_t *lb,
+ u32 hdr_len, esp_header_t *esp)
{
- if (sa0->crypto_enc_op_id)
+ if (ort->cipher_op_id)
{
vnet_crypto_op_t *op;
vec_add2_aligned (crypto_ops[0], op, 1, CLIB_CACHE_LINE_BYTES);
- vnet_crypto_op_init (op, sa0->crypto_enc_op_id);
+ vnet_crypto_op_init (op, ort->cipher_op_id);
u8 *crypto_start = payload;
/* esp_add_footer_and_icv() in esp_encrypt_inline() makes sure we always
* have enough space for ESP header and footer which includes ICV */
@@ -396,24 +391,24 @@ esp_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
u16 crypto_len = payload_len - icv_sz;
/* generate the IV in front of the payload */
- void *pkt_iv = esp_generate_iv (sa0, payload, iv_sz);
+ void *pkt_iv = esp_generate_iv (ort, payload, iv_sz);
- op->key_index = sa0->crypto_key_index;
+ op->key_index = ort->cipher_key_index;
op->user_data = bi;
- if (ipsec_sa_is_set_IS_CTR (sa0))
+ if (ort->is_ctr)
{
/* construct nonce in a scratch space in front of the IP header */
esp_ctr_nonce_t *nonce =
(esp_ctr_nonce_t *) (pkt_iv - hdr_len - sizeof (*nonce));
- if (ipsec_sa_is_set_IS_AEAD (sa0))
+ if (ort->is_aead)
{
/* constuct aad in a scratch space in front of the nonce */
op->aad = (u8 *) nonce - sizeof (esp_aead_t);
- op->aad_len = esp_aad_fill (op->aad, esp, sa0, seq_hi);
+ op->aad_len = esp_aad_fill (op->aad, esp, ort->use_esn, seq_hi);
op->tag = payload + crypto_len;
op->tag_len = 16;
- if (PREDICT_FALSE (ipsec_sa_is_set_IS_NULL_GMAC (sa0)))
+ if (PREDICT_FALSE (ort->is_null_gmac))
{
/* RFC-4543 ENCR_NULL_AUTH_AES_GMAC: IV is part of AAD */
crypto_start -= iv_sz;
@@ -425,7 +420,7 @@ esp_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
nonce->ctr = clib_host_to_net_u32 (1);
}
- nonce->salt = sa0->salt;
+ nonce->salt = ort->salt;
nonce->iv = *(u64 *) pkt_iv;
op->iv = (u8 *) nonce;
}
@@ -445,9 +440,8 @@ esp_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
op->flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS;
op->chunk_index = vec_len (ptd->chunks);
op->tag = vlib_buffer_get_tail (lb) - icv_sz;
- esp_encrypt_chain_crypto (vm, ptd, sa0, b[0], lb, icv_sz,
- crypto_start, crypto_len + icv_sz,
- &op->n_chunks);
+ esp_encrypt_chain_crypto (vm, ptd, b[0], lb, icv_sz, crypto_start,
+ crypto_len + icv_sz, &op->n_chunks);
}
else
{
@@ -457,14 +451,14 @@ esp_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
}
}
- if (sa0->integ_op_id)
+ if (ort->integ_op_id)
{
vnet_crypto_op_t *op;
vec_add2_aligned (integ_ops[0], op, 1, CLIB_CACHE_LINE_BYTES);
- vnet_crypto_op_init (op, sa0->integ_op_id);
+ vnet_crypto_op_init (op, ort->integ_op_id);
op->src = payload - iv_sz - sizeof (esp_header_t);
op->digest = payload + payload_len - icv_sz;
- op->key_index = sa0->integ_key_index;
+ op->key_index = ort->integ_key_index;
op->digest_len = icv_sz;
op->len = payload_len - icv_sz + iv_sz + sizeof (esp_header_t);
op->user_data = bi;
@@ -476,13 +470,12 @@ esp_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
op->chunk_index = vec_len (ptd->chunks);
op->digest = vlib_buffer_get_tail (lb) - icv_sz;
- esp_encrypt_chain_integ (vm, ptd, sa0, b[0], lb, icv_sz,
+ esp_encrypt_chain_integ (vm, ptd, ort, b[0], lb, icv_sz,
payload - iv_sz - sizeof (esp_header_t),
- payload_len + iv_sz +
- sizeof (esp_header_t), op->digest,
- &op->n_chunks);
+ payload_len + iv_sz + sizeof (esp_header_t),
+ op->digest, &op->n_chunks);
}
- else if (ipsec_sa_is_set_USE_ESN (sa0))
+ else if (ort->use_esn)
{
u32 tmp = clib_net_to_host_u32 (seq_hi);
clib_memcpy_fast (op->digest, &tmp, sizeof (seq_hi));
@@ -494,15 +487,15 @@ esp_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
static_always_inline void
esp_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
vnet_crypto_async_frame_t *async_frame,
- ipsec_sa_t *sa, vlib_buffer_t *b, esp_header_t *esp,
- u8 *payload, u32 payload_len, u8 iv_sz, u8 icv_sz,
- u32 bi, u16 next, u32 hdr_len, u16 async_next,
- vlib_buffer_t *lb)
+ ipsec_sa_outb_rt_t *ort, vlib_buffer_t *b,
+ esp_header_t *esp, u8 *payload, u32 payload_len,
+ u8 iv_sz, u8 icv_sz, u32 bi, u16 next, u32 hdr_len,
+ u16 async_next, vlib_buffer_t *lb)
{
esp_post_data_t *post = esp_post_data (b);
u8 *tag, *iv, *aad = 0;
u8 flag = 0;
- const u32 key_index = sa->crypto_key_index;
+ const u32 key_index = ort->cipher_key_index;
i16 crypto_start_offset, integ_start_offset;
u16 crypto_total_len, integ_total_len;
@@ -514,19 +507,19 @@ esp_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
tag = payload + crypto_total_len;
/* generate the IV in front of the payload */
- void *pkt_iv = esp_generate_iv (sa, payload, iv_sz);
+ void *pkt_iv = esp_generate_iv (ort, payload, iv_sz);
- if (ipsec_sa_is_set_IS_CTR (sa))
+ if (ort->is_ctr)
{
/* construct nonce in a scratch space in front of the IP header */
esp_ctr_nonce_t *nonce =
(esp_ctr_nonce_t *) (pkt_iv - hdr_len - sizeof (*nonce));
- if (ipsec_sa_is_set_IS_AEAD (sa))
+ if (ort->is_aead)
{
/* constuct aad in a scratch space in front of the nonce */
aad = (u8 *) nonce - sizeof (esp_aead_t);
- esp_aad_fill (aad, esp, sa, sa->seq_hi);
- if (PREDICT_FALSE (ipsec_sa_is_set_IS_NULL_GMAC (sa)))
+ esp_aad_fill (aad, esp, ort->use_esn, ort->seq64 >> 32);
+ if (PREDICT_FALSE (ort->is_null_gmac))
{
/* RFC-4543 ENCR_NULL_AUTH_AES_GMAC: IV is part of AAD */
crypto_start_offset -= iv_sz;
@@ -538,7 +531,7 @@ esp_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
nonce->ctr = clib_host_to_net_u32 (1);
}
- nonce->salt = sa->salt;
+ nonce->salt = ort->salt;
nonce->iv = *(u64 *) pkt_iv;
iv = (u8 *) nonce;
}
@@ -558,11 +551,11 @@ esp_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
flag |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS;
tag = vlib_buffer_get_tail (lb) - icv_sz;
crypto_total_len = esp_encrypt_chain_crypto (
- vm, ptd, sa, b, lb, icv_sz, b->data + crypto_start_offset,
+ vm, ptd, b, lb, icv_sz, b->data + crypto_start_offset,
crypto_total_len + icv_sz, 0);
}
- if (sa->integ_op_id)
+ if (ort->integ_op_id)
{
integ_start_offset -= iv_sz + sizeof (esp_header_t);
integ_total_len += iv_sz + sizeof (esp_header_t);
@@ -570,15 +563,14 @@ esp_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
if (b != lb)
{
integ_total_len = esp_encrypt_chain_integ (
- vm, ptd, sa, b, lb, icv_sz,
+ vm, ptd, ort, b, lb, icv_sz,
payload - iv_sz - sizeof (esp_header_t),
payload_len + iv_sz + sizeof (esp_header_t), tag, 0);
}
- else if (ipsec_sa_is_set_USE_ESN (sa))
+ else if (ort->use_esn)
{
- u32 seq_hi = clib_net_to_host_u32 (sa->seq_hi);
- clib_memcpy_fast (tag, &seq_hi, sizeof (seq_hi));
- integ_total_len += sizeof (seq_hi);
+ *(u32u *) tag = clib_net_to_host_u32 (ort->seq64 >> 32);
+ integ_total_len += sizeof (u32);
}
}
@@ -615,18 +607,17 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
u32 *from = vlib_frame_vector_args (frame);
u32 n_left = frame->n_vectors;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u16 buffer_data_size = vlib_buffer_get_default_data_size (vm);
u32 current_sa_index = ~0, current_sa_packets = 0;
u32 current_sa_bytes = 0, spi = 0;
u8 esp_align = 4, iv_sz = 0, icv_sz = 0;
- ipsec_sa_t *sa0 = 0;
- u8 sa_drop_no_crypto = 0;
+ ipsec_sa_outb_rt_t *ort = 0;
vlib_buffer_t *lb;
vnet_crypto_op_t **crypto_ops = &ptd->crypto_ops;
vnet_crypto_op_t **integ_ops = &ptd->integ_ops;
vnet_crypto_async_frame_t *async_frames[VNET_CRYPTO_N_OP_IDS];
- int is_async = im->async_mode;
+ int is_async = 0;
vnet_crypto_op_id_t async_op = ~0;
u16 drop_next =
(lt == VNET_LINK_IP6 ? ESP_ENCRYPT_NEXT_DROP6 :
@@ -708,27 +699,20 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
current_sa_packets, current_sa_bytes);
current_sa_packets = current_sa_bytes = 0;
- sa0 = ipsec_sa_get (sa_index0);
+ ort = ipsec_sa_get_outb_rt_by_index (sa_index0);
current_sa_index = sa_index0;
- sa_drop_no_crypto = ((sa0->crypto_alg == IPSEC_CRYPTO_ALG_NONE &&
- sa0->integ_alg == IPSEC_INTEG_ALG_NONE) &&
- !ipsec_sa_is_set_NO_ALGO_NO_DROP (sa0));
-
vlib_prefetch_combined_counter (&ipsec_sa_counters, thread_index,
current_sa_index);
- /* fetch the second cacheline ASAP */
- clib_prefetch_load (sa0->cacheline1);
-
- spi = clib_net_to_host_u32 (sa0->spi);
- esp_align = sa0->esp_block_align;
- icv_sz = sa0->integ_icv_size;
- iv_sz = sa0->crypto_iv_size;
- is_async = im->async_mode | ipsec_sa_is_set_IS_ASYNC (sa0);
+ spi = ort->spi_be;
+ icv_sz = ort->integ_icv_size;
+ esp_align = ort->esp_block_align;
+ iv_sz = ort->cipher_iv_size;
+ is_async = ort->is_async;
}
- if (PREDICT_FALSE (sa_drop_no_crypto != 0))
+ if (PREDICT_FALSE (ort->drop_no_crypto != 0))
{
err = ESP_ENCRYPT_ERROR_NO_ENCRYPTION;
esp_encrypt_set_next_index (b[0], node, thread_index, err, n_noop,
@@ -736,18 +720,18 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
goto trace;
}
- if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index))
+ if (PREDICT_FALSE ((u16) ~0 == ort->thread_index))
{
/* this is the first packet to use this SA, claim the SA
* for this thread. this could happen simultaneously on
* another thread */
- clib_atomic_cmp_and_swap (&sa0->thread_index, ~0,
+ clib_atomic_cmp_and_swap (&ort->thread_index, ~0,
ipsec_sa_assign_thread (thread_index));
}
- if (PREDICT_FALSE (thread_index != sa0->thread_index))
+ if (PREDICT_FALSE (thread_index != ort->thread_index))
{
- vnet_buffer (b[0])->ipsec.thread_index = sa0->thread_index;
+ vnet_buffer (b[0])->ipsec.thread_index = ort->thread_index;
err = ESP_ENCRYPT_ERROR_HANDOFF;
esp_encrypt_set_next_index (b[0], node, thread_index, err, n_noop,
noop_nexts, handoff_next,
@@ -772,7 +756,7 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
lb = vlib_get_buffer (vm, lb->next_buffer);
}
- if (PREDICT_FALSE (esp_seq_advance (sa0)))
+ if (PREDICT_FALSE (esp_seq_advance (ort)))
{
err = ESP_ENCRYPT_ERROR_SEQ_CYCLED;
esp_encrypt_set_next_index (b[0], node, thread_index, err, n_noop,
@@ -783,7 +767,7 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
/* space for IV */
hdr_len = iv_sz;
- if (ipsec_sa_is_set_IS_TUNNEL (sa0))
+ if (ort->is_tunnel)
{
payload = vlib_buffer_get_current (b[0]);
next_hdr_ptr = esp_add_footer_and_icv (
@@ -806,40 +790,39 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
esp = (esp_header_t *) (payload - hdr_len);
/* optional UDP header */
- if (ipsec_sa_is_set_UDP_ENCAP (sa0))
+ if (ort->udp_encap)
{
hdr_len += sizeof (udp_header_t);
- esp_fill_udp_hdr (sa0, (udp_header_t *) (payload - hdr_len),
+ esp_fill_udp_hdr (ort, (udp_header_t *) (payload - hdr_len),
payload_len_total + hdr_len);
}
/* IP header */
- if (ipsec_sa_is_set_IS_TUNNEL_V6 (sa0))
+ if (ort->is_tunnel_v6)
{
ip6_header_t *ip6;
u16 len = sizeof (ip6_header_t);
hdr_len += len;
ip6 = (ip6_header_t *) (payload - hdr_len);
- clib_memcpy_fast (ip6, &sa0->ip6_hdr, sizeof (ip6_header_t));
+ clib_memcpy_fast (ip6, &ort->ip6_hdr, sizeof (ip6_header_t));
if (VNET_LINK_IP6 == lt)
{
*next_hdr_ptr = IP_PROTOCOL_IPV6;
- tunnel_encap_fixup_6o6 (sa0->tunnel_flags,
- (const ip6_header_t *) payload,
- ip6);
+ tunnel_encap_fixup_6o6 (ort->tunnel_flags,
+ (const ip6_header_t *) payload, ip6);
}
else if (VNET_LINK_IP4 == lt)
{
*next_hdr_ptr = IP_PROTOCOL_IP_IN_IP;
- tunnel_encap_fixup_4o6 (sa0->tunnel_flags, b[0],
+ tunnel_encap_fixup_4o6 (ort->tunnel_flags, b[0],
(const ip4_header_t *) payload, ip6);
}
else if (VNET_LINK_MPLS == lt)
{
*next_hdr_ptr = IP_PROTOCOL_MPLS_IN_IP;
tunnel_encap_fixup_mplso6 (
- sa0->tunnel_flags, b[0],
+ ort->tunnel_flags, b[0],
(const mpls_unicast_header_t *) payload, ip6);
}
else
@@ -855,27 +838,25 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
u16 len = sizeof (ip4_header_t);
hdr_len += len;
ip4 = (ip4_header_t *) (payload - hdr_len);
- clib_memcpy_fast (ip4, &sa0->ip4_hdr, sizeof (ip4_header_t));
+ clib_memcpy_fast (ip4, &ort->ip4_hdr, sizeof (ip4_header_t));
if (VNET_LINK_IP6 == lt)
{
*next_hdr_ptr = IP_PROTOCOL_IPV6;
- tunnel_encap_fixup_6o4_w_chksum (sa0->tunnel_flags,
- (const ip6_header_t *)
- payload, ip4);
+ tunnel_encap_fixup_6o4_w_chksum (
+ ort->tunnel_flags, (const ip6_header_t *) payload, ip4);
}
else if (VNET_LINK_IP4 == lt)
{
*next_hdr_ptr = IP_PROTOCOL_IP_IN_IP;
- tunnel_encap_fixup_4o4_w_chksum (sa0->tunnel_flags,
- (const ip4_header_t *)
- payload, ip4);
+ tunnel_encap_fixup_4o4_w_chksum (
+ ort->tunnel_flags, (const ip4_header_t *) payload, ip4);
}
else if (VNET_LINK_MPLS == lt)
{
*next_hdr_ptr = IP_PROTOCOL_MPLS_IN_IP;
tunnel_encap_fixup_mplso4_w_chksum (
- sa0->tunnel_flags, (const mpls_unicast_header_t *) payload,
+ ort->tunnel_flags, (const mpls_unicast_header_t *) payload,
ip4);
}
else
@@ -885,8 +866,7 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
esp_update_ip4_hdr (ip4, len, /* is_transport */ 0, 0);
}
- if (ipsec_sa_is_set_UDP_ENCAP (sa0) &&
- ipsec_sa_is_set_IS_TUNNEL_V6 (sa0))
+ if (ort->udp_encap && ort->is_tunnel_v6)
{
i16 l3_off = b[0]->current_data - hdr_len;
i16 l4_off = l3_off + sizeof (ip6_header_t);
@@ -894,7 +874,7 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
set_ip6_udp_cksum_offload (b[0], l3_off, l4_off);
}
- dpo = &sa0->dpo;
+ dpo = &ort->dpo;
if (!is_tun)
{
sync_next[0] = dpo->dpoi_next_node;
@@ -953,7 +933,7 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
esp = (esp_header_t *) (payload - hdr_len);
/* optional UDP header */
- if (ipsec_sa_is_set_UDP_ENCAP (sa0))
+ if (ort->udp_encap)
{
hdr_len += sizeof (udp_header_t);
udp = (udp_header_t *) (payload - hdr_len);
@@ -1010,7 +990,7 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (udp)
{
udp_len = len - ip_len;
- esp_fill_udp_hdr (sa0, udp, udp_len);
+ esp_fill_udp_hdr (ort, udp, udp_len);
}
if (udp && (VNET_LINK_IP6 == lt))
@@ -1036,11 +1016,11 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
}
esp->spi = spi;
- esp->seq = clib_net_to_host_u32 (sa0->seq);
+ esp->seq = clib_net_to_host_u32 (ort->seq64);
if (is_async)
{
- async_op = sa0->crypto_async_enc_op_id;
+ async_op = ort->async_op_id;
/* get a frame for this op if we don't yet have one or it's full
*/
@@ -1063,15 +1043,15 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vec_add1 (ptd->async_frames, async_frames[async_op]);
}
- esp_prepare_async_frame (vm, ptd, async_frames[async_op], sa0, b[0],
+ esp_prepare_async_frame (vm, ptd, async_frames[async_op], ort, b[0],
esp, payload, payload_len, iv_sz, icv_sz,
from[b - bufs], sync_next[0], hdr_len,
async_next_node, lb);
}
else
- esp_prepare_sync_op (vm, ptd, crypto_ops, integ_ops, sa0, sa0->seq_hi,
- payload, payload_len, iv_sz, icv_sz, n_sync, b,
- lb, hdr_len, esp);
+ esp_prepare_sync_op (vm, ptd, crypto_ops, integ_ops, ort,
+ ort->seq64 >> 32, payload, payload_len, iv_sz,
+ icv_sz, n_sync, b, lb, hdr_len, esp);
vlib_buffer_advance (b[0], 0LL - hdr_len);
@@ -1087,13 +1067,13 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
clib_memset_u8 (tr, 0xff, sizeof (*tr));
else
{
+ ipsec_sa_t *sa = ipsec_sa_get (sa_index0);
tr->sa_index = sa_index0;
- tr->spi = sa0->spi;
- tr->seq = sa0->seq;
- tr->sa_seq_hi = sa0->seq_hi;
- tr->udp_encap = ipsec_sa_is_set_UDP_ENCAP (sa0);
- tr->crypto_alg = sa0->crypto_alg;
- tr->integ_alg = sa0->integ_alg;
+ tr->spi = sa->spi;
+ tr->seq = ort->seq64;
+ tr->udp_encap = ort->udp_encap;
+ tr->crypto_alg = sa->crypto_alg;
+ tr->integ_alg = sa->integ_alg;
}
}
diff --git a/src/vnet/ipsec/ipsec.c b/src/vnet/ipsec/ipsec.c
index a1d4d56768c..b95b65dfeea 100644
--- a/src/vnet/ipsec/ipsec.c
+++ b/src/vnet/ipsec/ipsec.c
@@ -312,9 +312,9 @@ clib_error_t *
ipsec_rsc_in_use (ipsec_main_t * im)
{
/* return an error is crypto resource are in use */
- if (pool_elts (ipsec_sa_pool) > 0)
+ if (pool_elts (im->sa_pool) > 0)
return clib_error_return (0, "%d SA entries configured",
- pool_elts (ipsec_sa_pool));
+ pool_elts (im->sa_pool));
if (ipsec_itf_count () > 0)
return clib_error_return (0, "%d IPSec interface configured",
ipsec_itf_count ());
@@ -384,7 +384,7 @@ ipsec_set_async_mode (u32 is_enabled)
im->async_mode = is_enabled;
/* change SA crypto op data */
- pool_foreach (sa, ipsec_sa_pool)
+ pool_foreach (sa, im->sa_pool)
ipsec_sa_set_async_mode (sa, is_enabled);
}
diff --git a/src/vnet/ipsec/ipsec.h b/src/vnet/ipsec/ipsec.h
index 3409d0e4fb9..bdc4ee4d455 100644
--- a/src/vnet/ipsec/ipsec.h
+++ b/src/vnet/ipsec/ipsec.h
@@ -118,6 +118,9 @@ typedef struct
const u8 iv_size;
const u8 block_align;
const u8 icv_size;
+ const u8 is_aead : 1;
+ const u8 is_ctr : 1;
+ const u8 is_null_gmac : 1;
} ipsec_main_crypto_alg_t;
typedef struct
@@ -263,6 +266,10 @@ typedef struct
u8 async_mode;
u16 msg_id_base;
+
+ ipsec_sa_t *sa_pool;
+ ipsec_sa_inb_rt_t **inb_sa_runtimes;
+ ipsec_sa_outb_rt_t **outb_sa_runtimes;
} ipsec_main_t;
typedef enum ipsec_format_flags_t_
@@ -354,8 +361,9 @@ ipsec_spinlock_unlock (i32 *lock)
*/
always_inline void
ipsec_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node,
- u32 thread_index, u32 err, u32 ipsec_sa_err, u16 index,
- u16 *nexts, u16 drop_next, u32 sa_index)
+ clib_thread_index_t thread_index, u32 err,
+ u32 ipsec_sa_err, u16 index, u16 *nexts, u16 drop_next,
+ u32 sa_index)
{
nexts[index] = drop_next;
b->error = node->errors[err];
@@ -396,6 +404,8 @@ extern clib_error_t *ipsec_register_next_header (vlib_main_t *vm,
u8 next_header,
const char *next_node);
+#include <vnet/ipsec/ipsec_funcs.h>
+
#endif /* __IPSEC_H__ */
/*
diff --git a/src/vnet/ipsec/ipsec_api.c b/src/vnet/ipsec/ipsec_api.c
index 21216b1a614..e5b38992de2 100644
--- a/src/vnet/ipsec/ipsec_api.c
+++ b/src/vnet/ipsec/ipsec_api.c
@@ -40,6 +40,26 @@
#define REPLY_MSG_ID_BASE ipsec_main.msg_id_base
#include <vlibapi/api_helper_macros.h>
+static inline u64
+ipsec_sa_get_inb_seq (ipsec_sa_t *sa)
+{
+ ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt (sa);
+ u64 seq = irt->seq64;
+ if (!ipsec_sa_is_set_USE_ESN (sa))
+ seq = (u32) seq;
+ return seq;
+}
+
+static inline u64
+ipsec_sa_get_outb_seq (ipsec_sa_t *sa)
+{
+ ipsec_sa_outb_rt_t *ort = ipsec_sa_get_outb_rt (sa);
+ u64 seq;
+
+ seq = ort->seq64;
+ return seq;
+}
+
static void
vl_api_ipsec_spd_add_del_t_handler (vl_api_ipsec_spd_add_del_t * mp)
{
@@ -950,6 +970,8 @@ ipsec_sa_dump_match_sa (index_t itpi, void *arg)
static walk_rc_t
send_ipsec_sa_details (ipsec_sa_t * sa, void *arg)
{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt (sa);
ipsec_dump_walk_ctx_t *ctx = arg;
vl_api_ipsec_sa_details_t *mp;
@@ -975,7 +997,7 @@ send_ipsec_sa_details (ipsec_sa_t * sa, void *arg)
if (ipsec_sa_is_set_IS_PROTECT (sa))
{
ipsec_sa_dump_match_ctx_t ctx = {
- .sai = sa - ipsec_sa_pool,
+ .sai = sa - im->sa_pool,
.sw_if_index = ~0,
};
ipsec_tun_protect_walk (ipsec_sa_dump_match_sa, &ctx);
@@ -992,22 +1014,16 @@ send_ipsec_sa_details (ipsec_sa_t * sa, void *arg)
}
if (ipsec_sa_is_set_UDP_ENCAP (sa))
{
- mp->entry.udp_src_port = sa->udp_hdr.src_port;
- mp->entry.udp_dst_port = sa->udp_hdr.dst_port;
+ mp->entry.udp_src_port = clib_host_to_net_u16 (sa->udp_src_port);
+ mp->entry.udp_dst_port = clib_host_to_net_u16 (sa->udp_dst_port);
}
- mp->seq_outbound = clib_host_to_net_u64 (((u64) sa->seq));
- mp->last_seq_inbound = clib_host_to_net_u64 (((u64) sa->seq));
- if (ipsec_sa_is_set_USE_ESN (sa))
- {
- mp->seq_outbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
- mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
- }
- if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
- {
- mp->replay_window =
- clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa));
- }
+ mp->seq_outbound = clib_host_to_net_u64 (ipsec_sa_get_outb_seq (sa));
+ mp->last_seq_inbound = clib_host_to_net_u64 (ipsec_sa_get_inb_seq (sa));
+
+ if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && irt)
+ mp->replay_window =
+ clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (irt));
mp->stat_index = clib_host_to_net_u32 (sa->stat_index);
@@ -1036,6 +1052,8 @@ vl_api_ipsec_sa_dump_t_handler (vl_api_ipsec_sa_dump_t * mp)
static walk_rc_t
send_ipsec_sa_v2_details (ipsec_sa_t * sa, void *arg)
{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt (sa);
ipsec_dump_walk_ctx_t *ctx = arg;
vl_api_ipsec_sa_v2_details_t *mp;
@@ -1061,7 +1079,7 @@ send_ipsec_sa_v2_details (ipsec_sa_t * sa, void *arg)
if (ipsec_sa_is_set_IS_PROTECT (sa))
{
ipsec_sa_dump_match_ctx_t ctx = {
- .sai = sa - ipsec_sa_pool,
+ .sai = sa - im->sa_pool,
.sw_if_index = ~0,
};
ipsec_tun_protect_walk (ipsec_sa_dump_match_sa, &ctx);
@@ -1078,26 +1096,20 @@ send_ipsec_sa_v2_details (ipsec_sa_t * sa, void *arg)
}
if (ipsec_sa_is_set_UDP_ENCAP (sa))
{
- mp->entry.udp_src_port = sa->udp_hdr.src_port;
- mp->entry.udp_dst_port = sa->udp_hdr.dst_port;
+ mp->entry.udp_src_port = clib_host_to_net_u16 (sa->udp_src_port);
+ mp->entry.udp_dst_port = clib_host_to_net_u16 (sa->udp_dst_port);
}
mp->entry.tunnel_flags =
tunnel_encap_decap_flags_encode (sa->tunnel.t_encap_decap_flags);
mp->entry.dscp = ip_dscp_encode (sa->tunnel.t_dscp);
- mp->seq_outbound = clib_host_to_net_u64 (((u64) sa->seq));
- mp->last_seq_inbound = clib_host_to_net_u64 (((u64) sa->seq));
- if (ipsec_sa_is_set_USE_ESN (sa))
- {
- mp->seq_outbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
- mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
- }
- if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
- {
- mp->replay_window =
- clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa));
- }
+ mp->seq_outbound = clib_host_to_net_u64 (ipsec_sa_get_outb_seq (sa));
+ mp->last_seq_inbound = clib_host_to_net_u64 (ipsec_sa_get_inb_seq (sa));
+
+ if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && irt)
+ mp->replay_window =
+ clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (irt));
mp->stat_index = clib_host_to_net_u32 (sa->stat_index);
@@ -1126,6 +1138,8 @@ vl_api_ipsec_sa_v2_dump_t_handler (vl_api_ipsec_sa_v2_dump_t *mp)
static walk_rc_t
send_ipsec_sa_v3_details (ipsec_sa_t *sa, void *arg)
{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt (sa);
ipsec_dump_walk_ctx_t *ctx = arg;
vl_api_ipsec_sa_v3_details_t *mp;
@@ -1150,7 +1164,7 @@ send_ipsec_sa_v3_details (ipsec_sa_t *sa, void *arg)
if (ipsec_sa_is_set_IS_PROTECT (sa))
{
ipsec_sa_dump_match_ctx_t ctx = {
- .sai = sa - ipsec_sa_pool,
+ .sai = sa - im->sa_pool,
.sw_if_index = ~0,
};
ipsec_tun_protect_walk (ipsec_sa_dump_match_sa, &ctx);
@@ -1165,22 +1179,16 @@ send_ipsec_sa_v3_details (ipsec_sa_t *sa, void *arg)
if (ipsec_sa_is_set_UDP_ENCAP (sa))
{
- mp->entry.udp_src_port = sa->udp_hdr.src_port;
- mp->entry.udp_dst_port = sa->udp_hdr.dst_port;
+ mp->entry.udp_src_port = clib_host_to_net_u16 (sa->udp_src_port);
+ mp->entry.udp_dst_port = clib_host_to_net_u16 (sa->udp_dst_port);
}
- mp->seq_outbound = clib_host_to_net_u64 (((u64) sa->seq));
- mp->last_seq_inbound = clib_host_to_net_u64 (((u64) sa->seq));
- if (ipsec_sa_is_set_USE_ESN (sa))
- {
- mp->seq_outbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
- mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
- }
- if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
- {
- mp->replay_window =
- clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa));
- }
+ mp->seq_outbound = clib_host_to_net_u64 (ipsec_sa_get_outb_seq (sa));
+ mp->last_seq_inbound = clib_host_to_net_u64 (ipsec_sa_get_inb_seq (sa));
+
+ if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && irt)
+ mp->replay_window =
+ clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (irt));
mp->stat_index = clib_host_to_net_u32 (sa->stat_index);
@@ -1209,8 +1217,12 @@ vl_api_ipsec_sa_v3_dump_t_handler (vl_api_ipsec_sa_v3_dump_t *mp)
static walk_rc_t
send_ipsec_sa_v4_details (ipsec_sa_t *sa, void *arg)
{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt (sa);
+ ipsec_sa_outb_rt_t *ort = ipsec_sa_get_outb_rt (sa);
ipsec_dump_walk_ctx_t *ctx = arg;
vl_api_ipsec_sa_v4_details_t *mp;
+ clib_thread_index_t thread_index = 0;
mp = vl_msg_api_alloc (sizeof (*mp));
clib_memset (mp, 0, sizeof (*mp));
@@ -1233,7 +1245,7 @@ send_ipsec_sa_v4_details (ipsec_sa_t *sa, void *arg)
if (ipsec_sa_is_set_IS_PROTECT (sa))
{
ipsec_sa_dump_match_ctx_t ctx = {
- .sai = sa - ipsec_sa_pool,
+ .sai = sa - im->sa_pool,
.sw_if_index = ~0,
};
ipsec_tun_protect_walk (ipsec_sa_dump_match_sa, &ctx);
@@ -1248,24 +1260,23 @@ send_ipsec_sa_v4_details (ipsec_sa_t *sa, void *arg)
if (ipsec_sa_is_set_UDP_ENCAP (sa))
{
- mp->entry.udp_src_port = sa->udp_hdr.src_port;
- mp->entry.udp_dst_port = sa->udp_hdr.dst_port;
+ mp->entry.udp_src_port = clib_host_to_net_u16 (sa->udp_src_port);
+ mp->entry.udp_dst_port = clib_host_to_net_u16 (sa->udp_dst_port);
}
- mp->seq_outbound = clib_host_to_net_u64 (((u64) sa->seq));
- mp->last_seq_inbound = clib_host_to_net_u64 (((u64) sa->seq));
- if (ipsec_sa_is_set_USE_ESN (sa))
- {
- mp->seq_outbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
- mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
- }
- if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
- {
- mp->replay_window =
- clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa));
- }
+ mp->seq_outbound = clib_host_to_net_u64 (ipsec_sa_get_outb_seq (sa));
+ mp->last_seq_inbound = clib_host_to_net_u64 (ipsec_sa_get_inb_seq (sa));
+
+ if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && irt)
+ mp->replay_window =
+ clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (irt));
+
+ if (ort)
+ thread_index = ort->thread_index;
+ else if (irt)
+ thread_index = irt->thread_index;
- mp->thread_index = clib_host_to_net_u32 (sa->thread_index);
+ mp->thread_index = clib_host_to_net_u32 (thread_index);
mp->stat_index = clib_host_to_net_u32 (sa->stat_index);
vl_api_send_msg (ctx->reg, (u8 *) mp);
@@ -1293,8 +1304,12 @@ vl_api_ipsec_sa_v4_dump_t_handler (vl_api_ipsec_sa_v4_dump_t *mp)
static walk_rc_t
send_ipsec_sa_v5_details (ipsec_sa_t *sa, void *arg)
{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt (sa);
+ ipsec_sa_outb_rt_t *ort = ipsec_sa_get_outb_rt (sa);
ipsec_dump_walk_ctx_t *ctx = arg;
vl_api_ipsec_sa_v5_details_t *mp;
+ clib_thread_index_t thread_index = 0;
mp = vl_msg_api_alloc (sizeof (*mp));
clib_memset (mp, 0, sizeof (*mp));
@@ -1317,7 +1332,7 @@ send_ipsec_sa_v5_details (ipsec_sa_t *sa, void *arg)
if (ipsec_sa_is_set_IS_PROTECT (sa))
{
ipsec_sa_dump_match_ctx_t ctx = {
- .sai = sa - ipsec_sa_pool,
+ .sai = sa - im->sa_pool,
.sw_if_index = ~0,
};
ipsec_tun_protect_walk (ipsec_sa_dump_match_sa, &ctx);
@@ -1332,27 +1347,27 @@ send_ipsec_sa_v5_details (ipsec_sa_t *sa, void *arg)
if (ipsec_sa_is_set_UDP_ENCAP (sa))
{
- mp->entry.udp_src_port = sa->udp_hdr.src_port;
- mp->entry.udp_dst_port = sa->udp_hdr.dst_port;
+ mp->entry.udp_src_port = clib_host_to_net_u16 (sa->udp_src_port);
+ mp->entry.udp_dst_port = clib_host_to_net_u16 (sa->udp_dst_port);
}
- mp->seq_outbound = clib_host_to_net_u64 (((u64) sa->seq));
- mp->last_seq_inbound = clib_host_to_net_u64 (((u64) sa->seq));
- if (ipsec_sa_is_set_USE_ESN (sa))
- {
- mp->seq_outbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
- mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
- }
- if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
+ mp->seq_outbound = clib_host_to_net_u64 (ipsec_sa_get_outb_seq (sa));
+ mp->last_seq_inbound = clib_host_to_net_u64 (ipsec_sa_get_inb_seq (sa));
+
+ if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && irt)
{
mp->replay_window =
- clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa));
-
+ clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (irt));
mp->entry.anti_replay_window_size =
- clib_host_to_net_u32 (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (sa));
+ clib_host_to_net_u32 (irt->anti_replay_window_size);
}
- mp->thread_index = clib_host_to_net_u32 (sa->thread_index);
+ if (ort)
+ thread_index = ort->thread_index;
+ else if (irt)
+ thread_index = irt->thread_index;
+
+ mp->thread_index = clib_host_to_net_u32 (thread_index);
mp->stat_index = clib_host_to_net_u32 (sa->stat_index);
vl_api_send_msg (ctx->reg, (u8 *) mp);
@@ -1427,11 +1442,11 @@ vl_api_ipsec_select_backend_t_handler (vl_api_ipsec_select_backend_t * mp)
vl_api_ipsec_select_backend_reply_t *rmp;
ipsec_protocol_t protocol;
int rv = 0;
- if (pool_elts (ipsec_sa_pool) > 0)
- {
- rv = VNET_API_ERROR_INSTANCE_IN_USE;
- goto done;
- }
+ if (pool_elts (im->sa_pool) > 0)
+ {
+ rv = VNET_API_ERROR_INSTANCE_IN_USE;
+ goto done;
+ }
rv = ipsec_proto_decode (mp->protocol, &protocol);
diff --git a/src/vnet/ipsec/ipsec_cli.c b/src/vnet/ipsec/ipsec_cli.c
index 07d9df8f204..77a29d263eb 100644
--- a/src/vnet/ipsec/ipsec_cli.c
+++ b/src/vnet/ipsec/ipsec_cli.c
@@ -473,7 +473,7 @@ ipsec_sa_show_all (vlib_main_t * vm, ipsec_main_t * im, u8 detail)
{
u32 sai;
- pool_foreach_index (sai, ipsec_sa_pool)
+ pool_foreach_index (sai, im->sa_pool)
{
vlib_cli_output (vm, "%U", format_ipsec_sa, sai,
(detail ? IPSEC_FORMAT_DETAIL : IPSEC_FORMAT_BRIEF));
@@ -583,6 +583,7 @@ static clib_error_t *
clear_ipsec_sa_command_fn (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
+ ipsec_main_t *im = &ipsec_main;
u32 sai = ~0;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
@@ -595,14 +596,14 @@ clear_ipsec_sa_command_fn (vlib_main_t * vm,
if (~0 == sai)
{
- pool_foreach_index (sai, ipsec_sa_pool)
+ pool_foreach_index (sai, im->sa_pool)
{
ipsec_sa_clear (sai);
}
}
else
{
- if (pool_is_free_index (ipsec_sa_pool, sai))
+ if (pool_is_free_index (im->sa_pool, sai))
return clib_error_return (0, "unknown SA index: %d", sai);
else
ipsec_sa_clear (sai);
diff --git a/src/vnet/ipsec/ipsec_format.c b/src/vnet/ipsec/ipsec_format.c
index e421a0d96b4..e27892185e7 100644
--- a/src/vnet/ipsec/ipsec_format.c
+++ b/src/vnet/ipsec/ipsec_format.c
@@ -441,19 +441,24 @@ format_ipsec_sa_flags (u8 * s, va_list * args)
u8 *
format_ipsec_sa (u8 * s, va_list * args)
{
+ ipsec_main_t *im = &ipsec_main;
u32 sai = va_arg (*args, u32);
ipsec_format_flags_t flags = va_arg (*args, ipsec_format_flags_t);
vlib_counter_t counts;
counter_t errors;
ipsec_sa_t *sa;
+ ipsec_sa_inb_rt_t *irt;
+ ipsec_sa_outb_rt_t *ort;
- if (pool_is_free_index (ipsec_sa_pool, sai))
+ if (pool_is_free_index (im->sa_pool, sai))
{
s = format (s, "No such SA index: %d", sai);
goto done;
}
sa = ipsec_sa_get (sai);
+ irt = ipsec_sa_get_inb_rt (sa);
+ ort = ipsec_sa_get_outb_rt (sa);
s = format (s, "[%d] sa %u (0x%x) spi %u (0x%08x) protocol:%s flags:[%U]",
sai, sa->id, sa->id, sa->spi, sa->spi,
@@ -464,12 +469,20 @@ format_ipsec_sa (u8 * s, va_list * args)
s = format (s, "\n locks %d", sa->node.fn_locks);
s = format (s, "\n salt 0x%x", clib_net_to_host_u32 (sa->salt));
- s = format (s, "\n thread-index:%d", sa->thread_index);
- s = format (s, "\n seq %u seq-hi %u", sa->seq, sa->seq_hi);
- s = format (s, "\n window-size: %llu",
- IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (sa));
- s = format (s, "\n window: Bl <- %U Tl", format_ipsec_replay_window,
- ipsec_sa_anti_replay_get_64b_window (sa));
+ if (irt)
+ s = format (s, "\n inbound thread-index:%d", irt->thread_index);
+ if (ort)
+ s = format (s, "\n outbound thread-index:%d", ort->thread_index);
+ if (irt)
+ s = format (s, "\n inbound seq %lu", irt->seq64);
+ if (ort)
+ s = format (s, "\n outbound seq %lu", ort->seq64);
+ if (irt)
+ {
+ s = format (s, "\n window-size: %llu", irt->anti_replay_window_size);
+ s = format (s, "\n window: Bl <- %U Tl", format_ipsec_replay_window,
+ ipsec_sa_anti_replay_get_64b_window (irt));
+ }
s =
format (s, "\n crypto alg %U", format_ipsec_crypto_alg, sa->crypto_alg);
if (sa->crypto_alg && (flags & IPSEC_FORMAT_INSECURE))
@@ -482,9 +495,8 @@ format_ipsec_sa (u8 * s, va_list * args)
s = format (s, " key %U", format_ipsec_key, &sa->integ_key);
else
s = format (s, " key [redacted]");
- s = format (s, "\n UDP:[src:%d dst:%d]",
- clib_host_to_net_u16 (sa->udp_hdr.src_port),
- clib_host_to_net_u16 (sa->udp_hdr.dst_port));
+ s =
+ format (s, "\n UDP:[src:%d dst:%d]", sa->udp_src_port, sa->udp_dst_port);
vlib_get_combined_counter (&ipsec_sa_counters, sai, &counts);
s = format (s, "\n tx/rx:[packets:%Ld bytes:%Ld]", counts.packets,
diff --git a/src/vnet/ipsec/ipsec_funcs.h b/src/vnet/ipsec/ipsec_funcs.h
new file mode 100644
index 00000000000..29788b3d765
--- /dev/null
+++ b/src/vnet/ipsec/ipsec_funcs.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#ifndef __IPSEC_FUNCS_H__
+#define __IPSEC_FUNCS_H__
+
+#include <vlib/vlib.h>
+#include <vnet/ipsec/ipsec.h>
+
+always_inline ipsec_sa_t *
+ipsec_sa_get (u32 sa_index)
+{
+ return (pool_elt_at_index (ipsec_main.sa_pool, sa_index));
+}
+
+static_always_inline ipsec_sa_outb_rt_t *
+ipsec_sa_get_outb_rt_by_index (u32 sa_index)
+{
+ return ipsec_main.outb_sa_runtimes[sa_index];
+}
+
+static_always_inline ipsec_sa_inb_rt_t *
+ipsec_sa_get_inb_rt_by_index (u32 sa_index)
+{
+ return ipsec_main.inb_sa_runtimes[sa_index];
+}
+
+static_always_inline ipsec_sa_outb_rt_t *
+ipsec_sa_get_outb_rt (ipsec_sa_t *sa)
+{
+ return ipsec_sa_get_outb_rt_by_index (sa - ipsec_main.sa_pool);
+}
+
+static_always_inline ipsec_sa_inb_rt_t *
+ipsec_sa_get_inb_rt (ipsec_sa_t *sa)
+{
+ return ipsec_sa_get_inb_rt_by_index (sa - ipsec_main.sa_pool);
+}
+
+#endif /* __IPSEC_FUNCS_H__ */
diff --git a/src/vnet/ipsec/ipsec_input.c b/src/vnet/ipsec/ipsec_input.c
index 6a25f6c583c..d649c705bb0 100644
--- a/src/vnet/ipsec/ipsec_input.c
+++ b/src/vnet/ipsec/ipsec_input.c
@@ -211,6 +211,39 @@ ipsec_input_policy_match (ipsec_spd_t *spd, u32 sa, u32 da,
return 0;
}
+always_inline uword
+ip6_addr_match_range (ip6_address_t *a, ip6_address_t *la, ip6_address_t *ua)
+{
+ if ((memcmp (a->as_u64, la->as_u64, 2 * sizeof (u64)) >= 0) &&
+ (memcmp (a->as_u64, ua->as_u64, 2 * sizeof (u64)) <= 0))
+ return 1;
+
+ return 0;
+}
+
+always_inline ipsec_policy_t *
+ipsec6_input_policy_match (ipsec_spd_t *spd, ip6_address_t *sa,
+ ip6_address_t *da,
+ ipsec_spd_policy_type_t policy_type)
+{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_policy_t *p;
+ u32 *i;
+
+ vec_foreach (i, spd->policies[policy_type])
+ {
+ p = pool_elt_at_index (im->policies, *i);
+
+ if (!ip6_addr_match_range (sa, &p->raddr.start.ip6, &p->raddr.stop.ip6))
+ continue;
+
+ if (!ip6_addr_match_range (da, &p->laddr.start.ip6, &p->laddr.stop.ip6))
+ continue;
+ return p;
+ }
+ return 0;
+}
+
always_inline ipsec_policy_t *
ipsec_input_protect_policy_match (ipsec_spd_t *spd, u32 sa, u32 da, u32 spi)
{
@@ -263,16 +296,6 @@ ipsec_input_protect_policy_match (ipsec_spd_t *spd, u32 sa, u32 da, u32 spi)
return 0;
}
-always_inline uword
-ip6_addr_match_range (ip6_address_t * a, ip6_address_t * la,
- ip6_address_t * ua)
-{
- if ((memcmp (a->as_u64, la->as_u64, 2 * sizeof (u64)) >= 0) &&
- (memcmp (a->as_u64, ua->as_u64, 2 * sizeof (u64)) <= 0))
- return 1;
- return 0;
-}
-
always_inline void
ipsec_collect_ah_trace (vlib_buffer_t **b, vlib_node_runtime_t *node,
vlib_main_t *vm, ip4_header_t *ip0, ah_header_t *ah0,
@@ -295,10 +318,11 @@ ipsec_collect_ah_trace (vlib_buffer_t **b, vlib_node_runtime_t *node,
always_inline void
ipsec_ah_packet_process (vlib_main_t *vm, ipsec_main_t *im, ip4_header_t *ip0,
- ah_header_t *ah0, u32 thread_index, ipsec_spd_t *spd0,
- vlib_buffer_t **b, vlib_node_runtime_t *node,
- u64 *ipsec_bypassed, u64 *ipsec_dropped,
- u64 *ipsec_matched, u64 *ipsec_unprocessed, u16 *next)
+ ah_header_t *ah0, clib_thread_index_t thread_index,
+ ipsec_spd_t *spd0, vlib_buffer_t **b,
+ vlib_node_runtime_t *node, u64 *ipsec_bypassed,
+ u64 *ipsec_dropped, u64 *ipsec_matched,
+ u64 *ipsec_unprocessed, u16 *next)
{
ipsec_policy_t *p0 = NULL;
@@ -429,7 +453,7 @@ ipsec_ah_packet_process (vlib_main_t *vm, ipsec_main_t *im, ip4_header_t *ip0,
always_inline void
ipsec_esp_packet_process (vlib_main_t *vm, ipsec_main_t *im, ip4_header_t *ip0,
udp_header_t *udp0, esp_header_t *esp0,
- u32 thread_index, ipsec_spd_t *spd0,
+ clib_thread_index_t thread_index, ipsec_spd_t *spd0,
vlib_buffer_t **b, vlib_node_runtime_t *node,
u64 *ipsec_bypassed, u64 *ipsec_dropped,
u64 *ipsec_matched, u64 *ipsec_unprocessed,
@@ -514,7 +538,7 @@ udp_or_esp:
has_space0 = vlib_buffer_has_space (b[0], (clib_address_t) (esp0 + 1) -
(clib_address_t) ip0);
- if (PREDICT_TRUE ((p0 != NULL) & (has_space0)))
+ if (PREDICT_TRUE ((p0 != NULL) && (has_space0)))
{
*ipsec_matched += 1;
@@ -740,8 +764,6 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm,
spd0, b, node, &ipsec_bypassed,
&ipsec_dropped, &ipsec_matched,
&ipsec_unprocessed, next);
- if (ipsec_bypassed > 0)
- goto ipsec_bypassed;
}
}
else if (PREDICT_TRUE (ip0->protocol == IP_PROTOCOL_IPSEC_ESP))
@@ -751,8 +773,6 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm,
spd0, b, node, &ipsec_bypassed,
&ipsec_dropped, &ipsec_matched,
&ipsec_unprocessed, next);
- if (ipsec_bypassed > 0)
- goto ipsec_bypassed;
}
else if (ip0->protocol == IP_PROTOCOL_IPSEC_AH)
{
@@ -764,7 +784,6 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm,
}
else
{
- ipsec_bypassed:
ipsec_unprocessed += 1;
}
n_left_from -= 1;
@@ -813,6 +832,142 @@ VLIB_REGISTER_NODE (ipsec4_input_node) = {
extern vlib_node_registration_t ipsec6_input_node;
+always_inline void
+ipsec6_esp_packet_process (vlib_main_t *vm, ipsec_main_t *im,
+ ip6_header_t *ip0, esp_header_t *esp0,
+ clib_thread_index_t thread_index, ipsec_spd_t *spd0,
+ vlib_buffer_t **b, vlib_node_runtime_t *node,
+ u64 *ipsec_bypassed, u64 *ipsec_dropped,
+ u64 *ipsec_matched, u64 *ipsec_unprocessed,
+ u32 *next)
+
+{
+ ipsec_policy_t *p0 = NULL;
+ u32 pi0 = ~0;
+ u8 has_space0 = 0;
+ ipsec_policy_t *policies[1];
+ ipsec_fp_5tuple_t tuples[1];
+ bool ip_v6 = true;
+
+ if (im->fp_spd_ipv6_in_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip6_in_lookup_hash_idx))
+ ipsec_fp_in_5tuple_from_ip6_range (
+ &tuples[0], &ip0->src_address, &ip0->dst_address,
+ clib_net_to_host_u32 (esp0->spi), IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT);
+
+ if (esp0->spi != 0)
+ {
+ if (im->fp_spd_ipv6_in_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip6_in_lookup_hash_idx))
+ {
+ ipsec_fp_in_policy_match_n (&spd0->fp_spd, ip_v6, tuples, policies,
+ 1);
+ p0 = policies[0];
+ }
+ else /* linear search if fast path is not enabled */
+ {
+ p0 = ipsec6_input_protect_policy_match (
+ spd0, &ip0->src_address, &ip0->dst_address,
+ clib_net_to_host_u32 (esp0->spi));
+ }
+ has_space0 = vlib_buffer_has_space (b[0], (clib_address_t) (esp0 + 1) -
+ (clib_address_t) ip0);
+
+ if (PREDICT_TRUE ((p0 != NULL) && (has_space0)))
+ {
+ *ipsec_matched += 1;
+
+ pi0 = p0 - im->policies;
+ vlib_increment_combined_counter (
+ &ipsec_spd_policy_counters, thread_index, pi0, 1,
+ clib_net_to_host_u16 (ip0->payload_length));
+
+ vnet_buffer (b[0])->ipsec.sad_index = p0->sa_index;
+ next[0] = im->esp6_decrypt_next_index;
+ vlib_buffer_advance (b[0], ((u8 *) esp0 - (u8 *) ip0));
+ goto trace0;
+ }
+ }
+
+ if (im->fp_spd_ipv6_in_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip6_in_lookup_hash_idx))
+ {
+ tuples->action = IPSEC_SPD_POLICY_IP6_INBOUND_BYPASS;
+ ipsec_fp_in_policy_match_n (&spd0->fp_spd, ip_v6, tuples, policies, 1);
+ p0 = policies[0];
+ }
+ else
+ {
+ p0 =
+ ipsec6_input_policy_match (spd0, &ip0->src_address, &ip0->dst_address,
+ IPSEC_SPD_POLICY_IP6_INBOUND_BYPASS);
+ }
+
+ if (PREDICT_TRUE ((p0 != NULL)))
+ {
+ *ipsec_bypassed += 1;
+
+ pi0 = p0 - im->policies;
+ vlib_increment_combined_counter (
+ &ipsec_spd_policy_counters, thread_index, pi0, 1,
+ clib_net_to_host_u16 (ip0->payload_length));
+ goto trace0;
+ }
+ else
+ {
+ p0 = NULL;
+ pi0 = ~0;
+ }
+
+ if (im->fp_spd_ipv6_in_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip6_in_lookup_hash_idx))
+ {
+ tuples->action = IPSEC_SPD_POLICY_IP6_INBOUND_DISCARD;
+ ipsec_fp_in_policy_match_n (&spd0->fp_spd, ip_v6, tuples, policies, 1);
+ p0 = policies[0];
+ }
+ else
+ {
+ p0 =
+ ipsec6_input_policy_match (spd0, &ip0->src_address, &ip0->dst_address,
+ IPSEC_SPD_POLICY_IP6_INBOUND_DISCARD);
+ }
+
+ if (PREDICT_TRUE ((p0 != NULL)))
+ {
+ *ipsec_dropped += 1;
+
+ pi0 = p0 - im->policies;
+ vlib_increment_combined_counter (
+ &ipsec_spd_policy_counters, thread_index, pi0, 1,
+ clib_net_to_host_u16 (ip0->payload_length));
+ next[0] = IPSEC_INPUT_NEXT_DROP;
+ goto trace0;
+ }
+ else
+ {
+ p0 = 0;
+ pi0 = ~0;
+ }
+
+ /* Drop by default if no match on PROTECT, BYPASS or DISCARD */
+ *ipsec_unprocessed += 1;
+ next[0] = IPSEC_INPUT_NEXT_DROP;
+
+trace0:
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+ PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ipsec_input_trace_t *tr = vlib_add_trace (vm, node, b[0], sizeof (*tr));
+
+ tr->proto = ip0->protocol;
+ tr->sa_id = p0 ? p0->sa_id : ~0;
+ tr->spi = has_space0 ? clib_net_to_host_u32 (esp0->spi) : ~0;
+ tr->seq = has_space0 ? clib_net_to_host_u32 (esp0->seq) : ~0;
+ tr->spd = spd0->id;
+ tr->policy_index = pi0;
+ }
+}
VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -822,9 +977,6 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm,
ipsec_main_t *im = &ipsec_main;
u32 ipsec_unprocessed = 0;
u32 ipsec_matched = 0;
- ipsec_policy_t *policies[1];
- ipsec_fp_5tuple_t tuples[1];
- bool ip_v6 = true;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
@@ -843,12 +995,13 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm,
u32 bi0, next0, pi0 = ~0;
vlib_buffer_t *b0;
ip6_header_t *ip0;
- esp_header_t *esp0;
+ esp_header_t *esp0 = NULL;
ip4_ipsec_config_t *c0;
ipsec_spd_t *spd0;
ipsec_policy_t *p0 = 0;
- ah_header_t *ah0;
u32 header_size = sizeof (ip0[0]);
+ u64 ipsec_unprocessed = 0, ipsec_matched = 0;
+ u64 ipsec_dropped = 0, ipsec_bypassed = 0;
bi0 = to_next[0] = from[0];
from += 1;
@@ -864,113 +1017,76 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm,
spd0 = pool_elt_at_index (im->spds, c0->spd_index);
ip0 = vlib_buffer_get_current (b0);
+
+ if (ip0->protocol == IP_PROTOCOL_UDP)
+ {
+ udp_header_t *udp0 = (udp_header_t *) ((u8 *) ip0 + header_size);
+
+ /* RFC5996 Section 2.23: "Port 4500 is reserved for
+ * UDP-encapsulated ESP and IKE."
+ * RFC5996 Section 3.1: "IKE messages use UDP ports 500 and/or
+ 4500"
+ */
+ if ((clib_host_to_net_u16 (500) == udp0->dst_port) ||
+ (clib_host_to_net_u16 (4500) == udp0->dst_port))
+ esp0 = (esp_header_t *) ((u8 *) udp0 + sizeof (udp_header_t));
+ }
+ else if (ip0->protocol == IP_PROTOCOL_IPSEC_ESP)
esp0 = (esp_header_t *) ((u8 *) ip0 + header_size);
- ah0 = (ah_header_t *) ((u8 *) ip0 + header_size);
- if (PREDICT_TRUE (ip0->protocol == IP_PROTOCOL_IPSEC_ESP))
- {
-#if 0
- clib_warning
- ("packet received from %U to %U spi %u size %u spd_id %u",
- format_ip6_address, &ip0->src_address, format_ip6_address,
- &ip0->dst_address, clib_net_to_host_u32 (esp0->spi),
- clib_net_to_host_u16 (ip0->payload_length) + header_size,
- spd0->id);
-#endif
- if (im->fp_spd_ipv6_in_is_enabled &&
- PREDICT_TRUE (INDEX_INVALID !=
- spd0->fp_spd.ip6_in_lookup_hash_idx))
- {
- ipsec_fp_in_5tuple_from_ip6_range (
- &tuples[0], &ip0->src_address, &ip0->dst_address,
- clib_net_to_host_u32 (esp0->spi),
- IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT);
- ipsec_fp_in_policy_match_n (&spd0->fp_spd, ip_v6, tuples,
- policies, 1);
- p0 = policies[0];
- }
- else
- p0 = ipsec6_input_protect_policy_match (
- spd0, &ip0->src_address, &ip0->dst_address,
- clib_net_to_host_u32 (esp0->spi));
-
- if (PREDICT_TRUE (p0 != 0))
- {
- ipsec_matched += 1;
-
- pi0 = p0 - im->policies;
- vlib_increment_combined_counter
- (&ipsec_spd_policy_counters,
- thread_index, pi0, 1,
- clib_net_to_host_u16 (ip0->payload_length) +
- header_size);
-
- vnet_buffer (b0)->ipsec.sad_index = p0->sa_index;
- next0 = im->esp6_decrypt_next_index;
- vlib_buffer_advance (b0, header_size);
- /* TODO Add policy matching for bypass and discard policy
- * type */
- goto trace0;
- }
- else
- {
- pi0 = ~0;
- ipsec_unprocessed += 1;
- next0 = IPSEC_INPUT_NEXT_DROP;
- }
- }
+ if (esp0 != NULL)
+ {
+ ipsec6_esp_packet_process (vm, im, ip0, esp0, thread_index, spd0,
+ &b0, node, &ipsec_bypassed,
+ &ipsec_dropped, &ipsec_matched,
+ &ipsec_unprocessed, &next0);
+ }
else if (ip0->protocol == IP_PROTOCOL_IPSEC_AH)
{
- p0 = ipsec6_input_protect_policy_match (spd0,
- &ip0->src_address,
- &ip0->dst_address,
- clib_net_to_host_u32
- (ah0->spi));
-
- if (PREDICT_TRUE (p0 != 0))
- {
- ipsec_matched += 1;
- pi0 = p0 - im->policies;
- vlib_increment_combined_counter
- (&ipsec_spd_policy_counters,
- thread_index, pi0, 1,
- clib_net_to_host_u16 (ip0->payload_length) +
- header_size);
-
- vnet_buffer (b0)->ipsec.sad_index = p0->sa_index;
- next0 = im->ah6_decrypt_next_index;
- goto trace0;
- }
- else
- {
- pi0 = ~0;
- ipsec_unprocessed += 1;
- next0 = IPSEC_INPUT_NEXT_DROP;
- }
+ ah_header_t *ah0 = (ah_header_t *) ((u8 *) ip0 + header_size);
+
+ p0 = ipsec6_input_protect_policy_match (
+ spd0, &ip0->src_address, &ip0->dst_address,
+ clib_net_to_host_u32 (ah0->spi));
+
+ if (PREDICT_TRUE (p0 != 0))
+ {
+ ipsec_matched += 1;
+ pi0 = p0 - im->policies;
+ vlib_increment_combined_counter (
+ &ipsec_spd_policy_counters, thread_index, pi0, 1,
+ clib_net_to_host_u16 (ip0->payload_length) + header_size);
+
+ vnet_buffer (b0)->ipsec.sad_index = p0->sa_index;
+ next0 = im->ah6_decrypt_next_index;
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+ PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ipsec_input_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+
+ if (p0)
+ {
+ tr->sa_id = p0->sa_id;
+ tr->policy_type = p0->type;
+ }
+
+ tr->proto = ip0->protocol;
+ tr->spi = clib_net_to_host_u32 (ah0->spi);
+ tr->spd = spd0->id;
+ tr->policy_index = pi0;
+ }
+ }
+ else
+ {
+ pi0 = ~0;
+ ipsec_unprocessed += 1;
+ next0 = IPSEC_INPUT_NEXT_DROP;
+ }
}
else
{
- ipsec_unprocessed += 1;
- }
-
- trace0:
- if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
- PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- ipsec_input_trace_t *tr =
- vlib_add_trace (vm, node, b0, sizeof (*tr));
-
- if (p0)
- {
- tr->sa_id = p0->sa_id;
- tr->policy_type = p0->type;
- }
-
- tr->proto = ip0->protocol;
- tr->spi = clib_net_to_host_u32 (esp0->spi);
- tr->seq = clib_net_to_host_u32 (esp0->seq);
- tr->spd = spd0->id;
- tr->policy_index = pi0;
+ ipsec_unprocessed += 1;
}
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
diff --git a/src/vnet/ipsec/ipsec_sa.c b/src/vnet/ipsec/ipsec_sa.c
index d37d89d5e3e..2347a00f052 100644
--- a/src/vnet/ipsec/ipsec_sa.c
+++ b/src/vnet/ipsec/ipsec_sa.c
@@ -33,8 +33,6 @@ vlib_combined_counter_main_t ipsec_sa_counters = {
/* Per-SA error counters */
vlib_simple_counter_main_t ipsec_sa_err_counters[IPSEC_SA_N_ERRORS];
-ipsec_sa_t *ipsec_sa_pool;
-
static clib_error_t *
ipsec_call_add_del_callbacks (ipsec_main_t * im, ipsec_sa_t * sa,
u32 sa_index, int is_add)
@@ -77,39 +75,71 @@ static void
ipsec_sa_stack (ipsec_sa_t * sa)
{
ipsec_main_t *im = &ipsec_main;
+ ipsec_sa_outb_rt_t *ort = ipsec_sa_get_outb_rt (sa);
dpo_id_t tmp = DPO_INVALID;
tunnel_contribute_forwarding (&sa->tunnel, &tmp);
if (IPSEC_PROTOCOL_AH == sa->protocol)
dpo_stack_from_node ((ipsec_sa_is_set_IS_TUNNEL_V6 (sa) ?
- im->ah6_encrypt_node_index :
- im->ah4_encrypt_node_index), &sa->dpo, &tmp);
+ im->ah6_encrypt_node_index :
+ im->ah4_encrypt_node_index),
+ &ort->dpo, &tmp);
else
dpo_stack_from_node ((ipsec_sa_is_set_IS_TUNNEL_V6 (sa) ?
- im->esp6_encrypt_node_index :
- im->esp4_encrypt_node_index), &sa->dpo, &tmp);
+ im->esp6_encrypt_node_index :
+ im->esp4_encrypt_node_index),
+ &ort->dpo, &tmp);
dpo_reset (&tmp);
}
void
ipsec_sa_set_async_mode (ipsec_sa_t *sa, int is_enabled)
{
+ u32 cipher_key_index, integ_key_index;
+ vnet_crypto_op_id_t inb_cipher_op_id, outb_cipher_op_id, integ_op_id;
+ u32 is_async;
if (is_enabled)
{
- sa->crypto_key_index = sa->crypto_async_key_index;
- sa->crypto_enc_op_id = sa->crypto_async_enc_op_id;
- sa->crypto_dec_op_id = sa->crypto_async_dec_op_id;
- sa->integ_key_index = ~0;
- sa->integ_op_id = ~0;
+ if (sa->linked_key_index != ~0)
+ cipher_key_index = sa->linked_key_index;
+ else
+ cipher_key_index = sa->crypto_sync_key_index;
+
+ outb_cipher_op_id = sa->crypto_async_enc_op_id;
+ inb_cipher_op_id = sa->crypto_async_dec_op_id;
+ integ_key_index = ~0;
+ integ_op_id = ~0;
+ is_async = 1;
}
else
{
- sa->crypto_key_index = sa->crypto_sync_key_index;
- sa->crypto_enc_op_id = sa->crypto_sync_enc_op_id;
- sa->crypto_dec_op_id = sa->crypto_sync_dec_op_id;
- sa->integ_key_index = sa->integ_sync_key_index;
- sa->integ_op_id = sa->integ_sync_op_id;
+ cipher_key_index = sa->crypto_sync_key_index;
+ outb_cipher_op_id = sa->crypto_sync_enc_op_id;
+ inb_cipher_op_id = sa->crypto_sync_dec_op_id;
+ integ_key_index = sa->integ_sync_key_index;
+ integ_op_id = sa->integ_sync_op_id;
+ is_async = 0;
+ }
+
+ if (ipsec_sa_get_inb_rt (sa))
+ {
+ ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt (sa);
+ irt->cipher_key_index = cipher_key_index;
+ irt->integ_key_index = integ_key_index;
+ irt->cipher_op_id = inb_cipher_op_id;
+ irt->integ_op_id = integ_op_id;
+ irt->is_async = is_async;
+ }
+
+ if (ipsec_sa_get_outb_rt (sa))
+ {
+ ipsec_sa_outb_rt_t *ort = ipsec_sa_get_outb_rt (sa);
+ ort->cipher_key_index = cipher_key_index;
+ ort->integ_key_index = integ_key_index;
+ ort->cipher_op_id = outb_cipher_op_id;
+ ort->integ_op_id = integ_op_id;
+ ort->is_async = is_async;
}
}
@@ -117,32 +147,11 @@ void
ipsec_sa_set_crypto_alg (ipsec_sa_t * sa, ipsec_crypto_alg_t crypto_alg)
{
ipsec_main_t *im = &ipsec_main;
+ ipsec_main_crypto_alg_t *alg = im->crypto_algs + crypto_alg;
sa->crypto_alg = crypto_alg;
- sa->crypto_iv_size = im->crypto_algs[crypto_alg].iv_size;
- sa->esp_block_align = clib_max (4, im->crypto_algs[crypto_alg].block_align);
- sa->crypto_sync_enc_op_id = im->crypto_algs[crypto_alg].enc_op_id;
- sa->crypto_sync_dec_op_id = im->crypto_algs[crypto_alg].dec_op_id;
- sa->crypto_calg = im->crypto_algs[crypto_alg].alg;
- ASSERT (sa->crypto_iv_size <= ESP_MAX_IV_SIZE);
- ASSERT (sa->esp_block_align <= ESP_MAX_BLOCK_SIZE);
- if (IPSEC_CRYPTO_ALG_IS_GCM (crypto_alg) ||
- IPSEC_CRYPTO_ALG_CTR_AEAD_OTHERS (crypto_alg))
- {
- sa->integ_icv_size = im->crypto_algs[crypto_alg].icv_size;
- ipsec_sa_set_IS_CTR (sa);
- ipsec_sa_set_IS_AEAD (sa);
- }
- else if (IPSEC_CRYPTO_ALG_IS_CTR (crypto_alg))
- {
- ipsec_sa_set_IS_CTR (sa);
- }
- else if (IPSEC_CRYPTO_ALG_IS_NULL_GMAC (crypto_alg))
- {
- sa->integ_icv_size = im->crypto_algs[crypto_alg].icv_size;
- ipsec_sa_set_IS_CTR (sa);
- ipsec_sa_set_IS_AEAD (sa);
- ipsec_sa_set_IS_NULL_GMAC (sa);
- }
+ sa->crypto_sync_enc_op_id = alg->enc_op_id;
+ sa->crypto_sync_dec_op_id = alg->dec_op_id;
+ sa->crypto_calg = alg->alg;
}
void
@@ -150,14 +159,12 @@ ipsec_sa_set_integ_alg (ipsec_sa_t * sa, ipsec_integ_alg_t integ_alg)
{
ipsec_main_t *im = &ipsec_main;
sa->integ_alg = integ_alg;
- sa->integ_icv_size = im->integ_algs[integ_alg].icv_size;
sa->integ_sync_op_id = im->integ_algs[integ_alg].op_id;
sa->integ_calg = im->integ_algs[integ_alg].alg;
- ASSERT (sa->integ_icv_size <= ESP_MAX_ICV_SIZE);
}
-void
-ipsec_sa_set_async_op_ids (ipsec_sa_t * sa)
+static void
+ipsec_sa_set_async_op_ids (ipsec_sa_t *sa)
{
if (ipsec_sa_is_set_USE_ESN (sa))
{
@@ -191,12 +198,89 @@ ipsec_sa_set_async_op_ids (ipsec_sa_t * sa)
#undef _
}
+static void
+ipsec_sa_init_runtime (ipsec_sa_t *sa)
+{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_main_crypto_alg_t *alg = im->crypto_algs + sa->crypto_alg;
+ u8 integ_icv_size;
+
+ if (alg->is_aead)
+ integ_icv_size = im->crypto_algs[sa->crypto_alg].icv_size;
+ else
+ integ_icv_size = im->integ_algs[sa->integ_alg].icv_size;
+ ASSERT (integ_icv_size <= ESP_MAX_ICV_SIZE);
+
+ if (ipsec_sa_get_inb_rt (sa))
+ {
+ ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt (sa);
+ irt->use_anti_replay = ipsec_sa_is_set_USE_ANTI_REPLAY (sa);
+ irt->use_esn = ipsec_sa_is_set_USE_ESN (sa);
+ irt->is_tunnel = ipsec_sa_is_set_IS_TUNNEL (sa);
+ irt->is_transport =
+ !(ipsec_sa_is_set_IS_TUNNEL (sa) || ipsec_sa_is_set_IS_TUNNEL_V6 (sa));
+ irt->udp_sz = ipsec_sa_is_set_UDP_ENCAP (sa) ? sizeof (udp_header_t) : 0;
+ irt->is_ctr = alg->is_ctr;
+ irt->is_aead = alg->is_aead;
+ irt->is_null_gmac = alg->is_null_gmac;
+ irt->cipher_iv_size = im->crypto_algs[sa->crypto_alg].iv_size;
+ irt->integ_icv_size = integ_icv_size;
+ irt->salt = sa->salt;
+ irt->async_op_id = sa->crypto_async_dec_op_id;
+ ASSERT (irt->cipher_iv_size <= ESP_MAX_IV_SIZE);
+ }
+
+ if (ipsec_sa_get_outb_rt (sa))
+ {
+ ipsec_sa_outb_rt_t *ort = ipsec_sa_get_outb_rt (sa);
+ ort->use_anti_replay = ipsec_sa_is_set_USE_ANTI_REPLAY (sa);
+ ort->use_esn = ipsec_sa_is_set_USE_ESN (sa);
+ ort->is_ctr = alg->is_ctr;
+ ort->is_aead = alg->is_aead;
+ ort->is_null_gmac = alg->is_null_gmac;
+ ort->is_tunnel = ipsec_sa_is_set_IS_TUNNEL (sa);
+ ort->is_tunnel_v6 = ipsec_sa_is_set_IS_TUNNEL_V6 (sa);
+ ort->udp_encap = ipsec_sa_is_set_UDP_ENCAP (sa);
+ ort->esp_block_align =
+ clib_max (4, im->crypto_algs[sa->crypto_alg].block_align);
+ ort->cipher_iv_size = im->crypto_algs[sa->crypto_alg].iv_size;
+ ort->integ_icv_size = integ_icv_size;
+ ort->salt = sa->salt;
+ ort->spi_be = clib_host_to_net_u32 (sa->spi);
+ ort->tunnel_flags = sa->tunnel.t_encap_decap_flags;
+ ort->async_op_id = sa->crypto_async_enc_op_id;
+ ort->t_dscp = sa->tunnel.t_dscp;
+
+ ASSERT (ort->cipher_iv_size <= ESP_MAX_IV_SIZE);
+ ASSERT (ort->esp_block_align <= ESP_MAX_BLOCK_SIZE);
+ }
+ ipsec_sa_update_runtime (sa);
+}
+
+void
+ipsec_sa_update_runtime (ipsec_sa_t *sa)
+{
+ if (ipsec_sa_get_inb_rt (sa))
+ {
+ ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt (sa);
+ irt->is_protect = ipsec_sa_is_set_IS_PROTECT (sa);
+ }
+ if (ipsec_sa_get_outb_rt (sa))
+ {
+ ipsec_sa_outb_rt_t *ort = ipsec_sa_get_outb_rt (sa);
+ ort->drop_no_crypto = sa->crypto_alg == IPSEC_CRYPTO_ALG_NONE &&
+ sa->integ_alg == IPSEC_INTEG_ALG_NONE &&
+ !ipsec_sa_is_set_NO_ALGO_NO_DROP (sa);
+ }
+}
+
int
ipsec_sa_update (u32 id, u16 src_port, u16 dst_port, const tunnel_t *tun,
bool is_tun)
{
ipsec_main_t *im = &ipsec_main;
ipsec_sa_t *sa;
+ ipsec_sa_outb_rt_t *ort;
u32 sa_index;
uword *p;
int rv;
@@ -206,7 +290,8 @@ ipsec_sa_update (u32 id, u16 src_port, u16 dst_port, const tunnel_t *tun,
return VNET_API_ERROR_NO_SUCH_ENTRY;
sa = ipsec_sa_get (p[0]);
- sa_index = sa - ipsec_sa_pool;
+ ort = ipsec_sa_get_outb_rt (sa);
+ sa_index = sa - im->sa_pool;
if (is_tun && ipsec_sa_is_set_IS_TUNNEL (sa) &&
(ip_address_cmp (&tun->t_src, &sa->tunnel.t_src) != 0 ||
@@ -267,16 +352,16 @@ ipsec_sa_update (u32 id, u16 src_port, u16 dst_port, const tunnel_t *tun,
tunnel_copy (tun, &sa->tunnel);
if (!ipsec_sa_is_set_IS_INBOUND (sa))
{
- dpo_reset (&sa->dpo);
+ dpo_reset (&ort->dpo);
- sa->tunnel_flags = sa->tunnel.t_encap_decap_flags;
+ ort->tunnel_flags = sa->tunnel.t_encap_decap_flags;
rv = tunnel_resolve (&sa->tunnel, FIB_NODE_TYPE_IPSEC_SA, sa_index);
if (rv)
{
hash_unset (im->sa_index_by_sa_id, sa->id);
- pool_put (ipsec_sa_pool, sa);
+ pool_put (im->sa_pool, sa);
return rv;
}
ipsec_sa_stack (sa);
@@ -285,39 +370,42 @@ ipsec_sa_update (u32 id, u16 src_port, u16 dst_port, const tunnel_t *tun,
{
tunnel_build_v6_hdr (&sa->tunnel,
(ipsec_sa_is_set_UDP_ENCAP (sa) ?
- IP_PROTOCOL_UDP :
- IP_PROTOCOL_IPSEC_ESP),
- &sa->ip6_hdr);
+ IP_PROTOCOL_UDP :
+ IP_PROTOCOL_IPSEC_ESP),
+ &ort->ip6_hdr);
}
else
{
tunnel_build_v4_hdr (&sa->tunnel,
(ipsec_sa_is_set_UDP_ENCAP (sa) ?
- IP_PROTOCOL_UDP :
- IP_PROTOCOL_IPSEC_ESP),
- &sa->ip4_hdr);
+ IP_PROTOCOL_UDP :
+ IP_PROTOCOL_IPSEC_ESP),
+ &ort->ip4_hdr);
}
}
}
if (ipsec_sa_is_set_UDP_ENCAP (sa))
{
- if (dst_port != IPSEC_UDP_PORT_NONE &&
- dst_port != clib_net_to_host_u16 (sa->udp_hdr.dst_port))
+ if (dst_port != IPSEC_UDP_PORT_NONE && dst_port != sa->udp_dst_port)
{
if (ipsec_sa_is_set_IS_INBOUND (sa))
{
- ipsec_unregister_udp_port (
- clib_net_to_host_u16 (sa->udp_hdr.dst_port),
- !ipsec_sa_is_set_IS_TUNNEL_V6 (sa));
+ ipsec_unregister_udp_port (sa->udp_dst_port,
+ !ipsec_sa_is_set_IS_TUNNEL_V6 (sa));
ipsec_register_udp_port (dst_port,
!ipsec_sa_is_set_IS_TUNNEL_V6 (sa));
}
- sa->udp_hdr.dst_port = clib_host_to_net_u16 (dst_port);
+ sa->udp_dst_port = dst_port;
+ if (ort)
+ ort->udp_hdr.dst_port = clib_host_to_net_u16 (dst_port);
+ }
+ if (src_port != IPSEC_UDP_PORT_NONE && src_port != (sa->udp_src_port))
+ {
+ sa->udp_src_port = src_port;
+ if (ort)
+ ort->udp_hdr.src_port = clib_host_to_net_u16 (src_port);
}
- if (src_port != IPSEC_UDP_PORT_NONE &&
- src_port != clib_net_to_host_u16 (sa->udp_hdr.src_port))
- sa->udp_hdr.src_port = clib_host_to_net_u16 (src_port);
}
return (0);
}
@@ -332,9 +420,13 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
{
vlib_main_t *vm = vlib_get_main ();
ipsec_main_t *im = &ipsec_main;
+ ipsec_main_crypto_alg_t *alg = im->crypto_algs + crypto_alg;
+ ipsec_sa_inb_rt_t *irt;
+ ipsec_sa_outb_rt_t *ort;
clib_error_t *err;
ipsec_sa_t *sa;
- u32 sa_index;
+ u32 sa_index, irt_sz;
+ clib_thread_index_t thread_index = (vlib_num_workers ()) ? ~0 : 0;
u64 rand[2];
uword *p;
int rv;
@@ -346,13 +438,42 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
if (getrandom (rand, sizeof (rand), 0) != sizeof (rand))
return VNET_API_ERROR_INIT_FAILED;
- pool_get_aligned_zero (ipsec_sa_pool, sa, CLIB_CACHE_LINE_BYTES);
+ pool_get_aligned_zero (im->sa_pool, sa, CLIB_CACHE_LINE_BYTES);
+ sa_index = sa - im->sa_pool;
+ sa->flags = flags;
+
+ if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && anti_replay_window_size > 64)
+ /* window size rounded up to next power of 2 */
+ anti_replay_window_size = 1 << max_log2 (anti_replay_window_size);
+ else
+ anti_replay_window_size = 64;
+
+ vec_validate (im->inb_sa_runtimes, sa_index);
+ vec_validate (im->outb_sa_runtimes, sa_index);
+
+ irt_sz = sizeof (ipsec_sa_inb_rt_t);
+ irt_sz += anti_replay_window_size / 8;
+ irt_sz = round_pow2 (irt_sz, CLIB_CACHE_LINE_BYTES);
+
+ irt = clib_mem_alloc_aligned (irt_sz, alignof (ipsec_sa_inb_rt_t));
+ ort = clib_mem_alloc_aligned (sizeof (ipsec_sa_outb_rt_t),
+ alignof (ipsec_sa_outb_rt_t));
+ im->inb_sa_runtimes[sa_index] = irt;
+ im->outb_sa_runtimes[sa_index] = ort;
+
+ *irt = (ipsec_sa_inb_rt_t){
+ .thread_index = thread_index,
+ .anti_replay_window_size = anti_replay_window_size,
+ };
- clib_pcg64i_srandom_r (&sa->iv_prng, rand[0], rand[1]);
+ *ort = (ipsec_sa_outb_rt_t){
+ .thread_index = thread_index,
+ };
+
+ clib_pcg64i_srandom_r (&ort->iv_prng, rand[0], rand[1]);
fib_node_init (&sa->node, FIB_NODE_TYPE_IPSEC_SA);
fib_node_lock (&sa->node);
- sa_index = sa - ipsec_sa_pool;
vlib_validate_combined_counter (&ipsec_sa_counters, sa_index);
vlib_zero_combined_counter (&ipsec_sa_counters, sa_index);
@@ -367,9 +488,8 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
sa->spi = spi;
sa->stat_index = sa_index;
sa->protocol = proto;
- sa->flags = flags;
sa->salt = salt;
- sa->thread_index = (vlib_num_workers ()) ? ~0 : 0;
+
if (integ_alg != IPSEC_INTEG_ALG_NONE)
{
ipsec_sa_set_integ_alg (sa, integ_alg);
@@ -378,9 +498,6 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
ipsec_sa_set_crypto_alg (sa, crypto_alg);
ipsec_sa_set_async_op_ids (sa);
- if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && anti_replay_window_size > 64)
- ipsec_sa_set_ANTI_REPLAY_HUGE (sa);
-
clib_memcpy (&sa->crypto_key, ck, sizeof (sa->crypto_key));
if (crypto_alg != IPSEC_CRYPTO_ALG_NONE)
@@ -389,7 +506,7 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
vm, im->crypto_algs[crypto_alg].alg, (u8 *) ck->data, ck->len);
if (~0 == sa->crypto_sync_key_index)
{
- pool_put (ipsec_sa_pool, sa);
+ pool_put (im->sa_pool, sa);
return VNET_API_ERROR_KEY_LENGTH;
}
}
@@ -400,17 +517,17 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
vm, im->integ_algs[integ_alg].alg, (u8 *) ik->data, ik->len);
if (~0 == sa->integ_sync_key_index)
{
- pool_put (ipsec_sa_pool, sa);
+ pool_put (im->sa_pool, sa);
return VNET_API_ERROR_KEY_LENGTH;
}
}
- if (sa->crypto_async_enc_op_id && !ipsec_sa_is_set_IS_AEAD (sa))
- sa->crypto_async_key_index =
+ if (sa->crypto_async_enc_op_id && alg->is_aead == 0)
+ sa->linked_key_index =
vnet_crypto_key_add_linked (vm, sa->crypto_sync_key_index,
sa->integ_sync_key_index); // AES-CBC & HMAC
else
- sa->crypto_async_key_index = sa->crypto_sync_key_index;
+ sa->linked_key_index = ~0;
if (im->async_mode)
{
@@ -429,14 +546,14 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
if (err)
{
clib_warning ("%v", err->what);
- pool_put (ipsec_sa_pool, sa);
+ pool_put (im->sa_pool, sa);
return VNET_API_ERROR_UNIMPLEMENTED;
}
err = ipsec_call_add_del_callbacks (im, sa, sa_index, 1);
if (err)
{
- pool_put (ipsec_sa_pool, sa);
+ pool_put (im->sa_pool, sa);
return VNET_API_ERROR_SYSCALL_ERROR_1;
}
@@ -446,13 +563,12 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
if (ipsec_sa_is_set_IS_TUNNEL (sa) && !ipsec_sa_is_set_IS_INBOUND (sa))
{
- sa->tunnel_flags = sa->tunnel.t_encap_decap_flags;
rv = tunnel_resolve (&sa->tunnel, FIB_NODE_TYPE_IPSEC_SA, sa_index);
if (rv)
{
- pool_put (ipsec_sa_pool, sa);
+ pool_put (im->sa_pool, sa);
return rv;
}
ipsec_sa_stack (sa);
@@ -464,7 +580,7 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
(ipsec_sa_is_set_UDP_ENCAP (sa) ?
IP_PROTOCOL_UDP :
IP_PROTOCOL_IPSEC_ESP),
- &sa->ip6_hdr);
+ &ort->ip6_hdr);
}
else
{
@@ -472,44 +588,38 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
(ipsec_sa_is_set_UDP_ENCAP (sa) ?
IP_PROTOCOL_UDP :
IP_PROTOCOL_IPSEC_ESP),
- &sa->ip4_hdr);
+ &ort->ip4_hdr);
}
}
if (ipsec_sa_is_set_UDP_ENCAP (sa))
{
if (dst_port == IPSEC_UDP_PORT_NONE)
- sa->udp_hdr.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_ipsec);
- else
- sa->udp_hdr.dst_port = clib_host_to_net_u16 (dst_port);
-
+ dst_port = UDP_DST_PORT_ipsec;
if (src_port == IPSEC_UDP_PORT_NONE)
- sa->udp_hdr.src_port = clib_host_to_net_u16 (UDP_DST_PORT_ipsec);
- else
- sa->udp_hdr.src_port = clib_host_to_net_u16 (src_port);
+ src_port = UDP_DST_PORT_ipsec;
+ sa->udp_dst_port = dst_port;
+ sa->udp_src_port = src_port;
+ if (ort)
+ {
+ ort->udp_hdr.src_port = clib_host_to_net_u16 (src_port);
+ ort->udp_hdr.dst_port = clib_host_to_net_u16 (dst_port);
+ }
if (ipsec_sa_is_set_IS_INBOUND (sa))
- ipsec_register_udp_port (clib_host_to_net_u16 (sa->udp_hdr.dst_port),
- !ipsec_sa_is_set_IS_TUNNEL_V6 (sa));
+ ipsec_register_udp_port (dst_port, !ipsec_sa_is_set_IS_TUNNEL_V6 (sa));
}
- /* window size rounded up to next power of 2 */
- if (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa))
- {
- anti_replay_window_size = 1 << max_log2 (anti_replay_window_size);
- sa->replay_window_huge =
- clib_bitmap_set_region (0, 0, 1, anti_replay_window_size);
- }
- else
- {
- sa->replay_window = ~0;
- }
+ for (u32 i = 0; i < anti_replay_window_size / uword_bits; i++)
+ irt->replay_window[i] = ~0ULL;
hash_set (im->sa_index_by_sa_id, sa->id, sa_index);
if (sa_out_index)
*sa_out_index = sa_index;
+ ipsec_sa_init_runtime (sa);
+
return (0);
}
@@ -519,33 +629,37 @@ ipsec_sa_del (ipsec_sa_t * sa)
vlib_main_t *vm = vlib_get_main ();
ipsec_main_t *im = &ipsec_main;
u32 sa_index;
+ ipsec_sa_inb_rt_t *irt = ipsec_sa_get_inb_rt (sa);
+ ipsec_sa_outb_rt_t *ort = ipsec_sa_get_outb_rt (sa);
- sa_index = sa - ipsec_sa_pool;
+ sa_index = sa - im->sa_pool;
hash_unset (im->sa_index_by_sa_id, sa->id);
tunnel_unresolve (&sa->tunnel);
/* no recovery possible when deleting an SA */
(void) ipsec_call_add_del_callbacks (im, sa, sa_index, 0);
- if (ipsec_sa_is_set_IS_ASYNC (sa))
- {
- if (!ipsec_sa_is_set_IS_AEAD (sa))
- vnet_crypto_key_del (vm, sa->crypto_async_key_index);
- }
+ if (sa->linked_key_index != ~0)
+ vnet_crypto_key_del (vm, sa->linked_key_index);
if (ipsec_sa_is_set_UDP_ENCAP (sa) && ipsec_sa_is_set_IS_INBOUND (sa))
- ipsec_unregister_udp_port (clib_net_to_host_u16 (sa->udp_hdr.dst_port),
+ ipsec_unregister_udp_port (sa->udp_dst_port,
!ipsec_sa_is_set_IS_TUNNEL_V6 (sa));
if (ipsec_sa_is_set_IS_TUNNEL (sa) && !ipsec_sa_is_set_IS_INBOUND (sa))
- dpo_reset (&sa->dpo);
+ dpo_reset (&ort->dpo);
if (sa->crypto_alg != IPSEC_CRYPTO_ALG_NONE)
vnet_crypto_key_del (vm, sa->crypto_sync_key_index);
if (sa->integ_alg != IPSEC_INTEG_ALG_NONE)
vnet_crypto_key_del (vm, sa->integ_sync_key_index);
- if (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa))
- clib_bitmap_free (sa->replay_window_huge);
- pool_put (ipsec_sa_pool, sa);
+ foreach_pointer (p, irt, ort)
+ if (p)
+ clib_mem_free (p);
+
+ im->inb_sa_runtimes[sa_index] = 0;
+ im->outb_sa_runtimes[sa_index] = 0;
+
+ pool_put (im->sa_pool, sa);
}
int
@@ -554,23 +668,33 @@ ipsec_sa_bind (u32 id, u32 worker, bool bind)
ipsec_main_t *im = &ipsec_main;
uword *p;
ipsec_sa_t *sa;
+ ipsec_sa_inb_rt_t *irt;
+ ipsec_sa_outb_rt_t *ort;
+ clib_thread_index_t thread_index;
p = hash_get (im->sa_index_by_sa_id, id);
if (!p)
return VNET_API_ERROR_INVALID_VALUE;
sa = ipsec_sa_get (p[0]);
+ irt = ipsec_sa_get_inb_rt (sa);
+ ort = ipsec_sa_get_outb_rt (sa);
if (!bind)
{
- sa->thread_index = ~0;
- return 0;
+ thread_index = ~0;
+ goto done;
}
if (worker >= vlib_num_workers ())
return VNET_API_ERROR_INVALID_WORKER;
- sa->thread_index = vlib_get_worker_thread_index (worker);
+ thread_index = vlib_get_worker_thread_index (worker);
+done:
+ if (irt)
+ irt->thread_index = thread_index;
+ if (ort)
+ ort->thread_index = thread_index;
return 0;
}
@@ -646,9 +770,10 @@ ipsec_sa_clear (index_t sai)
void
ipsec_sa_walk (ipsec_sa_walk_cb_t cb, void *ctx)
{
+ ipsec_main_t *im = &ipsec_main;
ipsec_sa_t *sa;
- pool_foreach (sa, ipsec_sa_pool)
+ pool_foreach (sa, im->sa_pool)
{
if (WALK_CONTINUE != cb (sa, ctx))
break;
diff --git a/src/vnet/ipsec/ipsec_sa.h b/src/vnet/ipsec/ipsec_sa.h
index 640d9288a42..330043809ae 100644
--- a/src/vnet/ipsec/ipsec_sa.h
+++ b/src/vnet/ipsec/ipsec_sa.h
@@ -52,24 +52,6 @@ typedef enum
IPSEC_CRYPTO_N_ALG,
} __clib_packed ipsec_crypto_alg_t;
-#define IPSEC_CRYPTO_ALG_IS_NULL_GMAC(_alg) \
- ((_alg == IPSEC_CRYPTO_ALG_AES_NULL_GMAC_128) || \
- (_alg == IPSEC_CRYPTO_ALG_AES_NULL_GMAC_192) || \
- (_alg == IPSEC_CRYPTO_ALG_AES_NULL_GMAC_256))
-
-#define IPSEC_CRYPTO_ALG_IS_GCM(_alg) \
- (((_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) || \
- (_alg == IPSEC_CRYPTO_ALG_AES_GCM_192) || \
- (_alg == IPSEC_CRYPTO_ALG_AES_GCM_256)))
-
-#define IPSEC_CRYPTO_ALG_IS_CTR(_alg) \
- (((_alg == IPSEC_CRYPTO_ALG_AES_CTR_128) || \
- (_alg == IPSEC_CRYPTO_ALG_AES_CTR_192) || \
- (_alg == IPSEC_CRYPTO_ALG_AES_CTR_256)))
-
-#define IPSEC_CRYPTO_ALG_CTR_AEAD_OTHERS(_alg) \
- (_alg == IPSEC_CRYPTO_ALG_CHACHA20_POLY1305)
-
#define foreach_ipsec_integ_alg \
_ (0, NONE, "none") \
_ (1, MD5_96, "md5-96") /* RFC2403 */ \
@@ -117,12 +99,8 @@ typedef struct ipsec_key_t_
_ (16, UDP_ENCAP, "udp-encap") \
_ (32, IS_PROTECT, "Protect") \
_ (64, IS_INBOUND, "inbound") \
- _ (128, IS_AEAD, "aead") \
- _ (256, IS_CTR, "ctr") \
_ (512, IS_ASYNC, "async") \
- _ (1024, NO_ALGO_NO_DROP, "no-algo-no-drop") \
- _ (2048, IS_NULL_GMAC, "null-gmac") \
- _ (4096, ANTI_REPLAY_HUGE, "anti-replay-huge")
+ _ (1024, NO_ALGO_NO_DROP, "no-algo-no-drop")
typedef enum ipsec_sad_flags_t_
{
@@ -165,51 +143,82 @@ typedef enum
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u16 is_aead : 1;
+ u16 is_ctr : 1;
+ u16 is_null_gmac : 1;
+ u16 use_esn : 1;
+ u16 use_anti_replay : 1;
+ u16 is_protect : 1;
+ u16 is_tunnel : 1;
+ u16 is_transport : 1;
+ u16 is_async : 1;
+ u16 cipher_op_id;
+ u16 integ_op_id;
+ u8 cipher_iv_size;
+ u8 integ_icv_size;
+ u8 udp_sz;
+ clib_thread_index_t thread_index;
+ u32 salt;
+ u64 seq64;
+ u16 async_op_id;
+ vnet_crypto_key_index_t cipher_key_index;
+ vnet_crypto_key_index_t integ_key_index;
+ u32 anti_replay_window_size;
+ uword replay_window[];
+} ipsec_sa_inb_rt_t;
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u16 is_aead : 1;
+ u16 is_ctr : 1;
+ u16 is_null_gmac : 1;
+ u16 is_tunnel : 1;
+ u16 is_tunnel_v6 : 1;
+ u16 udp_encap : 1;
+ u16 use_esn : 1;
+ u16 use_anti_replay : 1;
+ u16 drop_no_crypto : 1;
+ u16 is_async : 1;
+ u16 cipher_op_id;
+ u16 integ_op_id;
+ u8 cipher_iv_size;
+ u8 esp_block_align;
+ u8 integ_icv_size;
+ ip_dscp_t t_dscp;
+ tunnel_encap_decap_flags_t tunnel_flags;
+ clib_thread_index_t thread_index;
+ u16 async_op_id;
+ u32 salt;
+ u32 spi_be;
+ u64 seq64;
+ dpo_id_t dpo;
clib_pcg64i_random_t iv_prng;
-
+ vnet_crypto_key_index_t cipher_key_index;
+ vnet_crypto_key_index_t integ_key_index;
union
{
- u64 replay_window;
- clib_bitmap_t *replay_window_huge;
+ ip4_header_t ip4_hdr;
+ ip6_header_t ip6_hdr;
};
- dpo_id_t dpo;
+ udp_header_t udp_hdr;
+} ipsec_sa_outb_rt_t;
- vnet_crypto_key_index_t crypto_key_index;
- vnet_crypto_key_index_t integ_key_index;
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
u32 spi;
- u32 seq;
- u32 seq_hi;
- u16 crypto_enc_op_id;
- u16 crypto_dec_op_id;
- u16 integ_op_id;
ipsec_sa_flags_t flags;
- u16 thread_index;
-
- u16 integ_icv_size : 6;
- u16 crypto_iv_size : 5;
- u16 esp_block_align : 5;
-
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
- union
- {
- ip4_header_t ip4_hdr;
- ip6_header_t ip6_hdr;
- };
- udp_header_t udp_hdr;
+ u16 udp_src_port;
+ u16 udp_dst_port;
/* Salt used in CTR modes (incl. GCM) - stored in network byte order */
u32 salt;
ipsec_protocol_t protocol;
- tunnel_encap_decap_flags_t tunnel_flags;
- u8 __pad[2];
-
- /* data accessed by dataplane code should be above this comment */
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline2);
/* Elements with u64 size multiples */
tunnel_t tunnel;
@@ -222,7 +231,7 @@ typedef struct
vnet_crypto_alg_t crypto_calg;
u32 crypto_sync_key_index;
u32 integ_sync_key_index;
- u32 crypto_async_key_index;
+ u32 linked_key_index;
/* elements with u16 size */
u16 crypto_sync_enc_op_id;
@@ -243,13 +252,6 @@ STATIC_ASSERT (VNET_CRYPTO_N_OP_IDS < (1 << 16), "crypto ops overflow");
STATIC_ASSERT (ESP_MAX_ICV_SIZE < (1 << 6), "integer icv overflow");
STATIC_ASSERT (ESP_MAX_IV_SIZE < (1 << 5), "esp iv overflow");
STATIC_ASSERT (ESP_MAX_BLOCK_SIZE < (1 << 5), "esp alignment overflow");
-STATIC_ASSERT_OFFSET_OF (ipsec_sa_t, cacheline1, CLIB_CACHE_LINE_BYTES);
-STATIC_ASSERT_OFFSET_OF (ipsec_sa_t, cacheline2, 2 * CLIB_CACHE_LINE_BYTES);
-
-/**
- * Pool of IPSec SAs
- */
-extern ipsec_sa_t *ipsec_sa_pool;
/*
* Ensure that the IPsec data does not overlap with the IP data in
@@ -291,6 +293,7 @@ extern void ipsec_mk_key (ipsec_key_t *key, const u8 *data, u8 len);
extern int ipsec_sa_update (u32 id, u16 src_port, u16 dst_port,
const tunnel_t *tun, bool is_tun);
+extern void ipsec_sa_update_runtime (ipsec_sa_t *sa);
extern int ipsec_sa_add_and_lock (
u32 id, u32 spi, ipsec_protocol_t proto, ipsec_crypto_alg_t crypto_alg,
const ipsec_key_t *ck, ipsec_integ_alg_t integ_alg, const ipsec_key_t *ik,
@@ -323,85 +326,35 @@ extern uword unformat_ipsec_key (unformat_input_t *input, va_list *args);
#define IPSEC_UDP_PORT_NONE ((u16) ~0)
-/*
- * Anti Replay definitions
- */
-
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE(_sa) \
- (u32) (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (_sa)) ? \
- clib_bitmap_bytes (_sa->replay_window_huge) * 8 : \
- BITS (_sa->replay_window))
-
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN(_sa, _is_huge) \
- (u32) (_is_huge ? clib_bitmap_bytes (_sa->replay_window_huge) * 8 : \
- BITS (_sa->replay_window))
-
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_N_SEEN(_sa) \
- (u64) (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (_sa)) ? \
- clib_bitmap_count_set_bits (_sa->replay_window_huge) : \
- count_set_bits (_sa->replay_window))
-
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_N_SEEN_KNOWN_WIN(_sa, _is_huge) \
- (u64) (_is_huge ? clib_bitmap_count_set_bits (_sa->replay_window_huge) : \
- count_set_bits (_sa->replay_window))
-
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_MAX_INDEX(_sa) \
- (u32) (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (_sa) - 1)
-
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_MAX_INDEX_KNOWN_WIN(_sa, _is_huge) \
- (u32) (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (_sa, _is_huge) - 1)
-
-/*
- * sequence number less than the lower bound are outside of the window
- * From RFC4303 Appendix A:
- * Bl = Tl - W + 1
- */
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND(_sa) \
- (u32) (_sa->seq - IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (_sa) + 1)
-
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND_KNOWN_WIN(_sa, _is_huge) \
- (u32) (_sa->seq - \
- IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (_sa, _is_huge) + 1)
-
always_inline u64
-ipsec_sa_anti_replay_get_64b_window (const ipsec_sa_t *sa)
+ipsec_sa_anti_replay_get_64b_window (const ipsec_sa_inb_rt_t *irt)
{
- if (!ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa))
- return sa->replay_window;
-
u64 w;
- u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (sa);
- u32 tl_win_index = sa->seq & (window_size - 1);
+ u32 window_size = irt->anti_replay_window_size;
+ u32 tl_win_index = irt->seq64 & (window_size - 1);
+ uword *bmp = (uword *) irt->replay_window;
if (PREDICT_TRUE (tl_win_index >= 63))
- return clib_bitmap_get_multiple (sa->replay_window_huge, tl_win_index - 63,
- 64);
+ return uword_bitmap_get_multiple (bmp, tl_win_index - 63, 64);
- w = clib_bitmap_get_multiple_no_check (sa->replay_window_huge, 0,
- tl_win_index + 1)
+ w = uword_bitmap_get_multiple_no_check (bmp, 0, tl_win_index + 1)
<< (63 - tl_win_index);
- w |= clib_bitmap_get_multiple_no_check (sa->replay_window_huge,
- window_size - 63 + tl_win_index,
- 63 - tl_win_index);
+ w |= uword_bitmap_get_multiple_no_check (
+ bmp, window_size - 63 + tl_win_index, 63 - tl_win_index);
return w;
}
always_inline int
-ipsec_sa_anti_replay_check (const ipsec_sa_t *sa, u32 seq, bool ar_huge)
+ipsec_sa_anti_replay_check (const ipsec_sa_inb_rt_t *irt, u32 window_size,
+ u32 seq)
{
- u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (sa, ar_huge);
-
/* we assume that the packet is in the window.
* if the packet falls left (sa->seq - seq >= window size),
* the result is wrong */
- if (ar_huge)
- return clib_bitmap_get (sa->replay_window_huge, seq & (window_size - 1));
- else
- return (sa->replay_window >> (window_size + seq - sa->seq - 1)) & 1;
-
- return 0;
+ return uword_bitmap_is_bit_set ((uword *) irt->replay_window,
+ seq & (window_size - 1));
}
/*
@@ -419,36 +372,37 @@ ipsec_sa_anti_replay_check (const ipsec_sa_t *sa, u32 seq, bool ar_huge)
* the high sequence number is set.
*/
always_inline int
-ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
+ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_inb_rt_t *irt, u32 seq,
u32 hi_seq_used, bool post_decrypt,
- u32 *hi_seq_req, bool ar_huge)
+ u32 *hi_seq_req)
{
ASSERT ((post_decrypt == false) == (hi_seq_req != 0));
- u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (sa, ar_huge);
- u32 window_lower_bound =
- IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND_KNOWN_WIN (sa, ar_huge);
+ u32 window_size = irt->anti_replay_window_size;
+ u32 exp_lo = irt->seq64;
+ u32 exp_hi = irt->seq64 >> 32;
+ u32 window_lower_bound = exp_lo - window_size + 1;
- if (!ipsec_sa_is_set_USE_ESN (sa))
+ if (!irt->use_esn)
{
if (hi_seq_req)
/* no ESN, therefore the hi-seq is always 0 */
*hi_seq_req = 0;
- if (!ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
+ if (!irt->use_anti_replay)
return 0;
- if (PREDICT_TRUE (seq > sa->seq))
+ if (PREDICT_TRUE (seq > exp_lo))
return 0;
/* does the packet fall out on the left of the window */
- if (sa->seq >= seq + window_size)
+ if (exp_lo >= seq + window_size)
return 1;
- return ipsec_sa_anti_replay_check (sa, seq, ar_huge);
+ return ipsec_sa_anti_replay_check (irt, window_size, seq);
}
- if (!ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
+ if (!irt->use_anti_replay)
{
/* there's no AR configured for this SA, but in order
* to know whether a packet has wrapped the hi ESN we need
@@ -463,20 +417,20 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
*/
if (hi_seq_req)
{
- if (seq >= sa->seq)
+ if (seq >= exp_lo)
/* The packet's sequence number is larger that the SA's.
* that can't be a warp - unless we lost more than
* 2^32 packets ... how could we know? */
- *hi_seq_req = sa->seq_hi;
+ *hi_seq_req = exp_hi;
else
{
/* The packet's SN is less than the SAs, so either the SN has
* wrapped or the SN is just old. */
- if (sa->seq - seq > (1 << 30))
+ if (exp_lo - seq > (1 << 30))
/* It's really really really old => it wrapped */
- *hi_seq_req = sa->seq_hi + 1;
+ *hi_seq_req = exp_hi + 1;
else
- *hi_seq_req = sa->seq_hi;
+ *hi_seq_req = exp_hi;
}
}
/*
@@ -486,7 +440,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
return 0;
}
- if (PREDICT_TRUE (window_size > 0 && sa->seq >= window_size - 1))
+ if (PREDICT_TRUE (exp_lo >= window_size - 1))
{
/*
* the last sequence number VPP received is more than one
@@ -503,7 +457,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
*/
if (post_decrypt)
{
- if (hi_seq_used == sa->seq_hi)
+ if (hi_seq_used == exp_hi)
/* the high sequence number used to succesfully decrypt this
* packet is the same as the last-sequence number of the SA.
* that means this packet did not cause a wrap.
@@ -520,7 +474,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
/* pre-decrypt it might be the packet that causes a wrap, we
* need to decrypt it to find out */
if (hi_seq_req)
- *hi_seq_req = sa->seq_hi + 1;
+ *hi_seq_req = exp_hi + 1;
return 0;
}
}
@@ -531,13 +485,13 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
* end of the window.
*/
if (hi_seq_req)
- *hi_seq_req = sa->seq_hi;
- if (seq <= sa->seq)
+ *hi_seq_req = exp_hi;
+ if (seq <= exp_lo)
/*
* The received seq number is within bounds of the window
* check if it's a duplicate
*/
- return ipsec_sa_anti_replay_check (sa, seq, ar_huge);
+ return ipsec_sa_anti_replay_check (irt, window_size, seq);
else
/*
* The received sequence number is greater than the window
@@ -562,15 +516,15 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
/*
* the sequence number is less than the lower bound.
*/
- if (seq <= sa->seq)
+ if (seq <= exp_lo)
{
/*
* the packet is within the window upper bound.
* check for duplicates.
*/
if (hi_seq_req)
- *hi_seq_req = sa->seq_hi;
- return ipsec_sa_anti_replay_check (sa, seq, ar_huge);
+ *hi_seq_req = exp_hi;
+ return ipsec_sa_anti_replay_check (irt, window_size, seq);
}
else
{
@@ -584,7 +538,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
* we've lost close to 2^32 packets.
*/
if (hi_seq_req)
- *hi_seq_req = sa->seq_hi;
+ *hi_seq_req = exp_hi;
return 0;
}
}
@@ -597,8 +551,8 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
* received packet, the SA has moved on to a higher sequence number.
*/
if (hi_seq_req)
- *hi_seq_req = sa->seq_hi - 1;
- return ipsec_sa_anti_replay_check (sa, seq, ar_huge);
+ *hi_seq_req = exp_hi - 1;
+ return ipsec_sa_anti_replay_check (irt, window_size, seq);
}
}
@@ -608,120 +562,97 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
}
always_inline u32
-ipsec_sa_anti_replay_window_shift (ipsec_sa_t *sa, u32 inc, bool ar_huge)
+ipsec_sa_anti_replay_window_shift (ipsec_sa_inb_rt_t *irt, u32 window_size,
+ u32 inc)
{
+ uword *window = irt->replay_window;
+ u32 window_mask = window_size - 1;
u32 n_lost = 0;
u32 seen = 0;
- u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (sa, ar_huge);
if (inc < window_size)
{
- if (ar_huge)
- {
- /* the number of packets we saw in this section of the window */
- clib_bitmap_t *window = sa->replay_window_huge;
- u32 window_lower_bound = (sa->seq + 1) & (window_size - 1);
- u32 window_next_lower_bound =
- (window_lower_bound + inc) & (window_size - 1);
+ /* the number of packets we saw in this section of the window */
+ u32 window_lower_bound = (irt->seq64 + 1) & window_mask;
+ u32 window_next_lower_bound = (window_lower_bound + inc) & window_mask;
- uword i_block, i_word_start, i_word_end, full_words;
- uword n_blocks = window_size >> log2_uword_bits;
- uword mask;
+ uword i_block, i_word_start, i_word_end, full_words;
+ uword n_blocks = window_size >> log2_uword_bits;
+ uword mask;
- i_block = window_lower_bound >> log2_uword_bits;
+ i_block = window_lower_bound >> log2_uword_bits;
- i_word_start = window_lower_bound & (uword_bits - 1);
- i_word_end = window_next_lower_bound & (uword_bits - 1);
+ i_word_start = window_lower_bound & (uword_bits - 1);
+ i_word_end = window_next_lower_bound & (uword_bits - 1);
- /* We stay in the same word */
- if (i_word_start + inc <= uword_bits)
- {
- mask = pow2_mask (inc) << i_word_start;
- seen += count_set_bits (window[i_block] & mask);
- window[i_block] &= ~mask;
- }
- else
+ /* We stay in the same word */
+ if (i_word_start + inc <= uword_bits)
+ {
+ mask = pow2_mask (inc) << i_word_start;
+ seen += count_set_bits (window[i_block] & mask);
+ window[i_block] &= ~mask;
+ }
+ else
+ {
+ full_words =
+ (inc + i_word_start - uword_bits - i_word_end) >> log2_uword_bits;
+
+ /* count set bits in the first word */
+ mask = (uword) ~0 << i_word_start;
+ seen += count_set_bits (window[i_block] & mask);
+ window[i_block] &= ~mask;
+ i_block = (i_block + 1) & (n_blocks - 1);
+
+ /* count set bits in the next full words */
+ /* even if the last word need to be fully counted, we treat it
+ * apart */
+ while (full_words >= 8)
{
- full_words = (inc + i_word_start - uword_bits - i_word_end) >>
- log2_uword_bits;
-
- /* count set bits in the first word */
- mask = (uword) ~0 << i_word_start;
- seen += count_set_bits (window[i_block] & mask);
- window[i_block] &= ~mask;
- i_block = (i_block + 1) & (n_blocks - 1);
-
- /* count set bits in the next full words */
- /* even if the last word need to be fully counted, we treat it
- * apart */
- while (full_words >= 8)
- {
- if (full_words >= 16)
- {
- /* prefect the next 8 blocks (64 bytes) */
- clib_prefetch_store (
- &window[(i_block + 8) & (n_blocks - 1)]);
- }
-
- seen += count_set_bits (window[i_block]);
- seen +=
- count_set_bits (window[(i_block + 1) & (n_blocks - 1)]);
- seen +=
- count_set_bits (window[(i_block + 2) & (n_blocks - 1)]);
- seen +=
- count_set_bits (window[(i_block + 3) & (n_blocks - 1)]);
- seen +=
- count_set_bits (window[(i_block + 4) & (n_blocks - 1)]);
- seen +=
- count_set_bits (window[(i_block + 5) & (n_blocks - 1)]);
- seen +=
- count_set_bits (window[(i_block + 6) & (n_blocks - 1)]);
- seen +=
- count_set_bits (window[(i_block + 7) & (n_blocks - 1)]);
- window[i_block] = 0;
- window[(i_block + 1) & (n_blocks - 1)] = 0;
- window[(i_block + 2) & (n_blocks - 1)] = 0;
- window[(i_block + 3) & (n_blocks - 1)] = 0;
- window[(i_block + 4) & (n_blocks - 1)] = 0;
- window[(i_block + 5) & (n_blocks - 1)] = 0;
- window[(i_block + 6) & (n_blocks - 1)] = 0;
- window[(i_block + 7) & (n_blocks - 1)] = 0;
-
- i_block = (i_block + 8) & (n_blocks - 1);
- full_words -= 8;
- }
- while (full_words > 0)
+ if (full_words >= 16)
{
- // last word is treated after the loop
- seen += count_set_bits (window[i_block]);
- window[i_block] = 0;
- i_block = (i_block + 1) & (n_blocks - 1);
- full_words--;
+ /* prefect the next 8 blocks (64 bytes) */
+ clib_prefetch_store (
+ &window[(i_block + 8) & (n_blocks - 1)]);
}
- /* the last word */
- mask = pow2_mask (i_word_end);
- seen += count_set_bits (window[i_block] & mask);
- window[i_block] &= ~mask;
+ seen += count_set_bits (window[i_block]);
+ seen += count_set_bits (window[(i_block + 1) & (n_blocks - 1)]);
+ seen += count_set_bits (window[(i_block + 2) & (n_blocks - 1)]);
+ seen += count_set_bits (window[(i_block + 3) & (n_blocks - 1)]);
+ seen += count_set_bits (window[(i_block + 4) & (n_blocks - 1)]);
+ seen += count_set_bits (window[(i_block + 5) & (n_blocks - 1)]);
+ seen += count_set_bits (window[(i_block + 6) & (n_blocks - 1)]);
+ seen += count_set_bits (window[(i_block + 7) & (n_blocks - 1)]);
+ window[i_block] = 0;
+ window[(i_block + 1) & (n_blocks - 1)] = 0;
+ window[(i_block + 2) & (n_blocks - 1)] = 0;
+ window[(i_block + 3) & (n_blocks - 1)] = 0;
+ window[(i_block + 4) & (n_blocks - 1)] = 0;
+ window[(i_block + 5) & (n_blocks - 1)] = 0;
+ window[(i_block + 6) & (n_blocks - 1)] = 0;
+ window[(i_block + 7) & (n_blocks - 1)] = 0;
+
+ i_block = (i_block + 8) & (n_blocks - 1);
+ full_words -= 8;
+ }
+ while (full_words > 0)
+ {
+ // last word is treated after the loop
+ seen += count_set_bits (window[i_block]);
+ window[i_block] = 0;
+ i_block = (i_block + 1) & (n_blocks - 1);
+ full_words--;
}
- clib_bitmap_set_no_check (window,
- (sa->seq + inc) & (window_size - 1), 1);
- }
- else
- {
- /*
- * count how many holes there are in the portion
- * of the window that we will right shift of the end
- * as a result of this increments
- */
- u64 old = sa->replay_window & pow2_mask (inc);
- /* the number of packets we saw in this section of the window */
- seen = count_set_bits (old);
- sa->replay_window =
- ((sa->replay_window) >> inc) | (1ULL << (window_size - 1));
+ /* the last word */
+ mask = pow2_mask (i_word_end);
+ seen += count_set_bits (window[i_block] & mask);
+ window[i_block] &= ~mask;
}
+ uword_bitmap_set_bits_at_index (window, (irt->seq64 + inc) & window_mask,
+ 1);
+
/*
* the number we missed is the size of the window section
* minus the number we saw.
@@ -730,24 +661,17 @@ ipsec_sa_anti_replay_window_shift (ipsec_sa_t *sa, u32 inc, bool ar_huge)
}
else
{
+ u32 n_uwords = window_size / uword_bits;
/* holes in the replay window are lost packets */
- n_lost = window_size -
- IPSEC_SA_ANTI_REPLAY_WINDOW_N_SEEN_KNOWN_WIN (sa, ar_huge);
+ n_lost = window_size - uword_bitmap_count_set_bits (window, n_uwords);
/* any sequence numbers that now fall outside the window
* are forever lost */
n_lost += inc - window_size;
- if (PREDICT_FALSE (ar_huge))
- {
- clib_bitmap_zero (sa->replay_window_huge);
- clib_bitmap_set_no_check (sa->replay_window_huge,
- (sa->seq + inc) & (window_size - 1), 1);
- }
- else
- {
- sa->replay_window = 1ULL << (window_size - 1);
- }
+ uword_bitmap_clear (window, n_uwords);
+ uword_bitmap_set_bits_at_index (window, (irt->seq64 + inc) & window_mask,
+ 1);
}
return n_lost;
@@ -763,66 +687,46 @@ ipsec_sa_anti_replay_window_shift (ipsec_sa_t *sa, u32 inc, bool ar_huge)
* the branch cost.
*/
always_inline u64
-ipsec_sa_anti_replay_advance (ipsec_sa_t *sa, u32 thread_index, u32 seq,
- u32 hi_seq, bool ar_huge)
+ipsec_sa_anti_replay_advance (ipsec_sa_inb_rt_t *irt,
+ clib_thread_index_t thread_index, u32 seq,
+ u32 hi_seq)
{
u64 n_lost = 0;
- u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (sa, ar_huge);
+ u32 window_size = irt->anti_replay_window_size;
+ u32 masked_seq = seq & (window_size - 1);
+ u32 exp_lo = irt->seq64;
+ u32 exp_hi = irt->seq64 >> 32;
u32 pos;
- if (ipsec_sa_is_set_USE_ESN (sa))
+ if (irt->use_esn)
{
- int wrap = hi_seq - sa->seq_hi;
+ int wrap = hi_seq - exp_hi;
- if (wrap == 0 && seq > sa->seq)
+ if (wrap == 0 && seq > exp_lo)
{
- pos = seq - sa->seq;
- n_lost = ipsec_sa_anti_replay_window_shift (sa, pos, ar_huge);
- sa->seq = seq;
+ pos = seq - exp_lo;
+ n_lost = ipsec_sa_anti_replay_window_shift (irt, window_size, pos);
+ irt->seq64 = (u64) exp_hi << 32 | seq;
}
else if (wrap > 0)
{
- pos = seq + ~sa->seq + 1;
- n_lost = ipsec_sa_anti_replay_window_shift (sa, pos, ar_huge);
- sa->seq = seq;
- sa->seq_hi = hi_seq;
- }
- else if (wrap < 0)
- {
- pos = ~seq + sa->seq + 1;
- if (ar_huge)
- clib_bitmap_set_no_check (sa->replay_window_huge,
- seq & (window_size - 1), 1);
- else
- sa->replay_window |= (1ULL << (window_size - 1 - pos));
+ pos = seq + ~exp_lo + 1;
+ n_lost = ipsec_sa_anti_replay_window_shift (irt, window_size, pos);
+ irt->seq64 = (u64) hi_seq << 32 | seq;
}
else
- {
- pos = sa->seq - seq;
- if (ar_huge)
- clib_bitmap_set_no_check (sa->replay_window_huge,
- seq & (window_size - 1), 1);
- else
- sa->replay_window |= (1ULL << (window_size - 1 - pos));
- }
+ uword_bitmap_set_bits_at_index (irt->replay_window, masked_seq, 1);
}
else
{
- if (seq > sa->seq)
+ if (seq > exp_lo)
{
- pos = seq - sa->seq;
- n_lost = ipsec_sa_anti_replay_window_shift (sa, pos, ar_huge);
- sa->seq = seq;
+ pos = seq - exp_lo;
+ n_lost = ipsec_sa_anti_replay_window_shift (irt, window_size, pos);
+ irt->seq64 = (u64) exp_hi << 32 | seq;
}
else
- {
- pos = sa->seq - seq;
- if (ar_huge)
- clib_bitmap_set_no_check (sa->replay_window_huge,
- seq & (window_size - 1), 1);
- else
- sa->replay_window |= (1ULL << (window_size - 1 - pos));
- }
+ uword_bitmap_set_bits_at_index (irt->replay_window, masked_seq, 1);
}
return n_lost;
@@ -840,12 +744,6 @@ ipsec_sa_assign_thread (u16 thread_id)
: (unix_time_now_nsec () % vlib_num_workers ()) + 1);
}
-always_inline ipsec_sa_t *
-ipsec_sa_get (u32 sa_index)
-{
- return (pool_elt_at_index (ipsec_sa_pool, sa_index));
-}
-
#endif /* __IPSEC_SPD_SA_H__ */
/*
diff --git a/src/vnet/ipsec/ipsec_tun.c b/src/vnet/ipsec/ipsec_tun.c
index 5fb07b3ba09..28702bdec47 100644
--- a/src/vnet/ipsec/ipsec_tun.c
+++ b/src/vnet/ipsec/ipsec_tun.c
@@ -470,6 +470,7 @@ ipsec_tun_protect_set_crypto_addr (ipsec_tun_protect_t * itp)
if (!(itp->itp_flags & IPSEC_PROTECT_ITF))
{
ipsec_sa_set_IS_PROTECT (sa);
+ ipsec_sa_update_runtime (sa);
itp->itp_flags |= IPSEC_PROTECT_ENCAPED;
}
}
@@ -497,7 +498,11 @@ ipsec_tun_protect_config (ipsec_main_t * im,
ipsec_sa_lock (itp->itp_out_sa);
if (itp->itp_flags & IPSEC_PROTECT_ITF)
- ipsec_sa_set_NO_ALGO_NO_DROP (ipsec_sa_get (itp->itp_out_sa));
+ {
+ ipsec_sa_t *sa = ipsec_sa_get (itp->itp_out_sa);
+ ipsec_sa_set_NO_ALGO_NO_DROP (sa);
+ ipsec_sa_update_runtime (sa);
+ }
FOR_EACH_IPSEC_PROTECT_INPUT_SAI(itp, sai,
({
@@ -523,12 +528,16 @@ ipsec_tun_protect_unconfig (ipsec_main_t * im, ipsec_tun_protect_t * itp)
FOR_EACH_IPSEC_PROTECT_INPUT_SA(itp, sa,
({
ipsec_sa_unset_IS_PROTECT (sa);
+ ipsec_sa_update_runtime (sa);
}));
ipsec_tun_protect_rx_db_remove (im, itp);
ipsec_tun_protect_tx_db_remove (itp);
- ipsec_sa_unset_NO_ALGO_NO_DROP (ipsec_sa_get (itp->itp_out_sa));
+ sa = ipsec_sa_get (itp->itp_out_sa);
+ ipsec_sa_unset_NO_ALGO_NO_DROP (sa);
+ ipsec_sa_update_runtime (sa);
+
ipsec_sa_unlock(itp->itp_out_sa);
FOR_EACH_IPSEC_PROTECT_INPUT_SAI(itp, sai,
diff --git a/src/vnet/ipsec/ipsec_tun_in.c b/src/vnet/ipsec/ipsec_tun_in.c
index 3dde084cb24..38f6baf3d2e 100644
--- a/src/vnet/ipsec/ipsec_tun_in.c
+++ b/src/vnet/ipsec/ipsec_tun_in.c
@@ -114,7 +114,7 @@ ipsec_tun_protect_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vnet_interface_main_t *vim = &vnm->interface_main;
int is_trace = node->flags & VLIB_NODE_FLAG_TRACE;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u32 n_left_from, *from;
u16 nexts[VLIB_FRAME_SIZE], *next;
diff --git a/src/vnet/ipsec/main.c b/src/vnet/ipsec/main.c
index e17d1dc5cfe..0a01797e066 100644
--- a/src/vnet/ipsec/main.c
+++ b/src/vnet/ipsec/main.c
@@ -61,6 +61,7 @@ ipsec_main_t ipsec_main = {
.alg = VNET_CRYPTO_ALG_AES_128_CTR,
.iv_size = 8,
.block_align = 1,
+ .is_ctr = 1,
},
[IPSEC_CRYPTO_ALG_AES_CTR_192] = {
@@ -69,6 +70,7 @@ ipsec_main_t ipsec_main = {
.alg = VNET_CRYPTO_ALG_AES_192_CTR,
.iv_size = 8,
.block_align = 1,
+ .is_ctr = 1,
},
[IPSEC_CRYPTO_ALG_AES_CTR_256] = {
@@ -77,6 +79,7 @@ ipsec_main_t ipsec_main = {
.alg = VNET_CRYPTO_ALG_AES_256_CTR,
.iv_size = 8,
.block_align = 1,
+ .is_ctr = 1,
},
[IPSEC_CRYPTO_ALG_AES_GCM_128] = {
@@ -86,6 +89,8 @@ ipsec_main_t ipsec_main = {
.iv_size = 8,
.block_align = 1,
.icv_size = 16,
+ .is_aead = 1,
+ .is_ctr = 1,
},
[IPSEC_CRYPTO_ALG_AES_GCM_192] = {
@@ -95,6 +100,8 @@ ipsec_main_t ipsec_main = {
.iv_size = 8,
.block_align = 1,
.icv_size = 16,
+ .is_aead = 1,
+ .is_ctr = 1,
},
[IPSEC_CRYPTO_ALG_AES_GCM_256] = {
@@ -104,6 +111,8 @@ ipsec_main_t ipsec_main = {
.iv_size = 8,
.block_align = 1,
.icv_size = 16,
+ .is_aead = 1,
+ .is_ctr = 1,
},
[IPSEC_CRYPTO_ALG_CHACHA20_POLY1305] = {
@@ -112,6 +121,8 @@ ipsec_main_t ipsec_main = {
.alg = VNET_CRYPTO_ALG_CHACHA20_POLY1305,
.iv_size = 8,
.icv_size = 16,
+ .is_ctr = 1,
+ .is_aead = 1,
},
[IPSEC_CRYPTO_ALG_AES_NULL_GMAC_128] = {
@@ -121,6 +132,9 @@ ipsec_main_t ipsec_main = {
.iv_size = 8,
.block_align = 1,
.icv_size = 16,
+ .is_ctr = 1,
+ .is_aead = 1,
+ .is_null_gmac = 1,
},
[IPSEC_CRYPTO_ALG_AES_NULL_GMAC_192] = {
@@ -130,6 +144,9 @@ ipsec_main_t ipsec_main = {
.iv_size = 8,
.block_align = 1,
.icv_size = 16,
+ .is_ctr = 1,
+ .is_aead = 1,
+ .is_null_gmac = 1,
},
[IPSEC_CRYPTO_ALG_AES_NULL_GMAC_256] = {
@@ -139,6 +156,9 @@ ipsec_main_t ipsec_main = {
.iv_size = 8,
.block_align = 1,
.icv_size = 16,
+ .is_ctr = 1,
+ .is_aead = 1,
+ .is_null_gmac = 1,
},
},
.integ_algs = {
diff --git a/src/vnet/l2/l2_flood.c b/src/vnet/l2/l2_flood.c
index f8cb3cb5687..c06bf5c636b 100644
--- a/src/vnet/l2/l2_flood.c
+++ b/src/vnet/l2/l2_flood.c
@@ -141,7 +141,7 @@ VLIB_NODE_FN (l2flood_node) (vlib_main_t * vm,
u32 n_left_from, *from, *to_next;
l2flood_next_t next_index;
l2flood_main_t *msm = &l2flood_main;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
diff --git a/src/vnet/l2/l2_input_node.c b/src/vnet/l2/l2_input_node.c
index 76b94809eb3..58a541756da 100644
--- a/src/vnet/l2/l2_input_node.c
+++ b/src/vnet/l2/l2_input_node.c
@@ -215,7 +215,10 @@ classify_and_dispatch (l2input_main_t * msm, vlib_buffer_t * b0, u16 * next0)
vnet_buffer (b0)->sw_if_index[VLIB_TX] = config->output_sw_if_index;
}
else
- feat_mask = L2INPUT_FEAT_DROP;
+ {
+ *next0 = L2INPUT_NEXT_DROP;
+ return;
+ }
/* mask out features from bitmap using packet type and bd config */
u32 feature_bitmap = config->feature_bitmap & feat_mask;
diff --git a/src/vnet/mpls/mpls_input.c b/src/vnet/mpls/mpls_input.c
index 0505d9a1829..79da5e53cba 100644
--- a/src/vnet/mpls/mpls_input.c
+++ b/src/vnet/mpls/mpls_input.c
@@ -75,7 +75,7 @@ mpls_input_inline (vlib_main_t * vm,
{
u32 n_left_from, next_index, * from, * to_next;
mpls_main_t * mm = &mpls_main;
- u32 thread_index = vlib_get_thread_index();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
vlib_simple_counter_main_t * cm;
vnet_main_t * vnm = vnet_get_main();
diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c
index a5ac56534a5..5b0e9413ea3 100644
--- a/src/vnet/mpls/mpls_lookup.c
+++ b/src/vnet/mpls/mpls_lookup.c
@@ -61,7 +61,7 @@ VLIB_NODE_FN (mpls_lookup_node) (vlib_main_t * vm,
vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
u32 n_left_from, next_index, * from, * to_next;
mpls_main_t * mm = &mpls_main;
- u32 thread_index = vlib_get_thread_index();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
@@ -493,7 +493,7 @@ VLIB_NODE_FN (mpls_load_balance_node) (vlib_main_t * vm,
{
vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
u32 n_left_from, n_left_to_next, * from, * to_next;
- u32 thread_index = vlib_get_thread_index();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
u32 next;
from = vlib_frame_vector_args (frame);
diff --git a/src/vnet/policer/police.h b/src/vnet/policer/police.h
index 8f126e22175..c1b8096ed41 100644
--- a/src/vnet/policer/police.h
+++ b/src/vnet/policer/police.h
@@ -91,7 +91,8 @@ typedef struct
u32 current_bucket; // MOD
u32 extended_limit;
u32 extended_bucket; // MOD
- u32 thread_index; // Tie policer to a thread, rather than lock
+ clib_thread_index_t
+ thread_index; // Tie policer to a thread, rather than lock
u64 last_update_time; // MOD
u8 *name;
} policer_t;
diff --git a/src/vnet/policer/police_inlines.h b/src/vnet/policer/police_inlines.h
index 08000b9a303..7b7e19171d6 100644
--- a/src/vnet/policer/police_inlines.h
+++ b/src/vnet/policer/police_inlines.h
@@ -74,7 +74,7 @@ vnet_policer_police (vlib_main_t *vm, vlib_buffer_t *b, u32 policer_index,
if (handoff)
{
- if (PREDICT_FALSE (pol->thread_index == ~0))
+ if (PREDICT_FALSE (pol->thread_index == CLIB_INVALID_THREAD_INDEX))
/*
* This is the first packet to use this policer. Set the
* thread index in the policer to this thread and any
diff --git a/src/vnet/qos/qos_store.c b/src/vnet/qos/qos_store.c
index 3424a914e35..8875585f199 100644
--- a/src/vnet/qos/qos_store.c
+++ b/src/vnet/qos/qos_store.c
@@ -181,7 +181,7 @@ qos_store_cli (vlib_main_t * vm,
enable = 1;
else if (unformat (input, "disable"))
enable = 0;
- else if (unformat (input, "value &d", &value))
+ else if (unformat (input, "value %d", &value))
;
else
break;
diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c
index eacc1adf941..1a2509e6356 100644
--- a/src/vnet/session/application.c
+++ b/src/vnet/session/application.c
@@ -490,7 +490,7 @@ vlib_node_registration_t appsl_rx_mqs_input_node;
VLIB_NODE_FN (appsl_rx_mqs_input_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- u32 thread_index = vm->thread_index, n_msgs = 0;
+ clib_thread_index_t thread_index = vm->thread_index, n_msgs = 0;
app_rx_mq_elt_t *elt, *next;
app_main_t *am = &app_main;
session_worker_t *wrk;
@@ -581,7 +581,7 @@ app_rx_mqs_epoll_add (application_t *app, app_rx_mq_elt_t *mqe)
{
clib_file_t template = { 0 };
app_rx_mq_handle_t handle;
- u32 thread_index;
+ clib_thread_index_t thread_index;
int fd;
thread_index = mqe - app->rx_mqs;
@@ -603,7 +603,7 @@ app_rx_mqs_epoll_add (application_t *app, app_rx_mq_elt_t *mqe)
static void
app_rx_mqs_epoll_del (application_t *app, app_rx_mq_elt_t *mqe)
{
- u32 thread_index = mqe - app->rx_mqs;
+ clib_thread_index_t thread_index = mqe - app->rx_mqs;
app_main_t *am = &app_main;
appsl_wrk_t *aw;
diff --git a/src/vnet/session/application.h b/src/vnet/session/application.h
index c68a911230f..d748eae9cd5 100644
--- a/src/vnet/session/application.h
+++ b/src/vnet/session/application.h
@@ -175,7 +175,7 @@ typedef struct app_rx_mq_handle_
struct
{
u32 app_index;
- u32 thread_index;
+ clib_thread_index_t thread_index;
};
u64 as_u64;
};
@@ -368,9 +368,11 @@ int app_worker_session_fifo_tuning (app_worker_t * app_wrk, session_t * s,
session_ft_action_t act, u32 len);
void app_worker_add_event (app_worker_t *app_wrk, session_t *s,
session_evt_type_t evt_type);
-void app_worker_add_event_custom (app_worker_t *app_wrk, u32 thread_index,
+void app_worker_add_event_custom (app_worker_t *app_wrk,
+ clib_thread_index_t thread_index,
session_event_t *evt);
-int app_wrk_flush_wrk_events (app_worker_t *app_wrk, u32 thread_index);
+int app_wrk_flush_wrk_events (app_worker_t *app_wrk,
+ clib_thread_index_t thread_index);
void app_worker_del_all_events (app_worker_t *app_wrk);
segment_manager_t *app_worker_get_listen_segment_manager (app_worker_t *,
session_t *);
@@ -386,10 +388,12 @@ void app_wrk_send_ctrl_evt_fd (app_worker_t *app_wrk, u8 evt_type, void *msg,
u32 msg_len, int fd);
void app_wrk_send_ctrl_evt (app_worker_t *app_wrk, u8 evt_type, void *msg,
u32 msg_len);
-u8 app_worker_mq_wrk_is_congested (app_worker_t *app_wrk, u32 thread_index);
-void app_worker_set_mq_wrk_congested (app_worker_t *app_wrk, u32 thread_index);
+u8 app_worker_mq_wrk_is_congested (app_worker_t *app_wrk,
+ clib_thread_index_t thread_index);
+void app_worker_set_mq_wrk_congested (app_worker_t *app_wrk,
+ clib_thread_index_t thread_index);
void app_worker_unset_wrk_mq_congested (app_worker_t *app_wrk,
- u32 thread_index);
+ clib_thread_index_t thread_index);
session_t *app_worker_proxy_listener (app_worker_t * app, u8 fib_proto,
u8 transport_proto);
void app_worker_del_detached_sm (app_worker_t * app_wrk, u32 sm_index);
diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c
index a62f914d43a..c9aaceb330d 100644
--- a/src/vnet/session/application_interface.c
+++ b/src/vnet/session/application_interface.c
@@ -23,9 +23,11 @@
/**
* unformat a vnet URI
*
- * transport-proto://[hostname]ip46-addr:port
- * eg. tcp://ip46-addr:port
- * tls://[testtsl.fd.io]ip46-addr:port
+ * transport-proto://[hostname]ip4-addr:port
+ * eg. tcp://ip4-addr:port
+ * https://[ip6]:port
+ * http://ip4:port
+ * tls://[testtsl.fd.io]ip4-addr:port
*
* u8 ip46_address[16];
* u16 port_in_host_byte_order;
@@ -38,35 +40,75 @@
*
*/
uword
-unformat_vnet_uri (unformat_input_t * input, va_list * args)
+unformat_vnet_uri (unformat_input_t *input, va_list *args)
{
session_endpoint_cfg_t *sep = va_arg (*args, session_endpoint_cfg_t *);
u32 transport_proto = 0, port;
- if (unformat (input, "%U://%U/%d", unformat_transport_proto,
- &transport_proto, unformat_ip4_address, &sep->ip.ip4, &port))
+ if (unformat (input, "%U:", unformat_transport_proto, &transport_proto))
{
sep->transport_proto = transport_proto;
- sep->port = clib_host_to_net_u16 (port);
+ }
+ else if (unformat (input, "%Us:", unformat_transport_proto,
+ &transport_proto))
+ {
+ sep->flags |= SESSION_ENDPT_CFG_F_SECURE;
+ sep->transport_proto = transport_proto;
+ }
+
+ if (unformat (input, "//%U:", unformat_ip4_address, &sep->ip.ip4))
+ {
+ sep->is_ip4 = 1;
+ }
+ /* deprecated */
+ else if (unformat (input, "//%U/", unformat_ip4_address, &sep->ip.ip4))
+ {
+ sep->is_ip4 = 1;
+ }
+ else if (unformat (input, "//%U", unformat_ip4_address, &sep->ip.ip4))
+ {
sep->is_ip4 = 1;
+ }
+ /* deprecated */
+ else if (unformat (input, "//%U/", unformat_ip6_address, &sep->ip.ip6))
+ {
+ sep->is_ip4 = 0;
+ }
+ else if (unformat (input, "//[%U]:", unformat_ip6_address, &sep->ip.ip6))
+ {
+ sep->is_ip4 = 0;
+ }
+ /* deprecated */
+ else if (unformat (input, "//[%U]/", unformat_ip6_address, &sep->ip.ip6))
+ {
+ sep->is_ip4 = 0;
+ }
+ else if (unformat (input, "//[%U]", unformat_ip6_address, &sep->ip.ip6))
+ {
+ sep->is_ip4 = 0;
+ }
+ else if (unformat (input, "//session/%lu", &sep->parent_handle))
+ {
+ sep->ip.ip4.as_u32 = 1; /* ip need to be non zero in vnet */
return 1;
}
- else if (unformat (input, "%U://%U/%d", unformat_transport_proto,
- &transport_proto, unformat_ip6_address, &sep->ip.ip6,
- &port))
+
+ if (unformat (input, "%d", &port))
{
- sep->transport_proto = transport_proto;
sep->port = clib_host_to_net_u16 (port);
- sep->is_ip4 = 0;
return 1;
}
- else if (unformat (input, "%U://session/%lu", unformat_transport_proto,
- &transport_proto, &sep->parent_handle))
+ else if (sep->transport_proto == TRANSPORT_PROTO_HTTP)
{
- sep->transport_proto = transport_proto;
- sep->ip.ip4.as_u32 = 1; /* ip need to be non zero in vnet */
+ sep->port = clib_host_to_net_u16 (80);
return 1;
}
+ else if (sep->transport_proto == TRANSPORT_PROTO_TLS)
+ {
+ sep->port = clib_host_to_net_u16 (443);
+ return 1;
+ }
+
return 0;
}
@@ -106,6 +148,45 @@ parse_uri (char *uri, session_endpoint_cfg_t *sep)
return 0;
}
+/* Use before 'parse_uri()'. Removes target from URI and copies it to 'char
+ * **target'. char **target is resized automatically.
+ */
+session_error_t
+parse_target (char **uri, char **target)
+{
+ u8 counter = 0;
+
+ for (u32 i = 0; i < (u32) strlen (*uri); i++)
+ {
+ if ((*uri)[i] == '/')
+ counter++;
+
+ if (counter == 3)
+ {
+ /* resize and make space for NULL terminator */
+ if (vec_len (*target) < strlen (*uri) - i + 2)
+ vec_resize (*target, strlen (*uri) - i + 2);
+
+ strncpy (*target, *uri + i, strlen (*uri) - i);
+ (*uri)[i + 1] = '\0';
+ break;
+ }
+ }
+
+ if (!*target)
+ {
+ vec_resize (*target, 2);
+ **target = '/';
+ }
+
+ vec_terminate_c_string (*target);
+
+ if (!*target)
+ return SESSION_E_INVALID;
+
+ return 0;
+}
+
session_error_t
vnet_bind_uri (vnet_listen_args_t *a)
{
diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h
index 21ed97998f2..33b61187fe3 100644
--- a/src/vnet/session/application_interface.h
+++ b/src/vnet/session/application_interface.h
@@ -281,6 +281,7 @@ typedef enum session_fd_flag_
} session_fd_flag_t;
session_error_t parse_uri (char *uri, session_endpoint_cfg_t *sep);
+session_error_t parse_target (char **uri, char **target);
session_error_t vnet_bind_uri (vnet_listen_args_t *);
session_error_t vnet_unbind_uri (vnet_unlisten_args_t *a);
session_error_t vnet_connect_uri (vnet_connect_args_t *a);
diff --git a/src/vnet/session/application_local.c b/src/vnet/session/application_local.c
index 18ea77dc8a8..f22e3647b7f 100644
--- a/src/vnet/session/application_local.c
+++ b/src/vnet/session/application_local.c
@@ -76,13 +76,13 @@ typedef struct ct_main_
static ct_main_t ct_main;
static inline ct_worker_t *
-ct_worker_get (u32 thread_index)
+ct_worker_get (clib_thread_index_t thread_index)
{
return &ct_main.wrk[thread_index];
}
static ct_connection_t *
-ct_connection_alloc (u32 thread_index)
+ct_connection_alloc (clib_thread_index_t thread_index)
{
ct_worker_t *wrk = ct_worker_get (thread_index);
ct_connection_t *ct;
@@ -99,7 +99,7 @@ ct_connection_alloc (u32 thread_index)
}
static ct_connection_t *
-ct_connection_get (u32 ct_index, u32 thread_index)
+ct_connection_get (u32 ct_index, clib_thread_index_t thread_index)
{
ct_worker_t *wrk = ct_worker_get (thread_index);
@@ -659,7 +659,7 @@ ct_init_accepted_session (app_worker_t *server_wrk, ct_connection_t *ct,
}
static void
-ct_accept_one (u32 thread_index, u32 ho_index)
+ct_accept_one (clib_thread_index_t thread_index, u32 ho_index)
{
ct_connection_t *sct, *cct, *ho;
transport_connection_t *ll_ct;
@@ -768,7 +768,7 @@ ct_accept_one (u32 thread_index, u32 ho_index)
static void
ct_accept_rpc_wrk_handler (void *rpc_args)
{
- u32 thread_index, n_connects, i, n_pending;
+ clib_thread_index_t thread_index, n_connects, i, n_pending;
const u32 max_connects = 32;
ct_worker_t *wrk;
u8 need_rpc = 0;
@@ -805,7 +805,7 @@ ct_accept_rpc_wrk_handler (void *rpc_args)
static void
ct_fwrk_flush_connects (void *rpc_args)
{
- u32 thread_index, fwrk_index, n_workers;
+ clib_thread_index_t thread_index, fwrk_index, n_workers;
ct_main_t *cm = &ct_main;
ct_worker_t *wrk;
u8 need_rpc;
@@ -851,7 +851,7 @@ static void
ct_program_connect_to_wrk (u32 ho_index)
{
ct_main_t *cm = &ct_main;
- u32 thread_index;
+ clib_thread_index_t thread_index;
/* Simple round-robin policy for spreading sessions over workers. We skip
* thread index 0, i.e., offset the index by 1, when we have workers as it
@@ -946,7 +946,7 @@ ct_session_half_open_get (u32 ct_index)
}
static void
-ct_session_cleanup (u32 conn_index, u32 thread_index)
+ct_session_cleanup (u32 conn_index, clib_thread_index_t thread_index)
{
ct_connection_t *ct, *peer_ct;
@@ -1173,7 +1173,7 @@ ct_program_cleanup (ct_connection_t *ct)
}
static void
-ct_session_close (u32 ct_index, u32 thread_index)
+ct_session_close (u32 ct_index, clib_thread_index_t thread_index)
{
ct_connection_t *ct, *peer_ct;
session_t *s;
@@ -1204,7 +1204,7 @@ ct_session_close (u32 ct_index, u32 thread_index)
}
static void
-ct_session_reset (u32 ct_index, u32 thread_index)
+ct_session_reset (u32 ct_index, clib_thread_index_t thread_index)
{
ct_connection_t *ct;
ct = ct_connection_get (ct_index, thread_index);
@@ -1213,7 +1213,7 @@ ct_session_reset (u32 ct_index, u32 thread_index)
}
static transport_connection_t *
-ct_session_get (u32 ct_index, u32 thread_index)
+ct_session_get (u32 ct_index, clib_thread_index_t thread_index)
{
return (transport_connection_t *) ct_connection_get (ct_index,
thread_index);
@@ -1331,7 +1331,7 @@ static u8 *
format_ct_session (u8 * s, va_list * args)
{
u32 ct_index = va_arg (*args, u32);
- u32 thread_index = va_arg (*args, u32);
+ clib_thread_index_t thread_index = va_arg (*args, u32);
u32 verbose = va_arg (*args, u32);
ct_connection_t *ct;
diff --git a/src/vnet/session/application_worker.c b/src/vnet/session/application_worker.c
index ad0b18e8d75..a5b1e1f4ea4 100644
--- a/src/vnet/session/application_worker.c
+++ b/src/vnet/session/application_worker.c
@@ -471,7 +471,8 @@ app_worker_connect_notify (app_worker_t * app_wrk, session_t * s,
session_event_t evt = { .event_type = SESSION_CTRL_EVT_CONNECTED,
.as_u64[0] = s ? s->session_index : ~0,
.as_u64[1] = (u64) opaque << 32 | (u32) err };
- u32 thread_index = s ? s->thread_index : vlib_get_thread_index ();
+ clib_thread_index_t thread_index =
+ s ? s->thread_index : vlib_get_thread_index ();
app_worker_add_event_custom (app_wrk, thread_index, &evt);
return 0;
@@ -782,7 +783,8 @@ app_worker_add_event (app_worker_t *app_wrk, session_t *s,
}
void
-app_worker_add_event_custom (app_worker_t *app_wrk, u32 thread_index,
+app_worker_add_event_custom (app_worker_t *app_wrk,
+ clib_thread_index_t thread_index,
session_event_t *evt)
{
clib_fifo_add1 (app_wrk->wrk_evts[thread_index], *evt);
@@ -832,13 +834,15 @@ app_wrk_send_ctrl_evt (app_worker_t *app_wrk, u8 evt_type, void *msg,
}
u8
-app_worker_mq_wrk_is_congested (app_worker_t *app_wrk, u32 thread_index)
+app_worker_mq_wrk_is_congested (app_worker_t *app_wrk,
+ clib_thread_index_t thread_index)
{
return app_wrk->wrk_mq_congested[thread_index] > 0;
}
void
-app_worker_set_mq_wrk_congested (app_worker_t *app_wrk, u32 thread_index)
+app_worker_set_mq_wrk_congested (app_worker_t *app_wrk,
+ clib_thread_index_t thread_index)
{
ASSERT (thread_index == vlib_get_thread_index ());
if (!app_wrk->wrk_mq_congested[thread_index])
@@ -849,7 +853,8 @@ app_worker_set_mq_wrk_congested (app_worker_t *app_wrk, u32 thread_index)
}
void
-app_worker_unset_wrk_mq_congested (app_worker_t *app_wrk, u32 thread_index)
+app_worker_unset_wrk_mq_congested (app_worker_t *app_wrk,
+ clib_thread_index_t thread_index)
{
clib_atomic_fetch_sub_relax (&app_wrk->mq_congested, 1);
ASSERT (thread_index == vlib_get_thread_index ());
diff --git a/src/vnet/session/segment_manager.c b/src/vnet/session/segment_manager.c
index 8c8b904c33d..341b70086d1 100644
--- a/src/vnet/session/segment_manager.c
+++ b/src/vnet/session/segment_manager.c
@@ -702,7 +702,8 @@ segment_manager_del_sessions_filter (segment_manager_t *sm,
}
int
-segment_manager_try_alloc_fifos (fifo_segment_t *fs, u32 thread_index,
+segment_manager_try_alloc_fifos (fifo_segment_t *fs,
+ clib_thread_index_t thread_index,
u32 rx_fifo_size, u32 tx_fifo_size,
svm_fifo_t **rx_fifo, svm_fifo_t **tx_fifo)
{
@@ -740,8 +741,8 @@ segment_manager_try_alloc_fifos (fifo_segment_t *fs, u32 thread_index,
static inline int
sm_lookup_segment_and_alloc_fifos (segment_manager_t *sm,
segment_manager_props_t *props,
- u32 thread_index, svm_fifo_t **rx_fifo,
- svm_fifo_t **tx_fifo)
+ clib_thread_index_t thread_index,
+ svm_fifo_t **rx_fifo, svm_fifo_t **tx_fifo)
{
uword free_bytes, max_free_bytes;
fifo_segment_t *cur, *fs = 0;
@@ -771,7 +772,8 @@ sm_lookup_segment_and_alloc_fifos (segment_manager_t *sm,
static int
sm_lock_and_alloc_segment_and_fifos (segment_manager_t *sm,
segment_manager_props_t *props,
- u32 thread_index, svm_fifo_t **rx_fifo,
+ clib_thread_index_t thread_index,
+ svm_fifo_t **rx_fifo,
svm_fifo_t **tx_fifo)
{
int new_fs_index, rv;
@@ -814,10 +816,10 @@ done:
}
int
-segment_manager_alloc_session_fifos (segment_manager_t * sm,
- u32 thread_index,
- svm_fifo_t ** rx_fifo,
- svm_fifo_t ** tx_fifo)
+segment_manager_alloc_session_fifos (segment_manager_t *sm,
+ clib_thread_index_t thread_index,
+ svm_fifo_t **rx_fifo,
+ svm_fifo_t **tx_fifo)
{
segment_manager_props_t *props;
int rv;
diff --git a/src/vnet/session/segment_manager.h b/src/vnet/session/segment_manager.h
index 86ca23bc9c7..0fb957a0912 100644
--- a/src/vnet/session/segment_manager.h
+++ b/src/vnet/session/segment_manager.h
@@ -146,15 +146,15 @@ u64 segment_manager_segment_handle (segment_manager_t * sm,
fifo_segment_t * segment);
void segment_manager_segment_reader_unlock (segment_manager_t * sm);
-int segment_manager_alloc_session_fifos (segment_manager_t * sm,
- u32 thread_index,
- svm_fifo_t ** rx_fifo,
- svm_fifo_t ** tx_fifo);
-int segment_manager_try_alloc_fifos (fifo_segment_t * fs,
- u32 thread_index,
+int segment_manager_alloc_session_fifos (segment_manager_t *sm,
+ clib_thread_index_t thread_index,
+ svm_fifo_t **rx_fifo,
+ svm_fifo_t **tx_fifo);
+int segment_manager_try_alloc_fifos (fifo_segment_t *fs,
+ clib_thread_index_t thread_index,
u32 rx_fifo_size, u32 tx_fifo_size,
- svm_fifo_t ** rx_fifo,
- svm_fifo_t ** tx_fifo);
+ svm_fifo_t **rx_fifo,
+ svm_fifo_t **tx_fifo);
void segment_manager_dealloc_fifos (svm_fifo_t * rx_fifo,
svm_fifo_t * tx_fifo);
void segment_manager_detach_fifo (segment_manager_t *sm, svm_fifo_t **f);
diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c
index 2a6ac283fb9..7eb6181adb9 100644
--- a/src/vnet/session/session.c
+++ b/src/vnet/session/session.c
@@ -28,9 +28,18 @@
session_main_t session_main;
+typedef enum
+{
+ SESSION_EVT_RPC,
+ SESSION_EVT_IO,
+ SESSION_EVT_SESSION,
+} session_evt_family_t;
+
static inline int
-session_send_evt_to_thread (void *data, void *args, u32 thread_index,
- session_evt_type_t evt_type)
+session_send_evt_to_thread (void *data, void *args,
+ clib_thread_index_t thread_index,
+ session_evt_type_t evt_type,
+ session_evt_family_t family)
{
session_worker_t *wrk = session_main_get_worker (thread_index);
session_event_t *evt;
@@ -45,30 +54,33 @@ session_send_evt_to_thread (void *data, void *args, u32 thread_index,
svm_msg_q_unlock (mq);
return -2;
}
- switch (evt_type)
+ switch (family)
{
- case SESSION_CTRL_EVT_RPC:
+ case SESSION_EVT_RPC:
+ ASSERT (evt_type == SESSION_CTRL_EVT_RPC);
msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg);
evt->rpc_args.fp = data;
evt->rpc_args.arg = args;
break;
- case SESSION_IO_EVT_RX:
- case SESSION_IO_EVT_TX:
- case SESSION_IO_EVT_TX_FLUSH:
- case SESSION_IO_EVT_BUILTIN_RX:
+ case SESSION_EVT_IO:
+ ASSERT (evt_type == SESSION_IO_EVT_RX || evt_type == SESSION_IO_EVT_TX ||
+ evt_type == SESSION_IO_EVT_TX_FLUSH ||
+ evt_type == SESSION_IO_EVT_BUILTIN_RX);
msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg);
evt->session_index = *(u32 *) data;
break;
- case SESSION_IO_EVT_TX_MAIN:
- case SESSION_CTRL_EVT_CLOSE:
- case SESSION_CTRL_EVT_RESET:
+ case SESSION_EVT_SESSION:
+ ASSERT (evt_type == SESSION_CTRL_EVT_CLOSE ||
+ evt_type == SESSION_CTRL_EVT_HALF_CLOSE ||
+ evt_type == SESSION_CTRL_EVT_RESET);
msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg);
evt->session_handle = session_handle ((session_t *) data);
break;
default:
+ ASSERT (0);
clib_warning ("evt unhandled!");
svm_msg_q_unlock (mq);
return -1;
@@ -88,22 +100,26 @@ int
session_send_io_evt_to_thread (svm_fifo_t * f, session_evt_type_t evt_type)
{
return session_send_evt_to_thread (&f->vpp_session_index, 0,
- f->master_thread_index, evt_type);
+ f->master_thread_index, evt_type,
+ SESSION_EVT_IO);
}
/* Deprecated, use session_program_* functions */
int
-session_send_io_evt_to_thread_custom (void *data, u32 thread_index,
+session_send_io_evt_to_thread_custom (void *data,
+ clib_thread_index_t thread_index,
session_evt_type_t evt_type)
{
- return session_send_evt_to_thread (data, 0, thread_index, evt_type);
+ return session_send_evt_to_thread (data, 0, thread_index, evt_type,
+ SESSION_EVT_IO);
}
int
session_program_tx_io_evt (session_handle_tu_t sh, session_evt_type_t evt_type)
{
return session_send_evt_to_thread ((void *) &sh.session_index, 0,
- (u32) sh.thread_index, evt_type);
+ (u32) sh.thread_index, evt_type,
+ SESSION_EVT_IO);
}
int
@@ -116,9 +132,9 @@ session_program_rx_io_evt (session_handle_tu_t sh)
}
else
{
- return session_send_evt_to_thread ((void *) &sh.session_index, 0,
- (u32) sh.thread_index,
- SESSION_IO_EVT_BUILTIN_RX);
+ return session_send_evt_to_thread (
+ (void *) &sh.session_index, 0, (u32) sh.thread_index,
+ SESSION_IO_EVT_BUILTIN_RX, SESSION_EVT_IO);
}
}
@@ -127,29 +143,29 @@ session_program_transport_io_evt (session_handle_tu_t sh,
session_evt_type_t evt_type)
{
return session_send_evt_to_thread ((void *) &sh.session_index, 0,
- (u32) sh.thread_index, evt_type);
+ (u32) sh.thread_index, evt_type,
+ SESSION_EVT_IO);
}
int
session_send_ctrl_evt_to_thread (session_t * s, session_evt_type_t evt_type)
{
/* only events supported are disconnect, shutdown and reset */
- ASSERT (evt_type == SESSION_CTRL_EVT_CLOSE ||
- evt_type == SESSION_CTRL_EVT_HALF_CLOSE ||
- evt_type == SESSION_CTRL_EVT_RESET);
- return session_send_evt_to_thread (s, 0, s->thread_index, evt_type);
+ return session_send_evt_to_thread (s, 0, s->thread_index, evt_type,
+ SESSION_EVT_SESSION);
}
void
-session_send_rpc_evt_to_thread_force (u32 thread_index, void *fp,
- void *rpc_args)
+session_send_rpc_evt_to_thread_force (clib_thread_index_t thread_index,
+ void *fp, void *rpc_args)
{
- session_send_evt_to_thread (fp, rpc_args, thread_index,
- SESSION_CTRL_EVT_RPC);
+ session_send_evt_to_thread (fp, rpc_args, thread_index, SESSION_CTRL_EVT_RPC,
+ SESSION_EVT_RPC);
}
void
-session_send_rpc_evt_to_thread (u32 thread_index, void *fp, void *rpc_args)
+session_send_rpc_evt_to_thread (clib_thread_index_t thread_index, void *fp,
+ void *rpc_args)
{
if (thread_index != vlib_get_thread_index ())
session_send_rpc_evt_to_thread_force (thread_index, fp, rpc_args);
@@ -212,7 +228,7 @@ sesssion_reschedule_tx (transport_connection_t * tc)
static void
session_program_transport_ctrl_evt (session_t * s, session_evt_type_t evt)
{
- u32 thread_index = vlib_get_thread_index ();
+ clib_thread_index_t thread_index = vlib_get_thread_index ();
session_evt_elt_t *elt;
session_worker_t *wrk;
@@ -234,7 +250,7 @@ session_program_transport_ctrl_evt (session_t * s, session_evt_type_t evt)
}
session_t *
-session_alloc (u32 thread_index)
+session_alloc (clib_thread_index_t thread_index)
{
session_worker_t *wrk = &session_main.wrk[thread_index];
session_t *s;
@@ -453,7 +469,7 @@ session_t *
session_alloc_for_connection (transport_connection_t * tc)
{
session_t *s;
- u32 thread_index = tc->thread_index;
+ clib_thread_index_t thread_index = tc->thread_index;
ASSERT (thread_index == vlib_get_thread_index ()
|| transport_protocol_is_cl (tc->proto));
@@ -480,115 +496,6 @@ session_alloc_for_half_open (transport_connection_t *tc)
return s;
}
-/**
- * Discards bytes from buffer chain
- *
- * It discards n_bytes_to_drop starting at first buffer after chain_b
- */
-always_inline void
-session_enqueue_discard_chain_bytes (vlib_main_t * vm, vlib_buffer_t * b,
- vlib_buffer_t ** chain_b,
- u32 n_bytes_to_drop)
-{
- vlib_buffer_t *next = *chain_b;
- u32 to_drop = n_bytes_to_drop;
- ASSERT (b->flags & VLIB_BUFFER_NEXT_PRESENT);
- while (to_drop && (next->flags & VLIB_BUFFER_NEXT_PRESENT))
- {
- next = vlib_get_buffer (vm, next->next_buffer);
- if (next->current_length > to_drop)
- {
- vlib_buffer_advance (next, to_drop);
- to_drop = 0;
- }
- else
- {
- to_drop -= next->current_length;
- next->current_length = 0;
- }
- }
- *chain_b = next;
-
- if (to_drop == 0)
- b->total_length_not_including_first_buffer -= n_bytes_to_drop;
-}
-
-/**
- * Enqueue buffer chain tail
- */
-always_inline int
-session_enqueue_chain_tail (session_t * s, vlib_buffer_t * b,
- u32 offset, u8 is_in_order)
-{
- vlib_buffer_t *chain_b;
- u32 chain_bi, len, diff;
- vlib_main_t *vm = vlib_get_main ();
- u8 *data;
- u32 written = 0;
- int rv = 0;
-
- if (is_in_order && offset)
- {
- diff = offset - b->current_length;
- if (diff > b->total_length_not_including_first_buffer)
- return 0;
- chain_b = b;
- session_enqueue_discard_chain_bytes (vm, b, &chain_b, diff);
- chain_bi = vlib_get_buffer_index (vm, chain_b);
- }
- else
- chain_bi = b->next_buffer;
-
- do
- {
- chain_b = vlib_get_buffer (vm, chain_bi);
- data = vlib_buffer_get_current (chain_b);
- len = chain_b->current_length;
- if (!len)
- continue;
- if (is_in_order)
- {
- rv = svm_fifo_enqueue (s->rx_fifo, len, data);
- if (rv == len)
- {
- written += rv;
- }
- else if (rv < len)
- {
- return (rv > 0) ? (written + rv) : written;
- }
- else if (rv > len)
- {
- written += rv;
-
- /* written more than what was left in chain */
- if (written > b->total_length_not_including_first_buffer)
- return written;
-
- /* drop the bytes that have already been delivered */
- session_enqueue_discard_chain_bytes (vm, b, &chain_b, rv - len);
- }
- }
- else
- {
- rv = svm_fifo_enqueue_with_offset (s->rx_fifo, offset, len, data);
- if (rv)
- {
- clib_warning ("failed to enqueue multi-buffer seg");
- return -1;
- }
- offset += len;
- }
- }
- while ((chain_bi = (chain_b->flags & VLIB_BUFFER_NEXT_PRESENT)
- ? chain_b->next_buffer : 0));
-
- if (is_in_order)
- return written;
-
- return 0;
-}
-
void
session_fifo_tuning (session_t * s, svm_fifo_t * f,
session_ft_action_t act, u32 len)
@@ -734,7 +641,7 @@ session_dequeue_notify (session_t *s)
*/
void
session_main_flush_enqueue_events (transport_proto_t transport_proto,
- u32 thread_index)
+ clib_thread_index_t thread_index)
{
session_worker_t *wrk = session_main_get_worker (thread_index);
session_handle_t *handles;
@@ -760,154 +667,6 @@ session_main_flush_enqueue_events (transport_proto_t transport_proto,
wrk->session_to_enqueue[transport_proto] = handles;
}
-/*
- * Enqueue data for delivery to app. If requested, it queues app notification
- * event for later delivery.
- *
- * @param tc Transport connection which is to be enqueued data
- * @param b Buffer to be enqueued
- * @param offset Offset at which to start enqueueing if out-of-order
- * @param queue_event Flag to indicate if peer is to be notified or if event
- * is to be queued. The former is useful when more data is
- * enqueued and only one event is to be generated.
- * @param is_in_order Flag to indicate if data is in order
- * @return Number of bytes enqueued or a negative value if enqueueing failed.
- */
-int
-session_enqueue_stream_connection (transport_connection_t * tc,
- vlib_buffer_t * b, u32 offset,
- u8 queue_event, u8 is_in_order)
-{
- session_t *s;
- int enqueued = 0, rv, in_order_off;
-
- s = session_get (tc->s_index, tc->thread_index);
-
- if (is_in_order)
- {
- enqueued = svm_fifo_enqueue (s->rx_fifo,
- b->current_length,
- vlib_buffer_get_current (b));
- if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_NEXT_PRESENT)
- && enqueued >= 0))
- {
- in_order_off = enqueued > b->current_length ? enqueued : 0;
- rv = session_enqueue_chain_tail (s, b, in_order_off, 1);
- if (rv > 0)
- enqueued += rv;
- }
- }
- else
- {
- rv = svm_fifo_enqueue_with_offset (s->rx_fifo, offset,
- b->current_length,
- vlib_buffer_get_current (b));
- if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) && !rv))
- session_enqueue_chain_tail (s, b, offset + b->current_length, 0);
- /* if something was enqueued, report even this as success for ooo
- * segment handling */
- return rv;
- }
-
- if (queue_event)
- {
- /* Queue RX event on this fifo. Eventually these will need to be
- * flushed by calling @ref session_main_flush_enqueue_events () */
- if (!(s->flags & SESSION_F_RX_EVT))
- {
- session_worker_t *wrk = session_main_get_worker (s->thread_index);
- ASSERT (s->thread_index == vlib_get_thread_index ());
- s->flags |= SESSION_F_RX_EVT;
- vec_add1 (wrk->session_to_enqueue[tc->proto], session_handle (s));
- }
-
- session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0);
- }
-
- return enqueued;
-}
-
-always_inline int
-session_enqueue_dgram_connection_inline (session_t *s,
- session_dgram_hdr_t *hdr,
- vlib_buffer_t *b, u8 proto,
- u8 queue_event, u32 is_cl)
-{
- int rv;
-
- ASSERT (svm_fifo_max_enqueue_prod (s->rx_fifo)
- >= b->current_length + sizeof (*hdr));
-
- if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT)))
- {
- svm_fifo_seg_t segs[2] = {
- { (u8 *) hdr, sizeof (*hdr) },
- { vlib_buffer_get_current (b), b->current_length }
- };
-
- rv = svm_fifo_enqueue_segments (s->rx_fifo, segs, 2,
- 0 /* allow_partial */ );
- }
- else
- {
- vlib_main_t *vm = vlib_get_main ();
- svm_fifo_seg_t *segs = 0, *seg;
- vlib_buffer_t *it = b;
- u32 n_segs = 1;
-
- vec_add2 (segs, seg, 1);
- seg->data = (u8 *) hdr;
- seg->len = sizeof (*hdr);
- while (it)
- {
- vec_add2 (segs, seg, 1);
- seg->data = vlib_buffer_get_current (it);
- seg->len = it->current_length;
- n_segs++;
- if (!(it->flags & VLIB_BUFFER_NEXT_PRESENT))
- break;
- it = vlib_get_buffer (vm, it->next_buffer);
- }
- rv = svm_fifo_enqueue_segments (s->rx_fifo, segs, n_segs,
- 0 /* allow partial */ );
- vec_free (segs);
- }
-
- if (queue_event && rv > 0)
- {
- /* Queue RX event on this fifo. Eventually these will need to be
- * flushed by calling @ref session_main_flush_enqueue_events () */
- if (!(s->flags & SESSION_F_RX_EVT))
- {
- u32 thread_index =
- is_cl ? vlib_get_thread_index () : s->thread_index;
- session_worker_t *wrk = session_main_get_worker (thread_index);
- ASSERT (s->thread_index == vlib_get_thread_index () || is_cl);
- s->flags |= SESSION_F_RX_EVT;
- vec_add1 (wrk->session_to_enqueue[proto], session_handle (s));
- }
-
- session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0);
- }
- return rv > 0 ? rv : 0;
-}
-
-int
-session_enqueue_dgram_connection (session_t *s, session_dgram_hdr_t *hdr,
- vlib_buffer_t *b, u8 proto, u8 queue_event)
-{
- return session_enqueue_dgram_connection_inline (s, hdr, b, proto,
- queue_event, 0 /* is_cl */);
-}
-
-int
-session_enqueue_dgram_connection2 (session_t *s, session_dgram_hdr_t *hdr,
- vlib_buffer_t *b, u8 proto, u8 queue_event)
-{
- return session_enqueue_dgram_connection_inline (s, hdr, b, proto,
- queue_event, 1 /* is_cl */);
-}
-
int
session_enqueue_dgram_connection_cl (session_t *s, session_dgram_hdr_t *hdr,
vlib_buffer_t *b, u8 proto,
@@ -1016,7 +775,7 @@ session_switch_pool_closed_rpc (void *arg)
typedef struct _session_switch_pool_args
{
u32 session_index;
- u32 thread_index;
+ clib_thread_index_t thread_index;
u32 new_thread_index;
u32 new_session_index;
} session_switch_pool_args_t;
@@ -1308,8 +1067,8 @@ session_stream_accept_notify (transport_connection_t * tc)
* Accept a stream session. Optionally ping the server by callback.
*/
int
-session_stream_accept (transport_connection_t * tc, u32 listener_index,
- u32 thread_index, u8 notify)
+session_stream_accept (transport_connection_t *tc, u32 listener_index,
+ clib_thread_index_t thread_index, u8 notify)
{
session_t *s;
int rv;
@@ -1343,8 +1102,8 @@ session_stream_accept (transport_connection_t * tc, u32 listener_index,
}
int
-session_dgram_accept (transport_connection_t * tc, u32 listener_index,
- u32 thread_index)
+session_dgram_accept (transport_connection_t *tc, u32 listener_index,
+ clib_thread_index_t thread_index)
{
app_worker_t *app_wrk;
session_t *s;
diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h
index daa3bf97f56..d5402b3571e 100644
--- a/src/vnet/session/session.h
+++ b/src/vnet/session/session.h
@@ -151,6 +151,8 @@ typedef struct session_worker_
/** Per-app-worker bitmap of pending notifications */
uword *app_wrks_pending_ntf;
+ svm_fifo_seg_t *rx_segs;
+
int config_index;
u8 dma_enabled;
session_dma_transfer *dma_trans;
@@ -323,6 +325,67 @@ typedef struct _session_enable_disable_args_t
#define TRANSPORT_PROTO_INVALID (session_main.last_transport_proto_type + 1)
#define TRANSPORT_N_PROTOS (session_main.last_transport_proto_type + 1)
+/*
+ * Session layer functions
+ */
+
+always_inline session_main_t *
+vnet_get_session_main ()
+{
+ return &session_main;
+}
+
+always_inline session_worker_t *
+session_main_get_worker (clib_thread_index_t thread_index)
+{
+ return vec_elt_at_index (session_main.wrk, thread_index);
+}
+
+static inline session_worker_t *
+session_main_get_worker_if_valid (clib_thread_index_t thread_index)
+{
+ if (thread_index > vec_len (session_main.wrk))
+ return 0;
+ return session_main_get_worker (thread_index);
+}
+
+always_inline svm_msg_q_t *
+session_main_get_vpp_event_queue (clib_thread_index_t thread_index)
+{
+ return session_main_get_worker (thread_index)->vpp_event_queue;
+}
+
+always_inline u8
+session_main_is_enabled ()
+{
+ return session_main.is_enabled == 1;
+}
+
+always_inline void
+session_worker_stat_error_inc (session_worker_t *wrk, int error, int value)
+{
+ if ((-(error) >= 0 && -(error) < SESSION_N_ERRORS))
+ wrk->stats.errors[-error] += value;
+ else
+ SESSION_DBG ("unknown session counter");
+}
+
+always_inline void
+session_stat_error_inc (int error, int value)
+{
+ session_worker_t *wrk;
+ wrk = session_main_get_worker (vlib_get_thread_index ());
+ session_worker_stat_error_inc (wrk, error, value);
+}
+
+#define session_cli_return_if_not_enabled() \
+ do \
+ { \
+ if (!session_main.is_enabled) \
+ return clib_error_return (0, "session layer is not enabled"); \
+ } \
+ while (0)
+
static inline void
session_evt_add_old (session_worker_t * wrk, session_evt_elt_t * elt)
{
@@ -392,7 +455,7 @@ session_evt_alloc_old (session_worker_t * wrk)
int session_wrk_handle_mq (session_worker_t *wrk, svm_msg_q_t *mq);
-session_t *session_alloc (u32 thread_index);
+session_t *session_alloc (clib_thread_index_t thread_index);
void session_free (session_t * s);
void session_cleanup (session_t *s);
void session_program_cleanup (session_t *s);
@@ -400,14 +463,14 @@ void session_cleanup_half_open (session_handle_t ho_handle);
u8 session_is_valid (u32 si, u8 thread_index);
always_inline session_t *
-session_get (u32 si, u32 thread_index)
+session_get (u32 si, clib_thread_index_t thread_index)
{
ASSERT (session_is_valid (si, thread_index));
return pool_elt_at_index (session_main.wrk[thread_index].sessions, si);
}
always_inline session_t *
-session_get_if_valid (u64 si, u32 thread_index)
+session_get_if_valid (u64 si, clib_thread_index_t thread_index)
{
if (thread_index >= vec_len (session_main.wrk))
return 0;
@@ -455,7 +518,7 @@ session_get_from_handle_safe (session_handle_tu_t handle)
}
always_inline session_t *
-session_clone_safe (u32 session_index, u32 thread_index)
+session_clone_safe (u32 session_index, clib_thread_index_t thread_index)
{
u32 current_thread_index = vlib_get_thread_index (), new_index;
session_t *old_s, *new_s;
@@ -487,17 +550,18 @@ int session_enqueue_notify_cl (session_t *s);
/* Deprecated, use session_program_* functions */
int session_send_io_evt_to_thread (svm_fifo_t *f, session_evt_type_t evt_type);
/* Deprecated, use session_program_* functions */
-int session_send_io_evt_to_thread_custom (void *data, u32 thread_index,
+int session_send_io_evt_to_thread_custom (void *data,
+ clib_thread_index_t thread_index,
session_evt_type_t evt_type);
int session_program_tx_io_evt (session_handle_tu_t sh,
session_evt_type_t evt_type);
int session_program_rx_io_evt (session_handle_tu_t sh);
int session_program_transport_io_evt (session_handle_tu_t sh,
session_evt_type_t evt_type);
-void session_send_rpc_evt_to_thread (u32 thread_index, void *fp,
- void *rpc_args);
-void session_send_rpc_evt_to_thread_force (u32 thread_index, void *fp,
- void *rpc_args);
+void session_send_rpc_evt_to_thread (clib_thread_index_t thread_index,
+ void *fp, void *rpc_args);
+void session_send_rpc_evt_to_thread_force (clib_thread_index_t thread_index,
+ void *fp, void *rpc_args);
void session_add_self_custom_tx_evt (transport_connection_t * tc,
u8 has_prio);
void sesssion_reschedule_tx (transport_connection_t * tc);
@@ -517,20 +581,6 @@ uword unformat_transport_connection (unformat_input_t * input,
* Interface to transport protos
*/
-int session_enqueue_stream_connection (transport_connection_t * tc,
- vlib_buffer_t * b, u32 offset,
- u8 queue_event, u8 is_in_order);
-int session_enqueue_dgram_connection (session_t * s,
- session_dgram_hdr_t * hdr,
- vlib_buffer_t * b, u8 proto,
- u8 queue_event);
-int session_enqueue_dgram_connection2 (session_t *s, session_dgram_hdr_t *hdr,
- vlib_buffer_t *b, u8 proto,
- u8 queue_event);
-int session_enqueue_dgram_connection_cl (session_t *s,
- session_dgram_hdr_t *hdr,
- vlib_buffer_t *b, u8 proto,
- u8 queue_event);
int session_stream_connect_notify (transport_connection_t * tc,
session_error_t err);
int session_dgram_connect_notify (transport_connection_t * tc,
@@ -544,10 +594,10 @@ void session_half_open_migrate_notify (transport_connection_t *tc);
int session_half_open_migrated_notify (transport_connection_t *tc);
void session_transport_closed_notify (transport_connection_t * tc);
void session_transport_reset_notify (transport_connection_t * tc);
-int session_stream_accept (transport_connection_t * tc, u32 listener_index,
- u32 thread_index, u8 notify);
-int session_dgram_accept (transport_connection_t * tc, u32 listener_index,
- u32 thread_index);
+int session_stream_accept (transport_connection_t *tc, u32 listener_index,
+ clib_thread_index_t thread_index, u8 notify);
+int session_dgram_accept (transport_connection_t *tc, u32 listener_index,
+ clib_thread_index_t thread_index);
/**
* Initialize session layer for given transport proto and ip version
@@ -566,9 +616,279 @@ void session_register_transport (transport_proto_t transport_proto,
u32 output_node);
transport_proto_t session_add_transport_proto (void);
void session_register_update_time_fn (session_update_time_fn fn, u8 is_add);
+void session_main_flush_enqueue_events (transport_proto_t transport_proto,
+ clib_thread_index_t thread_index);
+void session_queue_run_on_main_thread (vlib_main_t *vm);
int session_tx_fifo_peek_bytes (transport_connection_t * tc, u8 * buffer,
u32 offset, u32 max_bytes);
u32 session_tx_fifo_dequeue_drop (transport_connection_t * tc, u32 max_bytes);
+int session_enqueue_dgram_connection_cl (session_t *s,
+ session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto,
+ u8 queue_event);
+void session_fifo_tuning (session_t *s, svm_fifo_t *f, session_ft_action_t act,
+ u32 len);
+
+/**
+ * Discards bytes from buffer chain
+ *
+ * It discards n_bytes_to_drop starting at first buffer after chain_b
+ */
+always_inline void
+session_enqueue_discard_chain_bytes (vlib_main_t *vm, vlib_buffer_t *b,
+ vlib_buffer_t **chain_b,
+ u32 n_bytes_to_drop)
+{
+ vlib_buffer_t *next = *chain_b;
+ u32 to_drop = n_bytes_to_drop;
+ ASSERT (b->flags & VLIB_BUFFER_NEXT_PRESENT);
+ while (to_drop && (next->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ next = vlib_get_buffer (vm, next->next_buffer);
+ if (next->current_length > to_drop)
+ {
+ vlib_buffer_advance (next, to_drop);
+ to_drop = 0;
+ }
+ else
+ {
+ to_drop -= next->current_length;
+ next->current_length = 0;
+ }
+ }
+ *chain_b = next;
+
+ if (to_drop == 0)
+ b->total_length_not_including_first_buffer -= n_bytes_to_drop;
+}
+
+/**
+ * Enqueue buffer chain tail
+ */
+always_inline int
+session_enqueue_chain_tail (session_t *s, vlib_buffer_t *b, u32 offset,
+ u8 is_in_order)
+{
+ vlib_buffer_t *chain_b;
+ u32 chain_bi;
+
+ if (is_in_order)
+ {
+ session_worker_t *wrk = session_main_get_worker (s->thread_index);
+ u32 diff, written = 0;
+
+ if (offset)
+ {
+ diff = offset - b->current_length;
+ if (diff > b->total_length_not_including_first_buffer)
+ return 0;
+ chain_b = b;
+ session_enqueue_discard_chain_bytes (wrk->vm, b, &chain_b, diff);
+ chain_bi = vlib_get_buffer_index (wrk->vm, chain_b);
+ }
+ else
+ {
+ chain_bi = b->next_buffer;
+ }
+
+ chain_b = vlib_get_buffer (wrk->vm, chain_bi);
+ svm_fifo_seg_t *seg;
+
+ while (chain_b)
+ {
+ vec_add2 (wrk->rx_segs, seg, 1);
+ seg->data = vlib_buffer_get_current (chain_b);
+ seg->len = chain_b->current_length;
+ chain_b = (chain_b->flags & VLIB_BUFFER_NEXT_PRESENT) ?
+ vlib_get_buffer (wrk->vm, chain_b->next_buffer) :
+ 0;
+ }
+
+ written = svm_fifo_enqueue_segments (s->rx_fifo, wrk->rx_segs,
+ vec_len (wrk->rx_segs),
+ 1 /* allow partial*/);
+
+ vec_reset_length (wrk->rx_segs);
+
+ return written;
+ }
+ else
+ {
+ vlib_main_t *vm = vlib_get_main ();
+ int rv = 0;
+ u8 *data;
+ u32 len;
+
+ /* TODO svm_fifo_enqueue_segments with offset */
+ chain_bi = b->next_buffer;
+ do
+ {
+ chain_b = vlib_get_buffer (vm, chain_bi);
+ data = vlib_buffer_get_current (chain_b);
+ len = chain_b->current_length;
+ if (!len)
+ continue;
+
+ rv = svm_fifo_enqueue_with_offset (s->rx_fifo, offset, len, data);
+ if (rv)
+ {
+ clib_warning ("failed to enqueue multi-buffer seg");
+ return -1;
+ }
+ offset += len;
+ }
+ while ((chain_bi = (chain_b->flags & VLIB_BUFFER_NEXT_PRESENT) ?
+ chain_b->next_buffer :
+ 0));
+
+ return 0;
+ }
+}
+
+/*
+ * Enqueue data for delivery to app. If requested, it queues app notification
+ * event for later delivery.
+ *
+ * @param tc Transport connection which is to be enqueued data
+ * @param b Buffer to be enqueued
+ * @param offset Offset at which to start enqueueing if out-of-order
+ * @param queue_event Flag to indicate if peer is to be notified or if event
+ * is to be queued. The former is useful when more data is
+ * enqueued and only one event is to be generated.
+ * @param is_in_order Flag to indicate if data is in order
+ * @return Number of bytes enqueued or a negative value if enqueueing failed.
+ */
+always_inline int
+session_enqueue_stream_connection (transport_connection_t *tc,
+ vlib_buffer_t *b, u32 offset,
+ u8 queue_event, u8 is_in_order)
+{
+ session_t *s;
+ int enqueued = 0, rv, in_order_off;
+
+ s = session_get (tc->s_index, tc->thread_index);
+
+ if (is_in_order)
+ {
+ enqueued = svm_fifo_enqueue (s->rx_fifo, b->current_length,
+ vlib_buffer_get_current (b));
+ if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) &&
+ enqueued >= 0))
+ {
+ in_order_off = enqueued > b->current_length ? enqueued : 0;
+ rv = session_enqueue_chain_tail (s, b, in_order_off, 1);
+ if (rv > 0)
+ enqueued += rv;
+ }
+ }
+ else
+ {
+ rv = svm_fifo_enqueue_with_offset (s->rx_fifo, offset, b->current_length,
+ vlib_buffer_get_current (b));
+ if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) && !rv))
+ session_enqueue_chain_tail (s, b, offset + b->current_length, 0);
+ /* if something was enqueued, report even this as success for ooo
+ * segment handling */
+ return rv;
+ }
+
+ if (queue_event)
+ {
+ /* Queue RX event on this fifo. Eventually these will need to be
+ * flushed by calling @ref session_main_flush_enqueue_events () */
+ if (!(s->flags & SESSION_F_RX_EVT))
+ {
+ session_worker_t *wrk = session_main_get_worker (s->thread_index);
+ ASSERT (s->thread_index == vlib_get_thread_index ());
+ s->flags |= SESSION_F_RX_EVT;
+ vec_add1 (wrk->session_to_enqueue[tc->proto], session_handle (s));
+ }
+
+ session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0);
+ }
+
+ return enqueued;
+}
+
+always_inline int
+session_enqueue_dgram_connection_inline (session_t *s,
+ session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto,
+ u8 queue_event, u32 is_cl)
+{
+ int rv;
+
+ ASSERT (svm_fifo_max_enqueue_prod (s->rx_fifo) >=
+ b->current_length + sizeof (*hdr));
+
+ if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT)))
+ {
+ svm_fifo_seg_t segs[2] = { { (u8 *) hdr, sizeof (*hdr) },
+ { vlib_buffer_get_current (b),
+ b->current_length } };
+
+ rv =
+ svm_fifo_enqueue_segments (s->rx_fifo, segs, 2, 0 /* allow_partial */);
+ }
+ else
+ {
+ vlib_main_t *vm = vlib_get_main ();
+ svm_fifo_seg_t *segs = 0, *seg;
+ vlib_buffer_t *it = b;
+ u32 n_segs = 1;
+
+ vec_add2 (segs, seg, 1);
+ seg->data = (u8 *) hdr;
+ seg->len = sizeof (*hdr);
+ while (it)
+ {
+ vec_add2 (segs, seg, 1);
+ seg->data = vlib_buffer_get_current (it);
+ seg->len = it->current_length;
+ n_segs++;
+ if (!(it->flags & VLIB_BUFFER_NEXT_PRESENT))
+ break;
+ it = vlib_get_buffer (vm, it->next_buffer);
+ }
+ rv = svm_fifo_enqueue_segments (s->rx_fifo, segs, n_segs,
+ 0 /* allow partial */);
+ vec_free (segs);
+ }
+
+ if (queue_event && rv > 0)
+ {
+ /* Queue RX event on this fifo. Eventually these will need to be
+ * flushed by calling @ref session_main_flush_enqueue_events () */
+ if (!(s->flags & SESSION_F_RX_EVT))
+ {
+ clib_thread_index_t thread_index =
+ is_cl ? vlib_get_thread_index () : s->thread_index;
+ session_worker_t *wrk = session_main_get_worker (thread_index);
+ ASSERT (s->thread_index == vlib_get_thread_index () || is_cl);
+ s->flags |= SESSION_F_RX_EVT;
+ vec_add1 (wrk->session_to_enqueue[proto], session_handle (s));
+ }
+
+ session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0);
+ }
+ return rv > 0 ? rv : 0;
+}
+
+always_inline int
+session_enqueue_dgram_connection (session_t *s, session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto, u8 queue_event)
+{
+ return session_enqueue_dgram_connection_inline (s, hdr, b, proto,
+ queue_event, 0 /* is_cl */);
+}
+
+always_inline int
+session_enqueue_dgram_connection2 (session_t *s, session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto, u8 queue_event)
+{
+ return session_enqueue_dgram_connection_inline (s, hdr, b, proto,
+ queue_event, 1 /* is_cl */);
+}
always_inline void
session_set_state (session_t *s, session_state_t session_state)
@@ -640,19 +960,19 @@ transport_rx_fifo_req_deq_ntf (transport_connection_t *tc)
}
always_inline clib_time_type_t
-transport_time_now (u32 thread_index)
+transport_time_now (clib_thread_index_t thread_index)
{
return session_main.wrk[thread_index].last_vlib_time;
}
always_inline clib_us_time_t
-transport_us_time_now (u32 thread_index)
+transport_us_time_now (clib_thread_index_t thread_index)
{
return session_main.wrk[thread_index].last_vlib_us_time;
}
always_inline clib_time_type_t
-transport_seconds_per_loop (u32 thread_index)
+transport_seconds_per_loop (clib_thread_index_t thread_index)
{
return session_main.wrk[thread_index].vm->seconds_per_loop;
}
@@ -753,69 +1073,6 @@ ho_session_free (session_t *s)
transport_connection_t *listen_session_get_transport (session_t * s);
-/*
- * Session layer functions
- */
-
-always_inline session_main_t *
-vnet_get_session_main ()
-{
- return &session_main;
-}
-
-always_inline session_worker_t *
-session_main_get_worker (u32 thread_index)
-{
- return vec_elt_at_index (session_main.wrk, thread_index);
-}
-
-static inline session_worker_t *
-session_main_get_worker_if_valid (u32 thread_index)
-{
- if (thread_index > vec_len (session_main.wrk))
- return 0;
- return session_main_get_worker (thread_index);
-}
-
-always_inline svm_msg_q_t *
-session_main_get_vpp_event_queue (u32 thread_index)
-{
- return session_main_get_worker (thread_index)->vpp_event_queue;
-}
-
-always_inline u8
-session_main_is_enabled ()
-{
- return session_main.is_enabled == 1;
-}
-
-always_inline void
-session_worker_stat_error_inc (session_worker_t *wrk, int error, int value)
-{
- if ((-(error) >= 0 && -(error) < SESSION_N_ERRORS))
- wrk->stats.errors[-error] += value;
- else
- SESSION_DBG ("unknown session counter");
-}
-
-always_inline void
-session_stat_error_inc (int error, int value)
-{
- session_worker_t *wrk;
- wrk = session_main_get_worker (vlib_get_thread_index ());
- session_worker_stat_error_inc (wrk, error, value);
-}
-
-#define session_cli_return_if_not_enabled() \
-do { \
- if (!session_main.is_enabled) \
- return clib_error_return (0, "session layer is not enabled"); \
-} while (0)
-
-void session_main_flush_enqueue_events (transport_proto_t transport_proto,
- u32 thread_index);
-void session_queue_run_on_main_thread (vlib_main_t * vm);
-
/**
* Add session node pending buffer with custom node
*
@@ -825,7 +1082,8 @@ void session_queue_run_on_main_thread (vlib_main_t * vm);
* must exist
*/
always_inline void
-session_add_pending_tx_buffer (u32 thread_index, u32 bi, u32 next_node)
+session_add_pending_tx_buffer (clib_thread_index_t thread_index, u32 bi,
+ u32 next_node)
{
session_worker_t *wrk = session_main_get_worker (thread_index);
vec_add1 (wrk->pending_tx_buffers, bi);
diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c
index 5ac21c4eb85..8192194ff34 100644
--- a/src/vnet/session/session_api.c
+++ b/src/vnet/session/session_api.c
@@ -517,7 +517,7 @@ mq_send_session_migrate_cb (session_t * s, session_handle_t new_sh)
fifo_segment_t *eq_seg;
app_worker_t *app_wrk;
application_t *app;
- u32 thread_index;
+ clib_thread_index_t thread_index;
thread_index = session_thread_from_handle (new_sh);
app_wrk = app_worker_get (s->app_wrk_index);
diff --git a/src/vnet/session/session_cli.c b/src/vnet/session/session_cli.c
index aff102a6989..b223eff41b0 100644
--- a/src/vnet/session/session_cli.c
+++ b/src/vnet/session/session_cli.c
@@ -404,7 +404,7 @@ typedef struct session_cli_filter_
session_cli_endpt_flags_t endpt_flags;
session_state_t *states;
transport_proto_t transport_proto;
- u32 thread_index;
+ clib_thread_index_t thread_index;
u32 verbose;
} session_cli_filter_t;
@@ -521,7 +521,8 @@ session_cli_show_session_filter (vlib_main_t *vm, session_cli_filter_t *sf)
}
void
-session_cli_show_events_thread (vlib_main_t * vm, u32 thread_index)
+session_cli_show_events_thread (vlib_main_t *vm,
+ clib_thread_index_t thread_index)
{
session_worker_t *wrk;
@@ -540,7 +541,7 @@ session_cli_show_events_thread (vlib_main_t * vm, u32 thread_index)
}
static void
-session_cli_show_events (vlib_main_t * vm, u32 thread_index)
+session_cli_show_events (vlib_main_t *vm, clib_thread_index_t thread_index)
{
session_main_t *smm = &session_main;
if (!thread_index)
@@ -824,7 +825,7 @@ clear_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
session_main_t *smm = &session_main;
- u32 thread_index = 0, clear_all = 0;
+ clib_thread_index_t thread_index = 0, clear_all = 0;
session_worker_t *wrk;
u32 session_index = ~0;
session_t *session;
diff --git a/src/vnet/session/session_input.c b/src/vnet/session/session_input.c
index 01be281d4f7..dd3bde77058 100644
--- a/src/vnet/session/session_input.c
+++ b/src/vnet/session/session_input.c
@@ -4,6 +4,7 @@
#include <vnet/session/session.h>
#include <vnet/session/application.h>
+#include <vnet/session/application_local.h>
static inline int
mq_try_lock (svm_msg_q_t *mq)
@@ -34,7 +35,7 @@ app_worker_del_all_events (app_worker_t *app_wrk)
{
session_worker_t *wrk;
session_event_t *evt;
- u32 thread_index;
+ clib_thread_index_t thread_index;
session_t *s;
for (thread_index = 0; thread_index < vec_len (app_wrk->wrk_evts);
@@ -72,7 +73,8 @@ app_worker_del_all_events (app_worker_t *app_wrk)
}
always_inline int
-app_worker_flush_events_inline (app_worker_t *app_wrk, u32 thread_index,
+app_worker_flush_events_inline (app_worker_t *app_wrk,
+ clib_thread_index_t thread_index,
u8 is_builtin)
{
application_t *app = application_get (app_wrk->app_index);
@@ -166,6 +168,13 @@ app_worker_flush_events_inline (app_worker_t *app_wrk, u32 thread_index,
if (!(s->flags & SESSION_F_APP_CLOSED))
app->cb_fns.session_disconnect_callback (s);
}
+ else if (!session_has_transport (s))
+ {
+ /* Special handling for cut-through sessions for builtin apps
+ * similar to session_mq_accepted_reply_handler */
+ session_set_state (s, SESSION_STATE_READY);
+ ct_session_connect_notify (s, SESSION_E_NONE);
+ }
}
break;
case SESSION_CTRL_EVT_CONNECTED:
@@ -277,7 +286,8 @@ app_worker_flush_events_inline (app_worker_t *app_wrk, u32 thread_index,
}
int
-app_wrk_flush_wrk_events (app_worker_t *app_wrk, u32 thread_index)
+app_wrk_flush_wrk_events (app_worker_t *app_wrk,
+ clib_thread_index_t thread_index)
{
if (app_worker_application_is_builtin (app_wrk))
return app_worker_flush_events_inline (app_wrk, thread_index,
@@ -292,7 +302,7 @@ session_wrk_flush_events (session_worker_t *wrk)
{
app_worker_t *app_wrk;
uword app_wrk_index;
- u32 thread_index;
+ clib_thread_index_t thread_index;
thread_index = wrk->vm->thread_index;
app_wrk_index = clib_bitmap_first_set (wrk->app_wrks_pending_ntf);
@@ -320,7 +330,7 @@ session_wrk_flush_events (session_worker_t *wrk)
VLIB_NODE_FN (session_input_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
session_worker_t *wrk;
wrk = session_main_get_worker (thread_index);
diff --git a/src/vnet/session/session_lookup.c b/src/vnet/session/session_lookup.c
index 28a1feb1ed8..42b48a00d02 100644
--- a/src/vnet/session/session_lookup.c
+++ b/src/vnet/session/session_lookup.c
@@ -954,10 +954,10 @@ session_lookup_half_open_connection (u64 handle, u8 proto, u8 is_ip4)
* @return pointer to transport connection, if one is found, 0 otherwise
*/
transport_connection_t *
-session_lookup_connection_wt4 (u32 fib_index, ip4_address_t * lcl,
- ip4_address_t * rmt, u16 lcl_port,
- u16 rmt_port, u8 proto, u32 thread_index,
- u8 * result)
+session_lookup_connection_wt4 (u32 fib_index, ip4_address_t *lcl,
+ ip4_address_t *rmt, u16 lcl_port, u16 rmt_port,
+ u8 proto, clib_thread_index_t thread_index,
+ u8 *result)
{
session_table_t *st;
session_kv4_t kv4;
@@ -1185,10 +1185,10 @@ session_lookup_safe4 (u32 fib_index, ip4_address_t * lcl, ip4_address_t * rmt,
* @return pointer to transport connection, if one is found, 0 otherwise
*/
transport_connection_t *
-session_lookup_connection_wt6 (u32 fib_index, ip6_address_t * lcl,
- ip6_address_t * rmt, u16 lcl_port,
- u16 rmt_port, u8 proto, u32 thread_index,
- u8 * result)
+session_lookup_connection_wt6 (u32 fib_index, ip6_address_t *lcl,
+ ip6_address_t *rmt, u16 lcl_port, u16 rmt_port,
+ u8 proto, clib_thread_index_t thread_index,
+ u8 *result)
{
session_table_t *st;
session_t *s;
@@ -1380,6 +1380,71 @@ session_lookup_connection (u32 fib_index, ip46_address_t * lcl,
lcl_port, rmt_port, proto);
}
+/**
+ * Lookup exact match 6-tuple amongst established and half-open sessions
+ *
+ * Does not look into session rules table and does not try to find a listener.
+ */
+transport_connection_t *
+session_lookup_6tuple (u32 fib_index, ip46_address_t *lcl, ip46_address_t *rmt,
+ u16 lcl_port, u16 rmt_port, u8 proto, u8 is_ip4)
+{
+ session_table_t *st;
+ session_t *s;
+ int rv;
+
+ if (is_ip4)
+ {
+ session_kv4_t kv4;
+
+ st = session_table_get_for_fib_index (FIB_PROTOCOL_IP4, fib_index);
+ if (PREDICT_FALSE (!st))
+ return 0;
+
+ /*
+ * Lookup session amongst established ones
+ */
+ make_v4_ss_kv (&kv4, &lcl->ip4, &rmt->ip4, lcl_port, rmt_port, proto);
+ rv = clib_bihash_search_inline_16_8 (&st->v4_session_hash, &kv4);
+ if (rv == 0)
+ {
+ s = session_get_from_handle (kv4.value);
+ return transport_get_connection (proto, s->connection_index,
+ s->thread_index);
+ }
+
+ /*
+ * Try half-open connections
+ */
+ rv = clib_bihash_search_inline_16_8 (&st->v4_half_open_hash, &kv4);
+ if (rv == 0)
+ return transport_get_half_open (proto, kv4.value & 0xFFFFFFFF);
+ }
+ else
+ {
+ session_kv6_t kv6;
+
+ st = session_table_get_for_fib_index (FIB_PROTOCOL_IP6, fib_index);
+ if (PREDICT_FALSE (!st))
+ return 0;
+
+ make_v6_ss_kv (&kv6, &lcl->ip6, &rmt->ip6, lcl_port, rmt_port, proto);
+ rv = clib_bihash_search_inline_48_8 (&st->v6_session_hash, &kv6);
+ if (rv == 0)
+ {
+ s = session_get_from_handle (kv6.value);
+ return transport_get_connection (proto, s->connection_index,
+ s->thread_index);
+ }
+
+ /* Try half-open connections */
+ rv = clib_bihash_search_inline_48_8 (&st->v6_half_open_hash, &kv6);
+ if (rv == 0)
+ return transport_get_half_open (proto, kv6.value & 0xFFFFFFFF);
+ }
+ return 0;
+}
+
session_error_t
vnet_session_rule_add_del (session_rule_add_del_args_t *args)
{
diff --git a/src/vnet/session/session_lookup.h b/src/vnet/session/session_lookup.h
index 9f56af20a87..64016639190 100644
--- a/src/vnet/session/session_lookup.h
+++ b/src/vnet/session/session_lookup.h
@@ -43,25 +43,17 @@ session_t *session_lookup_safe4 (u32 fib_index, ip4_address_t * lcl,
session_t *session_lookup_safe6 (u32 fib_index, ip6_address_t * lcl,
ip6_address_t * rmt, u16 lcl_port,
u16 rmt_port, u8 proto);
-transport_connection_t *session_lookup_connection_wt4 (u32 fib_index,
- ip4_address_t * lcl,
- ip4_address_t * rmt,
- u16 lcl_port,
- u16 rmt_port, u8 proto,
- u32 thread_index,
- u8 * is_filtered);
+transport_connection_t *session_lookup_connection_wt4 (
+ u32 fib_index, ip4_address_t *lcl, ip4_address_t *rmt, u16 lcl_port,
+ u16 rmt_port, u8 proto, clib_thread_index_t thread_index, u8 *is_filtered);
transport_connection_t *session_lookup_connection4 (u32 fib_index,
ip4_address_t * lcl,
ip4_address_t * rmt,
u16 lcl_port,
u16 rmt_port, u8 proto);
-transport_connection_t *session_lookup_connection_wt6 (u32 fib_index,
- ip6_address_t * lcl,
- ip6_address_t * rmt,
- u16 lcl_port,
- u16 rmt_port, u8 proto,
- u32 thread_index,
- u8 * is_filtered);
+transport_connection_t *session_lookup_connection_wt6 (
+ u32 fib_index, ip6_address_t *lcl, ip6_address_t *rmt, u16 lcl_port,
+ u16 rmt_port, u8 proto, clib_thread_index_t thread_index, u8 *is_filtered);
transport_connection_t *session_lookup_connection6 (u32 fib_index,
ip6_address_t * lcl,
ip6_address_t * rmt,
@@ -72,6 +64,9 @@ transport_connection_t *session_lookup_connection (u32 fib_index,
ip46_address_t * rmt,
u16 lcl_port, u16 rmt_port,
u8 proto, u8 is_ip4);
+transport_connection_t *
+session_lookup_6tuple (u32 fib_index, ip46_address_t *lcl, ip46_address_t *rmt,
+ u16 lcl_port, u16 rmt_port, u8 proto, u8 is_ip4);
session_t *session_lookup_listener4 (u32 fib_index, ip4_address_t * lcl,
u16 lcl_port, u8 proto, u8 use_wildcard);
session_t *session_lookup_listener6 (u32 fib_index, ip6_address_t * lcl,
diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c
index c0ff1de39bc..fb4c6252bb6 100644
--- a/src/vnet/session/session_node.c
+++ b/src/vnet/session/session_node.c
@@ -64,7 +64,8 @@ session_wrk_timerfd_update (session_worker_t *wrk, u64 time_ns)
}
always_inline u64
-session_wrk_tfd_timeout (session_wrk_state_t state, u32 thread_index)
+session_wrk_tfd_timeout (session_wrk_state_t state,
+ clib_thread_index_t thread_index)
{
if (state == SESSION_WRK_INTERRUPT)
return thread_index ? 1e6 : vlib_num_workers () ? 5e8 : 1e6;
@@ -282,7 +283,7 @@ session_mq_handle_connects_rpc (void *arg)
static void
session_mq_connect_handler (session_worker_t *wrk, session_evt_elt_t *elt)
{
- u32 thread_index = wrk - session_main.wrk;
+ clib_thread_index_t thread_index = wrk - session_main.wrk;
session_evt_elt_t *he;
if (PREDICT_FALSE (thread_index > transport_cl_thread ()))
@@ -778,7 +779,7 @@ session_wrk_handle_evts_main_rpc (void *args)
clib_llist_index_t ei, next_ei;
session_evt_elt_t *he, *elt;
session_worker_t *fwrk;
- u32 thread_index;
+ clib_thread_index_t thread_index;
vlib_worker_thread_barrier_sync (vm);
@@ -836,8 +837,7 @@ vlib_node_registration_t session_queue_node;
typedef struct
{
- u32 session_index;
- u32 server_thread_index;
+ clib_thread_index_t thread_index;
} session_queue_trace_t;
/* packet trace format function */
@@ -848,8 +848,7 @@ format_session_queue_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
session_queue_trace_t *t = va_arg (*args, session_queue_trace_t *);
- s = format (s, "session index %d thread index %d",
- t->session_index, t->server_thread_index);
+ s = format (s, "thread index %d", t->thread_index);
return s;
}
@@ -880,25 +879,25 @@ enum
};
static void
-session_tx_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
- u32 next_index, vlib_buffer_t **bufs, u16 n_segs,
- session_t *s, u32 n_trace)
+session_tx_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node, u32 *bis,
+ u16 *nexts, u16 n_bufs)
{
- vlib_buffer_t **b = bufs;
+ u32 n_trace = vlib_get_trace_count (vm, node), *bi = bis;
+ u16 *next = nexts;
+ vlib_buffer_t *b;
- while (n_trace && n_segs)
+ while (n_trace && n_bufs)
{
- if (PREDICT_TRUE (vlib_trace_buffer (vm, node, next_index, b[0],
- 1 /* follow_chain */)))
+ b = vlib_get_buffer (vm, bi[0]);
+ if (PREDICT_TRUE (
+ vlib_trace_buffer (vm, node, next[0], b, 1 /* follow_chain */)))
{
- session_queue_trace_t *t =
- vlib_add_trace (vm, node, b[0], sizeof (*t));
- t->session_index = s->session_index;
- t->server_thread_index = s->thread_index;
+ session_queue_trace_t *t = vlib_add_trace (vm, node, b, sizeof (*t));
+ t->thread_index = vm->thread_index;
n_trace--;
}
- b++;
- n_segs--;
+ bi++;
+ n_bufs--;
}
vlib_set_trace_count (vm, node, n_trace);
}
@@ -1194,7 +1193,7 @@ session_tx_not_ready (session_t * s, u8 peek_data)
}
else
{
- if (s->session_state == SESSION_STATE_TRANSPORT_DELETED)
+ if (s->session_state == SESSION_STATE_TRANSPORT_DELETED || !s->tx_fifo)
return 2;
}
return 0;
@@ -1402,7 +1401,7 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
session_evt_elt_t * elt,
int *n_tx_packets, u8 peek_data)
{
- u32 n_trace, n_left, pbi, next_index, max_burst;
+ u32 n_left, pbi, next_index, max_burst;
session_tx_context_t *ctx = &wrk->ctx;
session_main_t *smm = &session_main;
session_event_t *e = &elt->evt;
@@ -1576,10 +1575,6 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
ctx->transport_vft->push_header (ctx->tc, ctx->transport_pending_bufs,
ctx->n_segs_per_evt);
- if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node)) > 0))
- session_tx_trace_frame (vm, node, next_index, ctx->transport_pending_bufs,
- ctx->n_segs_per_evt, ctx->s, n_trace);
-
if (PREDICT_FALSE (n_bufs))
vlib_buffer_free (vm, ctx->tx_buffers, n_bufs);
@@ -1851,7 +1846,7 @@ static const u32 session_evt_msg_sizes[] = {
always_inline void
session_update_time_subscribers (session_main_t *smm, clib_time_type_t now,
- u32 thread_index)
+ clib_thread_index_t thread_index)
{
session_update_time_fn *fn;
@@ -1959,7 +1954,7 @@ static uword
session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- u32 thread_index = vm->thread_index, __clib_unused n_evts;
+ clib_thread_index_t thread_index = vm->thread_index, __clib_unused n_evts;
session_evt_elt_t *elt, *ctrl_he, *new_he, *old_he;
session_main_t *smm = vnet_get_session_main ();
session_worker_t *wrk = &smm->wrk[thread_index];
@@ -2072,7 +2067,13 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
SESSION_EVT (SESSION_EVT_DSP_CNTRS, OLD_IO_EVTS, wrk);
if (vec_len (wrk->pending_tx_buffers))
- session_flush_pending_tx_buffers (wrk, node);
+ {
+ if (PREDICT_FALSE (vlib_get_trace_count (vm, node) > 0))
+ session_tx_trace_frame (vm, node, wrk->pending_tx_buffers,
+ wrk->pending_tx_nexts,
+ vec_len (wrk->pending_tx_nexts));
+ session_flush_pending_tx_buffers (wrk, node);
+ }
vlib_node_increment_counter (vm, session_queue_node.index,
SESSION_QUEUE_ERROR_TX, n_tx_packets);
@@ -2119,7 +2120,7 @@ session_wrk_tfd_write_ready (clib_file_t *cf)
void
session_wrk_enable_adaptive_mode (session_worker_t *wrk)
{
- u32 thread_index = wrk->vm->thread_index;
+ clib_thread_index_t thread_index = wrk->vm->thread_index;
clib_file_t template = { 0 };
if ((wrk->timerfd = timerfd_create (CLOCK_MONOTONIC, TFD_NONBLOCK)) < 0)
diff --git a/src/vnet/session/session_types.h b/src/vnet/session/session_types.h
index 935f8f189ee..47a77449ba7 100644
--- a/src/vnet/session/session_types.h
+++ b/src/vnet/session/session_types.h
@@ -34,7 +34,7 @@ typedef union session_handle_tu_
struct
{
u32 session_index;
- u32 thread_index;
+ clib_thread_index_t thread_index;
};
} __attribute__ ((__transparent_union__)) session_handle_tu_t;
@@ -49,7 +49,9 @@ typedef struct _session_endpoint
#undef _
} session_endpoint_t;
-#define foreach_session_endpoint_cfg_flags _ (PROXY_LISTEN, "proxy listener")
+#define foreach_session_endpoint_cfg_flags \
+ _ (PROXY_LISTEN, "proxy listener") \
+ _ (SECURE, "secure")
typedef enum session_endpoint_cfg_flags_bits_
{
@@ -218,7 +220,7 @@ typedef struct session_
u32 session_index;
/** Index of the thread that allocated the session */
- u32 thread_index;
+ clib_thread_index_t thread_index;
};
};
diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c
index e8c9490decb..edec182541e 100644
--- a/src/vnet/session/transport.c
+++ b/src/vnet/session/transport.c
@@ -106,7 +106,7 @@ format_transport_connection (u8 * s, va_list * args)
{
u32 transport_proto = va_arg (*args, u32);
u32 conn_index = va_arg (*args, u32);
- u32 thread_index = va_arg (*args, u32);
+ clib_thread_index_t thread_index = va_arg (*args, u32);
u32 verbose = va_arg (*args, u32);
transport_proto_vft_t *tp_vft;
transport_connection_t *tc;
@@ -247,15 +247,15 @@ format_transport_state (u8 *s, va_list *args)
}
u32
-transport_endpoint_lookup (transport_endpoint_table_t * ht, u8 proto,
- ip46_address_t * ip, u16 port)
+transport_endpoint_lookup (transport_endpoint_table_t *ht, u8 proto,
+ u32 fib_index, ip46_address_t *ip, u16 port)
{
clib_bihash_kv_24_8_t kv;
int rv;
kv.key[0] = ip->as_u64[0];
kv.key[1] = ip->as_u64[1];
- kv.key[2] = (u64) port << 8 | (u64) proto;
+ kv.key[2] = (u64) fib_index << 32 | (u64) port << 8 | (u64) proto;
rv = clib_bihash_search_inline_24_8 (ht, &kv);
if (rv == 0)
@@ -272,7 +272,7 @@ transport_endpoint_table_add (transport_endpoint_table_t * ht, u8 proto,
kv.key[0] = te->ip.as_u64[0];
kv.key[1] = te->ip.as_u64[1];
- kv.key[2] = (u64) te->port << 8 | (u64) proto;
+ kv.key[2] = (u64) te->fib_index << 32 | (u64) te->port << 8 | (u64) proto;
kv.value = value;
clib_bihash_add_del_24_8 (ht, &kv, 1);
@@ -286,7 +286,7 @@ transport_endpoint_table_del (transport_endpoint_table_t * ht, u8 proto,
kv.key[0] = te->ip.as_u64[0];
kv.key[1] = te->ip.as_u64[1];
- kv.key[2] = (u64) te->port << 8 | (u64) proto;
+ kv.key[2] = (u64) te->fib_index << 32 | (u64) te->port << 8 | (u64) proto;
clib_bihash_add_del_24_8 (ht, &kv, 0);
}
@@ -431,8 +431,8 @@ default_get_transport_endpoint (transport_connection_t * tc,
void
transport_get_endpoint (transport_proto_t tp, u32 conn_index,
- u32 thread_index, transport_endpoint_t * tep,
- u8 is_lcl)
+ clib_thread_index_t thread_index,
+ transport_endpoint_t *tep, u8 is_lcl)
{
if (tp_vfts[tp].get_transport_endpoint)
tp_vfts[tp].get_transport_endpoint (conn_index, thread_index, tep,
@@ -547,14 +547,15 @@ transport_program_endpoint_cleanup (u32 lepi)
}
int
-transport_release_local_endpoint (u8 proto, ip46_address_t *lcl_ip, u16 port)
+transport_release_local_endpoint (u8 proto, u32 fib_index,
+ ip46_address_t *lcl_ip, u16 port)
{
transport_main_t *tm = &tp_main;
local_endpoint_t *lep;
u32 lepi;
- lepi = transport_endpoint_lookup (&tm->local_endpoints_table, proto, lcl_ip,
- port);
+ lepi = transport_endpoint_lookup (&tm->local_endpoints_table, proto,
+ fib_index, lcl_ip, port);
if (lepi == ENDPOINT_INVALID_INDEX)
return -1;
@@ -574,7 +575,8 @@ transport_release_local_endpoint (u8 proto, ip46_address_t *lcl_ip, u16 port)
}
static int
-transport_endpoint_mark_used (u8 proto, ip46_address_t *ip, u16 port)
+transport_endpoint_mark_used (u8 proto, u32 fib_index, ip46_address_t *ip,
+ u16 port)
{
transport_main_t *tm = &tp_main;
local_endpoint_t *lep;
@@ -582,14 +584,15 @@ transport_endpoint_mark_used (u8 proto, ip46_address_t *ip, u16 port)
ASSERT (vlib_get_thread_index () <= transport_cl_thread ());
- tei =
- transport_endpoint_lookup (&tm->local_endpoints_table, proto, ip, port);
+ tei = transport_endpoint_lookup (&tm->local_endpoints_table, proto,
+ fib_index, ip, port);
if (tei != ENDPOINT_INVALID_INDEX)
return SESSION_E_PORTINUSE;
/* Pool reallocs with worker barrier */
lep = transport_endpoint_alloc ();
clib_memcpy_fast (&lep->ep.ip, ip, sizeof (*ip));
+ lep->ep.fib_index = fib_index;
lep->ep.port = port;
lep->proto = proto;
lep->refcnt = 1;
@@ -601,7 +604,8 @@ transport_endpoint_mark_used (u8 proto, ip46_address_t *ip, u16 port)
}
void
-transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip, u16 port)
+transport_share_local_endpoint (u8 proto, u32 fib_index,
+ ip46_address_t *lcl_ip, u16 port)
{
transport_main_t *tm = &tp_main;
local_endpoint_t *lep;
@@ -610,8 +614,8 @@ transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip, u16 port)
/* Active opens should call this only from a control thread, which are also
* used to allocate and free ports. So, pool has only one writer and
* potentially many readers. Listeners are allocated with barrier */
- lepi = transport_endpoint_lookup (&tm->local_endpoints_table, proto, lcl_ip,
- port);
+ lepi = transport_endpoint_lookup (&tm->local_endpoints_table, proto,
+ fib_index, lcl_ip, port);
if (lepi != ENDPOINT_INVALID_INDEX)
{
lep = pool_elt_at_index (tm->local_endpoints, lepi);
@@ -653,16 +657,17 @@ transport_alloc_local_port (u8 proto, ip46_address_t *lcl_addr,
}
}
- if (!transport_endpoint_mark_used (proto, lcl_addr, port))
+ if (!transport_endpoint_mark_used (proto, rmt->fib_index, lcl_addr,
+ port))
break;
/* IP:port pair already in use, check if 6-tuple available */
- if (session_lookup_connection (rmt->fib_index, lcl_addr, &rmt->ip, port,
- rmt->port, proto, rmt->is_ip4))
+ if (session_lookup_6tuple (rmt->fib_index, lcl_addr, &rmt->ip, port,
+ rmt->port, proto, rmt->is_ip4))
continue;
/* 6-tuple is available so increment lcl endpoint refcount */
- transport_share_local_endpoint (proto, lcl_addr, port);
+ transport_share_local_endpoint (proto, rmt->fib_index, lcl_addr, port);
break;
}
@@ -679,6 +684,13 @@ transport_port_alloc_max_tries ()
return tm->port_alloc_max_tries;
}
+u32
+transport_port_local_in_use ()
+{
+ transport_main_t *tm = &tp_main;
+ return pool_elts (tm->local_endpoints) - vec_len (tm->lcl_endpts_freelist);
+}
+
void
transport_clear_stats ()
{
@@ -783,17 +795,19 @@ transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt_cfg,
{
*lcl_port = rmt_cfg->peer.port;
- if (!transport_endpoint_mark_used (proto, lcl_addr, rmt_cfg->peer.port))
+ if (!transport_endpoint_mark_used (proto, rmt->fib_index, lcl_addr,
+ rmt_cfg->peer.port))
return 0;
/* IP:port pair already in use, check if 6-tuple available */
- if (session_lookup_connection (rmt->fib_index, lcl_addr, &rmt->ip,
- rmt_cfg->peer.port, rmt->port, proto,
- rmt->is_ip4))
+ if (session_lookup_6tuple (rmt->fib_index, lcl_addr, &rmt->ip,
+ rmt_cfg->peer.port, rmt->port, proto,
+ rmt->is_ip4))
return SESSION_E_PORTINUSE;
/* 6-tuple is available so increment lcl endpoint refcount */
- transport_share_local_endpoint (proto, lcl_addr, rmt_cfg->peer.port);
+ transport_share_local_endpoint (proto, rmt->fib_index, lcl_addr,
+ rmt_cfg->peer.port);
return 0;
}
@@ -816,7 +830,7 @@ u8 *
format_transport_pacer (u8 * s, va_list * args)
{
spacer_t *pacer = va_arg (*args, spacer_t *);
- u32 thread_index = va_arg (*args, int);
+ clib_thread_index_t thread_index = va_arg (*args, int);
clib_us_time_t now, diff;
now = transport_us_time_now (thread_index);
@@ -952,7 +966,8 @@ transport_connection_tx_pacer_update_bytes (transport_connection_t * tc,
}
void
-transport_update_pacer_time (u32 thread_index, clib_time_type_t now)
+transport_update_pacer_time (clib_thread_index_t thread_index,
+ clib_time_type_t now)
{
session_wrk_update_time (session_main_get_worker (thread_index), now);
}
diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h
index 289bf471af0..31ad36bdc67 100644
--- a/src/vnet/session/transport.h
+++ b/src/vnet/session/transport.h
@@ -74,10 +74,10 @@ typedef struct _transport_proto_vft
u32 (*start_listen) (u32 session_index, transport_endpoint_cfg_t *lcl);
u32 (*stop_listen) (u32 conn_index);
int (*connect) (transport_endpoint_cfg_t * rmt);
- void (*half_close) (u32 conn_index, u32 thread_index);
- void (*close) (u32 conn_index, u32 thread_index);
- void (*reset) (u32 conn_index, u32 thread_index);
- void (*cleanup) (u32 conn_index, u32 thread_index);
+ void (*half_close) (u32 conn_index, clib_thread_index_t thread_index);
+ void (*close) (u32 conn_index, clib_thread_index_t thread_index);
+ void (*reset) (u32 conn_index, clib_thread_index_t thread_index);
+ void (*cleanup) (u32 conn_index, clib_thread_index_t thread_index);
void (*cleanup_ho) (u32 conn_index);
clib_error_t *(*enable) (vlib_main_t * vm, u8 is_en);
@@ -97,7 +97,8 @@ typedef struct _transport_proto_vft
/*
* Connection retrieval
*/
- transport_connection_t *(*get_connection) (u32 conn_idx, u32 thread_idx);
+ transport_connection_t *(*get_connection) (u32 conn_idx,
+ clib_thread_index_t thread_idx);
transport_connection_t *(*get_listener) (u32 conn_index);
transport_connection_t *(*get_half_open) (u32 conn_index);
@@ -111,13 +112,14 @@ typedef struct _transport_proto_vft
/*
* Properties retrieval/setting
*/
- void (*get_transport_endpoint) (u32 conn_index, u32 thread_index,
+ void (*get_transport_endpoint) (u32 conn_index,
+ clib_thread_index_t thread_index,
transport_endpoint_t *tep, u8 is_lcl);
void (*get_transport_listener_endpoint) (u32 conn_index,
transport_endpoint_t *tep,
u8 is_lcl);
- int (*attribute) (u32 conn_index, u32 thread_index, u8 is_get,
- transport_endpt_attr_t *attr);
+ int (*attribute) (u32 conn_index, clib_thread_index_t thread_index,
+ u8 is_get, transport_endpt_attr_t *attr);
/*
* Properties
@@ -144,8 +146,8 @@ void transport_cleanup (transport_proto_t tp, u32 conn_index,
u8 thread_index);
void transport_cleanup_half_open (transport_proto_t tp, u32 conn_index);
void transport_get_endpoint (transport_proto_t tp, u32 conn_index,
- u32 thread_index, transport_endpoint_t * tep,
- u8 is_lcl);
+ clib_thread_index_t thread_index,
+ transport_endpoint_t *tep, u8 is_lcl);
void transport_get_listener_endpoint (transport_proto_t tp, u32 conn_index,
transport_endpoint_t * tep, u8 is_lcl);
int transport_connection_attribute (transport_proto_t tp, u32 conn_index,
@@ -179,7 +181,8 @@ transport_custom_tx (transport_proto_t tp, void *s,
}
static inline int
-transport_app_rx_evt (transport_proto_t tp, u32 conn_index, u32 thread_index)
+transport_app_rx_evt (transport_proto_t tp, u32 conn_index,
+ clib_thread_index_t thread_index)
{
transport_connection_t *tc;
if (!tp_vfts[tp].app_rx_evt)
@@ -248,11 +251,12 @@ int transport_alloc_local_port (u8 proto, ip46_address_t *ip,
transport_endpoint_cfg_t *rmt);
int transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t *rmt,
ip46_address_t *lcl_addr, u16 *lcl_port);
-void transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip,
- u16 port);
-int transport_release_local_endpoint (u8 proto, ip46_address_t *lcl_ip,
- u16 port);
+void transport_share_local_endpoint (u8 proto, u32 fib_index,
+ ip46_address_t *lcl_ip, u16 port);
+int transport_release_local_endpoint (u8 proto, u32 fib_index,
+ ip46_address_t *lcl_ip, u16 port);
u16 transport_port_alloc_max_tries ();
+u32 transport_port_local_in_use ();
void transport_clear_stats ();
void transport_enable_disable (vlib_main_t * vm, u8 is_en);
void transport_init (void);
@@ -367,7 +371,8 @@ transport_connection_tx_pacer_update_bytes (transport_connection_t * tc,
* @param thread_index thread for which time is updated
* @param now time now
*/
-void transport_update_pacer_time (u32 thread_index, clib_time_type_t now);
+void transport_update_pacer_time (clib_thread_index_t thread_index,
+ clib_time_type_t now);
#endif /* SRC_VNET_SESSION_TRANSPORT_H_ */
diff --git a/src/vnet/session/transport_types.h b/src/vnet/session/transport_types.h
index 4a2f861814f..55cb1206e6b 100644
--- a/src/vnet/session/transport_types.h
+++ b/src/vnet/session/transport_types.h
@@ -113,7 +113,7 @@ typedef struct _transport_connection
u32 s_index; /**< Parent session index */
u32 c_index; /**< Connection index in transport pool */
- u32 thread_index; /**< Worker-thread index */
+ clib_thread_index_t thread_index; /**< Worker-thread index */
u8 flags; /**< Transport specific flags */
u8 dscp; /**< Differentiated Services Code Point */
diff --git a/src/vnet/srv6/sr_api.c b/src/vnet/srv6/sr_api.c
index e546e1db0e7..5e022ebd637 100644
--- a/src/vnet/srv6/sr_api.c
+++ b/src/vnet/srv6/sr_api.c
@@ -215,7 +215,7 @@ vl_api_sr_policy_mod_v2_t_handler (vl_api_sr_policy_mod_v2_t *mp)
ntohl (mp->sl_index), ntohl (mp->sids.weight));
vec_free (segments);
- REPLY_MACRO (VL_API_SR_POLICY_MOD_REPLY);
+ REPLY_MACRO (VL_API_SR_POLICY_MOD_V2_REPLY);
}
static void
diff --git a/src/vnet/srv6/sr_localsid.c b/src/vnet/srv6/sr_localsid.c
index 47082e9c96a..8bf0996bbe1 100644
--- a/src/vnet/srv6/sr_localsid.c
+++ b/src/vnet/srv6/sr_localsid.c
@@ -1196,7 +1196,7 @@ sr_localsid_d_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
next_index = node->cached_next_index;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
while (n_left_from > 0)
{
@@ -1500,7 +1500,7 @@ sr_localsid_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
next_index = node->cached_next_index;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
while (n_left_from > 0)
{
@@ -1809,7 +1809,7 @@ sr_localsid_un_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
next_index = node->cached_next_index;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
while (n_left_from > 0)
{
@@ -2116,7 +2116,7 @@ sr_localsid_un_perf_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
next_index = node->cached_next_index;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
while (n_left_from > 0)
{
diff --git a/src/vnet/srv6/sr_policy_rewrite.c b/src/vnet/srv6/sr_policy_rewrite.c
index a9114628f95..92586669378 100644
--- a/src/vnet/srv6/sr_policy_rewrite.c
+++ b/src/vnet/srv6/sr_policy_rewrite.c
@@ -503,8 +503,9 @@ update_lb (ip6_sr_policy_t * sr_policy)
};
/* Add FIB entry for BSID */
- fhc = fib_table_get_flow_hash_config (sr_policy->fib_table,
- FIB_PROTOCOL_IP6);
+ fhc = fib_table_get_flow_hash_config (
+ fib_table_find (FIB_PROTOCOL_IP6, sr_policy->fib_table),
+ FIB_PROTOCOL_IP6);
dpo_set (&sr_policy->bsid_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP6,
load_balance_create (0, DPO_PROTO_IP6, fhc));
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index aea49558882..ed8c514ae8e 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -242,8 +242,8 @@ tcp_connection_cleanup (tcp_connection_t * tc)
/* Cleanup local endpoint if this was an active connect */
if (!(tc->cfg_flags & TCP_CFG_F_NO_ENDPOINT))
- transport_release_local_endpoint (TRANSPORT_PROTO_TCP, &tc->c_lcl_ip,
- tc->c_lcl_port);
+ transport_release_local_endpoint (TRANSPORT_PROTO_TCP, tc->c_fib_index,
+ &tc->c_lcl_ip, tc->c_lcl_port);
/* Check if connection is not yet fully established */
if (tc->state == TCP_STATE_SYN_SENT)
@@ -432,7 +432,7 @@ tcp_connection_close (tcp_connection_t * tc)
}
static void
-tcp_session_half_close (u32 conn_index, u32 thread_index)
+tcp_session_half_close (u32 conn_index, clib_thread_index_t thread_index)
{
tcp_worker_ctx_t *wrk;
tcp_connection_t *tc;
@@ -456,7 +456,7 @@ tcp_session_half_close (u32 conn_index, u32 thread_index)
}
static void
-tcp_session_close (u32 conn_index, u32 thread_index)
+tcp_session_close (u32 conn_index, clib_thread_index_t thread_index)
{
tcp_connection_t *tc;
tc = tcp_connection_get (conn_index, thread_index);
@@ -464,7 +464,7 @@ tcp_session_close (u32 conn_index, u32 thread_index)
}
static void
-tcp_session_cleanup (u32 conn_index, u32 thread_index)
+tcp_session_cleanup (u32 conn_index, clib_thread_index_t thread_index)
{
tcp_connection_t *tc;
tc = tcp_connection_get (conn_index, thread_index);
@@ -487,7 +487,7 @@ tcp_session_cleanup_ho (u32 conn_index)
}
static void
-tcp_session_reset (u32 conn_index, u32 thread_index)
+tcp_session_reset (u32 conn_index, clib_thread_index_t thread_index)
{
tcp_connection_t *tc;
tc = tcp_connection_get (conn_index, thread_index);
@@ -856,7 +856,7 @@ static u8 *
format_tcp_session (u8 * s, va_list * args)
{
u32 tci = va_arg (*args, u32);
- u32 thread_index = va_arg (*args, u32);
+ clib_thread_index_t thread_index = va_arg (*args, u32);
u32 verbose = va_arg (*args, u32);
tcp_connection_t *tc;
@@ -906,7 +906,7 @@ format_tcp_half_open_session (u8 * s, va_list * args)
}
static transport_connection_t *
-tcp_session_get_transport (u32 conn_index, u32 thread_index)
+tcp_session_get_transport (u32 conn_index, clib_thread_index_t thread_index)
{
tcp_connection_t *tc = tcp_connection_get (conn_index, thread_index);
if (PREDICT_FALSE (!tc))
@@ -1016,8 +1016,8 @@ tcp_get_attribute (tcp_connection_t *tc, transport_endpt_attr_t *attr)
}
static int
-tcp_session_attribute (u32 conn_index, u32 thread_index, u8 is_get,
- transport_endpt_attr_t *attr)
+tcp_session_attribute (u32 conn_index, clib_thread_index_t thread_index,
+ u8 is_get, transport_endpt_attr_t *attr)
{
tcp_connection_t *tc = tcp_connection_get (conn_index, thread_index);
@@ -1279,7 +1279,7 @@ tcp_dispatch_pending_timers (tcp_worker_ctx_t * wrk)
static void
tcp_handle_cleanups (tcp_worker_ctx_t * wrk, clib_time_type_t now)
{
- u32 thread_index = wrk->vm->thread_index;
+ clib_thread_index_t thread_index = wrk->vm->thread_index;
tcp_cleanup_req_t *req;
tcp_connection_t *tc;
@@ -1404,7 +1404,8 @@ tcp_reschedule (tcp_connection_t * tc)
static void
tcp_expired_timers_dispatch (u32 * expired_timers)
{
- u32 thread_index = vlib_get_thread_index (), n_left, max_per_loop;
+ clib_thread_index_t thread_index = vlib_get_thread_index (), n_left,
+ max_per_loop;
u32 connection_index, timer_id, n_expired, max_loops;
tcp_worker_ctx_t *wrk;
tcp_connection_t *tc;
@@ -1467,7 +1468,7 @@ tcp_stats_collector_fn (vlib_stats_collector_data_t *d)
tcp_wrk_stats_t acc = {};
tcp_worker_ctx_t *wrk;
- vec_foreach (wrk, tm->wrk_ctx)
+ vec_foreach (wrk, tm->wrk)
{
#define _(name, type, str) acc.name += wrk->stats.name;
foreach_tcp_wrk_stat
@@ -1515,7 +1516,7 @@ tcp_main_enable (vlib_main_t * vm)
int thread;
/* Already initialized */
- if (tm->wrk_ctx)
+ if (tm->wrk)
return 0;
if ((error = vlib_call_init_function (vm, ip_main_init)))
@@ -1537,11 +1538,11 @@ tcp_main_enable (vlib_main_t * vm)
*/
num_threads = 1 /* main thread */ + vtm->n_threads;
- vec_validate (tm->wrk_ctx, num_threads - 1);
+ vec_validate (tm->wrk, num_threads - 1);
n_workers = num_threads == 1 ? 1 : vtm->n_threads;
prealloc_conn_per_wrk = tcp_cfg.preallocated_connections / n_workers;
- wrk = &tm->wrk_ctx[0];
+ wrk = &tm->wrk[0];
wrk->tco_next_node[0] = vlib_node_get_next (vm, session_queue_node.index,
tcp4_output_node.index);
wrk->tco_next_node[1] = vlib_node_get_next (vm, session_queue_node.index,
@@ -1549,7 +1550,7 @@ tcp_main_enable (vlib_main_t * vm)
for (thread = 0; thread < num_threads; thread++)
{
- wrk = &tm->wrk_ctx[thread];
+ wrk = &tm->wrk[thread];
vec_validate (wrk->pending_deq_acked, 255);
vec_validate (wrk->pending_disconnects, 255);
@@ -1562,8 +1563,8 @@ tcp_main_enable (vlib_main_t * vm)
if (thread > 0)
{
- wrk->tco_next_node[0] = tm->wrk_ctx[0].tco_next_node[0];
- wrk->tco_next_node[1] = tm->wrk_ctx[0].tco_next_node[1];
+ wrk->tco_next_node[0] = tm->wrk[0].tco_next_node[0];
+ wrk->tco_next_node[1] = tm->wrk[0].tco_next_node[1];
}
/*
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index 8feac807d59..67dc7407e91 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -220,7 +220,7 @@ typedef struct tcp_configuration_
typedef struct _tcp_main
{
/** per-worker context */
- tcp_worker_ctx_t *wrk_ctx;
+ tcp_worker_ctx_t *wrk;
/* Pool of listeners. */
tcp_connection_t *listener_pool;
@@ -299,10 +299,10 @@ vnet_get_tcp_main ()
}
always_inline tcp_worker_ctx_t *
-tcp_get_worker (u32 thread_index)
+tcp_get_worker (clib_thread_index_t thread_index)
{
- ASSERT (thread_index < vec_len (tcp_main.wrk_ctx));
- return &tcp_main.wrk_ctx[thread_index];
+ ASSERT (thread_index < vec_len (tcp_main.wrk));
+ return &tcp_main.wrk[thread_index];
}
tcp_connection_t *tcp_connection_alloc (u8 thread_index);
@@ -314,8 +314,8 @@ void tcp_connection_cleanup (tcp_connection_t * tc);
void tcp_connection_del (tcp_connection_t * tc);
int tcp_half_open_connection_cleanup (tcp_connection_t * tc);
-void tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt,
- u32 thread_index, u8 is_ip4);
+void tcp_send_reset_w_pkt (tcp_connection_t *tc, vlib_buffer_t *pkt,
+ clib_thread_index_t thread_index, u8 is_ip4);
void tcp_send_reset (tcp_connection_t * tc);
void tcp_send_syn (tcp_connection_t * tc);
void tcp_send_synack (tcp_connection_t * tc);
diff --git a/src/vnet/tcp/tcp_bt.c b/src/vnet/tcp/tcp_bt.c
index 3cb57a550de..3624cd8158a 100644
--- a/src/vnet/tcp/tcp_bt.c
+++ b/src/vnet/tcp/tcp_bt.c
@@ -635,6 +635,8 @@ tcp_bt_flush_samples (tcp_connection_t * tc)
tcp_bt_sample_t *bts;
u32 *samples = 0, *si;
+ ASSERT (pool_elts (bt->samples) != 0);
+
vec_validate (samples, pool_elts (bt->samples) - 1);
vec_reset_length (samples);
diff --git a/src/vnet/tcp/tcp_cli.c b/src/vnet/tcp/tcp_cli.c
index 55bc5764df2..c14994aa440 100644
--- a/src/vnet/tcp/tcp_cli.c
+++ b/src/vnet/tcp/tcp_cli.c
@@ -919,7 +919,7 @@ show_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input,
if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
return clib_error_return (0, "unknown input `%U'", format_unformat_error,
input);
- for (thread = 0; thread < vec_len (tm->wrk_ctx); thread++)
+ for (thread = 0; thread < vec_len (tm->wrk); thread++)
{
wrk = tcp_get_worker (thread);
vlib_cli_output (vm, "Thread %u:\n", thread);
@@ -957,7 +957,7 @@ clear_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input,
return clib_error_return (0, "unknown input `%U'", format_unformat_error,
input);
- for (thread = 0; thread < vec_len (tm->wrk_ctx); thread++)
+ for (thread = 0; thread < vec_len (tm->wrk); thread++)
{
wrk = tcp_get_worker (thread);
clib_memset (&wrk->stats, 0, sizeof (wrk->stats));
diff --git a/src/vnet/tcp/tcp_cubic.c b/src/vnet/tcp/tcp_cubic.c
index cf2b9a17d18..63abcd1312d 100644
--- a/src/vnet/tcp/tcp_cubic.c
+++ b/src/vnet/tcp/tcp_cubic.c
@@ -49,7 +49,7 @@ typedef struct cubic_data_
STATIC_ASSERT (sizeof (cubic_data_t) <= TCP_CC_DATA_SZ, "cubic data len");
static inline f64
-cubic_time (u32 thread_index)
+cubic_time (clib_thread_index_t thread_index)
{
return tcp_time_now_us (thread_index);
}
diff --git a/src/vnet/tcp/tcp_inlines.h b/src/vnet/tcp/tcp_inlines.h
index ccd0e3fe3ee..6ab467d759b 100644
--- a/src/vnet/tcp/tcp_inlines.h
+++ b/src/vnet/tcp/tcp_inlines.h
@@ -56,7 +56,7 @@ tcp_buffer_hdr (vlib_buffer_t * b)
}
always_inline tcp_connection_t *
-tcp_connection_get (u32 conn_index, u32 thread_index)
+tcp_connection_get (u32 conn_index, clib_thread_index_t thread_index)
{
tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
if (PREDICT_FALSE (pool_is_free_index (wrk->connections, conn_index)))
@@ -65,10 +65,10 @@ tcp_connection_get (u32 conn_index, u32 thread_index)
}
always_inline tcp_connection_t *
-tcp_connection_get_if_valid (u32 conn_index, u32 thread_index)
+tcp_connection_get_if_valid (u32 conn_index, clib_thread_index_t thread_index)
{
tcp_worker_ctx_t *wrk;
- if (thread_index >= vec_len (tcp_main.wrk_ctx))
+ if (thread_index >= vec_len (tcp_main.wrk))
return 0;
wrk = tcp_get_worker (thread_index);
if (pool_is_free_index (wrk->connections, conn_index))
@@ -215,9 +215,9 @@ tcp_is_lost_fin (tcp_connection_t * tc)
* Time used to generate timestamps, not the timestamp
*/
always_inline u32
-tcp_time_tstamp (u32 thread_index)
+tcp_time_tstamp (clib_thread_index_t thread_index)
{
- return tcp_main.wrk_ctx[thread_index].time_tstamp;
+ return tcp_main.wrk[thread_index].time_tstamp;
}
/**
@@ -226,14 +226,13 @@ tcp_time_tstamp (u32 thread_index)
always_inline u32
tcp_tstamp (tcp_connection_t * tc)
{
- return (tcp_main.wrk_ctx[tc->c_thread_index].time_tstamp -
- tc->timestamp_delta);
+ return (tcp_main.wrk[tc->c_thread_index].time_tstamp - tc->timestamp_delta);
}
always_inline f64
-tcp_time_now_us (u32 thread_index)
+tcp_time_now_us (clib_thread_index_t thread_index)
{
- return tcp_main.wrk_ctx[thread_index].time_us;
+ return tcp_main.wrk[thread_index].time_us;
}
always_inline void
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index cd3e4b7700c..47ae8513f62 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -217,20 +217,6 @@ static int
tcp_segment_validate (tcp_worker_ctx_t * wrk, tcp_connection_t * tc0,
vlib_buffer_t * b0, tcp_header_t * th0, u32 * error0)
{
- /* We could get a burst of RSTs interleaved with acks */
- if (PREDICT_FALSE (tc0->state == TCP_STATE_CLOSED))
- {
- tcp_send_reset (tc0);
- *error0 = TCP_ERROR_CONNECTION_CLOSED;
- goto error;
- }
-
- if (PREDICT_FALSE (!tcp_ack (th0) && !tcp_rst (th0) && !tcp_syn (th0)))
- {
- *error0 = TCP_ERROR_SEGMENT_INVALID;
- goto error;
- }
-
if (PREDICT_FALSE (tcp_options_parse (th0, &tc0->rcv_opts, 0)))
{
*error0 = TCP_ERROR_OPTIONS;
@@ -512,7 +498,7 @@ tcp_estimate_initial_rtt (tcp_connection_t * tc)
static void
tcp_handle_postponed_dequeues (tcp_worker_ctx_t * wrk)
{
- u32 thread_index = wrk->vm->thread_index;
+ clib_thread_index_t thread_index = wrk->vm->thread_index;
u32 *pending_deq_acked;
tcp_connection_t *tc;
int i;
@@ -1025,7 +1011,8 @@ tcp_program_disconnect (tcp_worker_ctx_t * wrk, tcp_connection_t * tc)
static void
tcp_handle_disconnects (tcp_worker_ctx_t * wrk)
{
- u32 thread_index, *pending_disconnects, *pending_resets;
+ clib_thread_index_t thread_index;
+ u32 *pending_disconnects, *pending_resets;
tcp_connection_t *tc;
int i;
@@ -1372,11 +1359,47 @@ tcp_established_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
}
}
+always_inline int
+tcp_segment_is_exception (tcp_connection_t *tc, tcp_header_t *th)
+{
+ /* tcp-input allows through segments without ack, e.g., fin without ack,
+ * which have to be handled as exception in nodes like established. So
+ * flags must be checked */
+ return !tc || tc->state == TCP_STATE_CLOSED ||
+ !(th->flags & (TCP_FLAG_ACK | TCP_FLAG_RST | TCP_FLAG_SYN));
+}
+
+always_inline void
+tcp_segment_handle_exception (tcp_connection_t *tc, tcp_header_t *th,
+ u32 *error)
+{
+ if (!tc)
+ {
+ *error = TCP_ERROR_INVALID_CONNECTION;
+ return;
+ }
+
+ /* We could get a burst of RSTs interleaved with acks */
+ if (tc->state == TCP_STATE_CLOSED)
+ {
+ tcp_send_reset (tc);
+ *error = TCP_ERROR_CONNECTION_CLOSED;
+ return;
+ }
+
+ if (!(th->flags & (TCP_FLAG_ACK | TCP_FLAG_RST | TCP_FLAG_SYN)))
+ {
+ *error = TCP_ERROR_SEGMENT_INVALID;
+ return;
+ }
+}
+
always_inline uword
tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame, int is_ip4)
{
- u32 thread_index = vm->thread_index, n_left_from, *from;
+ clib_thread_index_t thread_index = vm->thread_index;
+ u32 n_left_from, *from;
tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u16 err_counters[TCP_N_ERROR] = { 0 };
@@ -1404,15 +1427,14 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
tc = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
thread_index);
+ th = tcp_buffer_hdr (b[0]);
- if (PREDICT_FALSE (tc == 0))
+ if (PREDICT_FALSE (tcp_segment_is_exception (tc, th)))
{
- error = TCP_ERROR_INVALID_CONNECTION;
+ tcp_segment_handle_exception (tc, th, &error);
goto done;
}
- th = tcp_buffer_hdr (b[0]);
-
/* TODO header prediction fast path */
/* 1-4: check SEQ, RST, SYN */
@@ -1862,8 +1884,8 @@ tcp46_syn_sent_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
else
new_tc->rcv_wscale = 0;
- new_tc->snd_wnd = clib_net_to_host_u16 (tcp->window)
- << new_tc->snd_wscale;
+ /* RFC7323 sec 2.2: Window field in a syn segment must not be scaled */
+ new_tc->snd_wnd = clib_net_to_host_u16 (tcp->window);
new_tc->snd_wl1 = seq;
new_tc->snd_wl2 = ack;
@@ -2005,7 +2027,7 @@ static void
tcp46_rcv_process_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
u32 *from, u32 n_bufs)
{
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
tcp_connection_t *tc = 0;
tcp_rx_trace_t *t;
vlib_buffer_t *b;
@@ -2031,7 +2053,8 @@ always_inline uword
tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_frame_t *frame, int is_ip4)
{
- u32 thread_index = vm->thread_index, n_left_from, *from, max_deq;
+ clib_thread_index_t thread_index = vm->thread_index;
+ u32 n_left_from, *from, max_deq;
tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
@@ -2524,7 +2547,7 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
u32 n_left_from, *from, n_syns = 0;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
u32 tw_iss = 0;
from = vlib_frame_vector_args (frame);
@@ -2819,8 +2842,6 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
}
- next[0] = next[1] = TCP_INPUT_NEXT_DROP;
-
tc0 = tcp_input_lookup_buffer (b[0], thread_index, &error0, is_ip4,
is_nolookup);
tc1 = tcp_input_lookup_buffer (b[1], thread_index, &error1, is_ip4,
@@ -2881,7 +2902,6 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
CLIB_PREFETCH (b[1]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
}
- next[0] = TCP_INPUT_NEXT_DROP;
tc0 = tcp_input_lookup_buffer (b[0], thread_index, &error0, is_ip4,
is_nolookup);
if (PREDICT_TRUE (tc0 != 0))
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index 2fd20acf241..120ad6c533e 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -299,7 +299,7 @@ tcp_make_options (tcp_connection_t * tc, tcp_options_t * opts,
void
tcp_update_burst_snd_vars (tcp_connection_t * tc)
{
- tcp_main_t *tm = &tcp_main;
+ tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
/* Compute options to be used for connection. These may be reused when
* sending data or to compute the effective mss (snd_mss) */
@@ -310,8 +310,7 @@ tcp_update_burst_snd_vars (tcp_connection_t * tc)
tc->snd_mss = clib_min (tc->mss, tc->rcv_opts.mss) - tc->snd_opts_len;
ASSERT (tc->snd_mss > 0);
- tcp_options_write (tm->wrk_ctx[tc->c_thread_index].cached_opts,
- &tc->snd_opts);
+ tcp_options_write (wrk->cached_opts, &tc->snd_opts);
tcp_update_rcv_wnd (tc);
@@ -647,8 +646,8 @@ tcp_buffer_make_reset (vlib_main_t *vm, vlib_buffer_t *b, u8 is_ip4)
* It extracts connection info out of original packet
*/
void
-tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt,
- u32 thread_index, u8 is_ip4)
+tcp_send_reset_w_pkt (tcp_connection_t *tc, vlib_buffer_t *pkt,
+ clib_thread_index_t thread_index, u8 is_ip4)
{
tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
vlib_main_t *vm = wrk->vm;
@@ -875,7 +874,6 @@ tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b, u32 snd_nxt,
{
u8 tcp_hdr_opts_len, flags = TCP_FLAG_ACK;
u32 advertise_wnd, data_len;
- tcp_main_t *tm = &tcp_main;
tcp_header_t *th;
data_len = b->current_length;
@@ -907,9 +905,8 @@ tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b, u32 snd_nxt,
if (maybe_burst)
{
- clib_memcpy_fast ((u8 *) (th + 1),
- tm->wrk_ctx[tc->c_thread_index].cached_opts,
- tc->snd_opts_len);
+ tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
+ clib_memcpy_fast ((u8 *) (th + 1), wrk->cached_opts, tc->snd_opts_len);
}
else
{
diff --git a/src/vnet/tls/tls.c b/src/vnet/tls/tls.c
index 08809f70070..d409ee5f126 100644
--- a/src/vnet/tls/tls.c
+++ b/src/vnet/tls/tls.c
@@ -21,7 +21,7 @@
static tls_main_t tls_main;
tls_engine_vft_t *tls_vfts;
-void tls_disconnect (u32 ctx_handle, u32 thread_index);
+void tls_disconnect (u32 ctx_handle, clib_thread_index_t thread_index);
void
tls_disconnect_transport (tls_ctx_t * ctx)
@@ -684,7 +684,7 @@ tls_connect (transport_endpoint_cfg_t * tep)
}
void
-tls_disconnect (u32 ctx_handle, u32 thread_index)
+tls_disconnect (u32 ctx_handle, clib_thread_index_t thread_index)
{
tls_ctx_t *ctx;
@@ -820,7 +820,7 @@ tls_stop_listen (u32 lctx_index)
}
transport_connection_t *
-tls_connection_get (u32 ctx_index, u32 thread_index)
+tls_connection_get (u32 ctx_index, clib_thread_index_t thread_index)
{
tls_ctx_t *ctx;
ctx = tls_ctx_get_w_thread (ctx_index, thread_index);
@@ -959,7 +959,7 @@ u8 *
format_tls_connection (u8 * s, va_list * args)
{
u32 ctx_index = va_arg (*args, u32);
- u32 thread_index = va_arg (*args, u32);
+ clib_thread_index_t thread_index = va_arg (*args, u32);
u32 verbose = va_arg (*args, u32);
tls_ctx_t *ctx;
@@ -1013,8 +1013,8 @@ format_tls_half_open (u8 * s, va_list * args)
}
static void
-tls_transport_endpoint_get (u32 ctx_handle, u32 thread_index,
- transport_endpoint_t * tep, u8 is_lcl)
+tls_transport_endpoint_get (u32 ctx_handle, clib_thread_index_t thread_index,
+ transport_endpoint_t *tep, u8 is_lcl)
{
tls_ctx_t *ctx = tls_ctx_get_w_thread (ctx_handle, thread_index);
session_t *ts;
@@ -1179,7 +1179,7 @@ dtls_half_open_get (u32 ho_index)
}
static void
-dtls_cleanup_callback (u32 ctx_index, u32 thread_index)
+dtls_cleanup_callback (u32 ctx_index, clib_thread_index_t thread_index)
{
/* No op */
}
diff --git a/src/vnet/tls/tls.h b/src/vnet/tls/tls.h
index 244e2042f11..7e69432512e 100644
--- a/src/vnet/tls/tls.h
+++ b/src/vnet/tls/tls.h
@@ -146,10 +146,10 @@ typedef struct tls_main_
typedef struct tls_engine_vft_
{
u32 (*ctx_alloc) (void);
- u32 (*ctx_alloc_w_thread) (u32 thread_index);
+ u32 (*ctx_alloc_w_thread) (clib_thread_index_t thread_index);
void (*ctx_free) (tls_ctx_t * ctx);
void *(*ctx_detach) (tls_ctx_t *ctx);
- u32 (*ctx_attach) (u32 thread_index, void *ctx);
+ u32 (*ctx_attach) (clib_thread_index_t thread_index, void *ctx);
tls_ctx_t *(*ctx_get) (u32 ctx_index);
tls_ctx_t *(*ctx_get_w_thread) (u32 ctx_index, u8 thread_index);
int (*ctx_init_client) (tls_ctx_t * ctx);
diff --git a/src/vnet/tls/tls_inlines.h b/src/vnet/tls/tls_inlines.h
index 3e3f59fcf51..2f12a779102 100644
--- a/src/vnet/tls/tls_inlines.h
+++ b/src/vnet/tls/tls_inlines.h
@@ -23,7 +23,8 @@ tls_ctx_alloc (crypto_engine_type_t engine_type)
}
static inline u32
-tls_ctx_alloc_w_thread (crypto_engine_type_t engine_type, u32 thread_index)
+tls_ctx_alloc_w_thread (crypto_engine_type_t engine_type,
+ clib_thread_index_t thread_index)
{
u32 ctx_index;
ctx_index = tls_vfts[engine_type].ctx_alloc_w_thread (thread_index);
@@ -65,7 +66,8 @@ tls_ctx_init_client (tls_ctx_t *ctx)
}
static inline u32
-tls_ctx_attach (crypto_engine_type_t engine_type, u32 thread_index, void *ctx)
+tls_ctx_attach (crypto_engine_type_t engine_type,
+ clib_thread_index_t thread_index, void *ctx)
{
u32 ctx_index;
ctx_index = tls_vfts[engine_type].ctx_attach (thread_index, ctx);
diff --git a/src/vnet/udp/udp.c b/src/vnet/udp/udp.c
index 1fc055f8d50..4ed5a68fa02 100644
--- a/src/vnet/udp/udp.c
+++ b/src/vnet/udp/udp.c
@@ -71,7 +71,7 @@ udp_connection_unregister_port (u16 lcl_port, u8 is_ip4)
}
udp_connection_t *
-udp_connection_alloc (u32 thread_index)
+udp_connection_alloc (clib_thread_index_t thread_index)
{
udp_worker_t *wrk = udp_worker_get (thread_index);
udp_connection_t *uc;
@@ -99,8 +99,8 @@ udp_connection_free (udp_connection_t * uc)
static void
udp_connection_cleanup (udp_connection_t * uc)
{
- transport_release_local_endpoint (TRANSPORT_PROTO_UDP, &uc->c_lcl_ip,
- uc->c_lcl_port);
+ transport_release_local_endpoint (TRANSPORT_PROTO_UDP, uc->c_fib_index,
+ &uc->c_lcl_ip, uc->c_lcl_port);
udp_connection_unregister_port (uc->c_lcl_port, uc->c_is_ip4);
udp_connection_free (uc);
}
@@ -115,7 +115,7 @@ udp_connection_delete (udp_connection_t * uc)
static void
udp_handle_cleanups (void *args)
{
- u32 thread_index = (u32) pointer_to_uword (args);
+ clib_thread_index_t thread_index = (u32) pointer_to_uword (args);
udp_connection_t *uc;
udp_worker_t *wrk;
u32 *uc_index;
@@ -205,6 +205,7 @@ udp_session_bind (u32 session_index, transport_endpoint_cfg_t *lcl)
clib_spinlock_init (&listener->rx_lock);
if (!um->csum_offload)
listener->cfg_flags |= UDP_CFG_F_NO_CSUM_OFFLOAD;
+ listener->start_ts = transport_time_now (listener->c_thread_index);
udp_connection_register_port (listener->c_lcl_port, lcl->is_ip4);
return listener->c_c_index;
@@ -303,6 +304,8 @@ udp_push_one_header (vlib_main_t *vm, udp_connection_t *uc, vlib_buffer_t *b,
vnet_buffer (b)->tcp.flags |= UDP_CONN_F_LISTEN;
}
+ uc->bytes_out += vlib_buffer_length_in_chain (vm, b);
+ uc->dgrams_out += 1;
uh->checksum =
udp_compute_checksum (vm, b, udp_csum_offload (uc), uc->c_is_ip4);
@@ -359,7 +362,7 @@ udp_push_header (transport_connection_t *tc, vlib_buffer_t **bs, u32 n_bufs)
}
static transport_connection_t *
-udp_session_get (u32 connection_index, u32 thread_index)
+udp_session_get (u32 connection_index, clib_thread_index_t thread_index)
{
udp_connection_t *uc;
uc = udp_connection_get (connection_index, thread_index);
@@ -369,7 +372,7 @@ udp_session_get (u32 connection_index, u32 thread_index)
}
static void
-udp_session_close (u32 connection_index, u32 thread_index)
+udp_session_close (u32 connection_index, clib_thread_index_t thread_index)
{
udp_connection_t *uc;
@@ -384,7 +387,7 @@ udp_session_close (u32 connection_index, u32 thread_index)
}
static void
-udp_session_cleanup (u32 connection_index, u32 thread_index)
+udp_session_cleanup (u32 connection_index, clib_thread_index_t thread_index)
{
udp_connection_t *uc;
uc = udp_connection_get (connection_index, thread_index);
@@ -419,7 +422,7 @@ udp_open_connection (transport_endpoint_cfg_t * rmt)
udp_main_t *um = &udp_main;
ip46_address_t lcl_addr;
udp_connection_t *uc;
- u32 thread_index;
+ clib_thread_index_t thread_index;
u16 lcl_port;
int rv;
@@ -434,8 +437,8 @@ udp_open_connection (transport_endpoint_cfg_t * rmt)
/* If specific source port was requested abort */
if (rmt->peer.port)
{
- transport_release_local_endpoint (TRANSPORT_PROTO_UDP, &lcl_addr,
- lcl_port);
+ transport_release_local_endpoint (
+ TRANSPORT_PROTO_UDP, rmt->fib_index, &lcl_addr, lcl_port);
return SESSION_E_PORTINUSE;
}
@@ -443,8 +446,8 @@ udp_open_connection (transport_endpoint_cfg_t * rmt)
while (udp_connection_port_used_extern (clib_net_to_host_u16 (lcl_port),
rmt->is_ip4))
{
- transport_release_local_endpoint (TRANSPORT_PROTO_UDP, &lcl_addr,
- lcl_port);
+ transport_release_local_endpoint (
+ TRANSPORT_PROTO_UDP, rmt->fib_index, &lcl_addr, lcl_port);
lcl_port =
transport_alloc_local_port (TRANSPORT_PROTO_UDP, &lcl_addr, rmt);
if ((int) lcl_port < 1)
@@ -472,6 +475,7 @@ udp_open_connection (transport_endpoint_cfg_t * rmt)
uc->cfg_flags |= UDP_CFG_F_NO_CSUM_OFFLOAD;
uc->next_node_index = rmt->next_node_index;
uc->next_node_opaque = rmt->next_node_opaque;
+ uc->start_ts = transport_time_now (thread_index);
udp_connection_register_port (uc->c_lcl_port, rmt->is_ip4);
@@ -482,7 +486,7 @@ static transport_connection_t *
udp_session_get_half_open (u32 conn_index)
{
udp_connection_t *uc;
- u32 thread_index;
+ clib_thread_index_t thread_index;
/* We don't poll main thread if we have workers */
thread_index = transport_cl_thread ();
@@ -496,7 +500,7 @@ static u8 *
format_udp_session (u8 * s, va_list * args)
{
u32 uci = va_arg (*args, u32);
- u32 thread_index = va_arg (*args, u32);
+ clib_thread_index_t thread_index = va_arg (*args, u32);
u32 verbose = va_arg (*args, u32);
udp_connection_t *uc;
diff --git a/src/vnet/udp/udp.h b/src/vnet/udp/udp.h
index c6f867500e0..6ff22009a4c 100644
--- a/src/vnet/udp/udp.h
+++ b/src/vnet/udp/udp.h
@@ -87,6 +87,12 @@ typedef struct
u32 sw_if_index; /**< connection sw_if_index */
u32 next_node_index; /**< Can be used to control next node in output */
u32 next_node_opaque; /**< Opaque to pass to next node */
+ u64 bytes_in; /**< bytes received */
+ u64 dgrams_in; /**< rfc4113 dgrams received */
+ u64 bytes_out; /**< bytes sent */
+ u64 dgrams_out; /**< rfc4113 dgrams sent */
+ u32 errors_in; /**< rfc4113 dgrams in errors */
+ clib_time_type_t start_ts; /**< time stamp when connection was created */
} udp_connection_t;
#define udp_csum_offload(uc) (!((uc)->cfg_flags & UDP_CFG_F_NO_CSUM_OFFLOAD))
@@ -171,13 +177,13 @@ void udp_add_dst_port (udp_main_t * um, udp_dst_port_t dst_port,
char *dst_port_name, u8 is_ip4);
always_inline udp_worker_t *
-udp_worker_get (u32 thread_index)
+udp_worker_get (clib_thread_index_t thread_index)
{
return vec_elt_at_index (udp_main.wrk, thread_index);
}
always_inline udp_connection_t *
-udp_connection_get (u32 conn_index, u32 thread_index)
+udp_connection_get (u32 conn_index, clib_thread_index_t thread_index)
{
udp_worker_t *wrk = udp_worker_get (thread_index);
@@ -205,11 +211,12 @@ udp_connection_from_transport (transport_connection_t * tc)
}
void udp_connection_free (udp_connection_t * uc);
-udp_connection_t *udp_connection_alloc (u32 thread_index);
+udp_connection_t *udp_connection_alloc (clib_thread_index_t thread_index);
void udp_connection_share_port (u16 lcl_port, u8 is_ip4);
always_inline udp_connection_t *
-udp_connection_clone_safe (u32 connection_index, u32 thread_index)
+udp_connection_clone_safe (u32 connection_index,
+ clib_thread_index_t thread_index)
{
u32 current_thread_index = vlib_get_thread_index (), new_index;
udp_connection_t *old_c, *new_c;
diff --git a/src/vnet/udp/udp_cli.c b/src/vnet/udp/udp_cli.c
index 6c8992cd0de..c910b508933 100644
--- a/src/vnet/udp/udp_cli.c
+++ b/src/vnet/udp/udp_cli.c
@@ -18,6 +18,7 @@
#include <vppinfra/format_table.h>
#include <vnet/udp/udp.h>
#include <vnet/session/session_types.h>
+#include <vnet/session/session.h>
u8 *
format_udp_connection_id (u8 * s, va_list * args)
@@ -91,17 +92,36 @@ format_udp_connection_flags (u8 * s, va_list * args)
}
static u8 *
+format_udp_stats (u8 *s, va_list *args)
+{
+ udp_connection_t *uc = va_arg (*args, udp_connection_t *);
+ u32 indent = format_get_indent (s);
+ s = format (s, "in dgrams %lu bytes %lu err %lu\n", uc->dgrams_in,
+ uc->bytes_in, uc->errors_in);
+ s = format (s, "%Uout dgrams %lu bytes %lu", format_white_space, indent,
+ uc->dgrams_out, uc->bytes_out);
+ return s;
+}
+
+static u8 *
format_udp_vars (u8 * s, va_list * args)
{
udp_connection_t *uc = va_arg (*args, udp_connection_t *);
- s = format (s, " index %u%U flags: %U\n", uc->c_c_index,
+ s = format (s, " index %u cfg: %U flags: %U\n", uc->c_c_index,
format_udp_cfg_flags, uc, format_udp_connection_flags, uc);
- s = format (s, " fib_index: %u next_node: %u opaque: %u ", uc->c_fib_index);
- if (!(uc->flags & UDP_CONN_F_LISTEN))
- s = format (s, " sw_if_index: %d mss: %u\n", uc->sw_if_index, uc->mss);
- else
- s = format (s, "\n");
+ s = format (s, " fib_index %u next_node %u opaque %u", uc->c_fib_index,
+ uc->next_node_index, uc->next_node_opaque);
+
+ if (uc->flags & UDP_CONN_F_LISTEN)
+ {
+ s = format (s, "\n");
+ return s;
+ }
+
+ s = format (s, " sw_if_index %d mss %u duration %.3f\n", uc->sw_if_index,
+ uc->mss, transport_time_now (uc->c_thread_index) - uc->start_ts);
+ s = format (s, " stats: %U\n", format_udp_stats, uc);
return s;
}
diff --git a/src/vnet/udp/udp_encap_node.c b/src/vnet/udp/udp_encap_node.c
index a86614f5475..99658ef6d03 100644
--- a/src/vnet/udp/udp_encap_node.c
+++ b/src/vnet/udp/udp_encap_node.c
@@ -78,7 +78,7 @@ udp_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_combined_counter_main_t *cm = &udp_encap_counters;
u32 *from = vlib_frame_vector_args (frame);
u32 n_left_from, n_left_to_next, *to_next, next_index;
- u32 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
diff --git a/src/vnet/udp/udp_input.c b/src/vnet/udp/udp_input.c
index 693824f9628..e4aaa0c7218 100644
--- a/src/vnet/udp/udp_input.c
+++ b/src/vnet/udp/udp_input.c
@@ -36,7 +36,7 @@ typedef struct
{
u32 connection;
u32 disposition;
- u32 thread_index;
+ clib_thread_index_t thread_index;
} udp_input_trace_t;
/* packet trace format function */
@@ -101,8 +101,8 @@ udp_trace_buffer (vlib_main_t * vm, vlib_node_runtime_t * node,
}
static udp_connection_t *
-udp_connection_accept (udp_connection_t * listener, session_dgram_hdr_t * hdr,
- u32 thread_index)
+udp_connection_accept (udp_connection_t *listener, session_dgram_hdr_t *hdr,
+ clib_thread_index_t thread_index)
{
udp_connection_t *uc;
@@ -129,9 +129,10 @@ udp_connection_accept (udp_connection_t * listener, session_dgram_hdr_t * hdr,
}
static void
-udp_connection_enqueue (udp_connection_t * uc0, session_t * s0,
- session_dgram_hdr_t * hdr0, u32 thread_index,
- vlib_buffer_t * b, u8 queue_event, u32 * error0)
+udp_connection_enqueue (udp_connection_t *uc0, session_t *s0,
+ session_dgram_hdr_t *hdr0,
+ clib_thread_index_t thread_index, vlib_buffer_t *b,
+ u8 queue_event, u32 *error0)
{
int wrote0;
@@ -146,7 +147,15 @@ udp_connection_enqueue (udp_connection_t * uc0, session_t * s0,
/* Expect cl udp enqueue to fail because fifo enqueue */
if (PREDICT_FALSE (wrote0 == 0))
- *error0 = UDP_ERROR_FIFO_FULL;
+ {
+ *error0 = UDP_ERROR_FIFO_FULL;
+ uc0->errors_in += 1;
+ }
+ else
+ {
+ uc0->bytes_in += wrote0;
+ uc0->dgrams_in += 1;
+ }
return;
}
@@ -155,6 +164,7 @@ udp_connection_enqueue (udp_connection_t * uc0, session_t * s0,
< hdr0->data_length + sizeof (session_dgram_hdr_t))
{
*error0 = UDP_ERROR_FIFO_FULL;
+ uc0->errors_in += 1;
return;
}
@@ -175,7 +185,15 @@ udp_connection_enqueue (udp_connection_t * uc0, session_t * s0,
/* In some rare cases, session_enqueue_dgram_connection can fail because a
* chunk cannot be allocated in the RX FIFO */
if (PREDICT_FALSE (wrote0 == 0))
- *error0 = UDP_ERROR_FIFO_NOMEM;
+ {
+ *error0 = UDP_ERROR_FIFO_NOMEM;
+ uc0->errors_in += 1;
+ }
+ else
+ {
+ uc0->bytes_in += wrote0;
+ uc0->dgrams_in += 1;
+ }
}
always_inline session_t *
@@ -241,7 +259,8 @@ always_inline uword
udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame, u8 is_ip4)
{
- u32 thread_index = vm->thread_index, n_left_from, *from, *first_buffer;
+ clib_thread_index_t thread_index = vm->thread_index;
+ u32 n_left_from, *from, *first_buffer;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u16 err_counters[UDP_N_ERROR] = { 0 };
diff --git a/src/vnet/udp/udp_output.c b/src/vnet/udp/udp_output.c
index 22b94141365..87bb150e403 100644
--- a/src/vnet/udp/udp_output.c
+++ b/src/vnet/udp/udp_output.c
@@ -52,7 +52,7 @@ format_udp_tx_trace (u8 *s, va_list *args)
}
always_inline udp_connection_t *
-udp_output_get_connection (vlib_buffer_t *b, u32 thread_index)
+udp_output_get_connection (vlib_buffer_t *b, clib_thread_index_t thread_index)
{
if (PREDICT_FALSE (vnet_buffer (b)->tcp.flags & UDP_CONN_F_LISTEN))
return udp_listener_get (vnet_buffer (b)->tcp.connection_index);
diff --git a/src/vnet/unix/tuntap.c b/src/vnet/unix/tuntap.c
index f1102dc321e..9013f4bf878 100644
--- a/src/vnet/unix/tuntap.c
+++ b/src/vnet/unix/tuntap.c
@@ -42,7 +42,7 @@
#include <linux/if_tun.h>
#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
+#include <vlib/file.h>
#include <vnet/ip/ip.h>
#include <vnet/fib/fib_table.h>
@@ -153,7 +153,7 @@ tuntap_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
vnet_interface_main_t *im = &vnm->interface_main;
u32 n_bytes = 0;
int i;
- u16 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
for (i = 0; i < n_packets; i++)
{
@@ -242,7 +242,7 @@ tuntap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
vlib_buffer_t *b;
u32 bi;
const uword buffer_size = vlib_buffer_get_default_data_size (vm);
- u16 thread_index = vm->thread_index;
+ clib_thread_index_t thread_index = vm->thread_index;
/** Make sure we have some RX buffers. */
{
diff --git a/src/vnet/util/refcount.c b/src/vnet/util/refcount.c
index a7b525d67be..dcb29841262 100644
--- a/src/vnet/util/refcount.c
+++ b/src/vnet/util/refcount.c
@@ -32,7 +32,7 @@ u64 vlib_refcount_get(vlib_refcount_t *r, u32 index)
{
u64 count = 0;
vlib_thread_main_t *tm = vlib_get_thread_main ();
- u32 thread_index;
+ clib_thread_index_t thread_index;
for (thread_index = 0; thread_index < tm->n_vlib_mains; thread_index++) {
vlib_refcount_lock(r->per_cpu[thread_index].counter_lock);
if (index < vec_len(r->per_cpu[thread_index].counters))
diff --git a/src/vnet/util/refcount.h b/src/vnet/util/refcount.h
index 4c7d7bdbdd5..63bc80d72be 100644
--- a/src/vnet/util/refcount.h
+++ b/src/vnet/util/refcount.h
@@ -64,8 +64,9 @@ void vlib_refcount_unlock (clib_spinlock_t counter_lock)
void __vlib_refcount_resize(vlib_refcount_per_cpu_t *per_cpu, u32 size);
-static_always_inline
-void vlib_refcount_add(vlib_refcount_t *r, u32 thread_index, u32 counter_index, i32 v)
+static_always_inline void
+vlib_refcount_add (vlib_refcount_t *r, clib_thread_index_t thread_index,
+ u32 counter_index, i32 v)
{
vlib_refcount_per_cpu_t *per_cpu = &r->per_cpu[thread_index];
if (PREDICT_FALSE(counter_index >= vec_len(per_cpu->counters)))
@@ -80,7 +81,7 @@ static_always_inline
void vlib_refcount_init(vlib_refcount_t *r)
{
vlib_thread_main_t *tm = vlib_get_thread_main ();
- u32 thread_index;
+ clib_thread_index_t thread_index;
r->per_cpu = 0;
vec_validate (r->per_cpu, tm->n_vlib_mains - 1);
diff --git a/src/vnet/util/throttle.h b/src/vnet/util/throttle.h
index 53435c4a359..4fd1619935e 100644
--- a/src/vnet/util/throttle.h
+++ b/src/vnet/util/throttle.h
@@ -40,7 +40,7 @@ extern void throttle_init (throttle_t *t, u32 n_threads, u32 buckets,
f64 time);
always_inline u64
-throttle_seed (throttle_t * t, u32 thread_index, f64 time_now)
+throttle_seed (throttle_t *t, clib_thread_index_t thread_index, f64 time_now)
{
if (time_now - t->last_seed_change_time[thread_index] > t->time)
{
@@ -53,7 +53,8 @@ throttle_seed (throttle_t * t, u32 thread_index, f64 time_now)
}
always_inline int
-throttle_check (throttle_t * t, u32 thread_index, u64 hash, u64 seed)
+throttle_check (throttle_t *t, clib_thread_index_t thread_index, u64 hash,
+ u64 seed)
{
ASSERT (is_pow2 (t->buckets));
diff --git a/src/vpp-api/python/vpp_papi/vpp_papi_async.py b/src/vpp-api/python/vpp_papi/vpp_papi_async.py
index d9a4fabb69e..44e2a78eeea 100644
--- a/src/vpp-api/python/vpp_papi/vpp_papi_async.py
+++ b/src/vpp-api/python/vpp_papi/vpp_papi_async.py
@@ -451,7 +451,8 @@ class VPPApiClient:
for m in r.message_table:
n = m.name
self.message_table[n] = m.index
- self.vpp_dictionary_maxid = len(self.message_table)
+ # Find the maximum index of the message table
+ self.vpp_dictionary_maxid = max(self.message_table.values() or [0])
# self.worker_task = asyncio.create_task(self.message_handler(event_queue))
requests = {}
diff --git a/src/vpp-api/python/vpp_papi/vpp_transport_socket.py b/src/vpp-api/python/vpp_papi/vpp_transport_socket.py
index 174ab74d0b8..1ba365ad6e1 100644
--- a/src/vpp-api/python/vpp_papi/vpp_transport_socket.py
+++ b/src/vpp-api/python/vpp_papi/vpp_transport_socket.py
@@ -177,7 +177,8 @@ class VppTransport:
return 0
def msg_table_max_index(self):
- return len(self.message_table)
+ """Return the maximum index of the message table."""
+ return max(self.message_table.values() or [0])
def write(self, buf):
"""Send a binary-packed message to VPP."""
diff --git a/src/vpp-api/vapi/vapi.c b/src/vpp-api/vapi/vapi.c
index 9e5101bd9f9..e9fd346fbbb 100644
--- a/src/vpp-api/vapi/vapi.c
+++ b/src/vpp-api/vapi/vapi.c
@@ -507,6 +507,10 @@ vapi_sock_recv_internal (vapi_ctx_t ctx, u8 **vec_msg, u32 timeout)
vec_validate (sock->rx_buffer, sizeof (*mbp) - 1);
n = recv (sock->fd, sock->rx_buffer + current_rx_index,
sizeof (*mbp) - current_rx_index, MSG_DONTWAIT);
+
+ if (n == 0)
+ return VAPI_ECONNRESET;
+
if (n < 0)
{
if (errno == EAGAIN && clib_time_now (&ctx->time) >= deadline)
@@ -776,15 +780,19 @@ vapi_sock_client_connect (vapi_ctx_t ctx, char *path, const char *name)
{
qstatus = vapi_sock_recv_internal (ctx, &msg, 0);
- if (qstatus == 0)
+ if (qstatus == VAPI_OK)
goto read_one_msg;
+
+ if (qstatus != VAPI_EAGAIN)
+ return VAPI_ECON_FAIL;
+
ts.tv_sec = 0;
ts.tv_nsec = 10000 * 1000; /* 10 ms */
while (nanosleep (&ts, &tsrem) < 0)
ts = tsrem;
}
/* Timeout... */
- return -1;
+ return VAPI_ECON_FAIL;
read_one_msg:
if (vec_len (msg) == 0)
@@ -1338,9 +1346,14 @@ vapi_sock_disconnect (vapi_ctx_t ctx)
rv = VAPI_ENORESP;
goto fail;
}
- if (vapi_sock_recv_internal (ctx, &msg, 0) < 0)
+
+ rv = vapi_sock_recv_internal (ctx, &msg, 0);
+ if (rv == VAPI_EAGAIN)
continue;
+ if (rv != VAPI_OK)
+ goto fail;
+
if (vec_len (msg) == 0)
continue;
diff --git a/src/vpp/CMakeLists.txt b/src/vpp/CMakeLists.txt
index 84144e4d059..88766021ba9 100644
--- a/src/vpp/CMakeLists.txt
+++ b/src/vpp/CMakeLists.txt
@@ -20,7 +20,7 @@ add_custom_command(
COMMAND mkdir
ARGS -p ${CMAKE_CURRENT_BINARY_DIR}/app
COMMAND scripts/generate_version_h
- ARGS ${CMAKE_CURRENT_BINARY_DIR}/app/version.h
+ ARGS ${CMAKE_CURRENT_BINARY_DIR}/app/version.h ${VPP_PLATFORM}
COMMENT "Generating VPP version.h"
)
diff --git a/src/vpp/conf/80-vpp.conf b/src/vpp/conf/80-vpp.conf
index 2207e2e3824..33230236eb4 100644
--- a/src/vpp/conf/80-vpp.conf
+++ b/src/vpp/conf/80-vpp.conf
@@ -1,8 +1,13 @@
# Number of 2MB hugepages desired
vm.nr_hugepages=1024
-# Must be greater than or equal to (2 * vm.nr_hugepages).
-vm.max_map_count=3096
+# The vm max_map_count must be greater than or equal to (2 * vm.nr_hugepages).
+
+# The system default is often an order of magnitude greater than the
+# value below. If you uncomment this stanza and reboot as-is, watch
+# out for seemingly "random" severe application failures; known to
+# occur in Brave, Firefox, and VirtualBox to name but a few.
+# vm.max_map_count=3096
# All groups allowed to access hugepages
vm.hugetlb_shm_group=0
@@ -12,4 +17,6 @@ vm.hugetlb_shm_group=0
# If the existing kernel.shmmax setting (cat /proc/sys/kernel/shmmax)
# is greater than the calculated TotalHugepageSize then set this parameter
# to current shmmax value.
-kernel.shmmax=2147483648
+# Linux default is 4278190079, you don't need to change it unless you
+# configure more than 2039 2MB hugepages
+# kernel.shmmax=2147483648
diff --git a/src/vpp/conf/startup.conf b/src/vpp/conf/startup.conf
index a30a15ab2b1..8e7aebd8271 100644
--- a/src/vpp/conf/startup.conf
+++ b/src/vpp/conf/startup.conf
@@ -231,6 +231,18 @@ cpu {
# update-interval <f64-seconds>, sets the segment scrape / update interval
# }
+## L3 FIB
+# l3fib {
+ ## load balance pool size preallocation (expected number of objects)
+ # load-balance-pool-size 1M
+
+ ## fib entry pool size preallocation (expected number of objects)
+ # fib-entry-pool-size 1M
+
+ ## ip4 mtrie pool size preallocation (expected number of mtries)
+ # ip4-mtrie-pool-size 1K
+# }
+
## L2 FIB
# l2fib {
## l2fib hash table size.
diff --git a/src/vpp/vnet/main.c b/src/vpp/vnet/main.c
index dd4f4cc3353..2808265ffb6 100644
--- a/src/vpp/vnet/main.c
+++ b/src/vpp/vnet/main.c
@@ -123,6 +123,7 @@ main (int argc, char *argv[])
unformat_input_t input, sub_input;
u8 *s = 0, *v = 0;
int main_core = ~0;
+ int cpu_translate = 0;
cpu_set_t cpuset;
void *main_heap;
@@ -282,6 +283,8 @@ main (int argc, char *argv[])
unix_main.flags |= UNIX_FLAG_INTERACTIVE;
else if (!strncmp (argv[i], "nosyslog", 8))
unix_main.flags |= UNIX_FLAG_NOSYSLOG;
+ else if (!strncmp (argv[i], "relative", 8))
+ cpu_translate = 1;
}
defaulted:
@@ -329,6 +332,17 @@ defaulted:
unformat_free (&input);
+ int translate_main_core = os_translate_cpu_to_affinity_bitmap (main_core);
+
+ if (cpu_translate && main_core != ~0)
+ {
+ if (translate_main_core == -1)
+ clib_error ("cpu %u is not available to be used"
+ " for the main thread in relative mode",
+ main_core);
+ main_core = translate_main_core;
+ }
+
/* if main thread affinity is unspecified, set to current running cpu */
if (main_core == ~0)
main_core = sched_getcpu ();
diff --git a/src/vppinfra/CMakeLists.txt b/src/vppinfra/CMakeLists.txt
index 83a8b2a7e57..08a5fa213ab 100644
--- a/src/vppinfra/CMakeLists.txt
+++ b/src/vppinfra/CMakeLists.txt
@@ -194,7 +194,6 @@ set(VPPINFRA_HEADERS
random_isaac.h
rbtree.h
serialize.h
- smp.h
socket.h
sparse_vec.h
stack.h
diff --git a/src/vppinfra/bihash_template.c b/src/vppinfra/bihash_template.c
index d488b1a659c..975607d904e 100644
--- a/src/vppinfra/bihash_template.c
+++ b/src/vppinfra/bihash_template.c
@@ -544,7 +544,7 @@ BV (make_working_copy) (BVT (clib_bihash) * h, BVT (clib_bihash_bucket) * b)
BVT (clib_bihash_value) * v;
BVT (clib_bihash_bucket) working_bucket __attribute__ ((aligned (8)));
BVT (clib_bihash_value) * working_copy;
- u32 thread_index = os_get_thread_index ();
+ clib_thread_index_t thread_index = os_get_thread_index ();
int log2_working_copy_length;
ASSERT (h->alloc_lock[0]);
@@ -696,7 +696,7 @@ static_always_inline int BV (clib_bihash_add_del_inline_with_hash) (
int i, limit;
u64 new_hash;
u32 new_log2_pages, old_log2_pages;
- u32 thread_index = os_get_thread_index ();
+ clib_thread_index_t thread_index = os_get_thread_index ();
int mark_bucket_linear;
int resplit_once;
diff --git a/src/vppinfra/bihash_vec8_8.h b/src/vppinfra/bihash_vec8_8.h
index 822f1bcc51f..1532103e9c1 100644
--- a/src/vppinfra/bihash_vec8_8.h
+++ b/src/vppinfra/bihash_vec8_8.h
@@ -46,6 +46,7 @@ static inline void
clib_bihash_mark_free_vec8_8 (clib_bihash_kv_vec8_8_t *v)
{
v->value = 0xFEEDFACE8BADF00DULL;
+ v->key = ~0ULL;
}
/** Decide if a clib_bihash_kv_vec8_8_t instance is free
diff --git a/src/vppinfra/bitmap.h b/src/vppinfra/bitmap.h
index 4ab7bcf7a7c..e4badb7f27d 100644
--- a/src/vppinfra/bitmap.h
+++ b/src/vppinfra/bitmap.h
@@ -381,11 +381,12 @@ clib_bitmap_set_region (uword * bitmap, uword i, uword value, uword n_bits)
@param ai - the bitmap
@param body - the expression to evaluate for each set bit
*/
-#define clib_bitmap_foreach(i,ai) \
- if (ai) \
- for (i = clib_bitmap_first_set (ai); \
- i != ~0; \
- i = clib_bitmap_next_set (ai, i + 1))
+#define clib_bitmap_foreach(i, ai) \
+ if (ai) \
+ for (uword __index = clib_bitmap_first_set (ai), \
+ __clib_unused __dummy = (i) = __index; \
+ __index != ~0; \
+ __index = clib_bitmap_next_set (ai, __index + 1), (i) = __index)
/** Return the lowest numbered set bit in a bitmap
@param ai - pointer to the bitmap
diff --git a/src/vppinfra/bitops.h b/src/vppinfra/bitops.h
index c1122f59ff6..bf73bd95a84 100644
--- a/src/vppinfra/bitops.h
+++ b/src/vppinfra/bitops.h
@@ -195,6 +195,13 @@ next_with_same_number_of_set_bits (uword x)
return ripple | ones;
}
+static_always_inline void
+uword_bitmap_clear (uword *bmp, uword n_uwords)
+{
+ while (n_uwords--)
+ bmp++[0] = 0;
+}
+
#define foreach_set_bit_index(i, v) \
for (uword _tmp = (v) + 0 * (uword) (i = get_lowest_set_bit_index (v)); \
_tmp; \
@@ -273,6 +280,34 @@ uword_bitmap_find_first_set (uword *bmp)
return (b - bmp) * uword_bits + get_lowest_set_bit_index (b[0]);
}
+always_inline uword
+uword_bitmap_get_multiple (uword *bmp, uword i, uword n_bits)
+{
+ uword rv;
+
+ bmp += i / uword_bits;
+ i %= uword_bits;
+
+ rv = (bmp[0] >> i);
+ rv &= pow2_mask (n_bits);
+
+ if (i + n_bits <= uword_bits)
+ return rv;
+
+ n_bits -= uword_bits - i;
+ rv |= (bmp[1] & pow2_mask (n_bits)) << (uword_bits - i);
+
+ return rv;
+}
+
+always_inline uword
+uword_bitmap_get_multiple_no_check (uword *bmp, uword i, uword n_bits)
+{
+ bmp += i / uword_bits;
+ i %= uword_bits;
+ return ((bmp[0] >> i) & pow2_mask (n_bits));
+}
+
static_always_inline u32
bit_extract_u32 (u32 v, u32 mask)
{
diff --git a/src/vppinfra/clib.h b/src/vppinfra/clib.h
index 5348738ec6a..cb90da5c1e0 100644
--- a/src/vppinfra/clib.h
+++ b/src/vppinfra/clib.h
@@ -39,6 +39,7 @@
#define included_clib_h
#include <stddef.h>
+#include <stdalign.h>
#if __has_include(<vppinfra/config.h>)
#include <vppinfra/config.h>
diff --git a/src/vppinfra/clib_error.h b/src/vppinfra/clib_error.h
index 45f18eb1fe4..5db1a5e3440 100644
--- a/src/vppinfra/clib_error.h
+++ b/src/vppinfra/clib_error.h
@@ -23,7 +23,7 @@ typedef struct
/* Error message. */
u8 *what;
- /* Where error occurred (e.g. __FUNCTION__ __LINE__) */
+ /* Where error occurred (e.g. __func__ __LINE__) */
const u8 *where;
uword flags;
diff --git a/src/vppinfra/devicetree.c b/src/vppinfra/devicetree.c
index df5a24f198e..309308c926e 100644
--- a/src/vppinfra/devicetree.c
+++ b/src/vppinfra/devicetree.c
@@ -4,6 +4,7 @@
#include <vppinfra/clib.h>
#include <vppinfra/devicetree.h>
+#include <vppinfra/hash.h>
#ifdef __linux
#include <sys/types.h>
diff --git a/src/vppinfra/devicetree.h b/src/vppinfra/devicetree.h
index db7d8411a11..be94c1487ee 100644
--- a/src/vppinfra/devicetree.h
+++ b/src/vppinfra/devicetree.h
@@ -6,7 +6,8 @@
#define CLIB_DEVICETREE_H_
#include <vppinfra/clib.h>
-#include <vlib/vlib.h>
+#include <vppinfra/error.h>
+#include <vppinfra/format.h>
#ifdef __linux
#define CLIB_DT_LINUX_PREFIX "/sys/firmware/devicetree/base"
diff --git a/src/vppinfra/elog.h b/src/vppinfra/elog.h
index d0825bdd5b2..6a66319148d 100644
--- a/src/vppinfra/elog.h
+++ b/src/vppinfra/elog.h
@@ -444,21 +444,21 @@ elog_data_inline (elog_main_t * em, elog_event_type_t * type,
#define ELOG_TYPE_INIT_FORMAT_AND_FUNCTION(fmt,func) \
{ .format = fmt, .function = func, }
-#define ELOG_TYPE_INIT(fmt) \
- ELOG_TYPE_INIT_FORMAT_AND_FUNCTION(fmt,(char *) __FUNCTION__)
+#define ELOG_TYPE_INIT(fmt) \
+ ELOG_TYPE_INIT_FORMAT_AND_FUNCTION (fmt, (char *) __func__)
#define ELOG_TYPE_DECLARE_HELPER(f,fmt,func) \
static elog_event_type_t __ELOG_TYPE_VAR(f) = \
ELOG_TYPE_INIT_FORMAT_AND_FUNCTION (fmt, func)
-#define ELOG_TYPE_DECLARE_FORMAT_AND_FUNCTION(f,fmt) \
- ELOG_TYPE_DECLARE_HELPER (f, fmt, (char *) __FUNCTION__)
+#define ELOG_TYPE_DECLARE_FORMAT_AND_FUNCTION(f, fmt) \
+ ELOG_TYPE_DECLARE_HELPER (f, fmt, (char *) __func__)
#define ELOG_TYPE_DECLARE_FORMAT(f,fmt) \
ELOG_TYPE_DECLARE_HELPER (f, fmt, 0)
-/* Shorthands with and without __FUNCTION__.
- D for decimal; X for hex. F for __FUNCTION__. */
+/* Shorthands with and without __func__.
+ D for decimal; X for hex. F for __func__. */
#define ELOG_TYPE(f,fmt) ELOG_TYPE_DECLARE_FORMAT_AND_FUNCTION(f,fmt)
#define ELOG_TYPE_D(f) ELOG_TYPE_DECLARE_FORMAT (f, #f " %d")
#define ELOG_TYPE_X(f) ELOG_TYPE_DECLARE_FORMAT (f, #f " 0x%x")
diff --git a/src/vppinfra/error_bootstrap.h b/src/vppinfra/error_bootstrap.h
index ae23d1bcca8..d3eed1b83ae 100644
--- a/src/vppinfra/error_bootstrap.h
+++ b/src/vppinfra/error_bootstrap.h
@@ -53,7 +53,7 @@ enum
};
/* Current function name. Need (char *) cast to silence gcc4 pointer signedness warning. */
-#define clib_error_function ((char *) __FUNCTION__)
+#define clib_error_function ((char *) __func__)
#ifndef CLIB_ASSERT_ENABLE
#define CLIB_ASSERT_ENABLE (CLIB_DEBUG > 0)
diff --git a/src/vppinfra/file.h b/src/vppinfra/file.h
index 71956137665..99a1e97e1aa 100644
--- a/src/vppinfra/file.h
+++ b/src/vppinfra/file.h
@@ -42,6 +42,7 @@
#include <vppinfra/socket.h>
#include <vppinfra/pool.h>
+#include <vppinfra/lock.h>
#include <termios.h>
@@ -53,13 +54,18 @@ typedef struct clib_file
/* Unix file descriptor from open/socket. */
u32 file_descriptor;
- u32 flags;
+ u16 flags;
#define UNIX_FILE_DATA_AVAILABLE_TO_WRITE (1 << 0)
#define UNIX_FILE_EVENT_EDGE_TRIGGERED (1 << 1)
+ u16 active : 1;
+ u16 dont_close : 1;
+
/* polling thread index */
u32 polling_thread_index;
+ u32 index;
+
/* Data available for function's use. */
u64 private_data;
@@ -85,77 +91,116 @@ typedef enum
typedef struct
{
/* Pool of files to poll for input/output. */
- clib_file_t *file_pool;
+ clib_file_t **file_pool;
+ clib_file_t **pending_free;
+
+ u8 lock;
void (*file_update) (clib_file_t * file,
clib_file_update_type_t update_type);
} clib_file_main_t;
+always_inline clib_file_t *
+clib_file_get (clib_file_main_t *fm, u32 file_index)
+{
+ if (pool_is_free_index (fm->file_pool, file_index))
+ return 0;
+ return *pool_elt_at_index (fm->file_pool, file_index);
+}
+
always_inline uword
-clib_file_add (clib_file_main_t * um, clib_file_t * template)
+clib_file_add (clib_file_main_t *fm, clib_file_t *template)
{
- clib_file_t *f;
- pool_get (um->file_pool, f);
+ clib_file_t *f, **fp;
+ u32 index;
+
+ f = clib_mem_alloc_aligned (sizeof (clib_file_t), CLIB_CACHE_LINE_BYTES);
+
+ CLIB_SPINLOCK_LOCK (fm->lock);
+ pool_get (fm->file_pool, fp);
+ index = fp - fm->file_pool;
+ fp[0] = f;
+ CLIB_SPINLOCK_UNLOCK (fm->lock);
+
f[0] = template[0];
f->read_events = 0;
f->write_events = 0;
f->error_events = 0;
- um->file_update (f, UNIX_FILE_UPDATE_ADD);
- return f - um->file_pool;
+ f->index = index;
+ fm->file_update (f, UNIX_FILE_UPDATE_ADD);
+ f->active = 1;
+ return index;
+}
+
+always_inline void
+clib_file_del (clib_file_main_t *fm, clib_file_t *f)
+{
+ fm->file_update (f, UNIX_FILE_UPDATE_DELETE);
+ if (f->dont_close == 0)
+ close ((int) f->file_descriptor);
+
+ CLIB_SPINLOCK_LOCK (fm->lock);
+ f->active = 0;
+ vec_add1 (fm->pending_free, f);
+ pool_put_index (fm->file_pool, f->index);
+ CLIB_SPINLOCK_UNLOCK (fm->lock);
}
always_inline void
-clib_file_del (clib_file_main_t * um, clib_file_t * f)
+clib_file_del_by_index (clib_file_main_t *fm, uword index)
{
- um->file_update (f, UNIX_FILE_UPDATE_DELETE);
- close (f->file_descriptor);
- f->file_descriptor = ~0;
- vec_free (f->description);
- pool_put (um->file_pool, f);
+ clib_file_t *f = clib_file_get (fm, index);
+ clib_file_del (fm, f);
}
always_inline void
-clib_file_del_by_index (clib_file_main_t * um, uword index)
+clib_file_free_deleted (clib_file_main_t *fm, clib_thread_index_t thread_index)
{
- clib_file_t *uf;
- uf = pool_elt_at_index (um->file_pool, index);
- clib_file_del (um, uf);
+ u32 n_keep = 0;
+
+ if (vec_len (fm->pending_free) == 0)
+ return;
+
+ CLIB_SPINLOCK_LOCK (fm->lock);
+ vec_foreach_pointer (f, fm->pending_free)
+ {
+ if (f->polling_thread_index == thread_index)
+ {
+ vec_free (f->description);
+ clib_mem_free (f);
+ }
+ else
+ fm->pending_free[n_keep++] = f;
+ }
+ vec_set_len (fm->pending_free, n_keep);
+ CLIB_SPINLOCK_UNLOCK (fm->lock);
}
always_inline void
-clib_file_set_polling_thread (clib_file_main_t * um, uword index,
- u32 thread_index)
+clib_file_set_polling_thread (clib_file_main_t *fm, uword index,
+ clib_thread_index_t thread_index)
{
- clib_file_t *f = pool_elt_at_index (um->file_pool, index);
- um->file_update (f, UNIX_FILE_UPDATE_DELETE);
+ clib_file_t *f = clib_file_get (fm, index);
+ fm->file_update (f, UNIX_FILE_UPDATE_DELETE);
f->polling_thread_index = thread_index;
- um->file_update (f, UNIX_FILE_UPDATE_ADD);
+ fm->file_update (f, UNIX_FILE_UPDATE_ADD);
}
always_inline uword
-clib_file_set_data_available_to_write (clib_file_main_t * um,
- u32 clib_file_index,
- uword is_available)
+clib_file_set_data_available_to_write (clib_file_main_t *fm,
+ u32 clib_file_index, uword is_available)
{
- clib_file_t *uf = pool_elt_at_index (um->file_pool, clib_file_index);
- uword was_available = (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
+ clib_file_t *f = clib_file_get (fm, clib_file_index);
+ uword was_available = (f->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
if ((was_available != 0) != (is_available != 0))
{
- uf->flags ^= UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
- um->file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ f->flags ^= UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ fm->file_update (f, UNIX_FILE_UPDATE_MODIFY);
}
return was_available != 0;
}
-always_inline clib_file_t *
-clib_file_get (clib_file_main_t * fm, u32 file_index)
-{
- if (pool_is_free_index (fm->file_pool, file_index))
- return 0;
- return pool_elt_at_index (fm->file_pool, file_index);
-}
-
always_inline clib_error_t *
clib_file_write (clib_file_t * f)
{
@@ -166,11 +211,3 @@ clib_file_write (clib_file_t * f)
u8 *clib_file_get_resolved_basename (char *fmt, ...);
#endif /* included_clib_file_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/lock.h b/src/vppinfra/lock.h
index b7b3d00a905..0fa9aaa6e83 100644
--- a/src/vppinfra/lock.h
+++ b/src/vppinfra/lock.h
@@ -78,18 +78,24 @@ clib_spinlock_free (clib_spinlock_t * p)
}
}
+#define CLIB_SPINLOCK_LOCK(x) \
+ { \
+ typeof (x) __free = 0; \
+ while (!__atomic_compare_exchange_n (&(x), &__free, 1, 0, \
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) \
+ { \
+ while (__atomic_load_n (&(x), __ATOMIC_RELAXED)) \
+ CLIB_PAUSE (); \
+ __free = 0; \
+ } \
+ }
+
+#define CLIB_SPINLOCK_UNLOCK(x) __atomic_store_n (&(x), 0, __ATOMIC_RELEASE)
+
static_always_inline void
clib_spinlock_lock (clib_spinlock_t * p)
{
- u32 free = 0;
- while (!clib_atomic_cmp_and_swap_acq_relax_n (&(*p)->lock, &free, 1, 0))
- {
- /* atomic load limits number of compare_exchange executions */
- while (clib_atomic_load_relax_n (&(*p)->lock))
- CLIB_PAUSE ();
- /* on failure, compare_exchange writes (*p)->lock into free */
- free = 0;
- }
+ CLIB_SPINLOCK_LOCK ((*p)->lock);
CLIB_LOCK_DBG (p);
}
@@ -122,7 +128,7 @@ clib_spinlock_unlock (clib_spinlock_t * p)
{
CLIB_LOCK_DBG_CLEAR (p);
/* Make sure all reads/writes are complete before releasing the lock */
- clib_atomic_release (&(*p)->lock);
+ CLIB_SPINLOCK_UNLOCK ((*p)->lock);
}
static_always_inline void
diff --git a/src/vppinfra/mem.h b/src/vppinfra/mem.h
index 6211bb51f0a..893978081d0 100644
--- a/src/vppinfra/mem.h
+++ b/src/vppinfra/mem.h
@@ -259,7 +259,7 @@ uword clib_mem_size (void *p);
void clib_mem_free_s (void *p);
/* Memory allocator which panics when it fails.
- Use macro so that clib_panic macro can expand __FUNCTION__ and __LINE__. */
+ Use macro so that clib_panic macro can expand __func__ and __LINE__. */
#define clib_mem_alloc_aligned_no_fail(size,align) \
({ \
uword _clib_mem_alloc_size = (size); \
diff --git a/src/vppinfra/os.h b/src/vppinfra/os.h
index cd3b4289da6..229ed375e74 100644
--- a/src/vppinfra/os.h
+++ b/src/vppinfra/os.h
@@ -56,29 +56,29 @@ void os_out_of_memory (void);
/* Estimate, measure or divine CPU timestamp clock frequency. */
f64 os_cpu_clock_frequency (void);
-extern __thread uword __os_thread_index;
-extern __thread uword __os_numa_index;
+extern __thread clib_thread_index_t __os_thread_index;
+extern __thread clib_numa_node_index_t __os_numa_index;
-static_always_inline uword
+static_always_inline clib_thread_index_t
os_get_thread_index (void)
{
return __os_thread_index;
}
static_always_inline void
-os_set_thread_index (uword thread_index)
+os_set_thread_index (clib_thread_index_t thread_index)
{
__os_thread_index = thread_index;
}
-static_always_inline uword
+static_always_inline clib_numa_node_index_t
os_get_numa_index (void)
{
return __os_numa_index;
}
static_always_inline void
-os_set_numa_index (uword numa_index)
+os_set_numa_index (clib_numa_node_index_t numa_index)
{
__os_numa_index = numa_index;
}
@@ -94,14 +94,6 @@ os_get_cpu_number (void)
uword os_get_nthreads (void);
-#include <vppinfra/smp.h>
+#include <vppinfra/cache.h>
#endif /* included_os_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/pool.h b/src/vppinfra/pool.h
index 07c9269c6d8..d73a9783ba8 100644
--- a/src/vppinfra/pool.h
+++ b/src/vppinfra/pool.h
@@ -421,18 +421,20 @@ _pool_free (void **v)
#define pool_free(p) _pool_free ((void **) &(p))
static_always_inline uword
-pool_get_first_index (void *pool)
+_pool_get_first_index (void *pool)
{
pool_header_t *h = pool_header (pool);
return clib_bitmap_first_clear (h->free_bitmap);
}
+#define pool_get_first_index(p) _pool_get_first_index ((void *) (p))
static_always_inline uword
-pool_get_next_index (void *pool, uword last)
+_pool_get_next_index (void *pool, uword last)
{
pool_header_t *h = pool_header (pool);
return clib_bitmap_next_clear (h->free_bitmap, last + 1);
}
+#define pool_get_next_index(p, l) _pool_get_next_index ((void *) (p), l)
/** Optimized iteration through pool.
diff --git a/src/vppinfra/smp.h b/src/vppinfra/smp.h
deleted file mode 100644
index 2b3ed548dfa..00000000000
--- a/src/vppinfra/smp.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- Copyright (c) 2001-2005 Eliot Dresselhaus
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-#ifndef included_clib_smp_h
-#define included_clib_smp_h
-
-#include <vppinfra/cache.h>
-#include <vppinfra/os.h> /* for os_panic */
-
-#if defined (i386) || defined (__x86_64__)
-#define clib_smp_pause() do { asm volatile ("pause"); } while (0)
-#elif defined (__aarch64__) || defined (__arm__)
-#define clib_smp_pause() do { asm volatile ("isb" ::: "memory"); } while (0)
-#endif
-
-#ifndef clib_smp_pause
-#define clib_smp_pause() do { } while (0)
-#endif
-
-#ifdef CLIB_UNIX
-#include <sched.h>
-
-always_inline void
-os_sched_yield (void)
-{
- sched_yield ();
-}
-#else
-always_inline void
-os_sched_yield (void)
-{
- clib_smp_pause ();
-}
-#endif
-
-
-#endif /* included_clib_smp_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/socket.c b/src/vppinfra/socket.c
index 2abf2b244cd..f4dad844d33 100644
--- a/src/vppinfra/socket.c
+++ b/src/vppinfra/socket.c
@@ -48,6 +48,7 @@
#include <netdb.h>
#include <unistd.h>
#include <fcntl.h>
+#include <sched.h>
#include <vppinfra/mem.h>
#include <vppinfra/vec.h>
diff --git a/src/vppinfra/string.c b/src/vppinfra/string.c
index ea9480875a5..aedaf428a31 100644
--- a/src/vppinfra/string.c
+++ b/src/vppinfra/string.c
@@ -94,7 +94,7 @@ clib_memswap (void *_a, void *_b, uword bytes)
__clib_export void
clib_c11_violation (const char *s)
{
- _clib_error (CLIB_ERROR_WARNING, (char *) __FUNCTION__, 0, (char *) s);
+ _clib_error (CLIB_ERROR_WARNING, (char *) __func__, 0, (char *) s);
}
/**
diff --git a/src/vppinfra/time.c b/src/vppinfra/time.c
index f1736499a0a..7c0ea44b481 100644
--- a/src/vppinfra/time.c
+++ b/src/vppinfra/time.c
@@ -332,6 +332,7 @@ format_clib_time (u8 * s, va_list * args)
clib_time_t *c = va_arg (*args, clib_time_t *);
int verbose = va_arg (*args, int);
f64 now, reftime, delta_reftime_in_seconds, error;
+ u32 indent = format_get_indent (s);
/* Compute vpp elapsed time from the CPU clock */
reftime = unix_time_now ();
@@ -346,8 +347,14 @@ format_clib_time (u8 * s, va_list * args)
error = now - delta_reftime_in_seconds;
- s = format (s, ", reftime %.6f, error %.6f, clocks/sec %.6f",
- delta_reftime_in_seconds, error, c->clocks_per_second);
+ s = format (s, "\n%Ucpu time %.6f now %lu last %lu since start %lu \n",
+ format_white_space, indent, now, clib_cpu_time_now (),
+ c->last_cpu_time, c->total_cpu_time);
+ s = format (s, "%Ureftime %.6f now %.6f last %.6f init %.6f\n",
+ format_white_space, indent, delta_reftime_in_seconds, reftime,
+ c->last_verify_reference_time, c->init_reference_time);
+ s = format (s, "%Uerror %.6f, clocks/sec %.6f", format_white_space, indent,
+ error, c->clocks_per_second);
return (s);
}
diff --git a/src/vppinfra/types.h b/src/vppinfra/types.h
index ad85af35ac9..9ed1754fbff 100644
--- a/src/vppinfra/types.h
+++ b/src/vppinfra/types.h
@@ -211,6 +211,10 @@ typedef uword uwordu __attribute__ ((aligned (1), __may_alias__));
__ptr_ptr - (ARRAY_LEN (__ptr_array) - 1) < __ptr_array; \
__var = *++__ptr_ptr)
+typedef u16 clib_thread_index_t;
+typedef u8 clib_numa_node_index_t;
+#define CLIB_INVALID_THREAD_INDEX CLIB_U16_MAX
+
#endif /* included_clib_types_h */
/*
diff --git a/src/vppinfra/unix-misc.c b/src/vppinfra/unix-misc.c
index 05ca2f901c6..2255cc7cc3a 100644
--- a/src/vppinfra/unix-misc.c
+++ b/src/vppinfra/unix-misc.c
@@ -64,10 +64,10 @@
#include <stdio.h> /* for sprintf */
#include <limits.h>
-__clib_export __thread uword __os_thread_index = 0;
-__clib_export __thread uword __os_numa_index = 0;
-
-__clib_export clib_bitmap_t *os_get_cpu_affinity_bitmap (int pid);
+__clib_export __thread clib_thread_index_t __os_thread_index = 0;
+__clib_export __thread clib_numa_node_index_t __os_numa_index = 0;
+__clib_export cpu_set_t __os_affinity_cpu_set;
+__clib_export clib_bitmap_t *os_get_cpu_affinity_bitmap ();
clib_error_t *
clib_file_n_bytes (char *file, uword * result)
@@ -285,29 +285,31 @@ os_get_online_cpu_core_bitmap ()
}
__clib_export clib_bitmap_t *
-os_get_cpu_affinity_bitmap (int pid)
+os_get_cpu_affinity_bitmap ()
{
#if __linux
- int index, ret;
- cpu_set_t cpuset;
+ int cpu;
uword *affinity_cpus;
- clib_bitmap_alloc (affinity_cpus, sizeof (cpu_set_t));
+ clib_bitmap_alloc (affinity_cpus, __CPU_SETSIZE);
clib_bitmap_zero (affinity_cpus);
- CPU_ZERO_S (sizeof (cpu_set_t), &cpuset);
-
- ret = sched_getaffinity (0, sizeof (cpu_set_t), &cpuset);
-
- if (ret < 0)
+ /* set__os_affinity_cpu_set once on first call to
+ * os_get_cpu_affinity_bitmap() */
+ if (__CPU_COUNT_S (sizeof (cpu_set_t), &__os_affinity_cpu_set) == 0)
{
- clib_bitmap_free (affinity_cpus);
- return 0;
+ int ret;
+ ret = sched_getaffinity (0, sizeof (cpu_set_t), &__os_affinity_cpu_set);
+ if (ret < 0)
+ {
+ clib_bitmap_free (affinity_cpus);
+ return NULL;
+ }
}
- for (index = 0; index < sizeof (cpu_set_t); index++)
- if (CPU_ISSET_S (index, sizeof (cpu_set_t), &cpuset))
- clib_bitmap_set (affinity_cpus, index, 1);
+ for (cpu = 0; cpu < __CPU_SETSIZE; cpu++)
+ if (__CPU_ISSET_S (cpu, sizeof (cpu_set_t), &__os_affinity_cpu_set))
+ clib_bitmap_set (affinity_cpus, cpu, 1);
return affinity_cpus;
#elif defined(__FreeBSD__)
cpuset_t mask;
@@ -332,6 +334,100 @@ os_get_cpu_affinity_bitmap (int pid)
#endif
}
+__clib_export int
+os_translate_cpu_to_affinity_bitmap (int cpu)
+{
+ uword *affinity_bmp = os_get_cpu_affinity_bitmap ();
+ int cpu_it = 0;
+ int cpu_translate_it = 0;
+
+ if (!affinity_bmp)
+ return -1;
+
+ if (cpu == ~0)
+ goto err;
+
+ clib_bitmap_foreach (cpu_it, affinity_bmp)
+ {
+
+ if (cpu == cpu_translate_it)
+ {
+ clib_bitmap_free (affinity_bmp);
+ return cpu_it;
+ }
+
+ cpu_translate_it += 1;
+ }
+
+err:
+ clib_bitmap_free (affinity_bmp);
+ return -1;
+}
+
+__clib_export int
+os_translate_cpu_from_affinity_bitmap (int cpu_translated)
+{
+ uword *affinity_bmp = os_get_cpu_affinity_bitmap ();
+ int cpu_it = 0;
+ int cpu_translate_it = 0;
+
+ if (!affinity_bmp)
+ return -1;
+
+ if (cpu_translated == ~0)
+ goto err;
+
+ clib_bitmap_foreach (cpu_it, affinity_bmp)
+ {
+
+ if (cpu_translated == cpu_it)
+ {
+ clib_bitmap_free (affinity_bmp);
+ return cpu_translate_it;
+ }
+
+ cpu_translate_it += 1;
+ }
+
+err:
+ clib_bitmap_free (affinity_bmp);
+ return -1;
+}
+
+__clib_export clib_bitmap_t *
+os_translate_cpu_bmp_to_affinity_bitmap (clib_bitmap_t *cpu_bmp)
+{
+ uword *affinity_bmp = os_get_cpu_affinity_bitmap ();
+
+ if (!affinity_bmp)
+ return NULL;
+
+ u32 cpu_count_relative = clib_bitmap_count_set_bits (affinity_bmp);
+ u32 cpu_max_corelist = clib_bitmap_last_set (cpu_bmp);
+
+ if (cpu_count_relative <= cpu_max_corelist)
+ return NULL;
+
+ uword *translated_cpulist;
+ clib_bitmap_alloc (translated_cpulist, __CPU_SETSIZE);
+ clib_bitmap_zero (translated_cpulist);
+
+ uword cpu_it;
+ uword cpu_translate_it = 0;
+
+ clib_bitmap_foreach (cpu_it, affinity_bmp)
+ {
+
+ if (clib_bitmap_get (cpu_bmp, cpu_translate_it))
+ clib_bitmap_set (translated_cpulist, cpu_it, 1);
+
+ cpu_translate_it++;
+ }
+
+ vec_free (affinity_bmp);
+ return translated_cpulist;
+}
+
__clib_export clib_bitmap_t *
os_get_online_cpu_node_bitmap ()
{
diff --git a/src/vppinfra/unix.h b/src/vppinfra/unix.h
index d0ddb93a46f..db3102e4fee 100644
--- a/src/vppinfra/unix.h
+++ b/src/vppinfra/unix.h
@@ -56,6 +56,19 @@ clib_error_t *unix_proc_file_contents (char *file, u8 ** result);
/* Retrieve bitmap of online cpu cures */
clib_bitmap_t *os_get_online_cpu_core_bitmap ();
+/* Retrieve bitmap of cpu affinity */
+clib_bitmap_t *os_get_cpu_affinity_bitmap ();
+
+/* Translate cpu index in cpu affinity bitmap */
+int os_translate_cpu_to_affinity_bitmap (int cpu);
+
+/* Retrieve cpu index after translation in cpu affinity bitmap */
+int os_translate_cpu_from_affinity_bitmap (int cpu_translated);
+
+/* Translate cpu bitmap based on cpu affinity bitmap */
+clib_bitmap_t *
+os_translate_cpu_bmp_to_affinity_bitmap (clib_bitmap_t *cpu_bmp);
+
/* Retrieve bitmap of online cpu nodes (sockets) */
clib_bitmap_t *os_get_online_cpu_node_bitmap ();
diff --git a/src/vppinfra/vec.h b/src/vppinfra/vec.h
index 1a64a69a1e6..053c5b07aa2 100644
--- a/src/vppinfra/vec.h
+++ b/src/vppinfra/vec.h
@@ -446,8 +446,8 @@ _vec_dup (void *v, uword hdr_size, uword align, uword elt_sz)
@param DST destination
@param SRC source
*/
-#define vec_copy(DST,SRC) clib_memcpy_fast (DST, SRC, vec_len (DST) * \
- sizeof ((DST)[0]))
+#define vec_copy(DST, SRC) \
+ clib_memcpy_fast (DST, SRC, vec_len (DST) * _vec_elt_sz (DST))
/** \brief Clone a vector. Make a new vector with the
same size as a given vector but possibly with a different type.
@@ -480,7 +480,7 @@ _vec_zero_elts (void *v, uword first, uword count, uword elt_sz)
{
clib_memset_u8 (v + (first * elt_sz), 0, count * elt_sz);
}
-#define vec_zero_elts(V, F, C) _vec_zero_elts (V, F, C, sizeof ((V)[0]))
+#define vec_zero_elts(V, F, C) _vec_zero_elts (V, F, C, _vec_elt_sz (V))
static_always_inline void
_vec_validate (void **vp, uword index, uword header_size, uword align,
@@ -518,7 +518,7 @@ _vec_validate (void **vp, uword index, uword header_size, uword align,
}
#define vec_validate_hap(V, I, H, A, P) \
- _vec_validate ((void **) &(V), I, H, _vec_align (V, A), 0, sizeof ((V)[0]))
+ _vec_validate ((void **) &(V), I, H, _vec_align (V, A), 0, _vec_elt_sz (V))
/** \brief Make sure vector is long enough for given index
(no header, unspecified alignment)
@@ -1228,11 +1228,13 @@ _vec_is_equal (void *v1, void *v2, uword v1_elt_sz, uword v2_elt_sz)
@param vec vector to sort
@param f comparison function
*/
-#define vec_sort_with_function(vec,f) \
-do { \
- if (vec_len (vec) > 1) \
- qsort (vec, vec_len (vec), sizeof (vec[0]), (void *) (f)); \
-} while (0)
+#define vec_sort_with_function(vec, f) \
+ do \
+ { \
+ if (vec_len (vec) > 1) \
+ qsort (vec, vec_len (vec), _vec_elt_sz (vec), (void *) (f)); \
+ } \
+ while (0)
/** \brief Make a vector containing a NULL terminated c-string.
diff --git a/src/vppinfra/vec_bootstrap.h b/src/vppinfra/vec_bootstrap.h
index 5d386b1eaad..a4e07511426 100644
--- a/src/vppinfra/vec_bootstrap.h
+++ b/src/vppinfra/vec_bootstrap.h
@@ -83,7 +83,8 @@ always_inline uword __vec_elt_sz (uword elt_sz, int is_void);
(((s) + sizeof (uword) - 1) &~ (sizeof (uword) - 1))
#define _vec_is_void(P) \
__builtin_types_compatible_p (__typeof__ ((P)[0]), void)
-#define _vec_elt_sz(V) __vec_elt_sz (sizeof ((V)[0]), _vec_is_void (V))
+#define _vec_elt_sz(V) \
+ __vec_elt_sz (sizeof ((V)[0]), _vec_is_void (V)) /* NOLINT */
#define _vec_align(V, A) __vec_align (__alignof__((V)[0]), A)
always_inline __clib_nosanitize_addr uword
@@ -136,7 +137,7 @@ u32 vec_len_not_inline (void *v);
/** \brief Number of data bytes in vector. */
-#define vec_bytes(v) (vec_len (v) * sizeof (v[0]))
+#define vec_bytes(v) (vec_len (v) * _vec_elt_sz (v))
/**
* Return size of memory allocated for the vector